forked from vitalif/vitastor
Compare commits
74 Commits
msgr-iothr
...
v1.8.0
Author | SHA1 | Date | |
---|---|---|---|
ce359c5a69 | |||
521e867b10 | |||
333c54ebbf | |||
58d3da95c8 | |||
4e90e752eb | |||
09342d7189 | |||
eb3e8b8c19 | |||
e2ca3ad99e | |||
dd4b0aed2b | |||
42851a061c | |||
8e0f242d30 | |||
0daa8ea39b | |||
b263d311ef | |||
8720185780 | |||
20584414d8 | |||
306a3db7f3 | |||
5b0aebada4 | |||
d6f0b480c8 | |||
f1f8531fd4 | |||
8d79d59964 | |||
551a209a50 | |||
06cafd7702 | |||
3018352443 | |||
f8edfb4a71 | |||
8239ea2356 | |||
e898335b8d | |||
e7869611fa | |||
e1c2500b60 | |||
42cf3a11df | |||
4d9293f0e9 | |||
7a13f85ae2 | |||
fc219b8602 | |||
989d73f874 | |||
f0630722ce | |||
93b0947720 | |||
9c628646fa | |||
cf476a3b95 | |||
23f9273ba3 | |||
74b88bf8ba | |||
1254d5a0de | |||
f87bece253 | |||
ba85d0ef16 | |||
17a909ea3a | |||
a4dfc220ab | |||
26426dd95e | |||
9f38b7e5c1 | |||
20057defbe | |||
b4e9140755 | |||
413959e75a | |||
8973982570 | |||
990c3ba7eb | |||
1771d2ef36 | |||
d88ab76636 | |||
c010a0aa54 | |||
0d42712d29 | |||
66b438106a | |||
3aef6682fb | |||
8535bccf4c | |||
0487b3b239 | |||
a54ef97f5d | |||
10434a9b2b | |||
c6be194508 | |||
df668286fb | |||
667c5999c9 | |||
8ad63465cd | |||
976290e6a9 | |||
79f1d1969b | |||
918e1f83b0 | |||
abbba6ade4 | |||
21d1171ba4 | |||
![]() |
8f83086889 | ||
ceb18f25db | |||
ed51a89f70 | |||
f59456f22d |
@@ -16,6 +16,7 @@ env:
|
||||
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
|
||||
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
|
||||
OSD_ARGS: '--etcd_quick_timeout 2000'
|
||||
USE_RAMDISK: 1
|
||||
|
||||
concurrency:
|
||||
group: ci-${{ github.ref }}
|
||||
@@ -197,6 +198,24 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_etcd_fail_antietcd:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: ANTIETCD=1 /root/vitastor/tests/test_etcd_fail.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_interrupted_rebalance:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -665,6 +684,24 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_antietcd:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: ANTIETCD=1 /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_csum_32k_dmj:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
|
@@ -34,6 +34,10 @@ for my $line (<>)
|
||||
{
|
||||
$test_name .= '_imm';
|
||||
}
|
||||
elsif ($1 eq 'ANTIETCD')
|
||||
{
|
||||
$test_name .= '_antietcd';
|
||||
}
|
||||
else
|
||||
{
|
||||
$test_name .= '_'.lc($1).'_'.$2;
|
||||
|
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VERSION "1.6.1")
|
||||
set(VITASTOR_VERSION "1.8.0")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -1,9 +1,9 @@
|
||||
VERSION ?= v1.6.1
|
||||
VITASTOR_VERSION ?= v1.8.0
|
||||
|
||||
all: build push
|
||||
|
||||
build:
|
||||
@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .
|
||||
@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
|
||||
|
||||
push:
|
||||
@docker push vitalif/vitastor-csi:$(VERSION)
|
||||
@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v1.6.1
|
||||
image: vitalif/vitastor-csi:v1.8.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -121,7 +121,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v1.6.1
|
||||
image: vitalif/vitastor-csi:v1.8.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "1.6.1"
|
||||
vitastorCSIDriverVersion = "1.8.0"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
2
debian/changelog
vendored
2
debian/changelog
vendored
@@ -1,4 +1,4 @@
|
||||
vitastor (1.6.1-1) unstable; urgency=medium
|
||||
vitastor (1.8.0-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
|
2
debian/vitastor-mon.install
vendored
2
debian/vitastor-mon.install
vendored
@@ -1,3 +1,3 @@
|
||||
mon usr/lib/vitastor/mon
|
||||
mon usr/lib/vitastor/
|
||||
mon/scripts/make-etcd usr/lib/vitastor/mon
|
||||
mon/scripts/vitastor-mon.service /lib/systemd/system
|
||||
|
2
debian/vitastor-mon.postinst
vendored
2
debian/vitastor-mon.postinst
vendored
@@ -6,4 +6,6 @@ if [ "$1" = "configure" ]; then
|
||||
addgroup --system --quiet vitastor
|
||||
adduser --system --quiet --ingroup vitastor --no-create-home --home /nonexistent vitastor
|
||||
mkdir -p /etc/vitastor
|
||||
mkdir -p /var/lib/vitastor
|
||||
chown vitastor:vitastor /var/lib/vitastor
|
||||
fi
|
||||
|
33
debian/vitastor.Dockerfile
vendored
33
debian/vitastor.Dockerfile
vendored
@@ -9,12 +9,12 @@ ARG REL=
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
|
||||
echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
|
||||
echo >> /etc/apt/preferences; \
|
||||
echo 'Package: *' >> /etc/apt/preferences; \
|
||||
echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
|
||||
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
|
||||
RUN set -e -x; \
|
||||
if [ "$REL" = "buster" ]; then \
|
||||
apt-get update; \
|
||||
apt-get -y install wget; \
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
|
||||
echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
|
||||
fi; \
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
|
||||
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
|
||||
@@ -22,10 +22,9 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
|
||||
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
|
||||
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl
|
||||
RUN apt-get -y build-dep fio
|
||||
RUN apt-get --download-only source fio
|
||||
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev
|
||||
|
||||
ADD . /root/vitastor
|
||||
RUN set -e -x; \
|
||||
@@ -37,8 +36,10 @@ RUN set -e -x; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
cp -r /root/vitastor vitastor-1.6.1; \
|
||||
cd vitastor-1.6.1; \
|
||||
FULLVER=$(head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
VER=${FULLVER%%-*}; \
|
||||
cp -r /root/vitastor vitastor-$VER; \
|
||||
cd vitastor-$VER; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
@@ -50,10 +51,14 @@ RUN set -e -x; \
|
||||
echo fio-headers.patch >> debian/patches/series; \
|
||||
rm -rf a b; \
|
||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||
cd /root/packages/vitastor-$REL/vitastor-$VER; \
|
||||
mkdir mon/node_modules; \
|
||||
cd mon/node_modules; \
|
||||
curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx; \
|
||||
curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.6.1.orig.tar.xz vitastor-1.6.1; \
|
||||
cd vitastor-1.6.1; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER; \
|
||||
cd vitastor-$VER; \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
rm -rf /root/packages/vitastor-$REL/vitastor-*/
|
||||
|
@@ -9,6 +9,7 @@
|
||||
These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
|
||||
affect their interaction with the cluster.
|
||||
|
||||
- [client_iothread_count](#client_iothread_count)
|
||||
- [client_retry_interval](#client_retry_interval)
|
||||
- [client_eio_retry_interval](#client_eio_retry_interval)
|
||||
- [client_retry_enospc](#client_retry_enospc)
|
||||
@@ -23,6 +24,23 @@ affect their interaction with the cluster.
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
|
||||
## client_iothread_count
|
||||
|
||||
- Type: integer
|
||||
- Default: 0
|
||||
|
||||
Number of separate threads for handling TCP network I/O at client library
|
||||
side. Enabling 4 threads usually allows to increase peak performance of each
|
||||
client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
|
||||
100-150 to 400 thousand iops, but at the same time it increases latency.
|
||||
Latency increase depends on CPU: with CPU power saving disabled latency
|
||||
only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
|
||||
with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
|
||||
iops decrease from 2000 to 1000). RDMA isn't affected by this option.
|
||||
|
||||
It's recommended to enable client I/O threads if you don't use RDMA and want
|
||||
to increase peak client performance.
|
||||
|
||||
## client_retry_interval
|
||||
|
||||
- Type: milliseconds
|
||||
|
@@ -9,6 +9,7 @@
|
||||
Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
|
||||
затрагивают логику их работы с кластером.
|
||||
|
||||
- [client_iothread_count](#client_iothread_count)
|
||||
- [client_retry_interval](#client_retry_interval)
|
||||
- [client_eio_retry_interval](#client_eio_retry_interval)
|
||||
- [client_retry_enospc](#client_retry_enospc)
|
||||
@@ -23,6 +24,24 @@
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
|
||||
## client_iothread_count
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 0
|
||||
|
||||
Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
|
||||
клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
|
||||
производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
|
||||
чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
|
||||
секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
|
||||
при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
|
||||
падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
|
||||
и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
|
||||
RDMA данная опция не влияет.
|
||||
|
||||
Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
|
||||
RDMA и хотите повысить пиковую производительность клиентов.
|
||||
|
||||
## client_retry_interval
|
||||
|
||||
- Тип: миллисекунды
|
||||
|
@@ -56,14 +56,24 @@ Can't be smaller than the OSD data device sector.
|
||||
## immediate_commit
|
||||
|
||||
- Type: string
|
||||
- Default: false
|
||||
- Default: all
|
||||
|
||||
Another parameter which is really important for performance.
|
||||
One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
|
||||
|
||||
This parameter is also really important for performance.
|
||||
|
||||
TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
|
||||
power loss protection (nonvolatile write-through cache) and also for most HDDs.
|
||||
"none" or "small" should be only selected if you use desktop SSDs without
|
||||
capacitors or drives with slow write-back cache that can't be disabled. Check
|
||||
immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
|
||||
|
||||
Detailed explanation:
|
||||
|
||||
Desktop SSDs are very fast (100000+ iops) for simple random writes
|
||||
without cache flush. However, they are really slow (only around 1000 iops)
|
||||
if you try to fsync() each write, that is, when you want to guarantee that
|
||||
each change gets immediately persisted to the physical media.
|
||||
if you try to fsync() each write, that is, if you want to guarantee that
|
||||
each change gets actually persisted to the physical media.
|
||||
|
||||
Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
|
||||
"Supercapacitor-based Power Loss Protection", on the other hand, are equally
|
||||
@@ -75,8 +85,8 @@ really slow when used with desktop SSDs. Vitastor, however, can also
|
||||
efficiently utilize desktop SSDs by postponing fsync until the client calls
|
||||
it explicitly.
|
||||
|
||||
This is what this parameter regulates. When it's set to "all" the whole
|
||||
Vitastor cluster commits each change to disks immediately and clients just
|
||||
This is what this parameter regulates. When it's set to "all" Vitastor
|
||||
cluster commits each change to disks immediately and clients just
|
||||
ignore fsyncs because they know for sure that they're unneeded. This reduces
|
||||
the amount of network roundtrips performed by clients and improves
|
||||
performance. So it's always better to use server grade SSDs with
|
||||
@@ -99,9 +109,5 @@ Setting this parameter to "all" or "small" in OSD parameters requires enabling
|
||||
[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
|
||||
[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
|
||||
"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
|
||||
|
||||
TLDR: For optimal performance, set immediate_commit to "all" if you only use
|
||||
SSDs with supercapacitor-based power loss protection (nonvolatile
|
||||
write-through cache) for both data and journals in the whole Vitastor
|
||||
cluster. Set it to "small" if you only use such SSDs for journals. Leave
|
||||
empty if your drives have write-back cache.
|
||||
vitastor-disk tried to do that by default, first checking/disabling drive cache.
|
||||
If it can't disable drive cache, OSD get initialized with "none".
|
||||
|
@@ -57,9 +57,18 @@ amplification) и эффективность распределения нагр
|
||||
## immediate_commit
|
||||
|
||||
- Тип: строка
|
||||
- Значение по умолчанию: false
|
||||
- Значение по умолчанию: all
|
||||
|
||||
Ещё один важный для производительности параметр.
|
||||
Одно из значений "none", "small" или "all". Глобальное значение, может быть
|
||||
переопределено [на уровне пула](pool.ru.md#immediate_commit).
|
||||
|
||||
Данный параметр тоже важен для производительности.
|
||||
|
||||
Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
|
||||
суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
|
||||
устанавливать только при использовании SSD настольного класса без
|
||||
суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
|
||||
Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
|
||||
|
||||
Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
|
||||
секунду) при простой случайной записи без сбросов кэша. Однако они очень
|
||||
@@ -80,7 +89,7 @@ Power Loss Protection" - одинаково быстрые и со сбросо
|
||||
эффективно утилизировать настольные SSD.
|
||||
|
||||
Данный параметр влияет как раз на это. Когда он установлен в значение "all",
|
||||
весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
|
||||
кластер Vitastor мгновенно фиксирует каждое изменение на физические
|
||||
носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
|
||||
знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
|
||||
по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
|
||||
@@ -106,10 +115,3 @@ HDD-дисках с внутренним SSD или "медиа" кэшем - н
|
||||
включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
|
||||
[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
|
||||
также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
|
||||
|
||||
Итого, вкратце: для оптимальной производительности установите
|
||||
immediate_commit в значение "all", если вы используете в кластере только SSD
|
||||
с суперконденсаторами и для данных, и для журналов. Если вы используете
|
||||
такие SSD для всех журналов, но не для данных - можете установить параметр
|
||||
в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
|
||||
оставьте параметр пустым.
|
||||
|
@@ -8,6 +8,14 @@
|
||||
|
||||
These parameters only apply to Monitors.
|
||||
|
||||
- [use_antietcd](#use_antietcd)
|
||||
- [enable_prometheus](#enable_prometheus)
|
||||
- [mon_http_port](#mon_http_port)
|
||||
- [mon_http_ip](#mon_http_ip)
|
||||
- [mon_https_cert](#mon_https_cert)
|
||||
- [mon_https_key](#mon_https_key)
|
||||
- [mon_https_client_auth](#mon_https_client_auth)
|
||||
- [mon_https_ca](#mon_https_ca)
|
||||
- [etcd_mon_ttl](#etcd_mon_ttl)
|
||||
- [etcd_mon_timeout](#etcd_mon_timeout)
|
||||
- [etcd_mon_retries](#etcd_mon_retries)
|
||||
@@ -17,6 +25,87 @@ These parameters only apply to Monitors.
|
||||
- [placement_levels](#placement_levels)
|
||||
- [use_old_pg_combinator](#use_old_pg_combinator)
|
||||
|
||||
## use_antietcd
|
||||
|
||||
- Type: boolean
|
||||
- Default: false
|
||||
|
||||
Enable experimental built-in etcd replacement (clustered key-value database):
|
||||
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
|
||||
|
||||
When set to true, monitor runs internal antietcd automatically if it finds
|
||||
a network interface with an IP address matching one of addresses in the
|
||||
`etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
|
||||
the monitor command line). If there are multiple matching addresses, it also
|
||||
checks `antietcd_port` and antietcd is started for address with matching port.
|
||||
By default, antietcd accepts connection on the selected IP address, but it
|
||||
can also be overridden manually in the `antietcd_ip` option.
|
||||
|
||||
When antietcd is started, monitor stores cluster metadata itself and exposes
|
||||
a etcd-compatible REST API. On disk, these metadata are stored in
|
||||
`/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
|
||||
or antietcd_data_dir options). All other antietcd parameters
|
||||
(see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
|
||||
cluster, cluster_key, persist_filter, stale_read can also be set in
|
||||
Vitastor configuration with `antietcd_` prefix.
|
||||
|
||||
You can dump/load data to or from antietcd using Antietcd `anticli` tool:
|
||||
|
||||
```
|
||||
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
|
||||
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
|
||||
```
|
||||
|
||||
## enable_prometheus
|
||||
|
||||
- Type: boolean
|
||||
- Default: true
|
||||
|
||||
Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
|
||||
|
||||
Note that only the active (master) monitor exposes metrics, others return
|
||||
HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
|
||||
|
||||
Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
|
||||
|
||||
## mon_http_port
|
||||
|
||||
- Type: integer
|
||||
- Default: 8060
|
||||
|
||||
HTTP port for monitors to listen on (including metrics exporter)
|
||||
|
||||
## mon_http_ip
|
||||
|
||||
- Type: string
|
||||
|
||||
IP address for monitors to listen on (all addresses by default)
|
||||
|
||||
## mon_https_cert
|
||||
|
||||
- Type: string
|
||||
|
||||
Path to PEM SSL certificate file for monitor to listen using HTTPS
|
||||
|
||||
## mon_https_key
|
||||
|
||||
- Type: string
|
||||
|
||||
Path to PEM SSL private key file for monitor to listen using HTTPS
|
||||
|
||||
## mon_https_client_auth
|
||||
|
||||
- Type: boolean
|
||||
- Default: false
|
||||
|
||||
Enable HTTPS client certificate-based authorization for monitor connections
|
||||
|
||||
## mon_https_ca
|
||||
|
||||
- Type: string
|
||||
|
||||
Path to CA certificate for client HTTPS authorization
|
||||
|
||||
## etcd_mon_ttl
|
||||
|
||||
- Type: seconds
|
||||
|
@@ -8,6 +8,14 @@
|
||||
|
||||
Данные параметры используются только мониторами Vitastor.
|
||||
|
||||
- [use_antietcd](#use_antietcd)
|
||||
- [enable_prometheus](#enable_prometheus)
|
||||
- [mon_http_port](#mon_http_port)
|
||||
- [mon_http_ip](#mon_http_ip)
|
||||
- [mon_https_cert](#mon_https_cert)
|
||||
- [mon_https_key](#mon_https_key)
|
||||
- [mon_https_client_auth](#mon_https_client_auth)
|
||||
- [mon_https_ca](#mon_https_ca)
|
||||
- [etcd_mon_ttl](#etcd_mon_ttl)
|
||||
- [etcd_mon_timeout](#etcd_mon_timeout)
|
||||
- [etcd_mon_retries](#etcd_mon_retries)
|
||||
@@ -17,6 +25,89 @@
|
||||
- [placement_levels](#placement_levels)
|
||||
- [use_old_pg_combinator](#use_old_pg_combinator)
|
||||
|
||||
## use_antietcd
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
- Значение по умолчанию: false
|
||||
|
||||
Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
|
||||
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
|
||||
|
||||
Если параметр установлен в true, монитор запускает antietcd автоматически,
|
||||
если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
|
||||
конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
|
||||
командной строки монитора). Если таких адресов несколько, также проверяется
|
||||
опция `antietcd_port` и antietcd запускается для адреса с соответствующим
|
||||
портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
|
||||
IP, но его также можно определить вручную опцией `antietcd_ip`.
|
||||
|
||||
При запуске antietcd монитор сам хранит центральные метаданные кластера и
|
||||
выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
|
||||
`/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
|
||||
antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
|
||||
(смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
|
||||
node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
|
||||
в конфигурации Vitastor с префиксом `antietcd_`.
|
||||
|
||||
Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
|
||||
`anticli`:
|
||||
|
||||
```
|
||||
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
|
||||
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
|
||||
```
|
||||
|
||||
## enable_prometheus
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
- Значение по умолчанию: true
|
||||
|
||||
Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
|
||||
|
||||
Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
|
||||
возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
|
||||
в задание по сбору метрик Prometheus.
|
||||
|
||||
Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
|
||||
|
||||
## mon_http_port
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 8060
|
||||
|
||||
Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
|
||||
|
||||
## mon_http_ip
|
||||
|
||||
- Тип: строка
|
||||
|
||||
IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
|
||||
|
||||
## mon_https_cert
|
||||
|
||||
- Тип: строка
|
||||
|
||||
Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
|
||||
|
||||
## mon_https_key
|
||||
|
||||
- Тип: строка
|
||||
|
||||
Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
|
||||
|
||||
## mon_https_client_auth
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
- Значение по умолчанию: false
|
||||
|
||||
Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
|
||||
|
||||
## mon_https_ca
|
||||
|
||||
- Тип: строка
|
||||
|
||||
Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
|
||||
|
||||
## etcd_mon_ttl
|
||||
|
||||
- Тип: секунды
|
||||
|
@@ -10,6 +10,7 @@ These parameters only apply to OSDs, are not fixed at the moment of OSD drive
|
||||
initialization and can be changed - either with an OSD restart or, for some of
|
||||
them, even without restarting by updating configuration in etcd.
|
||||
|
||||
- [osd_iothread_count](#osd_iothread_count)
|
||||
- [etcd_report_interval](#etcd_report_interval)
|
||||
- [etcd_stats_interval](#etcd_stats_interval)
|
||||
- [run_primary](#run_primary)
|
||||
@@ -61,6 +62,18 @@ them, even without restarting by updating configuration in etcd.
|
||||
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
|
||||
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
|
||||
|
||||
## osd_iothread_count
|
||||
|
||||
- Type: integer
|
||||
- Default: 0
|
||||
|
||||
TCP network I/O thread count for OSD. When non-zero, a single OSD process
|
||||
may handle more TCP I/O, but at a cost of increased latency because thread
|
||||
switching overhead occurs. RDMA isn't affected by this option.
|
||||
|
||||
Because of latency, instead of enabling OSD I/O threads it's recommended to
|
||||
just create multiple OSDs per disk, or use RDMA.
|
||||
|
||||
## etcd_report_interval
|
||||
|
||||
- Type: seconds
|
||||
|
@@ -11,6 +11,7 @@
|
||||
момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
|
||||
изменения конфигурации в etcd.
|
||||
|
||||
- [osd_iothread_count](#osd_iothread_count)
|
||||
- [etcd_report_interval](#etcd_report_interval)
|
||||
- [etcd_stats_interval](#etcd_stats_interval)
|
||||
- [run_primary](#run_primary)
|
||||
@@ -62,6 +63,19 @@
|
||||
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
|
||||
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
|
||||
|
||||
## osd_iothread_count
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 0
|
||||
|
||||
Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
|
||||
стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
|
||||
по сети больше данных, но ухудшает задержку из-за накладных расходов
|
||||
переключения потоков. На работу RDMA опция не влияет.
|
||||
|
||||
Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
|
||||
просто создавать по несколько OSD на каждом диске, или использовать RDMA.
|
||||
|
||||
## etcd_report_interval
|
||||
|
||||
- Тип: секунды
|
||||
|
@@ -1,3 +1,32 @@
|
||||
- name: client_iothread_count
|
||||
type: int
|
||||
default: 0
|
||||
online: false
|
||||
info: |
|
||||
Number of separate threads for handling TCP network I/O at client library
|
||||
side. Enabling 4 threads usually allows to increase peak performance of each
|
||||
client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
|
||||
100-150 to 400 thousand iops, but at the same time it increases latency.
|
||||
Latency increase depends on CPU: with CPU power saving disabled latency
|
||||
only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
|
||||
with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
|
||||
iops decrease from 2000 to 1000). RDMA isn't affected by this option.
|
||||
|
||||
It's recommended to enable client I/O threads if you don't use RDMA and want
|
||||
to increase peak client performance.
|
||||
info_ru: |
|
||||
Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
|
||||
клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
|
||||
производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
|
||||
чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
|
||||
секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
|
||||
при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
|
||||
падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
|
||||
и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
|
||||
RDMA данная опция не влияет.
|
||||
|
||||
Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
|
||||
RDMA и хотите повысить пиковую производительность клиентов.
|
||||
- name: client_retry_interval
|
||||
type: ms
|
||||
min: 10
|
||||
|
@@ -47,14 +47,24 @@
|
||||
Не может быть меньше размера сектора дисков данных OSD.
|
||||
- name: immediate_commit
|
||||
type: string
|
||||
default: false
|
||||
default: all
|
||||
info: |
|
||||
Another parameter which is really important for performance.
|
||||
One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
|
||||
|
||||
This parameter is also really important for performance.
|
||||
|
||||
TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
|
||||
power loss protection (nonvolatile write-through cache) and also for most HDDs.
|
||||
"none" or "small" should be only selected if you use desktop SSDs without
|
||||
capacitors or drives with slow write-back cache that can't be disabled. Check
|
||||
immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
|
||||
|
||||
Detailed explanation:
|
||||
|
||||
Desktop SSDs are very fast (100000+ iops) for simple random writes
|
||||
without cache flush. However, they are really slow (only around 1000 iops)
|
||||
if you try to fsync() each write, that is, when you want to guarantee that
|
||||
each change gets immediately persisted to the physical media.
|
||||
if you try to fsync() each write, that is, if you want to guarantee that
|
||||
each change gets actually persisted to the physical media.
|
||||
|
||||
Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
|
||||
"Supercapacitor-based Power Loss Protection", on the other hand, are equally
|
||||
@@ -66,8 +76,8 @@
|
||||
efficiently utilize desktop SSDs by postponing fsync until the client calls
|
||||
it explicitly.
|
||||
|
||||
This is what this parameter regulates. When it's set to "all" the whole
|
||||
Vitastor cluster commits each change to disks immediately and clients just
|
||||
This is what this parameter regulates. When it's set to "all" Vitastor
|
||||
cluster commits each change to disks immediately and clients just
|
||||
ignore fsyncs because they know for sure that they're unneeded. This reduces
|
||||
the amount of network roundtrips performed by clients and improves
|
||||
performance. So it's always better to use server grade SSDs with
|
||||
@@ -90,14 +100,19 @@
|
||||
[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
|
||||
[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
|
||||
"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
|
||||
|
||||
TLDR: For optimal performance, set immediate_commit to "all" if you only use
|
||||
SSDs with supercapacitor-based power loss protection (nonvolatile
|
||||
write-through cache) for both data and journals in the whole Vitastor
|
||||
cluster. Set it to "small" if you only use such SSDs for journals. Leave
|
||||
empty if your drives have write-back cache.
|
||||
vitastor-disk tried to do that by default, first checking/disabling drive cache.
|
||||
If it can't disable drive cache, OSD get initialized with "none".
|
||||
info_ru: |
|
||||
Ещё один важный для производительности параметр.
|
||||
Одно из значений "none", "small" или "all". Глобальное значение, может быть
|
||||
переопределено [на уровне пула](pool.ru.md#immediate_commit).
|
||||
|
||||
Данный параметр тоже важен для производительности.
|
||||
|
||||
Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
|
||||
суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
|
||||
устанавливать только при использовании SSD настольного класса без
|
||||
суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
|
||||
Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
|
||||
|
||||
Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
|
||||
секунду) при простой случайной записи без сбросов кэша. Однако они очень
|
||||
@@ -118,7 +133,7 @@
|
||||
эффективно утилизировать настольные SSD.
|
||||
|
||||
Данный параметр влияет как раз на это. Когда он установлен в значение "all",
|
||||
весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
|
||||
кластер Vitastor мгновенно фиксирует каждое изменение на физические
|
||||
носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
|
||||
знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
|
||||
по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
|
||||
@@ -144,10 +159,3 @@
|
||||
включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
|
||||
[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
|
||||
также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
|
||||
|
||||
Итого, вкратце: для оптимальной производительности установите
|
||||
immediate_commit в значение "all", если вы используете в кластере только SSD
|
||||
с суперконденсаторами и для данных, и для журналов. Если вы используете
|
||||
такие SSD для всех журналов, но не для данных - можете установить параметр
|
||||
в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
|
||||
оставьте параметр пустым.
|
||||
|
@@ -1,3 +1,103 @@
|
||||
- name: use_antietcd
|
||||
type: bool
|
||||
default: false
|
||||
info: |
|
||||
Enable experimental built-in etcd replacement (clustered key-value database):
|
||||
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
|
||||
|
||||
When set to true, monitor runs internal antietcd automatically if it finds
|
||||
a network interface with an IP address matching one of addresses in the
|
||||
`etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
|
||||
the monitor command line). If there are multiple matching addresses, it also
|
||||
checks `antietcd_port` and antietcd is started for address with matching port.
|
||||
By default, antietcd accepts connection on the selected IP address, but it
|
||||
can also be overridden manually in the `antietcd_ip` option.
|
||||
|
||||
When antietcd is started, monitor stores cluster metadata itself and exposes
|
||||
a etcd-compatible REST API. On disk, these metadata are stored in
|
||||
`/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
|
||||
or antietcd_data_dir options). All other antietcd parameters
|
||||
(see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
|
||||
cluster, cluster_key, persist_filter, stale_read can also be set in
|
||||
Vitastor configuration with `antietcd_` prefix.
|
||||
|
||||
You can dump/load data to or from antietcd using Antietcd `anticli` tool:
|
||||
|
||||
```
|
||||
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
|
||||
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
|
||||
```
|
||||
info_ru: |
|
||||
Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
|
||||
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
|
||||
|
||||
Если параметр установлен в true, монитор запускает antietcd автоматически,
|
||||
если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
|
||||
конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
|
||||
командной строки монитора). Если таких адресов несколько, также проверяется
|
||||
опция `antietcd_port` и antietcd запускается для адреса с соответствующим
|
||||
портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
|
||||
IP, но его также можно определить вручную опцией `antietcd_ip`.
|
||||
|
||||
При запуске antietcd монитор сам хранит центральные метаданные кластера и
|
||||
выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
|
||||
`/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
|
||||
antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
|
||||
(смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
|
||||
node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
|
||||
в конфигурации Vitastor с префиксом `antietcd_`.
|
||||
|
||||
Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
|
||||
`anticli`:
|
||||
|
||||
```
|
||||
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
|
||||
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
|
||||
```
|
||||
- name: enable_prometheus
|
||||
type: bool
|
||||
default: true
|
||||
info: |
|
||||
Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
|
||||
|
||||
Note that only the active (master) monitor exposes metrics, others return
|
||||
HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
|
||||
|
||||
Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
|
||||
info_ru: |
|
||||
Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
|
||||
|
||||
Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
|
||||
возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
|
||||
в задание по сбору метрик Prometheus.
|
||||
|
||||
Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
|
||||
- name: mon_http_port
|
||||
type: int
|
||||
default: 8060
|
||||
info: HTTP port for monitors to listen on (including metrics exporter)
|
||||
info_ru: Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
|
||||
- name: mon_http_ip
|
||||
type: string
|
||||
info: IP address for monitors to listen on (all addresses by default)
|
||||
info_ru: IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
|
||||
- name: mon_https_cert
|
||||
type: string
|
||||
info: Path to PEM SSL certificate file for monitor to listen using HTTPS
|
||||
info_ru: Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
|
||||
- name: mon_https_key
|
||||
type: string
|
||||
info: Path to PEM SSL private key file for monitor to listen using HTTPS
|
||||
info_ru: Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
|
||||
- name: mon_https_client_auth
|
||||
type: bool
|
||||
default: false
|
||||
info: Enable HTTPS client certificate-based authorization for monitor connections
|
||||
info_ru: Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
|
||||
- name: mon_https_ca
|
||||
type: string
|
||||
info: Path to CA certificate for client HTTPS authorization
|
||||
info_ru: Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
|
||||
- name: etcd_mon_ttl
|
||||
type: sec
|
||||
min: 5
|
||||
|
@@ -1,3 +1,21 @@
|
||||
- name: osd_iothread_count
|
||||
type: int
|
||||
default: 0
|
||||
info: |
|
||||
TCP network I/O thread count for OSD. When non-zero, a single OSD process
|
||||
may handle more TCP I/O, but at a cost of increased latency because thread
|
||||
switching overhead occurs. RDMA isn't affected by this option.
|
||||
|
||||
Because of latency, instead of enabling OSD I/O threads it's recommended to
|
||||
just create multiple OSDs per disk, or use RDMA.
|
||||
info_ru: |
|
||||
Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
|
||||
стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
|
||||
по сети больше данных, но ухудшает задержку из-за накладных расходов
|
||||
переключения потоков. На работу RDMA опция не влияет.
|
||||
|
||||
Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
|
||||
просто создавать по несколько OSD на каждом диске, или использовать RDMA.
|
||||
- name: etcd_report_interval
|
||||
type: sec
|
||||
default: 5
|
||||
|
@@ -16,8 +16,6 @@
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
|
||||
stable version from 0.9.x branch instead of 1.x
|
||||
- For Debian 10 (Buster) also enable backports repository:
|
||||
`deb http://deb.debian.org/debian buster-backports main`
|
||||
- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
|
||||
|
||||
## CentOS
|
||||
|
@@ -16,8 +16,6 @@
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
|
||||
установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
|
||||
- Для Debian 10 (Buster) также включите репозиторий backports:
|
||||
`deb http://deb.debian.org/debian buster-backports main`
|
||||
- Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
|
||||
|
||||
## CentOS
|
||||
|
@@ -17,10 +17,10 @@ To enable Vitastor support in Proxmox Virtual Environment (6.4-8.1 are supported
|
||||
- Restart pvedaemon: `systemctl restart pvedaemon`
|
||||
|
||||
`/etc/pve/storage.cfg` example (the only required option is vitastor_pool, all others
|
||||
are listed below with their default values):
|
||||
are listed below with their default values; `vitastor_ssd` is Proxmox storage pool id):
|
||||
|
||||
```
|
||||
vitastor: vitastor
|
||||
vitastor: vitastor_ssd
|
||||
# pool to put new images into
|
||||
vitastor_pool testpool
|
||||
# path to the configuration file
|
||||
|
@@ -16,10 +16,10 @@
|
||||
- Перезапустите демон Proxmox: `systemctl restart pvedaemon`
|
||||
|
||||
Пример `/etc/pve/storage.cfg` (единственная обязательная опция - vitastor_pool, все остальные
|
||||
перечислены внизу для понимания значений по умолчанию):
|
||||
перечислены внизу для понимания значений по умолчанию; `vitastor_ssd` - имя хранилища в Proxmox):
|
||||
|
||||
```
|
||||
vitastor: vitastor
|
||||
vitastor: vitastor_ssd
|
||||
# Пул, в который будут помещаться образы дисков
|
||||
vitastor_pool testpool
|
||||
# Путь к файлу конфигурации
|
||||
|
@@ -34,6 +34,8 @@
|
||||
- [Client write-back cache](../config/client.en.md#client_enable_writeback)
|
||||
- [Intelligent recovery auto-tuning](../config/osd.en.md#recovery_tune_interval)
|
||||
- [Clustered file system](../usage/nfs.en.md#vitastorfs)
|
||||
- [Experimental internal etcd replacement - antietcd](../config/monitor.en.md#use_antietcd)
|
||||
- [Built-in Prometheus metric exporter](../config/monitor.en.md#enable_prometheus)
|
||||
|
||||
## Plugins and tools
|
||||
|
||||
|
@@ -36,6 +36,8 @@
|
||||
- [Буферизация записи на стороне клиента](../config/client.ru.md#client_enable_writeback)
|
||||
- [Интеллектуальная автоподстройка скорости восстановления](../config/osd.ru.md#recovery_tune_interval)
|
||||
- [Кластерная файловая система](../usage/nfs.ru.md#vitastorfs)
|
||||
- [Экспериментальная встроенная замена etcd - antietcd](../config/monitor.ru.md#use_antietcd)
|
||||
- [Встроенный Prometheus-экспортер метрик](../config/monitor.ru.md#enable_prometheus)
|
||||
|
||||
## Драйверы и инструменты
|
||||
|
||||
|
@@ -68,10 +68,6 @@ On the monitor hosts:
|
||||
but some free unpartitioned space must be available because the script creates new partitions for journals.
|
||||
- You can change OSD configuration in units or in `vitastor.conf`.
|
||||
Check [Configuration Reference](../config.en.md) for parameter descriptions.
|
||||
- If all your drives have capacitors, and even if not, but if you ran `vitastor-disk`
|
||||
without `--disable_data_fsync off` at the first step, then put the following
|
||||
setting into etcd: \
|
||||
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
|
||||
- Start all OSDs: `systemctl start vitastor.target`
|
||||
|
||||
## Create a pool
|
||||
@@ -88,6 +84,10 @@ For EC pools the configuration should look like the following:
|
||||
vitastor-cli create-pool testpool --ec 2+2 --pg_count 256
|
||||
```
|
||||
|
||||
Add `--immediate_commit none` if you added `--disable_data_fsync off` at the OSD
|
||||
initialization step, or if `vitastor-disk` complained about impossibility to
|
||||
disable drive cache.
|
||||
|
||||
After you do this, one of the monitors will configure PGs and OSDs will start them.
|
||||
|
||||
If you use HDDs you should also add `"block_size": 1048576` to pool configuration.
|
||||
|
@@ -69,11 +69,6 @@
|
||||
для журналов, на SSD должно быть доступно свободное нераспределённое место.
|
||||
- Вы можете менять параметры OSD в юнитах systemd или в `vitastor.conf`. Описания параметров
|
||||
смотрите в [справке по конфигурации](../config.ru.md).
|
||||
- Если все ваши диски - серверные с конденсаторами, и даже если нет, но при этом
|
||||
вы не добавляли опцию `--disable_data_fsync off` на первом шаге, а `vitastor-disk`
|
||||
не ругался на невозможность отключения кэша дисков, пропишите следующую настройку
|
||||
в глобальную конфигурацию в etcd: \
|
||||
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`.
|
||||
- Запустите все OSD: `systemctl start vitastor.target`
|
||||
|
||||
## Создайте пул
|
||||
@@ -90,6 +85,10 @@ vitastor-cli create-pool testpool --pg_size 2 --pg_count 256
|
||||
vitastor-cli create-pool testpool --ec 2+2 --pg_count 256
|
||||
```
|
||||
|
||||
Добавьте также опцию `--immediate_commit none`, если вы добавляли `--disable_data_fsync off`
|
||||
на этапе инициализации OSD, либо если `vitastor-disk` ругался на невозможность отключения
|
||||
кэша дисков.
|
||||
|
||||
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
|
||||
|
||||
Если вы используете HDD-диски, то добавьте в конфигурацию пулов опцию `"block_size": 1048576`.
|
||||
|
@@ -42,7 +42,7 @@ PG state always includes exactly 1 of the following base states:
|
||||
- **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
|
||||
this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
|
||||
or the primary OSD refuses to start this PG (for example, because of wrong block_size),
|
||||
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/config/pgs` in etcd.
|
||||
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/pg/config` in etcd.
|
||||
- **starting** — primary OSD has acquired PG lock in etcd, PG is starting.
|
||||
- **peering** — primary OSD requests PG object listings from secondary OSDs and calculates
|
||||
the PG state.
|
||||
@@ -107,16 +107,17 @@ If a PG is active it can also have any number of the following additional states
|
||||
|
||||
## Removing a healthy disk
|
||||
|
||||
Befor removing a healthy disk from the cluster set its OSD weight(s) to 0 to
|
||||
move data away. To do that, add `"reweight":0` to etcd key `/vitastor/config/osd/<OSD_NUMBER>`.
|
||||
For example:
|
||||
Before removing a healthy disk from the cluster set its OSD weight(s) to 0 to
|
||||
move data away. To do that, run `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
|
||||
|
||||
Then wait until rebalance finishes and remove OSD by running `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
Zero weight can also be put manually into etcd key `/vitastor/config/osd/<НОМЕР_OSD>`, for example:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
|
||||
```
|
||||
|
||||
Then wait until rebalance finishes and remove OSD by running `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
## Removing a failed disk
|
||||
|
||||
If a disk is already dead, its OSD(s) are likely already stopped.
|
||||
@@ -149,7 +150,7 @@ POOL_ID=1
|
||||
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
||||
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
||||
for i in $(seq 1 $PG_COUNT); do
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
|
||||
done
|
||||
```
|
||||
|
||||
@@ -168,21 +169,51 @@ Upgrading is performed without stopping clients (VMs/containers), you just need
|
||||
upgrade and restart servers one by one. However, ideally you should restart VMs too
|
||||
to make them use the new version of the client library.
|
||||
|
||||
Exceptions (specific upgrade instructions):
|
||||
- Upgrading <= 1.1.x to 1.2.0 or later, if you use EC n+k with k>=2, is recommended
|
||||
to be performed with full downtime: first you should stop all clients, then all OSDs,
|
||||
then upgrade and start everything back — because versions before 1.2.0 have several
|
||||
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
|
||||
- Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
|
||||
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
|
||||
without this intermediate step, client I/O will hang until the end of upgrade process.
|
||||
- Upgrading from <= 0.5.x to >= 0.6.x is not supported.
|
||||
### 1.1.x to 1.2.0
|
||||
|
||||
Rollback:
|
||||
- Version 1.0.0 has a new disk format, so OSDs initiaziled on 1.0.0 can't be rolled
|
||||
back to 0.9.x or previous versions.
|
||||
- Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
|
||||
start with 0.7.x or 0.6.x. :-)
|
||||
Upgrading version <= 1.1.x to version >= 1.2.0, if you use EC n+k with k>=2, is recommended
|
||||
to be performed with full downtime: first you should stop all clients, then all OSDs,
|
||||
then upgrade and start everything back — because versions before 1.2.0 have several
|
||||
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
|
||||
|
||||
### 0.8.7 to 0.9.0
|
||||
|
||||
Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
|
||||
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
|
||||
without this intermediate step, client I/O will hang until the end of upgrade process.
|
||||
|
||||
### 0.5.x to 0.6.x
|
||||
|
||||
Upgrading from <= 0.5.x to >= 0.6.x is not supported.
|
||||
|
||||
## Downgrade
|
||||
|
||||
Downgrade are also allowed freely, except the following specific instructions:
|
||||
|
||||
### 1.8.0 to 1.7.1
|
||||
|
||||
Before downgrading from version >= 1.8.0 to version <= 1.7.1
|
||||
you have to copy /vitastor/pg/config etcd key to /vitastor/config/pgs:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
|
||||
etcdctl --endpoints=http://... put /vitastor/config/pgs
|
||||
```
|
||||
|
||||
Then you can just install older packages and restart all services.
|
||||
|
||||
If you performed downgrade without first copying that key, run "add all OSDs into the
|
||||
history records of all PGs" from [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration).
|
||||
|
||||
### 1.0.0 to 0.9.x
|
||||
|
||||
Version 1.0.0 has a new disk format, so OSDs initialized on 1.0.0 or later can't
|
||||
be rolled back to 0.9.x or previous versions.
|
||||
|
||||
### 0.8.0 to 0.7.x
|
||||
|
||||
Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
|
||||
start with older versions (0.4.x - 0.7.x). :-)
|
||||
|
||||
## OSD memory usage
|
||||
|
||||
|
@@ -42,7 +42,7 @@
|
||||
- **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
|
||||
(если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
|
||||
назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
|
||||
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/config/pgs` в etcd.
|
||||
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/pg/config` в etcd.
|
||||
- **starting** — первичный OSD захватил блокировку PG в etcd, PG запускается.
|
||||
- **peering** — первичный OSD опрашивает вторичные OSD на предмет списков объектов данной PG и рассчитывает её состояние.
|
||||
- **repeering** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **peering**.
|
||||
@@ -105,14 +105,16 @@ PG должны очень быстро переходить из них в др
|
||||
## Удаление исправного диска
|
||||
|
||||
Перед удалением исправного диска из кластера установите его OSD вес в 0, чтобы убрать с него данные.
|
||||
Для этого добавьте в ключ `/vitastor/config/osd/<НОМЕР_OSD>` в etcd значение `"reweight":0`, например:
|
||||
Для этого выполните команду `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
|
||||
|
||||
Дождитесь завершения перебалансировки данных, после чего удалите OSD командой `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
Также вес 0 можно прописать вручную прямо в etcd в ключ `/vitastor/config/osd/<НОМЕР_OSD>`, например:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
|
||||
```
|
||||
|
||||
Дождитесь завершения ребаланса, после чего удалите OSD командой `vitastor-disk purge /dev/vitastor/osdN-data`.
|
||||
|
||||
## Удаление неисправного диска
|
||||
|
||||
Если диск уже умер, его OSD, скорее всего, уже будет/будут остановлен(ы).
|
||||
@@ -145,7 +147,7 @@ POOL_ID=1
|
||||
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
|
||||
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
|
||||
for i in $(seq 1 $PG_COUNT); do
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
|
||||
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
|
||||
done
|
||||
```
|
||||
|
||||
@@ -164,21 +166,51 @@ done
|
||||
достаточно обновлять серверы по одному. Однако, конечно, чтобы запущенные виртуальные машины
|
||||
начали использовать новую версию клиентской библиотеки, их тоже нужно перезапустить.
|
||||
|
||||
Исключения (особые указания при обновлении):
|
||||
- Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
|
||||
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
|
||||
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
|
||||
могли приводить к некорректному чтению данных в деградированных EC-пулах.
|
||||
- Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
|
||||
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
|
||||
Иначе клиентский ввод-вывод зависнет до завершения обновления.
|
||||
- Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
|
||||
### 1.1.x -> 1.2.0
|
||||
|
||||
Откат:
|
||||
- В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
|
||||
нельзя откатить до версии 0.9.x и более ранних.
|
||||
- В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD нельзя откатить
|
||||
до 0.7.x или 0.6.x. :-)
|
||||
Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
|
||||
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
|
||||
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
|
||||
могли приводить к некорректному чтению данных в деградированных EC-пулах.
|
||||
|
||||
### 0.8.7 -> 0.9.0
|
||||
|
||||
Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
|
||||
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
|
||||
Иначе клиентский ввод-вывод зависнет до завершения обновления.
|
||||
|
||||
### 0.5.x -> 0.6.x
|
||||
|
||||
Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
|
||||
|
||||
## Откат версии
|
||||
|
||||
Откат (понижение версии) тоже свободно разрешён, кроме указанных ниже случаев:
|
||||
|
||||
### 1.8.0 -> 1.7.1
|
||||
|
||||
Перед понижением версии с >= 1.8.0 до <= 1.7.1 вы должны скопировать ключ
|
||||
etcd `/vitastor/pg/config` в `/vitastor/config/pgs`:
|
||||
|
||||
```
|
||||
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
|
||||
etcdctl --endpoints=http://... put /vitastor/config/pgs
|
||||
```
|
||||
|
||||
После этого можно просто установить более старые пакеты и перезапустить все сервисы.
|
||||
|
||||
Если вы откатили версию, не скопировав предварительно этот ключ - выполните "добавление всех
|
||||
OSD в исторические записи всех PG" из раздела [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов).
|
||||
|
||||
### 1.0.0 -> 0.9.x
|
||||
|
||||
В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
|
||||
нельзя откатить до версии 0.9.x и более ранних.
|
||||
|
||||
### 0.8.0 -> 0.7.x
|
||||
|
||||
В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD не запустятся на
|
||||
более ранних версиях (0.4.x - 0.7.x). :-)
|
||||
|
||||
## Потребление памяти OSD
|
||||
|
||||
|
@@ -24,6 +24,10 @@ It supports the following commands:
|
||||
- [fix](#fix)
|
||||
- [alloc-osd](#alloc-osd)
|
||||
- [rm-osd](#rm-osd)
|
||||
- [osd-tree](#osd-tree)
|
||||
- [ls-osd](#ls-osd)
|
||||
- [modify-osd](#modify-osd)
|
||||
- [pg-list](#pg-list)
|
||||
- [create-pool](#create-pool)
|
||||
- [modify-pool](#modify-pool)
|
||||
- [ls-pools](#ls-pools)
|
||||
@@ -174,6 +178,7 @@ Remove inode data without changing metadata.
|
||||
--wait-list Retrieve full objects listings before starting to remove objects.
|
||||
Requires more memory, but allows to show correct removal progress.
|
||||
--min-offset Purge only data starting with specified offset.
|
||||
--max-offset Purge only data before specified offset.
|
||||
```
|
||||
|
||||
## merge-data
|
||||
@@ -246,6 +251,82 @@ Refuses to remove OSDs with data without `--force` and `--allow-data-loss`.
|
||||
With `--dry-run` only checks if deletion is possible without data loss and
|
||||
redundancy degradation.
|
||||
|
||||
## osd-tree
|
||||
|
||||
`vitastor-cli osd-tree [-l|--long]`
|
||||
|
||||
Show current OSD tree, optionally with I/O statistics if -l is specified.
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
TYPE NAME UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
|
||||
host kaveri
|
||||
disk nvme0n1p1
|
||||
osd 3 down 100G 0 % abc,kaveri 1 128k 4k none -
|
||||
osd 4 down 100G 0 % 1 128k 4k none -
|
||||
disk nvme1n1p1
|
||||
osd 5 down 100G 0 % abc,kaveri 1 128k 4k none -
|
||||
osd 6 down 100G 0 % 1 128k 4k none -
|
||||
host stump
|
||||
osd 1 up 100G 37.29 % osdone 1 128k 4k all -
|
||||
osd 2 up 100G 26.8 % abc 1 128k 4k all -
|
||||
osd 7 up 100G 21.84 % 1 128k 4k all -
|
||||
osd 8 up 100G 21.63 % 1 128k 4k all -
|
||||
osd 9 up 100G 20.69 % 1 128k 4k all -
|
||||
osd 10 up 100G 21.61 % 1 128k 4k all -
|
||||
osd 11 up 100G 21.53 % 1 128k 4k all -
|
||||
osd 12 up 100G 22.4 % 1 128k 4k all -
|
||||
```
|
||||
|
||||
## ls-osd
|
||||
|
||||
`vitastor-cli osds|ls-osd|osd-ls [-l|--long]`
|
||||
|
||||
Show current OSDs as list, optionally with I/O statistics if -l is specified.
|
||||
|
||||
Example output:
|
||||
|
||||
```
|
||||
OSD PARENT UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
|
||||
3 kaveri/nvme0n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
4 kaveri/nvme0n1p1 down 100G 0 % 1 128k 4k none -
|
||||
5 kaveri/nvme1n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
6 kaveri/nvme1n1p1 down 100G 0 % 1 128k 4k none -
|
||||
1 stump up 100G 37.29 % osdone 1 128k 4k all -
|
||||
2 stump up 100G 26.8 % globl 1 128k 4k all -
|
||||
7 stump up 100G 21.84 % 1 128k 4k all -
|
||||
8 stump up 100G 21.63 % 1 128k 4k all -
|
||||
9 stump up 100G 20.69 % 1 128k 4k all -
|
||||
10 stump up 100G 21.61 % 1 128k 4k all -
|
||||
11 stump up 100G 21.53 % 1 128k 4k all -
|
||||
12 stump up 100G 22.4 % 1 128k 4k all -
|
||||
```
|
||||
|
||||
## modify-osd
|
||||
|
||||
`vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
|
||||
|
||||
Set OSD reweight, tags or noout flag. See detail description in [OSD config documentation](../config/pool.en.md#osd-settings).
|
||||
|
||||
## pg-list
|
||||
|
||||
`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
|
||||
|
||||
List PGs with any of listed state filters (^ or ! in the beginning is negation). Options:
|
||||
|
||||
```
|
||||
--pool <pool name or number> Only list PGs of the given pool.
|
||||
--min <min pg number> Only list PGs with number >= min.
|
||||
--max <max pg number> Only list PGs with number <= max.
|
||||
```
|
||||
|
||||
Examples:
|
||||
|
||||
`vitastor-cli pg-list active+degraded`
|
||||
|
||||
`vitastor-cli pg-list ^active`
|
||||
|
||||
## create-pool
|
||||
|
||||
`vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]`
|
||||
|
@@ -23,6 +23,10 @@ vitastor-cli - интерфейс командной строки для адм
|
||||
- [merge-data](#merge-data)
|
||||
- [alloc-osd](#alloc-osd)
|
||||
- [rm-osd](#rm-osd)
|
||||
- [osd-tree](#osd-tree)
|
||||
- [ls-osd](#ls-osd)
|
||||
- [modify-osd](#modify-osd)
|
||||
- [pg-list](#pg-list)
|
||||
- [create-pool](#create-pool)
|
||||
- [modify-pool](#modify-pool)
|
||||
- [ls-pools](#ls-pools)
|
||||
@@ -182,6 +186,7 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
||||
--wait-list Сначала запросить полный листинг объектов, а потом начать удалять.
|
||||
Требует больше памяти, но позволяет правильно печатать прогресс удаления.
|
||||
--min-offset Удалять только данные, начиная с заданного смещения.
|
||||
--max-offset Удалять только данные до (исключительно) заданного смещения.
|
||||
```
|
||||
|
||||
## merge-data
|
||||
@@ -263,6 +268,83 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
||||
С опцией `--dry-run` только проверяет, возможно ли удаление без потери данных и деградации
|
||||
избыточности.
|
||||
|
||||
## osd-tree
|
||||
|
||||
`vitastor-cli osd-tree [-l|--long]`
|
||||
|
||||
Показать дерево OSD, со статистикой ввода-вывода, если установлено -l.
|
||||
|
||||
Пример вывода:
|
||||
|
||||
```
|
||||
TYPE NAME UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
|
||||
host kaveri
|
||||
disk nvme0n1p1
|
||||
osd 3 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
osd 4 down 100G 0 % 1 128k 4k none -
|
||||
disk nvme1n1p1
|
||||
osd 5 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
osd 6 down 100G 0 % 1 128k 4k none -
|
||||
host stump
|
||||
osd 1 up 100G 37.29 % osdone 1 128k 4k all -
|
||||
osd 2 up 100G 26.8 % globl 1 128k 4k all -
|
||||
osd 7 up 100G 21.84 % 1 128k 4k all -
|
||||
osd 8 up 100G 21.63 % 1 128k 4k all -
|
||||
osd 9 up 100G 20.69 % 1 128k 4k all -
|
||||
osd 10 up 100G 21.61 % 1 128k 4k all -
|
||||
osd 11 up 100G 21.53 % 1 128k 4k all -
|
||||
osd 12 up 100G 22.4 % 1 128k 4k all -
|
||||
```
|
||||
|
||||
## ls-osd
|
||||
|
||||
`vitastor-cli osds|ls-osd|osd-ls [-l|--long]`
|
||||
|
||||
Показать список OSD, со статистикой ввода-вывода, если установлено -l.
|
||||
|
||||
Пример вывода:
|
||||
|
||||
```
|
||||
OSD PARENT UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
|
||||
3 kaveri/nvme0n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
4 kaveri/nvme0n1p1 down 100G 0 % 1 128k 4k none -
|
||||
5 kaveri/nvme1n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
|
||||
6 kaveri/nvme1n1p1 down 100G 0 % 1 128k 4k none -
|
||||
1 stump up 100G 37.29 % osdone 1 128k 4k all -
|
||||
2 stump up 100G 26.8 % globl 1 128k 4k all -
|
||||
7 stump up 100G 21.84 % 1 128k 4k all -
|
||||
8 stump up 100G 21.63 % 1 128k 4k all -
|
||||
9 stump up 100G 20.69 % 1 128k 4k all -
|
||||
10 stump up 100G 21.61 % 1 128k 4k all -
|
||||
11 stump up 100G 21.53 % 1 128k 4k all -
|
||||
12 stump up 100G 22.4 % 1 128k 4k all -
|
||||
```
|
||||
|
||||
## modify-osd
|
||||
|
||||
`vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
|
||||
|
||||
Установить вес OSD, теги или флаг noout. Смотрите подробное описание в [документации настроек OSD](../config/pool.ru.md#настройки-osd).
|
||||
|
||||
## pg-list
|
||||
|
||||
`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
|
||||
|
||||
Вывести список PG с состояними, удовлетворяющими любому из переданных фильтров (^ или !
|
||||
в начале фильтра означает отрицание). Опции:
|
||||
|
||||
```
|
||||
--pool <pool name or number> Only list PGs of the given pool.
|
||||
--min <min pg number> Only list PGs with number >= min.
|
||||
--max <max pg number> Only list PGs with number <= max.
|
||||
```
|
||||
|
||||
Примеры:
|
||||
|
||||
`vitastor-cli pg-list active+degraded`
|
||||
|
||||
`vitastor-cli pg-list ^active`
|
||||
|
||||
## create-pool
|
||||
|
||||
`vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]`
|
||||
|
@@ -11,6 +11,8 @@ Vitastor has two file system implementations. Both can be used via `vitastor-nfs
|
||||
Commands:
|
||||
- [mount](#mount)
|
||||
- [start](#start)
|
||||
- [upgrade](#upgrade)
|
||||
- [defrag](#defrag)
|
||||
|
||||
## Pseudo-FS
|
||||
|
||||
@@ -86,10 +88,6 @@ POSIX features currently not implemented in VitastorFS:
|
||||
- Modification time (`mtime`) is updated lazily every second (like `-o lazytime`)
|
||||
|
||||
Other notable missing features which should be addressed in the future:
|
||||
- Defragmentation of "shared" inodes. Files smaller than pool object size (block_size
|
||||
multiplied by data part count if pool is EC) are internally stored in large block
|
||||
volumes sequentially, one after another, and leave garbage after deleting or resizing.
|
||||
Defragmentator will be implemented to collect this garbage.
|
||||
- Inode ID reuse. Currently inode IDs always grow, the limit is 2^48 inodes, so
|
||||
in theory you may hit it if you create and delete a very large number of files
|
||||
- Compaction of the key-value B-Tree. Current implementation never merges or deletes
|
||||
@@ -139,6 +137,37 @@ Start network NFS server. Options:
|
||||
| `--port <PORT>` | use port \<PORT> for NFS services (default is 2049) |
|
||||
| `--portmap 0` | do not listen on port 111 (portmap/rpcbind, requires root) |
|
||||
|
||||
### upgrade
|
||||
|
||||
`vitastor-nfs --fs <NAME> upgrade`
|
||||
|
||||
Upgrade FS metadata. Can be run online, but server(s) should be restarted after upgrade.
|
||||
|
||||
### defrag
|
||||
|
||||
`vitastor-nfs --fs <NAME> defrag [OPTIONS] [--dry-run]`
|
||||
|
||||
Defragment volumes used for small file storage having more than \<defrag_percent> %
|
||||
of data removed. Can be run online.
|
||||
|
||||
In VitastorFS, small files are stored in large "volumes" / "shared inodes" one
|
||||
after another. When you delete or extend such files, they are moved and garbage is left
|
||||
behind. Defragmentation removes garbage and moves data still in use to new volumes.
|
||||
|
||||
Options:
|
||||
|
||||
| <!-- --> | <!-- --> |
|
||||
|--------------------------|------------------------------------------------------------------------ |
|
||||
| --volume_untouched 86400 | Defragment volumes last appended to at least this number of seconds ago |
|
||||
| --defrag_percent 50 | Defragment volumes with at least this % of removed data |
|
||||
| --defrag_block_count 16 | Read this number of pool blocks at once during defrag |
|
||||
| --defrag_iodepth 16 | Move up to this number of files in parallel during defrag |
|
||||
| --trace | Print verbose defragmentation status |
|
||||
| --dry-run | Skip modifications, only print status |
|
||||
| --recalc-stats | Recalculate all volume statistics |
|
||||
| --include-empty | Include old and empty volumes; make sure to restart NFS servers before using it |
|
||||
| --no-rm | Move, but do not delete data |
|
||||
|
||||
## Common options
|
||||
|
||||
| <!-- --> | <!-- --> |
|
||||
|
@@ -11,6 +11,8 @@
|
||||
Команды:
|
||||
- [mount](#mount)
|
||||
- [start](#start)
|
||||
- [upgrade](#upgrade)
|
||||
- [defrag](#defrag)
|
||||
|
||||
## Псевдо-ФС
|
||||
|
||||
@@ -88,11 +90,6 @@ JSON-формате :-). Для инспекции содержимого БД
|
||||
- Времена модификации (`mtime`) отслеживаются асинхронно (как будто ФС смонтирована с `-o lazytime`)
|
||||
|
||||
Другие недостающие функции, которые нужно добавить в будущем:
|
||||
- Дефрагментация "общих инодов". На уровне реализации ФС файлы, меньшие, чем размер
|
||||
объекта пула (block_size умножить на число частей данных, если пул EC),
|
||||
упаковываются друг за другом в большие "общие" иноды/тома. Если такие файлы удалять
|
||||
или увеличивать, они перемещаются и оставляют за собой "мусор", вот тут-то и нужен
|
||||
дефрагментатор.
|
||||
- Переиспользование номеров инодов. В текущей реализации номера инодов всё время
|
||||
увеличиваются, так что в теории вы можете упереться в лимит, если насоздаёте
|
||||
и наудаляете больше, чем 2^48 файлов.
|
||||
@@ -145,6 +142,40 @@ JSON-формате :-). Для инспекции содержимого БД
|
||||
| `--port <PORT>` | использовать порт \<PORT> для NFS-сервисов (по умолчанию 2049) |
|
||||
| `--portmap 0` | отключить сервис portmap/rpcbind на порту 111 (по умолчанию включён и требует root привилегий) |
|
||||
|
||||
### upgrade
|
||||
|
||||
`vitastor-nfs --fs <NAME> upgrade`
|
||||
|
||||
Обновить метаданные ФС. Можно запускать онлайн (при запущенных серверах NFS), но после выполнения их всё
|
||||
же желательно перезапустить.
|
||||
|
||||
### defrag
|
||||
|
||||
`vitastor-nfs --fs <NAME> defrag [OPTIONS] [--dry-run]`
|
||||
|
||||
Дефрагментировать тома, используемые для хранения мелких файлов, в которых более, чем
|
||||
<defrag_percent> процентов данных удалено. Можно запускать онлайн.
|
||||
|
||||
На уровне реализации ФС файлы, меньшие, чем размер объекта пула (block_size умножить на число
|
||||
частей данных, если пул EC), упаковываются друг за другом в большие "тома" / "общие иноды".
|
||||
Когда такие файлы удаляются или увеличиваются, они перемещаются и оставляют за собой "мусор".
|
||||
|
||||
При дефрагментации мусор удаляется, а всё ещё используемые данные перемещаются в новые тома.
|
||||
|
||||
Опции:
|
||||
|
||||
| <!-- --> | <!-- --> |
|
||||
|--------------------------|------------------------------------------------------------------------ |
|
||||
| --volume_untouched 86400 | Дефрагментировать только тома, в которые уже не писали это число секунд |
|
||||
| --defrag_percent 50 | Дефрагментировать только тома, в которых этот % данных удалён |
|
||||
| --defrag_block_count 16 | Читать это количество блоков пула за один раз |
|
||||
| --defrag_iodepth 16 | Перемещать одновременно до этого числа файлов |
|
||||
| --trace | Печатать детальную статистику дефрагментации |
|
||||
| --dry-run | Не производить никаких изменений, только описать выполняемые действия |
|
||||
| --recalc-stats | Пересчитать и сохранить статистику всех томов |
|
||||
| --include-empty | Дефрагментировать старые и пустые тома; обязательно перезапустите NFS-сервера после использования этой опции |
|
||||
| --no-rm | Перемещать, но не удалять данные |
|
||||
|
||||
## Общие опции
|
||||
|
||||
| <!-- --> | <!-- --> |
|
||||
|
188
mon/antietcd_adapter.js
Normal file
188
mon/antietcd_adapter.js
Normal file
@@ -0,0 +1,188 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const AntiEtcd = require('antietcd');
|
||||
|
||||
const vitastor_persist_filter = require('./vitastor_persist_filter.js');
|
||||
const { b64, local_ips } = require('./utils.js');
|
||||
|
||||
class AntiEtcdAdapter
|
||||
{
|
||||
static async start_antietcd(config)
|
||||
{
|
||||
let antietcd;
|
||||
if (config.use_antietcd)
|
||||
{
|
||||
let cluster = config.etcd_address;
|
||||
if (!(cluster instanceof Array))
|
||||
cluster = cluster ? (''+(cluster||'')).split(/,+/) : [];
|
||||
cluster = Object.keys(cluster.reduce((a, url) =>
|
||||
{
|
||||
a[url.toLowerCase().replace(/^(https?:\/\/)/, '').replace(/\/.*$/, '')] = true;
|
||||
return a;
|
||||
}, {}));
|
||||
const cfg_port = config.antietcd_port;
|
||||
const is_local = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
|
||||
const selected = cluster.map(s => s.split(':', 2)).filter(ip => is_local[ip[0]] && (!cfg_port || ip[1] == cfg_port));
|
||||
if (selected.length > 1)
|
||||
{
|
||||
console.error('More than 1 etcd_address matches local IPs, please specify port');
|
||||
process.exit(1);
|
||||
}
|
||||
else if (selected.length == 1)
|
||||
{
|
||||
const antietcd_config = {
|
||||
ip: selected[0][0],
|
||||
port: selected[0][1],
|
||||
data: config.antietcd_data_file || ((config.antietcd_data_dir || '/var/lib/vitastor') + '/mon_'+selected[0][1]+'.json.gz'),
|
||||
persist_filter: vitastor_persist_filter({ vitastor_prefix: config.etcd_prefix || '/vitastor' }),
|
||||
node_id: selected[0][0]+':'+selected[0][1], // node_id = ip:port
|
||||
cluster: (cluster.length == 1 ? null : cluster.reduce((a, c) => { a[c] = "http://"+c; return a; }, {})),
|
||||
cluster_key: (config.etcd_prefix || '/vitastor'),
|
||||
stale_read: 1,
|
||||
log_level: 1,
|
||||
};
|
||||
for (const key in config)
|
||||
{
|
||||
if (key.substr(0, 9) === 'antietcd_')
|
||||
{
|
||||
const noprefix = key.substr(9);
|
||||
if (!(noprefix in antietcd_config) || noprefix == 'ip' || noprefix == 'cluster_key')
|
||||
{
|
||||
antietcd_config[noprefix] = config[key];
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log('Starting Antietcd node '+antietcd_config.node_id);
|
||||
antietcd = new AntiEtcd(antietcd_config);
|
||||
await antietcd.start();
|
||||
}
|
||||
else
|
||||
{
|
||||
console.log('Antietcd is enabled, but etcd_address does not contain local IPs, proceeding without it');
|
||||
}
|
||||
}
|
||||
return antietcd;
|
||||
}
|
||||
|
||||
constructor(mon, antietcd)
|
||||
{
|
||||
this.mon = mon;
|
||||
this.antietcd = antietcd;
|
||||
this.on_leader = [];
|
||||
this.on_change = (st) =>
|
||||
{
|
||||
if (st.state === 'leader')
|
||||
{
|
||||
for (const cb of this.on_leader)
|
||||
{
|
||||
cb();
|
||||
}
|
||||
this.on_leader = [];
|
||||
}
|
||||
};
|
||||
this.antietcd.on('raftchange', this.on_change);
|
||||
}
|
||||
|
||||
parse_config(/*config*/)
|
||||
{
|
||||
}
|
||||
|
||||
stop_watcher()
|
||||
{
|
||||
this.antietcd.off('raftchange', this.on_change);
|
||||
const watch_id = this.watch_id;
|
||||
if (watch_id)
|
||||
{
|
||||
this.watch_id = null;
|
||||
this.antietcd.cancel_watch(watch_id).catch(console.error);
|
||||
}
|
||||
}
|
||||
|
||||
async start_watcher()
|
||||
{
|
||||
if (this.watch_id)
|
||||
{
|
||||
await this.antietcd.cancel_watch(this.watch_id);
|
||||
this.watch_id = null;
|
||||
}
|
||||
const watch_id = await this.antietcd.create_watch({
|
||||
key: b64(this.mon.config.etcd_prefix+'/'),
|
||||
range_end: b64(this.mon.config.etcd_prefix+'0'),
|
||||
start_revision: ''+this.mon.etcd_watch_revision,
|
||||
watch_id: 1,
|
||||
progress_notify: true,
|
||||
}, (message) =>
|
||||
{
|
||||
setImmediate(() => this.mon.on_message(message.result));
|
||||
});
|
||||
console.log('Successfully subscribed to antietcd revision '+this.antietcd.etctree.mod_revision);
|
||||
this.watch_id = watch_id;
|
||||
}
|
||||
|
||||
async become_master()
|
||||
{
|
||||
if (!this.antietcd.cluster)
|
||||
{
|
||||
console.log('Running in non-clustered mode');
|
||||
}
|
||||
else
|
||||
{
|
||||
console.log('Waiting to become master');
|
||||
if (this.antietcd.cluster.raft.state !== 'leader')
|
||||
{
|
||||
await new Promise(ok => this.on_leader.push(ok));
|
||||
}
|
||||
}
|
||||
const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
|
||||
await this.etcd_call('/kv/txn', {
|
||||
success: [ { requestPut: { key: b64(this.mon.config.etcd_prefix+'/mon/master'), value: b64(JSON.stringify(state)), lease: ''+this.mon.etcd_lease_id } } ],
|
||||
}, this.mon.config.etcd_start_timeout, 0);
|
||||
if (this.antietcd.cluster)
|
||||
{
|
||||
console.log('Became master');
|
||||
}
|
||||
}
|
||||
|
||||
async etcd_call(path, body, timeout, retries)
|
||||
{
|
||||
let retry = 0;
|
||||
if (retries >= 0 && retries < 1)
|
||||
{
|
||||
retries = 1;
|
||||
}
|
||||
let prev = 0;
|
||||
while (retries < 0 || retry < retries)
|
||||
{
|
||||
retry++;
|
||||
if (this.mon.stopped)
|
||||
{
|
||||
throw new Error('Monitor instance is stopped');
|
||||
}
|
||||
try
|
||||
{
|
||||
if (Date.now()-prev < timeout)
|
||||
{
|
||||
await new Promise(ok => setTimeout(ok, timeout-(Date.now()-prev)));
|
||||
}
|
||||
prev = Date.now();
|
||||
const res = await this.antietcd.api(path.replace(/^\/+/, '').replace(/\/+$/, '').replace(/\/+/g, '_'), body);
|
||||
if (res.error)
|
||||
{
|
||||
console.error('Failed to query antietcd '+path+' (retry '+retry+'/'+retries+'): '+res.error);
|
||||
}
|
||||
else
|
||||
{
|
||||
return res;
|
||||
}
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
console.error('Failed to query antietcd '+path+' (retry '+retry+'/'+retries+'): '+e.stack);
|
||||
}
|
||||
}
|
||||
throw new Error('Failed to query antietcd ('+retries+' retries)');
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = AntiEtcdAdapter;
|
@@ -3,6 +3,7 @@
|
||||
|
||||
const http = require('http');
|
||||
const WebSocket = require('ws');
|
||||
const { b64, local_ips } = require('./utils.js');
|
||||
|
||||
const MON_STOPPED = 'Monitor instance is stopped';
|
||||
|
||||
@@ -23,7 +24,7 @@ class EtcdAdapter
|
||||
|
||||
parse_etcd_addresses(addrs)
|
||||
{
|
||||
const is_local_ip = this.mon.local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
|
||||
const is_local_ip = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
|
||||
this.etcd_local = [];
|
||||
this.etcd_urls = [];
|
||||
this.selected_etcd_url = null;
|
||||
@@ -348,9 +349,4 @@ function POST(url, body, timeout)
|
||||
});
|
||||
}
|
||||
|
||||
function b64(str)
|
||||
{
|
||||
return Buffer.from(str).toString('base64');
|
||||
}
|
||||
|
||||
module.exports = EtcdAdapter;
|
||||
|
@@ -6,7 +6,7 @@ const etcd_nonempty_keys = {
|
||||
'config/global': 1,
|
||||
'config/node_placement': 1,
|
||||
'config/pools': 1,
|
||||
'config/pgs': 1,
|
||||
'pg/config': 1,
|
||||
'history/last_clean_pgs': 1,
|
||||
'stats': 1,
|
||||
};
|
||||
@@ -15,7 +15,8 @@ const etcd_allow = new RegExp('^'+[
|
||||
'config/node_placement',
|
||||
'config/pools',
|
||||
'config/osd/[1-9]\\d*',
|
||||
'config/pgs',
|
||||
'config/pgs', // old name
|
||||
'pg/config',
|
||||
'config/inode/[1-9]\\d*/[1-9]\\d*',
|
||||
'osd/state/[1-9]\\d*',
|
||||
'osd/stats/[1-9]\\d*',
|
||||
@@ -24,7 +25,8 @@ const etcd_allow = new RegExp('^'+[
|
||||
'mon/master',
|
||||
'mon/member/[a-f0-9]+',
|
||||
'pg/state/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/stats/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/stats/[1-9]\\d*/[1-9]\\d*', // old name
|
||||
'pgstats/[1-9]\\d*/[1-9]\\d*',
|
||||
'pg/history/[1-9]\\d*/[1-9]\\d*',
|
||||
'history/last_clean_pgs',
|
||||
'inode/stats/[1-9]\\d*/\\d+',
|
||||
@@ -69,7 +71,7 @@ const etcd_tree = {
|
||||
block_size: 131072,
|
||||
disk_alignment: 4096,
|
||||
bitmap_granularity: 4096,
|
||||
immediate_commit: false, // 'all' or 'small'
|
||||
immediate_commit: 'all', // 'none', 'all' or 'small'
|
||||
// client - configurable online
|
||||
client_max_dirty_bytes: 33554432,
|
||||
client_max_dirty_ops: 1024,
|
||||
@@ -189,7 +191,7 @@ const etcd_tree = {
|
||||
block_size: 131072,
|
||||
bitmap_granularity: 4096,
|
||||
// 'all'/'small'/'none', same as in OSD options
|
||||
immediate_commit: 'none',
|
||||
immediate_commit: 'all',
|
||||
pg_stripe_size: 0,
|
||||
root_node?: 'rack1',
|
||||
// restrict pool to OSDs having all of these tags
|
||||
@@ -205,19 +207,6 @@ const etcd_tree = {
|
||||
osd: {
|
||||
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
|
||||
},
|
||||
/* pgs: {
|
||||
hash: string,
|
||||
items: {
|
||||
<pool_id>: {
|
||||
<pg_id>: {
|
||||
osd_set: [ 1, 2, 3 ],
|
||||
primary: 1,
|
||||
pause: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}, */
|
||||
pgs: {},
|
||||
/* inode: {
|
||||
<pool_id>: {
|
||||
<inode_t>: {
|
||||
@@ -245,6 +234,9 @@ const etcd_tree = {
|
||||
stats: {
|
||||
/* <osd_num_t>: {
|
||||
time: number, // unix time
|
||||
data_block_size: uint64_t, // bytes
|
||||
bitmap_granularity: uint64_t, // bytes
|
||||
immediate_commit: "all"|"small"|"none",
|
||||
blockstore_ready: boolean,
|
||||
size: uint64_t, // bytes
|
||||
free: uint64_t, // bytes
|
||||
@@ -282,11 +274,24 @@ const etcd_tree = {
|
||||
master: {
|
||||
/* ip: [ string ], id: uint64_t */
|
||||
},
|
||||
standby: {
|
||||
member: {
|
||||
/* <uint64_t>: { ip: [ string ] }, */
|
||||
},
|
||||
},
|
||||
pg: {
|
||||
/* config: {
|
||||
hash: string,
|
||||
items: {
|
||||
<pool_id>: {
|
||||
<pg_id>: {
|
||||
osd_set: [ 1, 2, 3 ],
|
||||
primary: 1,
|
||||
pause: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}, */
|
||||
config: {},
|
||||
state: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
@@ -297,18 +302,6 @@ const etcd_tree = {
|
||||
}
|
||||
}, */
|
||||
},
|
||||
stats: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
object_count: uint64_t,
|
||||
clean_count: uint64_t,
|
||||
misplaced_count: uint64_t,
|
||||
degraded_count: uint64_t,
|
||||
incomplete_count: uint64_t,
|
||||
write_osd_set: osd_num_t[],
|
||||
},
|
||||
}, */
|
||||
},
|
||||
history: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
@@ -320,6 +313,18 @@ const etcd_tree = {
|
||||
}, */
|
||||
},
|
||||
},
|
||||
pgstats: {
|
||||
/* <pool_id>: {
|
||||
<pg_id>: {
|
||||
object_count: uint64_t,
|
||||
clean_count: uint64_t,
|
||||
misplaced_count: uint64_t,
|
||||
degraded_count: uint64_t,
|
||||
incomplete_count: uint64_t,
|
||||
write_osd_set: osd_num_t[],
|
||||
},
|
||||
}, */
|
||||
},
|
||||
inode: {
|
||||
stats: {
|
||||
/* <pool_id>: {
|
||||
|
50
mon/http_server.js
Normal file
50
mon/http_server.js
Normal file
@@ -0,0 +1,50 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const fsp = require('fs').promises;
|
||||
const http = require('http');
|
||||
const https = require('https');
|
||||
|
||||
async function create_http_server(cfg, handler)
|
||||
{
|
||||
let server;
|
||||
if (cfg.mon_https_cert)
|
||||
{
|
||||
const tls = {
|
||||
key: await fsp.readFile(cfg.mon_https_key),
|
||||
cert: await fsp.readFile(cfg.mon_https_cert),
|
||||
};
|
||||
if (cfg.mon_https_ca)
|
||||
{
|
||||
tls.mon_https_ca = await fsp.readFile(cfg.mon_https_ca);
|
||||
}
|
||||
if (cfg.mon_https_client_auth)
|
||||
{
|
||||
tls.requestCert = true;
|
||||
}
|
||||
server = https.createServer(tls, handler);
|
||||
}
|
||||
else
|
||||
{
|
||||
server = http.createServer(handler);
|
||||
}
|
||||
try
|
||||
{
|
||||
let err;
|
||||
server.once('error', e => err = e);
|
||||
server.listen(cfg.mon_http_port || 8060, cfg.mon_http_ip || undefined);
|
||||
if (err)
|
||||
throw err;
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
console.error(
|
||||
'HTTP server disabled because listen at address: '+
|
||||
(cfg.mon_http_ip || '')+':'+(cfg.mon_http_port || 9090)+' failed with error: '+e
|
||||
);
|
||||
return null;
|
||||
}
|
||||
return server;
|
||||
}
|
||||
|
||||
module.exports = { create_http_server };
|
@@ -23,4 +23,4 @@ for (let i = 2; i < process.argv.length; i++)
|
||||
}
|
||||
}
|
||||
|
||||
Mon.run_forever(options);
|
||||
Mon.run_forever(options).catch(console.error);
|
||||
|
250
mon/mon.js
250
mon/mon.js
@@ -1,27 +1,43 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const { URL } = require('url');
|
||||
const fs = require('fs');
|
||||
const crypto = require('crypto');
|
||||
const os = require('os');
|
||||
const AntiEtcdAdapter = require('./antietcd_adapter.js');
|
||||
const EtcdAdapter = require('./etcd_adapter.js');
|
||||
const { create_http_server } = require('./http_server.js');
|
||||
const { export_prometheus_metrics } = require('./prometheus.js');
|
||||
const { etcd_tree, etcd_allow, etcd_nonempty_keys } = require('./etcd_schema.js');
|
||||
const { validate_pool_cfg } = require('./pool_config.js');
|
||||
const { sum_op_stats, sum_object_counts, sum_inode_stats, serialize_bigints } = require('./stats.js');
|
||||
const stableStringify = require('./stable-stringify.js');
|
||||
const { scale_pg_history } = require('./pg_utils.js');
|
||||
const { get_osd_tree } = require('./osd_tree.js');
|
||||
const { b64, de64, local_ips } = require('./utils.js');
|
||||
const { recheck_primary, save_new_pgs_txn, generate_pool_pgs } = require('./pg_gen.js');
|
||||
|
||||
class Mon
|
||||
{
|
||||
static run_forever(config)
|
||||
static async run_forever(config)
|
||||
{
|
||||
let mergedConfig = config;
|
||||
if (fs.existsSync(config.config_path||'/etc/vitastor/vitastor.conf'))
|
||||
{
|
||||
const fileConfig = JSON.parse(fs.readFileSync(config.config_path||'/etc/vitastor/vitastor.conf', { encoding: 'utf-8' }));
|
||||
mergedConfig = { ...fileConfig, ...config };
|
||||
}
|
||||
let antietcd = await AntiEtcdAdapter.start_antietcd(mergedConfig);
|
||||
let mon;
|
||||
const run = () =>
|
||||
{
|
||||
console.log('Starting Monitor');
|
||||
const my_mon = new Mon(config);
|
||||
my_mon.etcd = antietcd
|
||||
? new AntiEtcdAdapter(my_mon, antietcd)
|
||||
: new EtcdAdapter(my_mon);
|
||||
my_mon.etcd.parse_config(my_mon.config);
|
||||
mon = my_mon;
|
||||
my_mon.on_die = () =>
|
||||
{
|
||||
@@ -58,24 +74,57 @@ class Mon
|
||||
this.state = JSON.parse(JSON.stringify(etcd_tree));
|
||||
this.prev_stats = { osd_stats: {}, osd_diff: {} };
|
||||
this.recheck_pgs_active = false;
|
||||
this.etcd = new EtcdAdapter(this);
|
||||
this.etcd.parse_config(this.config);
|
||||
this.watcher_active = false;
|
||||
this.old_pg_config = false;
|
||||
this.old_pg_stats_seen = false;
|
||||
}
|
||||
|
||||
async start()
|
||||
{
|
||||
if (this.config.enable_prometheus || !('enable_prometheus' in this.config))
|
||||
{
|
||||
this.http = await create_http_server(this.config, (req, res) =>
|
||||
{
|
||||
const u = new URL(req.url, 'http://'+(req.headers.host || 'localhost'));
|
||||
if (u.pathname.replace(/\/+$/, '') == (this.config.prometheus_path||'/metrics'))
|
||||
{
|
||||
if (!this.watcher_active)
|
||||
{
|
||||
res.writeHead(503);
|
||||
res.write('Monitor is in standby mode. Please retrieve metrics from master monitor instance\n');
|
||||
}
|
||||
else
|
||||
{
|
||||
res.write(export_prometheus_metrics(this.state));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
res.writeHead(404);
|
||||
res.write('Not found. Metrics path: '+(this.config.prometheus_path||'/metrics\n'));
|
||||
}
|
||||
res.end();
|
||||
});
|
||||
this.http_connections = new Set();
|
||||
this.http.on('connection', conn =>
|
||||
{
|
||||
this.http_connections.add(conn);
|
||||
conn.once('close', () => this.http_connections.delete(conn));
|
||||
});
|
||||
}
|
||||
await this.load_config();
|
||||
await this.get_lease();
|
||||
await this.etcd.become_master();
|
||||
await this.load_cluster_state();
|
||||
await this.etcd.start_watcher(this.config.etcd_mon_retries);
|
||||
this.watcher_active = true;
|
||||
for (const pool_id in this.state.config.pools)
|
||||
{
|
||||
if (!this.state.pool.stats[pool_id] ||
|
||||
!Number(this.state.pool.stats[pool_id].pg_real_size))
|
||||
{
|
||||
// Generate missing data in etcd
|
||||
this.state.config.pgs.hash = null;
|
||||
this.state.pg.config.hash = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -147,17 +196,22 @@ class Mon
|
||||
this.etcd_watch_revision = BigInt(msg.header.revision)+BigInt(1);
|
||||
for (const e of msg.events||[])
|
||||
{
|
||||
this.parse_kv(e.kv);
|
||||
const key = e.kv.key.substr(this.config.etcd_prefix.length);
|
||||
const kv = this.parse_kv(e.kv);
|
||||
const key = kv.key.substr(this.config.etcd_prefix.length);
|
||||
if (key.substr(0, 11) == '/osd/state/')
|
||||
{
|
||||
stats_changed = true;
|
||||
changed = true;
|
||||
}
|
||||
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/' || key.substr(0, 16) == '/osd/inodestats/')
|
||||
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 9) == '/pgstats/' || key.substr(0, 16) == '/osd/inodestats/')
|
||||
{
|
||||
stats_changed = true;
|
||||
}
|
||||
else if (key.substr(0, 10) == '/pg/stats/')
|
||||
{
|
||||
this.old_pg_stats_seen = true;
|
||||
stats_changed = true;
|
||||
}
|
||||
else if (key.substr(0, 10) == '/pg/state/')
|
||||
{
|
||||
pg_states_changed = true;
|
||||
@@ -168,7 +222,7 @@ class Mon
|
||||
}
|
||||
if (this.config.verbose)
|
||||
{
|
||||
console.log(JSON.stringify(e));
|
||||
console.log(JSON.stringify({ ...e, kv: kv || undefined }));
|
||||
}
|
||||
}
|
||||
if (pg_states_changed)
|
||||
@@ -238,7 +292,7 @@ class Mon
|
||||
continue next_pool;
|
||||
}
|
||||
}
|
||||
new_clean_pgs.items[pool_id] = this.state.config.pgs.items[pool_id];
|
||||
new_clean_pgs.items[pool_id] = this.state.pg.config.items[pool_id];
|
||||
}
|
||||
this.state.history.last_clean_pgs = new_clean_pgs;
|
||||
await this.etcd.etcd_call('/kv/txn', {
|
||||
@@ -252,7 +306,7 @@ class Mon
|
||||
|
||||
get_mon_state()
|
||||
{
|
||||
return { ip: this.local_ips(), hostname: os.hostname() };
|
||||
return { ip: local_ips(), hostname: os.hostname() };
|
||||
}
|
||||
|
||||
async get_lease()
|
||||
@@ -284,6 +338,16 @@ class Mon
|
||||
async on_stop()
|
||||
{
|
||||
console.log('Stopping Monitor');
|
||||
if (this.http)
|
||||
{
|
||||
await new Promise(ok =>
|
||||
{
|
||||
this.http.close(ok);
|
||||
for (const conn of this.http_connections)
|
||||
conn.destroy();
|
||||
});
|
||||
this.http = null;
|
||||
}
|
||||
this.etcd.stop_watcher();
|
||||
if (this.save_last_clean_timer)
|
||||
{
|
||||
@@ -339,6 +403,50 @@ class Mon
|
||||
this.parse_kv(kv);
|
||||
}
|
||||
}
|
||||
if (Object.keys((this.state.config.pgs||{}).items||{}).length)
|
||||
{
|
||||
// Support seamless upgrade to new OSDs
|
||||
if (!Object.keys((this.state.pg.config||{}).items||{}).length)
|
||||
{
|
||||
const pgs = JSON.stringify(this.state.config.pgs);
|
||||
this.state.pg.config = JSON.parse(pgs);
|
||||
const res = await this.etcd.etcd_call('/kv/txn', {
|
||||
success: [
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(pgs) } },
|
||||
],
|
||||
compare: [
|
||||
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
],
|
||||
}, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
|
||||
if (!res.succeeded)
|
||||
throw new Error('Failed to duplicate old PG config to new PG config');
|
||||
}
|
||||
this.old_pg_config = true;
|
||||
this.old_pg_config_timer = setInterval(() => this.check_clear_old_config().catch(console.error),
|
||||
this.config.old_pg_config_clear_interval||3600000);
|
||||
}
|
||||
}
|
||||
|
||||
async check_clear_old_config()
|
||||
{
|
||||
if (this.old_pg_config && this.old_pg_stats_seen)
|
||||
{
|
||||
this.old_pg_stats_seen = false;
|
||||
return;
|
||||
}
|
||||
if (this.old_pg_config)
|
||||
{
|
||||
await this.etcd.etcd_call('/kv/txn', { success: [
|
||||
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/config/pgs') } },
|
||||
{ requestDeleteRange: { key: b64(this.config.etcd_prefix+'/pg/stats/'), range_end: b64(this.config.etcd_prefix+'/pg/stats0') } },
|
||||
] }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
|
||||
this.old_pg_config = false;
|
||||
}
|
||||
if (this.old_pg_config_timer)
|
||||
{
|
||||
clearInterval(this.old_pg_config_timer);
|
||||
this.old_pg_config_timer = null;
|
||||
}
|
||||
}
|
||||
|
||||
all_osds()
|
||||
@@ -349,7 +457,7 @@ class Mon
|
||||
async stop_all_pgs(pool_id)
|
||||
{
|
||||
let has_online = false, paused = true;
|
||||
for (const pg in this.state.config.pgs.items[pool_id]||{})
|
||||
for (const pg in this.state.pg.config.items[pool_id]||{})
|
||||
{
|
||||
// FIXME: Change all (||{}) to ?. (optional chaining) at some point
|
||||
const cur_state = (((this.state.pg.state[pool_id]||{})[pg]||{}).state||[]).join(',');
|
||||
@@ -357,7 +465,7 @@ class Mon
|
||||
{
|
||||
has_online = true;
|
||||
}
|
||||
if (!this.state.config.pgs.items[pool_id][pg].pause)
|
||||
if (!this.state.pg.config.items[pool_id][pg].pause)
|
||||
{
|
||||
paused = false;
|
||||
}
|
||||
@@ -365,7 +473,7 @@ class Mon
|
||||
if (!paused)
|
||||
{
|
||||
console.log('Stopping all PGs for pool '+pool_id+' before changing PG count');
|
||||
const new_cfg = JSON.parse(JSON.stringify(this.state.config.pgs));
|
||||
const new_cfg = JSON.parse(JSON.stringify(this.state.pg.config));
|
||||
for (const pg in new_cfg.items[pool_id])
|
||||
{
|
||||
new_cfg.items[pool_id][pg].pause = true;
|
||||
@@ -373,22 +481,26 @@ class Mon
|
||||
// Check that no OSDs change their state before we pause PGs
|
||||
// Doing this we make sure that OSDs don't wake up in the middle of our "transaction"
|
||||
// and can't see the old PG configuration
|
||||
const checks = [];
|
||||
const checks = [
|
||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
];
|
||||
for (const osd_num of this.all_osds())
|
||||
{
|
||||
const key = b64(this.config.etcd_prefix+'/osd/state/'+osd_num);
|
||||
checks.push({ key, target: 'MOD', result: 'LESS', mod_revision: ''+this.etcd_watch_revision });
|
||||
}
|
||||
await this.etcd.etcd_call('/kv/txn', {
|
||||
compare: [
|
||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
...checks,
|
||||
],
|
||||
const txn = {
|
||||
compare: checks,
|
||||
success: [
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } },
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_cfg)) } },
|
||||
],
|
||||
}, this.config.etcd_mon_timeout, 0);
|
||||
};
|
||||
if (this.old_pg_config)
|
||||
{
|
||||
txn.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } });
|
||||
}
|
||||
await this.etcd.etcd_call('/kv/txn', txn, this.config.etcd_mon_timeout, 0);
|
||||
return false;
|
||||
}
|
||||
return !has_online;
|
||||
@@ -416,7 +528,7 @@ class Mon
|
||||
pools: this.state.config.pools,
|
||||
};
|
||||
const tree_hash = sha1hex(stableStringify(tree_cfg));
|
||||
if (this.state.config.pgs.hash != tree_hash)
|
||||
if (this.state.pg.config.hash != tree_hash)
|
||||
{
|
||||
// Something has changed
|
||||
console.log('Pool configuration or OSD tree changed, re-optimizing');
|
||||
@@ -457,10 +569,10 @@ class Mon
|
||||
else
|
||||
{
|
||||
// Nothing changed, but we still want to recheck the distribution of primaries
|
||||
let new_config_pgs = recheck_primary(this.state, this.config, up_osds, osd_tree);
|
||||
if (new_config_pgs)
|
||||
let new_pg_config = recheck_primary(this.state, this.config, up_osds, osd_tree);
|
||||
if (new_pg_config)
|
||||
{
|
||||
const ok = await this.save_pg_config(new_config_pgs);
|
||||
const ok = await this.save_pg_config(new_pg_config);
|
||||
if (ok)
|
||||
console.log('PG configuration successfully changed');
|
||||
else
|
||||
@@ -475,12 +587,12 @@ class Mon
|
||||
|
||||
async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
|
||||
{
|
||||
for (const pool_id in (this.state.config.pgs||{}).items||{})
|
||||
for (const pool_id in (this.state.pg.config||{}).items||{})
|
||||
{
|
||||
// We should stop all PGs when deleting a pool or changing its PG count
|
||||
if (!this.state.config.pools[pool_id] ||
|
||||
this.state.config.pgs.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
|
||||
Object.keys(this.state.config.pgs.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
|
||||
this.state.pg.config.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
|
||||
Object.keys(this.state.pg.config.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
|
||||
{
|
||||
if (!await this.stop_all_pgs(pool_id))
|
||||
{
|
||||
@@ -488,22 +600,22 @@ class Mon
|
||||
}
|
||||
}
|
||||
}
|
||||
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
|
||||
const new_pg_config = JSON.parse(JSON.stringify(this.state.pg.config));
|
||||
const etcd_request = { compare: [], success: [] };
|
||||
for (const pool_id in (new_config_pgs||{}).items||{})
|
||||
for (const pool_id in (new_pg_config||{}).items||{})
|
||||
{
|
||||
if (!this.state.config.pools[pool_id])
|
||||
{
|
||||
const prev_pgs = [];
|
||||
for (const pg in new_config_pgs.items[pool_id]||{})
|
||||
for (const pg in new_pg_config.items[pool_id]||{})
|
||||
{
|
||||
prev_pgs[pg-1] = new_config_pgs.items[pool_id][pg].osd_set;
|
||||
prev_pgs[pg-1] = new_pg_config.items[pool_id][pg].osd_set;
|
||||
}
|
||||
// Also delete pool statistics
|
||||
etcd_request.success.push({ requestDeleteRange: {
|
||||
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
} });
|
||||
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
|
||||
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
|
||||
this.etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, [], []);
|
||||
}
|
||||
}
|
||||
@@ -512,7 +624,7 @@ class Mon
|
||||
const pool_id = pool_res.pool_id;
|
||||
const pool_cfg = this.state.config.pools[pool_id];
|
||||
let pg_history = [];
|
||||
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
|
||||
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
|
||||
{
|
||||
if (this.state.pg.history[pool_id] &&
|
||||
this.state.pg.history[pool_id][pg])
|
||||
@@ -521,9 +633,9 @@ class Mon
|
||||
}
|
||||
}
|
||||
const real_prev_pgs = [];
|
||||
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
|
||||
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
|
||||
{
|
||||
real_prev_pgs[pg-1] = [ ...this.state.config.pgs.items[pool_id][pg].osd_set ];
|
||||
real_prev_pgs[pg-1] = [ ...this.state.pg.config.items[pool_id][pg].osd_set ];
|
||||
}
|
||||
if (real_prev_pgs.length > 0 && real_prev_pgs.length != pool_res.pgs.length)
|
||||
{
|
||||
@@ -534,8 +646,8 @@ class Mon
|
||||
pg_history = scale_pg_history(pg_history, real_prev_pgs, pool_res.pgs);
|
||||
// Drop stats
|
||||
etcd_request.success.push({ requestDeleteRange: {
|
||||
key: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'/'),
|
||||
range_end: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'0'),
|
||||
key: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'/'),
|
||||
range_end: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'0'),
|
||||
} });
|
||||
}
|
||||
const stats = {
|
||||
@@ -546,22 +658,26 @@ class Mon
|
||||
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
value: b64(JSON.stringify(stats)),
|
||||
} });
|
||||
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
|
||||
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
|
||||
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
|
||||
}
|
||||
new_config_pgs.hash = tree_hash;
|
||||
return await this.save_pg_config(new_config_pgs, etcd_request);
|
||||
new_pg_config.hash = tree_hash;
|
||||
return await this.save_pg_config(new_pg_config, etcd_request);
|
||||
}
|
||||
|
||||
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
|
||||
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
|
||||
{
|
||||
etcd_request.compare.push(
|
||||
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||
);
|
||||
etcd_request.success.push(
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
|
||||
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_pg_config)) } },
|
||||
);
|
||||
if (this.old_pg_config)
|
||||
{
|
||||
etcd_request.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_pg_config)) } });
|
||||
}
|
||||
const txn_res = await this.etcd.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
|
||||
return txn_res.succeeded;
|
||||
}
|
||||
@@ -690,15 +806,16 @@ class Mon
|
||||
{
|
||||
if (!kv || !kv.key)
|
||||
{
|
||||
return;
|
||||
return kv;
|
||||
}
|
||||
kv = { ...kv };
|
||||
kv.key = de64(kv.key);
|
||||
kv.value = kv.value ? de64(kv.value) : null;
|
||||
let key = kv.key.substr(this.config.etcd_prefix.length+1);
|
||||
if (!etcd_allow.exec(key))
|
||||
{
|
||||
console.log('Bad key in etcd: '+kv.key+' = '+kv.value);
|
||||
return;
|
||||
return kv;
|
||||
}
|
||||
try
|
||||
{
|
||||
@@ -707,7 +824,7 @@ class Mon
|
||||
catch (e)
|
||||
{
|
||||
console.log('Bad value in etcd: '+kv.key+' = '+kv.value);
|
||||
return;
|
||||
return kv;
|
||||
}
|
||||
let key_parts = key.split('/');
|
||||
let cur = this.state;
|
||||
@@ -721,7 +838,14 @@ class Mon
|
||||
kv.value = kv.value || {};
|
||||
}
|
||||
const old = cur[key_parts[key_parts.length-1]];
|
||||
cur[key_parts[key_parts.length-1]] = kv.value;
|
||||
if (kv.value == null)
|
||||
{
|
||||
delete cur[key_parts[key_parts.length-1]];
|
||||
}
|
||||
else
|
||||
{
|
||||
cur[key_parts[key_parts.length-1]] = kv.value;
|
||||
}
|
||||
if (key === 'config/global')
|
||||
{
|
||||
this.config = { ...this.fileConfig, ...this.state.config.global, ...this.cliConfig };
|
||||
@@ -757,6 +881,7 @@ class Mon
|
||||
!this.state.osd.stats[osd_num] ? 0 : this.state.osd.stats[osd_num].time+this.config.osd_out_time
|
||||
);
|
||||
}
|
||||
return kv;
|
||||
}
|
||||
|
||||
_die(err)
|
||||
@@ -766,33 +891,6 @@ class Mon
|
||||
this.on_stop().catch(console.error);
|
||||
this.on_die();
|
||||
}
|
||||
|
||||
local_ips(all)
|
||||
{
|
||||
const ips = [];
|
||||
const ifaces = os.networkInterfaces();
|
||||
for (const ifname in ifaces)
|
||||
{
|
||||
for (const iface of ifaces[ifname])
|
||||
{
|
||||
if (iface.family == 'IPv4' && !iface.internal || all)
|
||||
{
|
||||
ips.push(iface.address);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ips;
|
||||
}
|
||||
}
|
||||
|
||||
function b64(str)
|
||||
{
|
||||
return Buffer.from(str).toString('base64');
|
||||
}
|
||||
|
||||
function de64(str)
|
||||
{
|
||||
return Buffer.from(str, 'base64').toString();
|
||||
}
|
||||
|
||||
function sha1hex(str)
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vitastor-mon",
|
||||
"version": "1.6.1",
|
||||
"version": "1.8.0",
|
||||
"description": "Vitastor SDS monitor service",
|
||||
"main": "mon-main.js",
|
||||
"scripts": {
|
||||
@@ -9,6 +9,7 @@
|
||||
"author": "Vitaliy Filippov",
|
||||
"license": "UNLICENSED",
|
||||
"dependencies": {
|
||||
"antietcd": "^1.1.0",
|
||||
"sprintf-js": "^1.1.2",
|
||||
"ws": "^7.2.5"
|
||||
},
|
||||
|
@@ -57,7 +57,7 @@ function pick_primary(pool_config, osd_set, up_osds, aff_osds)
|
||||
|
||||
function recheck_primary(state, global_config, up_osds, osd_tree)
|
||||
{
|
||||
let new_config_pgs;
|
||||
let new_pg_config;
|
||||
for (const pool_id in state.config.pools)
|
||||
{
|
||||
const pool_cfg = state.config.pools[pool_id];
|
||||
@@ -69,30 +69,30 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
|
||||
reset_rng();
|
||||
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
|
||||
{
|
||||
if (!state.config.pgs.items[pool_id])
|
||||
if (!state.pg.config.items[pool_id])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
|
||||
const pg_cfg = state.pg.config.items[pool_id][pg_num];
|
||||
if (pg_cfg)
|
||||
{
|
||||
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
|
||||
if (pg_cfg.primary != new_primary)
|
||||
{
|
||||
if (!new_config_pgs)
|
||||
if (!new_pg_config)
|
||||
{
|
||||
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
|
||||
new_pg_config = JSON.parse(JSON.stringify(state.pg.config));
|
||||
}
|
||||
console.log(
|
||||
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
|
||||
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
|
||||
);
|
||||
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
|
||||
new_pg_config.items[pool_id][pg_num].primary = new_primary;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return new_config_pgs;
|
||||
return new_pg_config;
|
||||
}
|
||||
|
||||
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
|
||||
@@ -174,7 +174,7 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
|
||||
state.osd.stats,
|
||||
pool_cfg.block_size || global_config.block_size || 131072,
|
||||
pool_cfg.bitmap_granularity || global_config.bitmap_granularity || 4096,
|
||||
pool_cfg.immediate_commit || global_config.immediate_commit || 'none'
|
||||
pool_cfg.immediate_commit || global_config.immediate_commit || 'all'
|
||||
);
|
||||
pool_tree = make_hier_tree(global_config, pool_tree);
|
||||
// First try last_clean_pgs to minimize data movement
|
||||
@@ -185,10 +185,10 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
|
||||
}
|
||||
if (!prev_pgs.length)
|
||||
{
|
||||
// Fall back to config/pgs if it's empty
|
||||
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
|
||||
// Fall back to pg/config if it's empty
|
||||
for (const pg in ((state.pg.config.items||{})[pool_id]||{}))
|
||||
{
|
||||
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
|
||||
prev_pgs[pg-1] = [ ...state.pg.config.items[pool_id][pg].osd_set ];
|
||||
}
|
||||
}
|
||||
const old_pg_count = prev_pgs.length;
|
||||
@@ -205,8 +205,8 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
|
||||
ordered: pool_cfg.scheme != 'replicated',
|
||||
};
|
||||
let optimize_result;
|
||||
// Re-shuffle PGs if config/pgs.hash is empty
|
||||
if (old_pg_count > 0 && state.config.pgs.hash)
|
||||
// Re-shuffle PGs if pg/config.hash is empty
|
||||
if (old_pg_count > 0 && state.pg.config.hash)
|
||||
{
|
||||
if (prev_pgs.length != pool_cfg.pg_count)
|
||||
{
|
||||
|
220
mon/prometheus.js
Normal file
220
mon/prometheus.js
Normal file
@@ -0,0 +1,220 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const metric_help =
|
||||
`# HELP vitastor_object_bytes Total size of objects in cluster in bytes
|
||||
# TYPE vitastor_object_bytes gauge
|
||||
# HELP vitastor_object_count Total number of objects in cluster
|
||||
# TYPE vitastor_object_count gauge
|
||||
# HELP vitastor_stat_count Total operation count
|
||||
# TYPE vitastor_stat_count counter
|
||||
# HELP vitastor_stat_usec Total operation latency in usec
|
||||
# TYPE vitastor_stat_usec counter
|
||||
# HELP vitastor_stat_bytes Total operation size in bytes
|
||||
# HELP vitastor_stat_bytes counter
|
||||
|
||||
# HELP vitastor_image_raw_used Image raw used size in bytes
|
||||
# TYPE vitastor_image_raw_used counter
|
||||
# HELP vitastor_image_stat_count Per-image total operation count
|
||||
# TYPE vitastor_image_stat_count counter
|
||||
# HELP vitastor_image_stat_usec Per-image total operation latency
|
||||
# TYPE vitastor_image_stat_usec counter
|
||||
# HELP vitastor_image_stat_bytes Per-image total operation size in bytes
|
||||
# TYPE vitastor_image_stat_bytes counter
|
||||
|
||||
# HELP vitastor_osd_status OSD up/down status
|
||||
# TYPE vitastor_osd_status gauge
|
||||
# HELP vitastor_osd_size_bytes OSD total space in bytes
|
||||
# TYPE vitastor_osd_size_bytes gauge
|
||||
# HELP vitastor_osd_free_bytes OSD free space in bytes
|
||||
# TYPE vitastor_osd_free_bytes gauge
|
||||
# HELP vitastor_osd_stat_count Per-image total operation count
|
||||
# TYPE vitastor_osd_stat_count counter
|
||||
# HELP vitastor_osd_stat_usec Per-image total operation latency
|
||||
# TYPE vitastor_osd_stat_usec counter
|
||||
# HELP vitastor_osd_stat_bytes Per-image total operation size in bytes
|
||||
# TYPE vitastor_osd_stat_bytes counter
|
||||
|
||||
# HELP vitastor_monitor_info Monitor info, 1 is master, 0 is standby
|
||||
# TYPE vitastor_monitor_info gauge
|
||||
|
||||
# HELP vitastor_pool_info Pool configuration (in labels)
|
||||
# TYPE vitastor_pool_info gauge
|
||||
# HELP vitastor_pool_status Pool up/down status
|
||||
# TYPE vitastor_pool_status gauge
|
||||
# HELP vitastor_pool_raw_to_usable Raw to usable space ratio
|
||||
# TYPE vitastor_pool_raw_to_usable gauge
|
||||
# HELP vitastor_pool_space_efficiency Pool space usage efficiency
|
||||
# TYPE vitastor_pool_space_efficiency gauge
|
||||
# HELP vitastor_pool_total_raw_tb Total raw space in pool in TB
|
||||
# TYPE vitastor_pool_total_raw_tb gauge
|
||||
# HELP vitastor_pool_used_raw_tb Used raw space in pool in TB
|
||||
# TYPE vitastor_pool_used_raw_tb gauge
|
||||
# HELP vitastor_pg_count PG counts by state
|
||||
# HELP vitastor_pg_count gauge
|
||||
|
||||
`;
|
||||
|
||||
function export_prometheus_metrics(st)
|
||||
{
|
||||
let res = metric_help;
|
||||
|
||||
// Global statistics
|
||||
|
||||
for (const k in st.stats.object_bytes)
|
||||
{
|
||||
res += `vitastor_object_bytes{object_type="${k}"} ${st.stats.object_bytes[k]}\n`;
|
||||
}
|
||||
|
||||
for (const k in st.stats.object_counts)
|
||||
{
|
||||
res += `vitastor_object_count{object_type="${k}"} ${st.stats.object_counts[k]}\n`;
|
||||
}
|
||||
|
||||
for (const typ of [ 'op', 'subop', 'recovery' ])
|
||||
{
|
||||
for (const op in st.stats[typ+"_stats"]||{})
|
||||
{
|
||||
const op_stat = st.stats[typ+"_stats"][op];
|
||||
for (const key of [ 'count', 'usec', 'bytes' ])
|
||||
{
|
||||
res += `vitastor_stat_${key}{op="${op}",op_type="${typ}"} ${op_stat[key]||0}\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Per-image statistics
|
||||
|
||||
for (const pool in st.inode.stats)
|
||||
{
|
||||
for (const inode in st.inode.stats[pool])
|
||||
{
|
||||
const ist = st.inode.stats[pool][inode];
|
||||
const inode_name = ((st.config.inode[pool]||{})[inode]||{}).name||'';
|
||||
const inode_label = `image_name="${addslashes(inode_name)}",inode_num="${inode}",pool_id="${pool}"`;
|
||||
res += `vitastor_image_raw_used{${inode_label}} ${ist.raw_used||0}\n`;
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
for (const k of [ 'count', 'usec', 'bytes' ])
|
||||
{
|
||||
if (ist[op])
|
||||
{
|
||||
res += `vitastor_image_stat_${k}{${inode_label},op="${op}"} ${ist[op][k]||0}\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Per-OSD statistics
|
||||
|
||||
for (const osd in st.osd.stats)
|
||||
{
|
||||
const osd_stat = st.osd.stats[osd];
|
||||
const up = st.osd.state[osd] && st.osd.state[osd].state == 'up' ? 1 : 0;
|
||||
res += `vitastor_osd_status{host="${addslashes(osd_stat.host)}",osd_num="${osd}"} ${up}\n`;
|
||||
res += `vitastor_osd_size_bytes{osd_num="${osd}"} ${osd_stat.size||0}\n`;
|
||||
res += `vitastor_osd_free_bytes{osd_num="${osd}"} ${osd_stat.free||0}\n`;
|
||||
for (const op in osd_stat.op_stats)
|
||||
{
|
||||
const ist = osd_stat.op_stats[op];
|
||||
for (const k of [ 'count', 'usec', 'bytes' ])
|
||||
{
|
||||
res += `vitastor_osd_stat_${k}{osd_num="${osd}",op="${op}",op_type="op"} ${ist[k]||0}\n`;
|
||||
}
|
||||
}
|
||||
for (const op in osd_stat.subop_stats)
|
||||
{
|
||||
const ist = osd_stat.subop_stats[op];
|
||||
for (const k of [ 'count', 'usec', 'bytes' ])
|
||||
{
|
||||
res += `vitastor_osd_stat_${k}{osd_num="${osd}",op="${op}",op_type="subop"} ${ist[k]||0}\n`;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Monitor statistics
|
||||
|
||||
for (const mon_id in st.mon.member)
|
||||
{
|
||||
const mon = st.mon.member[mon_id];
|
||||
const master = st.mon.master && st.mon.master.id == mon_id ? 1 : 0;
|
||||
const ip = (mon.ip instanceof Array ? mon.ip[0] : mon.ip) || '';
|
||||
res += `vitastor_monitor_info{monitor_hostname="${addslashes(mon.hostname)}",monitor_id="${mon_id}",monitor_ip="${addslashes(ip)}"} ${master}\n`;
|
||||
}
|
||||
|
||||
// Per-pool statistics
|
||||
|
||||
for (const pool_id in st.config.pools)
|
||||
{
|
||||
const pool_cfg = st.config.pools[pool_id];
|
||||
const pool_label = `pool_id="${pool_id}",pool_name="${addslashes(pool_cfg.name)}"`;
|
||||
const pool_stat = st.pool.stats[pool_id];
|
||||
res += `vitastor_pool_info{${pool_label}`+
|
||||
`,pool_scheme="${addslashes(pool_cfg.scheme)}"`+
|
||||
`,pg_size="${pool_cfg.pg_size||0}",pg_minsize="${pool_cfg.pg_minsize||0}"`+
|
||||
`,parity_chunks="${pool_cfg.parity_chunks||0}",pg_count="${pool_cfg.pg_count||0}"`+
|
||||
`,failure_domain="${addslashes(pool_cfg.failure_domain)}"`+
|
||||
`} 1\n`;
|
||||
if (!pool_stat)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
res += `vitastor_pool_raw_to_usable{${pool_label}} ${pool_stat.raw_to_usable||0}\n`;
|
||||
res += `vitastor_pool_space_efficiency{${pool_label}} ${pool_stat.space_efficiency||0}\n`;
|
||||
res += `vitastor_pool_total_raw_tb{${pool_label}} ${pool_stat.total_raw_tb||0}\n`;
|
||||
res += `vitastor_pool_used_raw_tb{${pool_label}} ${pool_stat.used_raw_tb||0}\n`;
|
||||
|
||||
// PG states and pool up/down status
|
||||
const real_pg_count = (Object.keys(((st.pg.config||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
|
||||
const per_state = {
|
||||
active: 0,
|
||||
starting: 0,
|
||||
peering: 0,
|
||||
incomplete: 0,
|
||||
repeering: 0,
|
||||
stopping: 0,
|
||||
offline: 0,
|
||||
degraded: 0,
|
||||
has_inconsistent: 0,
|
||||
has_corrupted: 0,
|
||||
has_incomplete: 0,
|
||||
has_degraded: 0,
|
||||
has_misplaced: 0,
|
||||
has_unclean: 0,
|
||||
has_invalid: 0,
|
||||
left_on_dead: 0,
|
||||
scrubbing: 0,
|
||||
};
|
||||
const pool_pg_states = st.pg.state[pool_id] || {};
|
||||
for (let i = 1; i <= real_pg_count; i++)
|
||||
{
|
||||
if (!pool_pg_states[i])
|
||||
{
|
||||
per_state['offline'] = 1 + (per_state['offline']|0);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const st_name of pool_pg_states[i].state)
|
||||
{
|
||||
per_state[st_name] = 1 + (per_state[st_name]|0);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const st_name in per_state)
|
||||
{
|
||||
res += `vitastor_pg_count{pg_state="${st_name}",${pool_label}} ${per_state[st_name]}\n`;
|
||||
}
|
||||
const pool_active = per_state['active'] >= real_pg_count ? 1 : 0;
|
||||
res += `vitastor_pool_status{${pool_label}} ${pool_active}\n`;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
function addslashes(str)
|
||||
{
|
||||
return ((str||'')+'').replace(/(["\n\\])/g, "\\$1"); // escape " \n \
|
||||
}
|
||||
|
||||
module.exports = { export_prometheus_metrics };
|
2818
mon/scripts/Vitastor-Grafana-6+.json
Normal file
2818
mon/scripts/Vitastor-Grafana-6+.json
Normal file
File diff suppressed because it is too large
Load Diff
38
mon/stats.js
38
mon/stats.js
@@ -3,10 +3,10 @@
|
||||
|
||||
function derive_osd_stats(st, prev, prev_diff)
|
||||
{
|
||||
const diff = { op_stats: {}, subop_stats: {}, recovery_stats: {}, inode_stats: {} };
|
||||
const diff = prev_diff || { op_stats: {}, subop_stats: {}, recovery_stats: {}, inode_stats: {} };
|
||||
if (!st || !st.time || !prev || !prev.time || prev.time >= st.time)
|
||||
{
|
||||
return prev_diff || diff;
|
||||
return diff;
|
||||
}
|
||||
const timediff = BigInt(st.time*1000 - prev.time*1000);
|
||||
for (const op in st.op_stats||{})
|
||||
@@ -17,8 +17,7 @@ function derive_osd_stats(st, prev, prev_diff)
|
||||
const b = c.bytes - BigInt(pr && pr.bytes||0);
|
||||
const us = c.usec - BigInt(pr && pr.usec||0);
|
||||
const n = c.count - BigInt(pr && pr.count||0);
|
||||
if (n > 0)
|
||||
diff.op_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff, lat: us/n };
|
||||
diff.op_stats[op] = { ...c, bps: n > 0 ? b*1000n/timediff : 0n, iops: n > 0 ? n*1000n/timediff : 0n, lat: n > 0 ? us/n : 0n };
|
||||
}
|
||||
for (const op in st.subop_stats||{})
|
||||
{
|
||||
@@ -27,8 +26,7 @@ function derive_osd_stats(st, prev, prev_diff)
|
||||
c = { usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
|
||||
const us = c.usec - BigInt(pr && pr.usec||0);
|
||||
const n = c.count - BigInt(pr && pr.count||0);
|
||||
if (n > 0)
|
||||
diff.subop_stats[op] = { ...c, iops: n*1000n/timediff, lat: us/n };
|
||||
diff.subop_stats[op] = { ...c, iops: n > 0 ? n*1000n/timediff : 0n, lat: n > 0 ? us/n : 0n };
|
||||
}
|
||||
for (const op in st.recovery_stats||{})
|
||||
{
|
||||
@@ -37,8 +35,7 @@ function derive_osd_stats(st, prev, prev_diff)
|
||||
c = { bytes: BigInt(c.bytes||0), count: BigInt(c.count||0) };
|
||||
const b = c.bytes - BigInt(pr && pr.bytes||0);
|
||||
const n = c.count - BigInt(pr && pr.count||0);
|
||||
if (n > 0)
|
||||
diff.recovery_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff };
|
||||
diff.recovery_stats[op] = { ...c, bps: n > 0 ? b*1000n/timediff : 0n, iops: n > 0 ? n*1000n/timediff : 0n };
|
||||
}
|
||||
for (const pool_id in st.inode_stats||{})
|
||||
{
|
||||
@@ -53,9 +50,9 @@ function derive_osd_stats(st, prev, prev_diff)
|
||||
prev.inode_stats[pool_id][inode_num] && prev.inode_stats[pool_id][inode_num][op];
|
||||
const n = BigInt(c.count||0) - BigInt(pr && pr.count||0);
|
||||
inode_diff[op] = {
|
||||
bps: (BigInt(c.bytes||0) - BigInt(pr && pr.bytes||0))*1000n/timediff,
|
||||
iops: n*1000n/timediff,
|
||||
lat: (BigInt(c.usec||0) - BigInt(pr && pr.usec||0))/(n || 1n),
|
||||
bps: n > 0 ? (BigInt(c.bytes||0) - BigInt(pr && pr.bytes||0))*1000n/timediff : 0n,
|
||||
iops: n > 0 ? n*1000n/timediff : 0n,
|
||||
lat: n > 0 ? (BigInt(c.usec||0) - BigInt(pr && pr.usec||0))/n : 0n,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -74,7 +71,7 @@ function sum_op_stats(all_osd, prev_stats)
|
||||
);
|
||||
prev_stats.osd_stats[osd] = cur;
|
||||
}
|
||||
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
|
||||
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: { degraded: {}, misplaced: {} } };
|
||||
// Sum derived values instead of deriving summed
|
||||
for (const osd in all_osd.state)
|
||||
{
|
||||
@@ -103,10 +100,19 @@ function sum_object_counts(state, global_config)
|
||||
{
|
||||
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
|
||||
for (const pool_id in state.pg.stats)
|
||||
let pgstats = state.pgstats;
|
||||
if (state.pg.stats)
|
||||
{
|
||||
// Merge with old stats for seamless transition to new stats
|
||||
for (const pool_id in state.pg.stats)
|
||||
{
|
||||
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
|
||||
}
|
||||
}
|
||||
for (const pool_id in pgstats)
|
||||
{
|
||||
let object_size = 0;
|
||||
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
|
||||
for (const osd_num of pgstats[pool_id].write_osd_set||[])
|
||||
{
|
||||
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
|
||||
{
|
||||
@@ -124,9 +130,9 @@ function sum_object_counts(state, global_config)
|
||||
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
|
||||
}
|
||||
object_size = BigInt(object_size);
|
||||
for (const pg_num in state.pg.stats[pool_id])
|
||||
for (const pg_num in pgstats[pool_id])
|
||||
{
|
||||
const st = state.pg.stats[pool_id][pg_num];
|
||||
const st = pgstats[pool_id][pg_num];
|
||||
if (st)
|
||||
{
|
||||
for (const k in object_counts)
|
||||
|
37
mon/utils.js
Normal file
37
mon/utils.js
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
const os = require('os');
|
||||
|
||||
function local_ips(all)
|
||||
{
|
||||
const ips = [];
|
||||
const ifaces = os.networkInterfaces();
|
||||
for (const ifname in ifaces)
|
||||
{
|
||||
for (const iface of ifaces[ifname])
|
||||
{
|
||||
if (iface.family == 'IPv4' && !iface.internal || all)
|
||||
{
|
||||
ips.push(iface.address);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ips;
|
||||
}
|
||||
|
||||
function b64(str)
|
||||
{
|
||||
return Buffer.from(str).toString('base64');
|
||||
}
|
||||
|
||||
function de64(str)
|
||||
{
|
||||
return Buffer.from(str, 'base64').toString();
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
b64,
|
||||
de64,
|
||||
local_ips,
|
||||
};
|
49
mon/vitastor_persist_filter.js
Normal file
49
mon/vitastor_persist_filter.js
Normal file
@@ -0,0 +1,49 @@
|
||||
// AntiEtcd persistence filter for Vitastor
|
||||
// (c) Vitaliy Filippov, 2024
|
||||
// License: Mozilla Public License 2.0 or Vitastor Network Public License 1.1
|
||||
|
||||
function vitastor_persist_filter(cfg)
|
||||
{
|
||||
const prefix = cfg.vitastor_prefix || '/vitastor';
|
||||
return (key, value) =>
|
||||
{
|
||||
if (key.substr(0, prefix.length+'/osd/stats/'.length) == prefix+'/osd/stats/')
|
||||
{
|
||||
if (value)
|
||||
{
|
||||
try
|
||||
{
|
||||
value = JSON.parse(value);
|
||||
value = JSON.stringify({
|
||||
bitmap_granularity: value.bitmap_granularity || undefined,
|
||||
data_block_size: value.data_block_size || undefined,
|
||||
host: value.host || undefined,
|
||||
immediate_commit: value.immediate_commit || undefined,
|
||||
});
|
||||
}
|
||||
catch (e)
|
||||
{
|
||||
console.error('invalid JSON in '+key+' = '+value+': '+e);
|
||||
value = '{}';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
value = undefined;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
else if (key.substr(0, prefix.length+'/osd/'.length) == prefix+'/osd/' ||
|
||||
key.substr(0, prefix.length+'/inode/stats/'.length) == prefix+'/inode/stats/' ||
|
||||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' || // old name
|
||||
key.substr(0, prefix.length+'/pgstats/'.length) == prefix+'/pgstats/' ||
|
||||
key.substr(0, prefix.length+'/pool/stats/'.length) == prefix+'/pool/stats/' ||
|
||||
key == prefix+'/stats')
|
||||
{
|
||||
return undefined;
|
||||
}
|
||||
return value;
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = vitastor_persist_filter;
|
80
node-binding/addon.cc
Normal file
80
node-binding/addon.cc
Normal file
@@ -0,0 +1,80 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#include "addon.h"
|
||||
|
||||
// Initialize the node addon
|
||||
NAN_MODULE_INIT(InitAddon)
|
||||
{
|
||||
// vitastor.Client
|
||||
|
||||
v8::Local<v8::FunctionTemplate> tpl = Nan::New<v8::FunctionTemplate>(NodeVitastor::Create);
|
||||
tpl->SetClassName(Nan::New("Client").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "read", NodeVitastor::Read);
|
||||
Nan::SetPrototypeMethod(tpl, "write", NodeVitastor::Write);
|
||||
Nan::SetPrototypeMethod(tpl, "sync", NodeVitastor::Sync);
|
||||
Nan::SetPrototypeMethod(tpl, "read_bitmap", NodeVitastor::ReadBitmap);
|
||||
//Nan::SetPrototypeMethod(tpl, "destroy", NodeVitastor::Destroy);
|
||||
|
||||
Nan::Set(target, Nan::New("Client").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
|
||||
|
||||
// vitastor.Image (opened image)
|
||||
|
||||
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorImage::Create);
|
||||
tpl->SetClassName(Nan::New("Image").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "read", NodeVitastorImage::Read);
|
||||
Nan::SetPrototypeMethod(tpl, "write", NodeVitastorImage::Write);
|
||||
Nan::SetPrototypeMethod(tpl, "sync", NodeVitastorImage::Sync);
|
||||
Nan::SetPrototypeMethod(tpl, "get_info", NodeVitastorImage::GetInfo);
|
||||
Nan::SetPrototypeMethod(tpl, "read_bitmap", NodeVitastorImage::ReadBitmap);
|
||||
|
||||
Nan::Set(target, Nan::New("Image").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
|
||||
|
||||
// vitastor.KV
|
||||
|
||||
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorKV::Create);
|
||||
tpl->SetClassName(Nan::New("KV").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "open", NodeVitastorKV::Open);
|
||||
Nan::SetPrototypeMethod(tpl, "set_config", NodeVitastorKV::SetConfig);
|
||||
Nan::SetPrototypeMethod(tpl, "close", NodeVitastorKV::Close);
|
||||
Nan::SetPrototypeMethod(tpl, "get_size", NodeVitastorKV::GetSize);
|
||||
Nan::SetPrototypeMethod(tpl, "get", NodeVitastorKV::Get);
|
||||
Nan::SetPrototypeMethod(tpl, "get_cached", NodeVitastorKV::GetCached);
|
||||
Nan::SetPrototypeMethod(tpl, "set", NodeVitastorKV::Set);
|
||||
Nan::SetPrototypeMethod(tpl, "del", NodeVitastorKV::Del);
|
||||
Nan::SetPrototypeMethod(tpl, "list", NodeVitastorKV::List);
|
||||
|
||||
Nan::Set(target, Nan::New("KV").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
|
||||
|
||||
Nan::Set(target, Nan::New("ENOENT").ToLocalChecked(), Nan::New<v8::Int32>(-ENOENT));
|
||||
Nan::Set(target, Nan::New("EIO").ToLocalChecked(), Nan::New<v8::Int32>(-EIO));
|
||||
Nan::Set(target, Nan::New("EINVAL").ToLocalChecked(), Nan::New<v8::Int32>(-EINVAL));
|
||||
Nan::Set(target, Nan::New("EROFS").ToLocalChecked(), Nan::New<v8::Int32>(-EROFS));
|
||||
Nan::Set(target, Nan::New("ENOSPC").ToLocalChecked(), Nan::New<v8::Int32>(-ENOSPC));
|
||||
Nan::Set(target, Nan::New("EINTR").ToLocalChecked(), Nan::New<v8::Int32>(-EINTR));
|
||||
Nan::Set(target, Nan::New("EILSEQ").ToLocalChecked(), Nan::New<v8::Int32>(-EILSEQ));
|
||||
Nan::Set(target, Nan::New("ENOTBLK").ToLocalChecked(), Nan::New<v8::Int32>(-ENOTBLK));
|
||||
Nan::Set(target, Nan::New("ENOSYS").ToLocalChecked(), Nan::New<v8::Int32>(-ENOSYS));
|
||||
Nan::Set(target, Nan::New("EAGAIN").ToLocalChecked(), Nan::New<v8::Int32>(-EAGAIN));
|
||||
|
||||
// Listing handle
|
||||
|
||||
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorKVListing::Create);
|
||||
tpl->SetClassName(Nan::New("KVListing").ToLocalChecked());
|
||||
tpl->InstanceTemplate()->SetInternalFieldCount(1);
|
||||
|
||||
Nan::SetPrototypeMethod(tpl, "next", NodeVitastorKVListing::Next);
|
||||
Nan::SetPrototypeMethod(tpl, "close", NodeVitastorKVListing::Close);
|
||||
|
||||
Nan::Set(target, Nan::New("KVListing").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
|
||||
|
||||
NodeVitastorKV::listing_class.Reset(Nan::GetFunction(tpl).ToLocalChecked());
|
||||
}
|
||||
|
||||
NODE_MODULE(addon, (void*)InitAddon)
|
20
node-binding/addon.h
Normal file
20
node-binding/addon.h
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#ifndef NODE_VITASTOR_ADDON_H
|
||||
#define NODE_VITASTOR_ADDON_H
|
||||
|
||||
#include <nan.h>
|
||||
#include <vitastor_c.h>
|
||||
|
||||
#include "client.h"
|
||||
|
||||
#define ERRORF(format, ...) fprintf(stderr, format "\n", __VA_ARGS__);
|
||||
|
||||
#define TRACEF(format, ...) fprintf(stderr, format "\n", __VA_ARGS__);
|
||||
#define TRACE(msg) fprintf(stderr, "%s\n", msg);
|
||||
|
||||
//#define TRACEF(format, arg) ;
|
||||
//#define TRACE(msg) ;
|
||||
|
||||
#endif
|
21
node-binding/binding.gyp
Normal file
21
node-binding/binding.gyp
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
'targets': [
|
||||
{
|
||||
'target_name': 'addon',
|
||||
'sources': [
|
||||
'client.cc',
|
||||
'addon.cc'
|
||||
],
|
||||
'include_dirs': [
|
||||
'<!(node -e "require(\'nan\')")'
|
||||
],
|
||||
'cflags': [
|
||||
'<!(pkg-config --cflags vitastor)'
|
||||
],
|
||||
'libraries': [
|
||||
'<!(pkg-config --libs vitastor)',
|
||||
'-lvitastor_kv'
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
981
node-binding/client.cc
Normal file
981
node-binding/client.cc
Normal file
@@ -0,0 +1,981 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#include "addon.h"
|
||||
|
||||
#define NODE_VITASTOR_READ 1
|
||||
#define NODE_VITASTOR_WRITE 2
|
||||
#define NODE_VITASTOR_SYNC 3
|
||||
#define NODE_VITASTOR_READ_BITMAP 4
|
||||
#define NODE_VITASTOR_GET_INFO 5
|
||||
|
||||
#ifndef INODE_POOL
|
||||
#define INODE_POOL(inode) (uint32_t)((inode) >> (64 - POOL_ID_BITS))
|
||||
#define INODE_NO_POOL(inode) (uint64_t)((inode) & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1))
|
||||
#define INODE_WITH_POOL(pool_id, inode) (((uint64_t)(pool_id) << (64-POOL_ID_BITS)) | INODE_NO_POOL(inode))
|
||||
#endif
|
||||
|
||||
class NodeVitastorRequest: public Nan::AsyncResource
|
||||
{
|
||||
public:
|
||||
NodeVitastorRequest(NodeVitastor *cli, v8::Local<v8::Function> cb): Nan::AsyncResource("NodeVitastorRequest")
|
||||
{
|
||||
this->cli = cli;
|
||||
callback.Reset(cb);
|
||||
}
|
||||
|
||||
iovec iov;
|
||||
std::vector<iovec> iov_list;
|
||||
NodeVitastor *cli = NULL;
|
||||
NodeVitastorImage *img = NULL;
|
||||
int op = 0;
|
||||
uint64_t offset = 0, len = 0, version = 0;
|
||||
bool with_parents = false;
|
||||
Nan::Persistent<v8::Function> callback;
|
||||
};
|
||||
|
||||
static uint64_t get_ui64(const v8::Local<v8::Value> & val)
|
||||
{
|
||||
if (val->IsBigInt())
|
||||
return val->ToBigInt(Nan::GetCurrentContext()).ToLocalChecked()->Uint64Value();
|
||||
return Nan::To<int64_t>(val).FromJust();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastor
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
NodeVitastor::NodeVitastor(): Nan::ObjectWrap()
|
||||
{
|
||||
TRACE("NodeVitastor: constructor");
|
||||
poll_watcher.data = this;
|
||||
}
|
||||
|
||||
NodeVitastor::~NodeVitastor()
|
||||
{
|
||||
TRACE("NodeVitastor: destructor");
|
||||
uv_poll_stop(&poll_watcher);
|
||||
vitastor_c_destroy(c);
|
||||
c = NULL;
|
||||
}
|
||||
|
||||
NAN_METHOD(NodeVitastor::Create)
|
||||
{
|
||||
TRACE("NodeVitastor::Create");
|
||||
v8::Local<v8::Object> jsParams = info[0].As<v8::Object>();
|
||||
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
|
||||
std::vector<std::string> cfg;
|
||||
for (uint32_t i = 0; i < keys->Length(); i++)
|
||||
{
|
||||
auto key = Nan::Get(keys, i).ToLocalChecked();
|
||||
cfg.push_back(std::string(*Nan::Utf8String(key)));
|
||||
cfg.push_back(std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked())));
|
||||
}
|
||||
|
||||
const char **c_cfg = new const char*[cfg.size()];
|
||||
for (size_t i = 0; i < cfg.size(); i++)
|
||||
{
|
||||
c_cfg[i] = cfg[i].c_str();
|
||||
}
|
||||
NodeVitastor* cli = new NodeVitastor();
|
||||
cli->c = vitastor_c_create_uring_json(c_cfg, cfg.size());
|
||||
delete[] c_cfg;
|
||||
|
||||
int res = vitastor_c_uring_register_eventfd(cli->c);
|
||||
if (res >= 0)
|
||||
{
|
||||
cli->eventfd = res;
|
||||
res = uv_poll_init(uv_default_loop(), &cli->poll_watcher, cli->eventfd);
|
||||
if (res >= 0)
|
||||
res = uv_poll_start(&cli->poll_watcher, UV_READABLE, on_io_readable);
|
||||
on_io_readable(&cli->poll_watcher, 0, UV_READABLE);
|
||||
}
|
||||
if (res < 0)
|
||||
{
|
||||
ERRORF("NodeVitastor: failed to create and register io_uring eventfd in libuv: %s", strerror(-cli->eventfd));
|
||||
vitastor_c_destroy(cli->c);
|
||||
cli->c = NULL;
|
||||
Nan::ThrowError("failed to create and register io_uring eventfd");
|
||||
return;
|
||||
}
|
||||
|
||||
cli->Wrap(info.This());
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
void NodeVitastor::on_io_readable(uv_poll_t* handle, int status, int revents)
|
||||
{
|
||||
TRACEF("NodeVitastor::on_io_readable status/revents %d %d", status, revents);
|
||||
if (revents & UV_READABLE)
|
||||
{
|
||||
NodeVitastor* self = (NodeVitastor*)handle->data;
|
||||
vitastor_c_uring_handle_events(self->c);
|
||||
}
|
||||
}
|
||||
|
||||
NodeVitastorRequest* NodeVitastor::get_read_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
|
||||
{
|
||||
uint64_t offset = get_ui64(info[argpos+0]);
|
||||
uint64_t len = get_ui64(info[argpos+1]);
|
||||
uint8_t *buf = (uint8_t*)malloc(len);
|
||||
if (!buf)
|
||||
{
|
||||
Nan::ThrowError("failed to allocate memory");
|
||||
return NULL;
|
||||
}
|
||||
v8::Local<v8::Function> callback = info[argpos+2].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(this, callback);
|
||||
|
||||
req->offset = offset;
|
||||
req->len = len;
|
||||
req->iov = { .iov_base = buf, .iov_len = len };
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
// read(pool, inode, offset, len, callback(err, buffer, version))
|
||||
NAN_METHOD(NodeVitastor::Read)
|
||||
{
|
||||
TRACE("NodeVitastor::Read");
|
||||
if (info.Length() < 5)
|
||||
Nan::ThrowError("Not enough arguments to read(pool, inode, offset, len, callback(err, buffer, version))");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
uint64_t pool = get_ui64(info[0]);
|
||||
uint64_t inode = get_ui64(info[1]);
|
||||
|
||||
auto req = self->get_read_request(info, 2);
|
||||
|
||||
self->Ref();
|
||||
vitastor_c_read(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, &req->iov, 1, on_read_finish, req);
|
||||
}
|
||||
|
||||
NodeVitastorRequest* NodeVitastor::get_write_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
|
||||
{
|
||||
uint64_t offset = get_ui64(info[argpos+0]);
|
||||
const auto & bufarg = info[argpos+1];
|
||||
uint64_t version = 0;
|
||||
if (!info[argpos+2].IsEmpty() &&
|
||||
!info[argpos+2]->IsFunction() &&
|
||||
info[argpos+2]->IsObject())
|
||||
{
|
||||
auto key = Nan::New<v8::String>("version").ToLocalChecked();
|
||||
auto params = info[argpos+2].As<v8::Object>();
|
||||
auto versionObj = Nan::Get(params, key).ToLocalChecked();
|
||||
if (!versionObj.IsEmpty())
|
||||
version = get_ui64(versionObj);
|
||||
argpos++;
|
||||
}
|
||||
|
||||
v8::Local<v8::Function> callback = info[argpos+2].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(this, callback);
|
||||
|
||||
req->offset = offset;
|
||||
req->version = version;
|
||||
|
||||
if (bufarg->IsArray())
|
||||
{
|
||||
auto buffers = bufarg.As<v8::Array>();
|
||||
req->len = 0;
|
||||
for (uint32_t i = 0; i < buffers->Length(); i++)
|
||||
{
|
||||
auto buffer_obj = Nan::Get(buffers, i).ToLocalChecked();
|
||||
char *buf = node::Buffer::Data(buffer_obj);
|
||||
uint64_t len = node::Buffer::Length(buffer_obj);
|
||||
req->iov_list.push_back({ .iov_base = buf, .iov_len = len });
|
||||
req->len += len;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
char *buf = node::Buffer::Data(bufarg);
|
||||
uint64_t len = node::Buffer::Length(bufarg);
|
||||
req->iov = { .iov_base = buf, .iov_len = len };
|
||||
req->len = len;
|
||||
}
|
||||
|
||||
return req;
|
||||
}
|
||||
|
||||
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
|
||||
NAN_METHOD(NodeVitastor::Write)
|
||||
{
|
||||
TRACE("NodeVitastor::Write");
|
||||
if (info.Length() < 5)
|
||||
Nan::ThrowError("Not enough arguments to write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
uint64_t pool = get_ui64(info[0]);
|
||||
uint64_t inode = get_ui64(info[1]);
|
||||
|
||||
auto req = self->get_write_request(info, 2);
|
||||
|
||||
self->Ref();
|
||||
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version,
|
||||
req->iov_list.size() ? req->iov_list.data() : &req->iov,
|
||||
req->iov_list.size() ? req->iov_list.size() : 1,
|
||||
on_write_finish, req);
|
||||
}
|
||||
|
||||
// sync(callback(err))
|
||||
NAN_METHOD(NodeVitastor::Sync)
|
||||
{
|
||||
TRACE("NodeVitastor::Sync");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to sync(callback(err))");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(self, callback);
|
||||
|
||||
self->Ref();
|
||||
vitastor_c_sync(self->c, on_write_finish, req);
|
||||
}
|
||||
|
||||
// read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))
|
||||
NAN_METHOD(NodeVitastor::ReadBitmap)
|
||||
{
|
||||
TRACE("NodeVitastor::ReadBitmap");
|
||||
if (info.Length() < 6)
|
||||
Nan::ThrowError("Not enough arguments to read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))");
|
||||
|
||||
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
|
||||
|
||||
uint64_t pool = get_ui64(info[0]);
|
||||
uint64_t inode = get_ui64(info[1]);
|
||||
uint64_t offset = get_ui64(info[2]);
|
||||
uint64_t len = get_ui64(info[3]);
|
||||
bool with_parents = Nan::To<bool>(info[4]).FromJust();
|
||||
v8::Local<v8::Function> callback = info[5].As<v8::Function>();
|
||||
|
||||
auto req = new NodeVitastorRequest(self, callback);
|
||||
self->Ref();
|
||||
vitastor_c_read_bitmap(self->c, ((pool << (64-POOL_ID_BITS)) | inode), offset, len, with_parents, on_read_bitmap_finish, req);
|
||||
}
|
||||
|
||||
static void on_error(NodeVitastorRequest *req, Nan::Callback & nanCallback, long retval)
|
||||
{
|
||||
// Legal errors: EINVAL, EIO, EROFS, ENOSPC, EINTR, ENOENT
|
||||
v8::Local<v8::Value> args[1];
|
||||
if (!retval)
|
||||
args[0] = Nan::Null();
|
||||
else
|
||||
args[0] = Nan::New<v8::Int32>((int32_t)retval);
|
||||
nanCallback.Call(1, args, req);
|
||||
}
|
||||
|
||||
void NodeVitastor::on_read_finish(void *opaque, long retval, uint64_t version)
|
||||
{
|
||||
TRACE("NodeVitastor::on_read_finish");
|
||||
Nan::HandleScope scope;
|
||||
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
if (retval == -ENOENT)
|
||||
{
|
||||
free(req->iov.iov_base);
|
||||
nanCallback.Call(0, NULL, req);
|
||||
}
|
||||
else if (retval < 0 || (uint64_t)retval != req->len)
|
||||
{
|
||||
free(req->iov.iov_base);
|
||||
on_error(req, nanCallback, retval);
|
||||
}
|
||||
else
|
||||
{
|
||||
v8::Local<v8::Value> args[3];
|
||||
args[0] = Nan::Null();
|
||||
args[1] = Nan::NewBuffer((char*)req->iov.iov_base, req->iov.iov_len).ToLocalChecked();
|
||||
args[2] = v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), version);
|
||||
nanCallback.Call(3, args, req);
|
||||
}
|
||||
req->cli->Unref();
|
||||
delete req;
|
||||
}
|
||||
|
||||
void NodeVitastor::on_write_finish(void *opaque, long retval)
|
||||
{
|
||||
TRACE("NodeVitastor::on_write_finish");
|
||||
Nan::HandleScope scope;
|
||||
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
if (retval < 0 || (uint64_t)retval != req->len)
|
||||
{
|
||||
on_error(req, nanCallback, retval);
|
||||
}
|
||||
else
|
||||
{
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = Nan::Null();
|
||||
nanCallback.Call(1, args, req);
|
||||
}
|
||||
req->cli->Unref();
|
||||
delete req;
|
||||
}
|
||||
|
||||
void NodeVitastor::on_read_bitmap_finish(void *opaque, long retval, uint8_t *bitmap)
|
||||
{
|
||||
TRACE("NodeVitastor::on_read_bitmap_finish");
|
||||
Nan::HandleScope scope;
|
||||
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
if (retval == -ENOENT)
|
||||
nanCallback.Call(0, NULL, req);
|
||||
else if (retval < 0)
|
||||
on_error(req, nanCallback, retval);
|
||||
else
|
||||
{
|
||||
v8::Local<v8::Value> args[2];
|
||||
args[0] = Nan::Null();
|
||||
args[1] = Nan::NewBuffer((char*)bitmap, (retval+7)/8).ToLocalChecked();
|
||||
nanCallback.Call(2, args, req);
|
||||
}
|
||||
req->cli->Unref();
|
||||
delete req;
|
||||
}
|
||||
|
||||
//NAN_METHOD(NodeVitastor::Destroy)
|
||||
//{
|
||||
// TRACE("NodeVitastor::Destroy");
|
||||
//}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastorImage
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
NAN_METHOD(NodeVitastorImage::Create)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Create");
|
||||
if (info.Length() < 2)
|
||||
Nan::ThrowError("Not enough arguments to Image(client, name)");
|
||||
|
||||
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
|
||||
std::string name = std::string(*Nan::Utf8String(info[1].As<v8::String>()));
|
||||
NodeVitastor *cli = Nan::ObjectWrap::Unwrap<NodeVitastor>(parent);
|
||||
|
||||
NodeVitastorImage *img = new NodeVitastorImage();
|
||||
img->Wrap(info.This());
|
||||
|
||||
img->cli = cli;
|
||||
img->name = name;
|
||||
|
||||
img->Ref();
|
||||
cli->Ref();
|
||||
vitastor_c_watch_inode(cli->c, (char*)img->name.c_str(), on_watch_start, img);
|
||||
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
NodeVitastorImage::~NodeVitastorImage()
|
||||
{
|
||||
if (watch)
|
||||
{
|
||||
vitastor_c_close_watch(cli->c, watch);
|
||||
watch = NULL;
|
||||
}
|
||||
cli->Unref();
|
||||
}
|
||||
|
||||
// read(offset, len, callback(err, buffer, version))
|
||||
NAN_METHOD(NodeVitastorImage::Read)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Read");
|
||||
if (info.Length() < 3)
|
||||
Nan::ThrowError("Not enough arguments to read(offset, len, callback(err, buffer, version))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
auto req = img->cli->get_read_request(info, 0);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_READ;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
// write(offset, buffer, { version }?, callback(err))
|
||||
NAN_METHOD(NodeVitastorImage::Write)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Write");
|
||||
if (info.Length() < 3)
|
||||
Nan::ThrowError("Not enough arguments to write(offset, buffer, { version }?, callback(err))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
auto req = img->cli->get_write_request(info, 0);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_WRITE;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
// sync(callback(err))
|
||||
NAN_METHOD(NodeVitastorImage::Sync)
|
||||
{
|
||||
TRACE("NodeVitastorImage::Sync");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to sync(callback(err))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(img->cli, callback);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_SYNC;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
// read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))
|
||||
NAN_METHOD(NodeVitastorImage::ReadBitmap)
|
||||
{
|
||||
TRACE("NodeVitastorImage::ReadBitmap");
|
||||
if (info.Length() < 4)
|
||||
Nan::ThrowError("Not enough arguments to read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
uint64_t offset = get_ui64(info[0]);
|
||||
uint64_t len = get_ui64(info[1]);
|
||||
bool with_parents = Nan::To<bool>(info[2]).FromJust();
|
||||
v8::Local<v8::Function> callback = info[3].As<v8::Function>();
|
||||
|
||||
auto req = new NodeVitastorRequest(img->cli, callback);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_READ_BITMAP;
|
||||
req->offset = offset;
|
||||
req->len = len;
|
||||
req->with_parents = with_parents;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
// get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))
|
||||
NAN_METHOD(NodeVitastorImage::GetInfo)
|
||||
{
|
||||
TRACE("NodeVitastorImage::GetInfo");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))");
|
||||
|
||||
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(img->cli, callback);
|
||||
req->img = img;
|
||||
req->op = NODE_VITASTOR_GET_INFO;
|
||||
|
||||
img->exec_or_wait(req);
|
||||
}
|
||||
|
||||
void NodeVitastorImage::exec_or_wait(NodeVitastorRequest *req)
|
||||
{
|
||||
if (!watch)
|
||||
{
|
||||
// Need to wait for initialisation
|
||||
on_init.push_back(req);
|
||||
}
|
||||
else
|
||||
{
|
||||
exec_request(req);
|
||||
}
|
||||
}
|
||||
|
||||
void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
|
||||
{
|
||||
if (req->op == NODE_VITASTOR_READ)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
cli->Ref();
|
||||
vitastor_c_read(cli->c, ino, req->offset, req->len, &req->iov, 1, NodeVitastor::on_read_finish, req);
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_WRITE)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
cli->Ref();
|
||||
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version,
|
||||
req->iov_list.size() ? req->iov_list.data() : &req->iov,
|
||||
req->iov_list.size() ? req->iov_list.size() : 1,
|
||||
NodeVitastor::on_write_finish, req);
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_SYNC)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
uint32_t imm = vitastor_c_inode_get_immediate_commit(cli->c, ino);
|
||||
cli->Ref();
|
||||
if (imm != IMMEDIATE_ALL)
|
||||
{
|
||||
vitastor_c_sync(cli->c, NodeVitastor::on_write_finish, req);
|
||||
}
|
||||
else
|
||||
{
|
||||
NodeVitastor::on_write_finish(req, 0);
|
||||
}
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_READ_BITMAP)
|
||||
{
|
||||
uint64_t ino = vitastor_c_inode_get_num(watch);
|
||||
cli->Ref();
|
||||
vitastor_c_read_bitmap(cli->c, ino, req->offset, req->len, req->with_parents, NodeVitastor::on_read_bitmap_finish, req);
|
||||
}
|
||||
else if (req->op == NODE_VITASTOR_GET_INFO)
|
||||
{
|
||||
uint64_t size = vitastor_c_inode_get_size(watch);
|
||||
uint64_t num = vitastor_c_inode_get_num(watch);
|
||||
uint32_t block_size = vitastor_c_inode_get_block_size(cli->c, num);
|
||||
uint32_t bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(cli->c, num);
|
||||
int readonly = vitastor_c_inode_get_readonly(watch);
|
||||
uint32_t immediate_commit = vitastor_c_inode_get_immediate_commit(cli->c, num);
|
||||
uint64_t parent_id = vitastor_c_inode_get_parent_id(watch);
|
||||
char *meta = vitastor_c_inode_get_meta(watch);
|
||||
uint64_t mod_revision = vitastor_c_inode_get_mod_revision(watch);
|
||||
|
||||
Nan::HandleScope scope;
|
||||
|
||||
v8::Local<v8::Object> res = Nan::New<v8::Object>();
|
||||
Nan::Set(res, Nan::New<v8::String>("pool_id").ToLocalChecked(), Nan::New<v8::Number>(INODE_POOL(num)));
|
||||
Nan::Set(res, Nan::New<v8::String>("inode_num").ToLocalChecked(), Nan::New<v8::Number>(INODE_NO_POOL(num)));
|
||||
if (size < ((uint64_t)1<<53))
|
||||
Nan::Set(res, Nan::New<v8::String>("size").ToLocalChecked(), Nan::New<v8::Number>(size));
|
||||
else
|
||||
Nan::Set(res, Nan::New<v8::String>("size").ToLocalChecked(), v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), size));
|
||||
if (parent_id)
|
||||
{
|
||||
Nan::Set(res, Nan::New<v8::String>("parent_pool_id").ToLocalChecked(), Nan::New<v8::Number>(INODE_POOL(parent_id)));
|
||||
Nan::Set(res, Nan::New<v8::String>("parent_inode_num").ToLocalChecked(), Nan::New<v8::Number>(INODE_NO_POOL(parent_id)));
|
||||
}
|
||||
Nan::Set(res, Nan::New<v8::String>("readonly").ToLocalChecked(), Nan::New((bool)readonly));
|
||||
if (meta)
|
||||
{
|
||||
Nan::JSON nanJSON;
|
||||
Nan::Set(res, Nan::New<v8::String>("meta").ToLocalChecked(), nanJSON.Parse(Nan::New<v8::String>(meta).ToLocalChecked()).ToLocalChecked());
|
||||
}
|
||||
if (mod_revision < ((uint64_t)1<<53))
|
||||
Nan::Set(res, Nan::New<v8::String>("mod_revision").ToLocalChecked(), Nan::New<v8::Number>(mod_revision));
|
||||
else
|
||||
Nan::Set(res, Nan::New<v8::String>("mod_revision").ToLocalChecked(), v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), mod_revision));
|
||||
Nan::Set(res, Nan::New<v8::String>("block_size").ToLocalChecked(), Nan::New(block_size));
|
||||
Nan::Set(res, Nan::New<v8::String>("bitmap_granularity").ToLocalChecked(), Nan::New(bitmap_granularity));
|
||||
Nan::Set(res, Nan::New<v8::String>("immediate_commit").ToLocalChecked(), Nan::New(immediate_commit));
|
||||
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = res;
|
||||
nanCallback.Call(1, args, req);
|
||||
|
||||
delete req;
|
||||
}
|
||||
}
|
||||
|
||||
void NodeVitastorImage::on_watch_start(void *opaque, long retval)
|
||||
{
|
||||
NodeVitastorImage *img = (NodeVitastorImage *)opaque;
|
||||
{
|
||||
img->watch = (void*)retval;
|
||||
auto on_init = std::move(img->on_init);
|
||||
for (auto req: on_init)
|
||||
{
|
||||
img->exec_request(req);
|
||||
}
|
||||
}
|
||||
img->Unref();
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastorKV
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
Nan::Persistent<v8::Function> NodeVitastorKV::listing_class;
|
||||
|
||||
// constructor(node_vitastor)
|
||||
NAN_METHOD(NodeVitastorKV::Create)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Create");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to new KV(client)");
|
||||
|
||||
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
|
||||
NodeVitastor *cli = Nan::ObjectWrap::Unwrap<NodeVitastor>(parent);
|
||||
|
||||
NodeVitastorKV *kv = new NodeVitastorKV();
|
||||
kv->cli = cli;
|
||||
{
|
||||
kv->dbw = new vitastorkv_dbw_t((cluster_client_t*)vitastor_c_get_internal_client(cli->c));
|
||||
}
|
||||
|
||||
kv->Wrap(info.This());
|
||||
cli->Ref();
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
NodeVitastorKV::~NodeVitastorKV()
|
||||
{
|
||||
delete dbw;
|
||||
cli->Unref();
|
||||
}
|
||||
|
||||
// open(pool_id, inode_num, { ...config }, callback(err))
|
||||
NAN_METHOD(NodeVitastorKV::Open)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Open");
|
||||
if (info.Length() < 4)
|
||||
Nan::ThrowError("Not enough arguments to open(pool_id, inode_num, { ...config }, callback(err))");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
uint64_t inode_id = INODE_WITH_POOL(get_ui64(info[0]), get_ui64(info[1]));
|
||||
|
||||
v8::Local<v8::Object> jsParams = info[2].As<v8::Object>();
|
||||
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
|
||||
std::map<std::string, std::string> cfg;
|
||||
for (uint32_t i = 0; i < keys->Length(); i++)
|
||||
{
|
||||
auto key = Nan::Get(keys, i).ToLocalChecked();
|
||||
cfg[std::string(*Nan::Utf8String(key))] = std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked()));
|
||||
}
|
||||
|
||||
v8::Local<v8::Function> callback = info[3].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(kv->cli, callback);
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->open(inode_id, cfg, [kv, req](int res)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
kv->Unref();
|
||||
});
|
||||
}
|
||||
|
||||
// close(callback(err))
|
||||
NAN_METHOD(NodeVitastorKV::Close)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Close");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to close(callback(err))");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(kv->cli, callback);
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->close([kv, req]()
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
nanCallback.Call(0, NULL, req);
|
||||
delete req;
|
||||
kv->Unref();
|
||||
});
|
||||
}
|
||||
|
||||
// set_config({ ...config })
|
||||
NAN_METHOD(NodeVitastorKV::SetConfig)
|
||||
{
|
||||
TRACE("NodeVitastorKV::SetConfig");
|
||||
if (info.Length() < 1)
|
||||
Nan::ThrowError("Not enough arguments to set_config({ ...config })");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
v8::Local<v8::Object> jsParams = info[0].As<v8::Object>();
|
||||
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
|
||||
std::map<std::string, std::string> cfg;
|
||||
for (uint32_t i = 0; i < keys->Length(); i++)
|
||||
{
|
||||
auto key = Nan::Get(keys, i).ToLocalChecked();
|
||||
cfg[std::string(*Nan::Utf8String(key))] = std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked()));
|
||||
}
|
||||
|
||||
kv->dbw->set_config(cfg);
|
||||
}
|
||||
|
||||
// get_size()
|
||||
NAN_METHOD(NodeVitastorKV::GetSize)
|
||||
{
|
||||
TRACE("NodeVitastorKV::GetSize");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
auto size = kv->dbw->get_size();
|
||||
info.GetReturnValue().Set((size < ((uint64_t)1<<53))
|
||||
? v8::Local<v8::Value>(Nan::New<v8::Number>(size))
|
||||
: v8::Local<v8::Value>(v8::BigInt::NewFromUnsigned(info.GetIsolate(), size)));
|
||||
}
|
||||
|
||||
void NodeVitastorKV::get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info, bool allow_cache)
|
||||
{
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
// FIXME: Handle Buffer too
|
||||
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
|
||||
|
||||
v8::Local<v8::Function> callback = info[1].As<v8::Function>();
|
||||
auto req = new NodeVitastorRequest(kv->cli, callback);
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->get(key, [kv, req](int res, const std::string & value)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[2];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
|
||||
args[1] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
nanCallback.Call(2, args, req);
|
||||
delete req;
|
||||
kv->Unref();
|
||||
}, allow_cache);
|
||||
}
|
||||
|
||||
// get(key, callback(err, value))
|
||||
NAN_METHOD(NodeVitastorKV::Get)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Get");
|
||||
if (info.Length() < 2)
|
||||
Nan::ThrowError("Not enough arguments to get(key, callback(err, value))");
|
||||
get_impl(info, false);
|
||||
}
|
||||
|
||||
// get_cached(key, callback(err, value))
|
||||
NAN_METHOD(NodeVitastorKV::GetCached)
|
||||
{
|
||||
TRACE("NodeVitastorKV::GetCached");
|
||||
if (info.Length() < 2)
|
||||
Nan::ThrowError("Not enough arguments to get_cached(key, callback(err, value))");
|
||||
get_impl(info, true);
|
||||
}
|
||||
|
||||
static std::function<bool(int, const std::string &)> make_cas_callback(NodeVitastorRequest *cas_req)
|
||||
{
|
||||
return [cas_req](int res, const std::string & value)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(cas_req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
Nan::MaybeLocal<v8::Value> ret = nanCallback.Call(1, args, cas_req);
|
||||
if (ret.IsEmpty())
|
||||
return false;
|
||||
return Nan::To<bool>(ret.ToLocalChecked()).FromJust();
|
||||
};
|
||||
}
|
||||
|
||||
// set(key, value, callback(err), cas_compare(old_value)?)
|
||||
NAN_METHOD(NodeVitastorKV::Set)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Set");
|
||||
if (info.Length() < 3)
|
||||
Nan::ThrowError("Not enough arguments to set(key, value, callback(err), cas_compare(old_value)?)");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
// FIXME: Handle Buffer too
|
||||
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
|
||||
std::string value(*Nan::Utf8String(info[1].As<v8::String>()));
|
||||
|
||||
v8::Local<v8::Function> callback = info[2].As<v8::Function>();
|
||||
NodeVitastorRequest *req = new NodeVitastorRequest(kv->cli, callback), *cas_req = NULL;
|
||||
|
||||
std::function<bool(int, const std::string &)> cas_cb;
|
||||
if (info.Length() > 3 && info[3]->IsObject())
|
||||
{
|
||||
v8::Local<v8::Function> cas_callback = info[3].As<v8::Function>();
|
||||
cas_req = new NodeVitastorRequest(kv->cli, cas_callback);
|
||||
cas_cb = make_cas_callback(cas_req);
|
||||
}
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->set(key, value, [kv, req, cas_req](int res)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
if (cas_req)
|
||||
delete cas_req;
|
||||
kv->Unref();
|
||||
}, cas_cb);
|
||||
}
|
||||
|
||||
// del(key, callback(err), cas_compare(old_value)?)
|
||||
NAN_METHOD(NodeVitastorKV::Del)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Del");
|
||||
if (info.Length() < 2)
|
||||
Nan::ThrowError("Not enough arguments to del(key, callback(err), cas_compare(old_value)?)");
|
||||
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
|
||||
// FIXME: Handle Buffer too
|
||||
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
|
||||
|
||||
v8::Local<v8::Function> callback = info[1].As<v8::Function>();
|
||||
NodeVitastorRequest *req = new NodeVitastorRequest(kv->cli, callback), *cas_req = NULL;
|
||||
|
||||
std::function<bool(int, const std::string &)> cas_cb;
|
||||
if (info.Length() > 2 && info[2]->IsObject())
|
||||
{
|
||||
v8::Local<v8::Function> cas_callback = info[2].As<v8::Function>();
|
||||
cas_req = new NodeVitastorRequest(kv->cli, cas_callback);
|
||||
cas_cb = make_cas_callback(cas_req);
|
||||
}
|
||||
|
||||
kv->Ref();
|
||||
kv->dbw->del(key, [kv, req, cas_req](int res)
|
||||
{
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
if (cas_req)
|
||||
delete cas_req;
|
||||
kv->Unref();
|
||||
}, cas_cb);
|
||||
}
|
||||
|
||||
// list(start_key?)
|
||||
NAN_METHOD(NodeVitastorKV::List)
|
||||
{
|
||||
TRACE("NodeVitastorKV::List");
|
||||
|
||||
v8::Local<v8::Function> cons = Nan::New(listing_class);
|
||||
v8::Local<v8::Value> args[2];
|
||||
args[0] = info.This();
|
||||
int narg = 1;
|
||||
if (info.Length() > 1 && info[1]->IsString())
|
||||
{
|
||||
args[1] = info[1];
|
||||
narg = 2;
|
||||
}
|
||||
info.GetReturnValue().Set(Nan::NewInstance(cons, narg, args).ToLocalChecked());
|
||||
}
|
||||
|
||||
/*NAN_METHOD(NodeVitastorKV::Destroy)
|
||||
{
|
||||
TRACE("NodeVitastorKV::Destroy");
|
||||
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
|
||||
if (!kv->dead)
|
||||
kv->Unref();
|
||||
}*/
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// NodeVitastorKVListing
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
// constructor(node_vitastor_kv, start_key?)
|
||||
NAN_METHOD(NodeVitastorKVListing::Create)
|
||||
{
|
||||
TRACE("NodeVitastorKVListing::Create");
|
||||
|
||||
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
|
||||
NodeVitastorKV *kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(parent);
|
||||
|
||||
std::string start_key;
|
||||
// FIXME: Handle Buffer too
|
||||
if (info.Length() > 1 && info[1]->IsString())
|
||||
{
|
||||
start_key = std::string(*Nan::Utf8String(info[1].As<v8::String>()));
|
||||
}
|
||||
|
||||
NodeVitastorKVListing *list = new NodeVitastorKVListing();
|
||||
list->kv = kv;
|
||||
list->handle = list->kv->dbw->list_start(start_key);
|
||||
|
||||
list->Wrap(info.This());
|
||||
kv->Ref();
|
||||
info.GetReturnValue().Set(info.This());
|
||||
}
|
||||
|
||||
NodeVitastorKVListing::~NodeVitastorKVListing()
|
||||
{
|
||||
if (handle)
|
||||
{
|
||||
kv->dbw->list_close(handle);
|
||||
handle = NULL;
|
||||
}
|
||||
if (iter)
|
||||
{
|
||||
delete iter;
|
||||
iter = NULL;
|
||||
}
|
||||
kv->Unref();
|
||||
}
|
||||
|
||||
// next(callback(err, value)?)
|
||||
NAN_METHOD(NodeVitastorKVListing::Next)
|
||||
{
|
||||
TRACE("NodeVitastorKVListing::Next");
|
||||
|
||||
NodeVitastorKVListing* list = Nan::ObjectWrap::Unwrap<NodeVitastorKVListing>(info.This());
|
||||
|
||||
if (info.Length() > 0)
|
||||
{
|
||||
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
|
||||
if (list->iter)
|
||||
{
|
||||
delete list->iter;
|
||||
}
|
||||
list->iter = new NodeVitastorRequest(list->kv->cli, callback);
|
||||
}
|
||||
if (!list->handle)
|
||||
{
|
||||
// Already closed
|
||||
if (list->iter)
|
||||
{
|
||||
auto req = list->iter;
|
||||
list->iter = NULL;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[1];
|
||||
args[0] = Nan::New<v8::Int32>(-EINVAL);
|
||||
nanCallback.Call(1, args, req);
|
||||
delete req;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
list->kv->Ref();
|
||||
list->kv->dbw->list_next(list->handle, [list](int res, const std::string & key, const std::string & value)
|
||||
{
|
||||
auto req = list->iter;
|
||||
list->iter = NULL;
|
||||
Nan::HandleScope scope;
|
||||
Nan::Callback nanCallback(Nan::New(req->callback));
|
||||
v8::Local<v8::Value> args[3];
|
||||
args[0] = Nan::New<v8::Int32>(res);
|
||||
args[1] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(key).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
args[2] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
|
||||
nanCallback.Call(3, args, req);
|
||||
if (list->iter)
|
||||
delete req;
|
||||
else
|
||||
list->iter = req;
|
||||
list->kv->Unref();
|
||||
});
|
||||
}
|
||||
|
||||
// close()
|
||||
NAN_METHOD(NodeVitastorKVListing::Close)
|
||||
{
|
||||
TRACE("NodeVitastorKVListing::Close");
|
||||
|
||||
NodeVitastorKVListing* list = Nan::ObjectWrap::Unwrap<NodeVitastorKVListing>(info.This());
|
||||
|
||||
if (list->handle)
|
||||
{
|
||||
list->kv->dbw->list_close(list->handle);
|
||||
list->handle = NULL;
|
||||
}
|
||||
if (list->iter)
|
||||
{
|
||||
delete list->iter;
|
||||
list->iter = NULL;
|
||||
}
|
||||
}
|
136
node-binding/client.h
Normal file
136
node-binding/client.h
Normal file
@@ -0,0 +1,136 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#ifndef NODE_VITASTOR_CLIENT_H
|
||||
#define NODE_VITASTOR_CLIENT_H
|
||||
|
||||
#include <nan.h>
|
||||
#include <vitastor_c.h>
|
||||
#include <vitastor_kv.h>
|
||||
|
||||
class NodeVitastorRequest;
|
||||
|
||||
class NodeVitastor: public Nan::ObjectWrap
|
||||
{
|
||||
public:
|
||||
// constructor({ ...config })
|
||||
static NAN_METHOD(Create);
|
||||
// read(pool, inode, offset, len, callback(err, buffer, version))
|
||||
static NAN_METHOD(Read);
|
||||
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
|
||||
static NAN_METHOD(Write);
|
||||
// sync(callback(err))
|
||||
static NAN_METHOD(Sync);
|
||||
// read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))
|
||||
static NAN_METHOD(ReadBitmap);
|
||||
// // destroy()
|
||||
// static NAN_METHOD(Destroy);
|
||||
|
||||
~NodeVitastor();
|
||||
|
||||
private:
|
||||
vitastor_c *c = NULL;
|
||||
int eventfd = -1;
|
||||
uv_poll_t poll_watcher;
|
||||
|
||||
NodeVitastor();
|
||||
|
||||
static void on_io_readable(uv_poll_t* handle, int status, int revents);
|
||||
static void on_read_finish(void *opaque, long retval, uint64_t version);
|
||||
static void on_write_finish(void *opaque, long retval);
|
||||
static void on_read_bitmap_finish(void *opaque, long retval, uint8_t *bitmap);
|
||||
|
||||
NodeVitastorRequest* get_read_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos);
|
||||
NodeVitastorRequest* get_write_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos);
|
||||
|
||||
friend class NodeVitastorImage;
|
||||
friend class NodeVitastorKV;
|
||||
friend class NodeVitastorKVListing;
|
||||
};
|
||||
|
||||
class NodeVitastorImage: public Nan::ObjectWrap
|
||||
{
|
||||
public:
|
||||
// constructor(node_vitastor, name)
|
||||
static NAN_METHOD(Create);
|
||||
// read(offset, len, callback(err, buffer, version))
|
||||
static NAN_METHOD(Read);
|
||||
// write(offset, buf: Buffer | Buffer[], { version }?, callback(err))
|
||||
static NAN_METHOD(Write);
|
||||
// sync(callback(err))
|
||||
static NAN_METHOD(Sync);
|
||||
// read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))
|
||||
static NAN_METHOD(ReadBitmap);
|
||||
// get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))
|
||||
static NAN_METHOD(GetInfo);
|
||||
|
||||
~NodeVitastorImage();
|
||||
|
||||
private:
|
||||
NodeVitastor *cli = NULL;
|
||||
std::string name;
|
||||
void *watch = NULL;
|
||||
std::vector<NodeVitastorRequest*> on_init;
|
||||
Nan::Persistent<v8::Object> cliObj;
|
||||
|
||||
static void on_watch_start(void *opaque, long retval);
|
||||
void exec_request(NodeVitastorRequest *req);
|
||||
void exec_or_wait(NodeVitastorRequest *req);
|
||||
};
|
||||
|
||||
class NodeVitastorKV: public Nan::ObjectWrap
|
||||
{
|
||||
public:
|
||||
// constructor(node_vitastor)
|
||||
static NAN_METHOD(Create);
|
||||
// open(pool_id, inode_num, { ...config }, callback(err))
|
||||
static NAN_METHOD(Open);
|
||||
// set_config({ ...config })
|
||||
static NAN_METHOD(SetConfig);
|
||||
// close(callback())
|
||||
static NAN_METHOD(Close);
|
||||
// get_size()
|
||||
static NAN_METHOD(GetSize);
|
||||
// get(key, callback(err, value))
|
||||
static NAN_METHOD(Get);
|
||||
// get_cached(key, callback(err, value))
|
||||
static NAN_METHOD(GetCached);
|
||||
// set(key, value, callback(err), cas_compare(old_value)?)
|
||||
static NAN_METHOD(Set);
|
||||
// del(key, callback(err), cas_compare(old_value)?)
|
||||
static NAN_METHOD(Del);
|
||||
// list(start_key?)
|
||||
static NAN_METHOD(List);
|
||||
|
||||
~NodeVitastorKV();
|
||||
|
||||
static Nan::Persistent<v8::Function> listing_class;
|
||||
|
||||
private:
|
||||
NodeVitastor *cli = NULL;
|
||||
vitastorkv_dbw_t *dbw = NULL;
|
||||
|
||||
static void get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info, bool allow_cache);
|
||||
|
||||
friend class NodeVitastorKVListing;
|
||||
};
|
||||
|
||||
class NodeVitastorKVListing: public Nan::ObjectWrap
|
||||
{
|
||||
public:
|
||||
// constructor(node_vitastor_kv, start_key?)
|
||||
static NAN_METHOD(Create);
|
||||
// next(callback(err, value)?)
|
||||
static NAN_METHOD(Next);
|
||||
// close()
|
||||
static NAN_METHOD(Close);
|
||||
|
||||
~NodeVitastorKVListing();
|
||||
|
||||
private:
|
||||
NodeVitastorKV *kv = NULL;
|
||||
void *handle = NULL;
|
||||
NodeVitastorRequest *iter = NULL;
|
||||
};
|
||||
|
||||
#endif
|
1
node-binding/index.js
Normal file
1
node-binding/index.js
Normal file
@@ -0,0 +1 @@
|
||||
module.exports = require('bindings')('addon.node');
|
24
node-binding/package.json
Normal file
24
node-binding/package.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"name": "vitastor",
|
||||
"version": "1.7.0",
|
||||
"description": "Low-level native bindings to Vitastor client library",
|
||||
"main": "index.js",
|
||||
"keywords": [
|
||||
"storage",
|
||||
"sds",
|
||||
"vitastor"
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git://git.yourcmc.ru/vitalif/vitastor.git"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "node-gyp rebuild"
|
||||
},
|
||||
"author": "Vitaliy Filippov",
|
||||
"license": "VNPL-2.0",
|
||||
"dependencies": {
|
||||
"bindings": "1.5.0",
|
||||
"nan": "^2.19.0"
|
||||
}
|
||||
}
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VERSION = '1.6.1'
|
||||
VITASTOR_VERSION = '1.8.0'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -238,7 +238,7 @@ class VitastorDriver(driver.CloneableImageVD,
|
||||
|
||||
stats = {
|
||||
'vendor_name': 'Vitastor',
|
||||
'driver_version': self.VERSION,
|
||||
'driver_version': VITASTOR_VERSION,
|
||||
'storage_protocol': 'vitastor',
|
||||
'total_capacity_gb': 'unknown',
|
||||
'free_capacity_gb': 'unknown',
|
||||
@@ -707,10 +707,10 @@ class VitastorDriver(driver.CloneableImageVD,
|
||||
return ({}, True)
|
||||
return ({}, False)
|
||||
|
||||
def copy_image_to_encrypted_volume(self, context, volume, image_service, image_id):
|
||||
self.copy_image_to_volume(context, volume, image_service, image_id, encrypted = True)
|
||||
def copy_image_to_encrypted_volume(self, context, volume, image_service, image_id, disable_sparse=False):
|
||||
self.copy_image_to_volume(context, volume, image_service, image_id, encrypted = True, disable_sparse=False)
|
||||
|
||||
def copy_image_to_volume(self, context, volume, image_service, image_id, encrypted = False):
|
||||
def copy_image_to_volume(self, context, volume, image_service, image_id, encrypted = False, disable_sparse=False):
|
||||
tmp_dir = volume_utils.image_conversion_dir()
|
||||
with tempfile.NamedTemporaryFile(dir = tmp_dir) as tmp:
|
||||
image_utils.fetch_to_raw(
|
||||
|
670
patches/libvirt-10.0-vitastor.diff
Normal file
670
patches/libvirt-10.0-vitastor.diff
Normal file
@@ -0,0 +1,670 @@
|
||||
From 571bde71268dcca6446454bb1e895e21bcc7b2a0 Mon Sep 17 00:00:00 2001
|
||||
From: ace <ace@0xace.cc>
|
||||
Date: Sat, 18 May 2024 19:45:49 +0300
|
||||
Subject: [PATCH] Add Vitastor support
|
||||
|
||||
---
|
||||
include/libvirt/libvirt-storage.h | 1 +
|
||||
src/conf/domain_conf.c | 4 +-
|
||||
src/conf/domain_validate.c | 10 +-
|
||||
src/conf/schemas/domaincommon.rng | 30 +++++
|
||||
src/conf/storage_conf.c | 20 ++-
|
||||
src/conf/storage_conf.h | 2 +
|
||||
src/conf/storage_source_conf.c | 2 +
|
||||
src/conf/storage_source_conf.h | 1 +
|
||||
src/conf/virstorageobj.c | 3 +
|
||||
src/libvirt-storage.c | 1 +
|
||||
src/libxl/libxl_conf.c | 1 +
|
||||
src/libxl/xen_xl.c | 1 +
|
||||
src/qemu/qemu_block.c | 45 +++++++
|
||||
src/qemu/qemu_domain.c | 4 +-
|
||||
src/qemu/qemu_snapshot.c | 2 +
|
||||
src/storage/storage_driver.c | 1 +
|
||||
.../storage_source_backingstore.c | 123 ++++++++++++++++++
|
||||
src/test/test_driver.c | 1 +
|
||||
.../storagepoolcapsschemadata/poolcaps-fs.xml | 7 +
|
||||
.../poolcaps-full.xml | 7 +
|
||||
tests/storagepoolxml2argvtest.c | 1 +
|
||||
tools/virsh-pool.c | 3 +
|
||||
22 files changed, 265 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
|
||||
index aaad4a3da1..5f5daa8341 100644
|
||||
--- a/include/libvirt/libvirt-storage.h
|
||||
+++ b/include/libvirt/libvirt-storage.h
|
||||
@@ -326,6 +326,7 @@ typedef enum {
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
|
||||
} virConnectListAllStoragePoolsFlags;
|
||||
|
||||
int virConnectListAllStoragePools(virConnectPtr conn,
|
||||
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
|
||||
index 52a5796ad2..089697b2a3 100644
|
||||
--- a/src/conf/domain_conf.c
|
||||
+++ b/src/conf/domain_conf.c
|
||||
@@ -7191,7 +7191,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
|
||||
src->configFile = virXPathString("string(./config/@file)", ctxt);
|
||||
|
||||
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
|
||||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
|
||||
src->query = virXMLPropString(node, "query");
|
||||
|
||||
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
|
||||
@@ -30657,6 +30658,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_POOL_MPATH:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
|
||||
index faa7659f07..01b907d60d 100644
|
||||
--- a/src/conf/domain_validate.c
|
||||
+++ b/src/conf/domain_validate.c
|
||||
@@ -495,6 +495,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
}
|
||||
}
|
||||
|
||||
- /* internal snapshots and config files are currently supported only with rbd: */
|
||||
+ /* internal snapshots are currently supported only with rbd: */
|
||||
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
|
||||
if (src->snapshot) {
|
||||
@@ -549,10 +550,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
_("<snapshot> element is currently supported only with 'rbd' disks"));
|
||||
return -1;
|
||||
}
|
||||
+ }
|
||||
|
||||
+ /* config files are currently supported only with rbd and vitastor: */
|
||||
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||
if (src->configFile) {
|
||||
virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
- _("<config> element is currently supported only with 'rbd' disks"));
|
||||
+ _("<config> element is currently supported only with 'rbd' and 'vitastor' disks"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
|
||||
index df44cd9857..4bb72fc697 100644
|
||||
--- a/src/conf/schemas/domaincommon.rng
|
||||
+++ b/src/conf/schemas/domaincommon.rng
|
||||
@@ -1997,6 +1997,35 @@
|
||||
</element>
|
||||
</define>
|
||||
|
||||
+ <define name="diskSourceNetworkProtocolVitastor">
|
||||
+ <element name="source">
|
||||
+ <interleave>
|
||||
+ <attribute name="protocol">
|
||||
+ <value>vitastor</value>
|
||||
+ </attribute>
|
||||
+ <ref name="diskSourceCommon"/>
|
||||
+ <optional>
|
||||
+ <attribute name="name"/>
|
||||
+ </optional>
|
||||
+ <optional>
|
||||
+ <attribute name="query"/>
|
||||
+ </optional>
|
||||
+ <zeroOrMore>
|
||||
+ <ref name="diskSourceNetworkHost"/>
|
||||
+ </zeroOrMore>
|
||||
+ <optional>
|
||||
+ <element name="config">
|
||||
+ <attribute name="file">
|
||||
+ <ref name="absFilePath"/>
|
||||
+ </attribute>
|
||||
+ <empty/>
|
||||
+ </element>
|
||||
+ </optional>
|
||||
+ <empty/>
|
||||
+ </interleave>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
<define name="diskSourceNetworkProtocolISCSI">
|
||||
<element name="source">
|
||||
<attribute name="protocol">
|
||||
@@ -2347,6 +2376,7 @@
|
||||
<ref name="diskSourceNetworkProtocolSimple"/>
|
||||
<ref name="diskSourceNetworkProtocolVxHS"/>
|
||||
<ref name="diskSourceNetworkProtocolNFS"/>
|
||||
+ <ref name="diskSourceNetworkProtocolVitastor"/>
|
||||
</choice>
|
||||
</define>
|
||||
|
||||
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
|
||||
index 68842004b7..1d69a788b6 100644
|
||||
--- a/src/conf/storage_conf.c
|
||||
+++ b/src/conf/storage_conf.c
|
||||
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
|
||||
"logical", "disk", "iscsi",
|
||||
"iscsi-direct", "scsi", "mpath",
|
||||
"rbd", "sheepdog", "gluster",
|
||||
- "zfs", "vstorage",
|
||||
+ "zfs", "vstorage", "vitastor",
|
||||
);
|
||||
|
||||
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
|
||||
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
|
||||
.formatToString = virStorageFileFormatTypeToString,
|
||||
}
|
||||
},
|
||||
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
|
||||
+ .poolOptions = {
|
||||
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NAME),
|
||||
+ },
|
||||
+ .volOptions = {
|
||||
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
|
||||
+ .formatFromString = virStorageVolumeFormatFromString,
|
||||
+ .formatToString = virStorageFileFormatTypeToString,
|
||||
+ }
|
||||
+ },
|
||||
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
|
||||
.poolOptions = {
|
||||
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
@@ -538,6 +550,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
|
||||
_("element 'name' is mandatory for RBD pool"));
|
||||
return -1;
|
||||
}
|
||||
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
|
||||
+ virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
+ _("element 'name' is mandatory for Vitastor pool"));
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
if (options->formatFromString) {
|
||||
g_autofree char *format = NULL;
|
||||
@@ -1127,6 +1144,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
|
||||
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
|
||||
* files, so they don't have a target */
|
||||
if (def->type != VIR_STORAGE_POOL_RBD &&
|
||||
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
|
||||
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
|
||||
def->type != VIR_STORAGE_POOL_GLUSTER &&
|
||||
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
|
||||
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
|
||||
index fc67957cfe..720c07ef74 100644
|
||||
--- a/src/conf/storage_conf.h
|
||||
+++ b/src/conf/storage_conf.h
|
||||
@@ -103,6 +103,7 @@ typedef enum {
|
||||
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
|
||||
VIR_STORAGE_POOL_ZFS, /* ZFS */
|
||||
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
|
||||
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
|
||||
|
||||
VIR_STORAGE_POOL_LAST,
|
||||
} virStoragePoolType;
|
||||
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
|
||||
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
|
||||
index 959ec5ed40..e751dd4d6a 100644
|
||||
--- a/src/conf/storage_source_conf.c
|
||||
+++ b/src/conf/storage_source_conf.c
|
||||
@@ -88,6 +88,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
|
||||
"ssh",
|
||||
"vxhs",
|
||||
"nfs",
|
||||
+ "vitastor",
|
||||
);
|
||||
|
||||
|
||||
@@ -1301,6 +1302,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
return 24007;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
/* we don't provide a default for RBD */
|
||||
return 0;
|
||||
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
|
||||
index 05b4bda16c..b5ed143c39 100644
|
||||
--- a/src/conf/storage_source_conf.h
|
||||
+++ b/src/conf/storage_source_conf.h
|
||||
@@ -129,6 +129,7 @@ typedef enum {
|
||||
VIR_STORAGE_NET_PROTOCOL_SSH,
|
||||
VIR_STORAGE_NET_PROTOCOL_VXHS,
|
||||
VIR_STORAGE_NET_PROTOCOL_NFS,
|
||||
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
|
||||
|
||||
VIR_STORAGE_NET_PROTOCOL_LAST
|
||||
} virStorageNetProtocol;
|
||||
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
|
||||
index 59fa5da372..4739167f5f 100644
|
||||
--- a/src/conf/virstorageobj.c
|
||||
+++ b/src/conf/virstorageobj.c
|
||||
@@ -1438,6 +1438,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
|
||||
return 1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
@@ -1921,6 +1922,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
|
||||
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
|
||||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
|
||||
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
|
||||
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
|
||||
index db7660aac4..561df34709 100644
|
||||
--- a/src/libvirt-storage.c
|
||||
+++ b/src/libvirt-storage.c
|
||||
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
|
||||
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
|
||||
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
|
||||
index 62e1be6672..71a1d42896 100644
|
||||
--- a/src/libxl/libxl_conf.c
|
||||
+++ b/src/libxl/libxl_conf.c
|
||||
@@ -979,6 +979,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
|
||||
index f175359307..8efcf4c329 100644
|
||||
--- a/src/libxl/xen_xl.c
|
||||
+++ b/src/libxl/xen_xl.c
|
||||
@@ -1456,6 +1456,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
|
||||
index c9f5cbbf29..dbbac36836 100644
|
||||
--- a/src/qemu/qemu_block.c
|
||||
+++ b/src/qemu/qemu_block.c
|
||||
@@ -758,6 +758,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
|
||||
}
|
||||
|
||||
|
||||
+static virJSONValue *
|
||||
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
|
||||
+{
|
||||
+ virJSONValue *ret = NULL;
|
||||
+ virStorageNetHostDef *host;
|
||||
+ size_t i;
|
||||
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
|
||||
+ g_autofree char *etcd = NULL;
|
||||
+
|
||||
+ for (i = 0; i < src->nhosts; i++) {
|
||||
+ host = src->hosts + i;
|
||||
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
|
||||
+ }
|
||||
+ if (src->nhosts > 0) {
|
||||
+ etcd = virBufferContentAndReset(&buf);
|
||||
+ }
|
||||
+
|
||||
+ if (virJSONValueObjectAdd(&ret,
|
||||
+ "S:etcd-host", etcd,
|
||||
+ "S:etcd-prefix", src->query,
|
||||
+ "S:config-path", src->configFile,
|
||||
+ "s:image", src->path,
|
||||
+ NULL) < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static virJSONValue *
|
||||
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
|
||||
{
|
||||
@@ -1140,6 +1172,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return NULL;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
@@ -2020,6 +2058,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
@@ -2400,6 +2439,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
||||
index 341c543280..61b248fa2c 100644
|
||||
--- a/src/qemu/qemu_domain.c
|
||||
+++ b/src/qemu/qemu_domain.c
|
||||
@@ -5207,7 +5207,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
|
||||
if (src->query &&
|
||||
(actualType != VIR_STORAGE_TYPE_NETWORK ||
|
||||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
|
||||
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
|
||||
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
||||
_("query is supported only with HTTP(S) protocols"));
|
||||
return -1;
|
||||
@@ -10387,6 +10388,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
|
||||
break;
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
|
||||
index 0cac0c4146..4955ebd8d4 100644
|
||||
--- a/src/qemu/qemu_snapshot.c
|
||||
+++ b/src/qemu/qemu_snapshot.c
|
||||
@@ -423,6 +423,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
@@ -648,6 +649,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
|
||||
index 314fe930e0..fb615a8b4e 100644
|
||||
--- a/src/storage/storage_driver.c
|
||||
+++ b/src/storage/storage_driver.c
|
||||
@@ -1626,6 +1626,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
|
||||
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
|
||||
index 80681924ea..8a3ade9ec0 100644
|
||||
--- a/src/storage_file/storage_source_backingstore.c
|
||||
+++ b/src/storage_file/storage_source_backingstore.c
|
||||
@@ -287,6 +287,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
|
||||
}
|
||||
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseVitastorColonString(const char *colonstr,
|
||||
+ virStorageSource *src)
|
||||
+{
|
||||
+ char *p, *e, *next;
|
||||
+ g_autofree char *options = NULL;
|
||||
+
|
||||
+ /* optionally skip the "vitastor:" prefix if provided */
|
||||
+ if (STRPREFIX(colonstr, "vitastor:"))
|
||||
+ colonstr += strlen("vitastor:");
|
||||
+
|
||||
+ options = g_strdup(colonstr);
|
||||
+
|
||||
+ p = options;
|
||||
+ while (*p) {
|
||||
+ /* find : delimiter or end of string */
|
||||
+ for (e = p; *e && *e != ':'; ++e) {
|
||||
+ if (*e == '\\') {
|
||||
+ e++;
|
||||
+ if (*e == '\0')
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (*e == '\0') {
|
||||
+ next = e; /* last kv pair */
|
||||
+ } else {
|
||||
+ next = e + 1;
|
||||
+ *e = '\0';
|
||||
+ }
|
||||
+
|
||||
+ if (STRPREFIX(p, "image=")) {
|
||||
+ src->path = g_strdup(p + strlen("image="));
|
||||
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
|
||||
+ src->query = g_strdup(p + strlen("etcd-prefix="));
|
||||
+ } else if (STRPREFIX(p, "config-path=")) {
|
||||
+ src->configFile = g_strdup(p + strlen("config-path="));
|
||||
+ } else if (STRPREFIX(p, "etcd-host=")) {
|
||||
+ char *h, *sep;
|
||||
+
|
||||
+ h = p + strlen("etcd-host=");
|
||||
+ while (h < e) {
|
||||
+ for (sep = h; sep < e; ++sep) {
|
||||
+ if (*sep == '\\' && (sep[1] == ',' ||
|
||||
+ sep[1] == ';' ||
|
||||
+ sep[1] == ' ')) {
|
||||
+ *sep = '\0';
|
||||
+ sep += 2;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (virStorageSourceRBDAddHost(src, h) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ h = sep;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ p = next;
|
||||
+ }
|
||||
+
|
||||
+ if (!src->path) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseNBDColonString(const char *nbdstr,
|
||||
virStorageSource *src)
|
||||
@@ -399,6 +468,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
@@ -975,6 +1049,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
|
||||
+ virJSONValue *json,
|
||||
+ const char *jsonstr G_GNUC_UNUSED,
|
||||
+ int opaque G_GNUC_UNUSED)
|
||||
+{
|
||||
+ const char *filename;
|
||||
+ const char *image = virJSONValueObjectGetString(json, "image");
|
||||
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
|
||||
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
|
||||
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
|
||||
+ size_t nservers;
|
||||
+ size_t i;
|
||||
+
|
||||
+ src->type = VIR_STORAGE_TYPE_NETWORK;
|
||||
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
|
||||
+
|
||||
+ /* legacy syntax passed via 'filename' option */
|
||||
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
|
||||
+ return virStorageSourceParseVitastorColonString(filename, src);
|
||||
+
|
||||
+ if (!image) {
|
||||
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
|
||||
+ _("missing image name in Vitastor backing volume "
|
||||
+ "JSON specification"));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ src->path = g_strdup(image);
|
||||
+ src->configFile = g_strdup(conf);
|
||||
+ src->query = g_strdup(etcd_prefix);
|
||||
+
|
||||
+ if (servers) {
|
||||
+ nservers = virJSONValueArraySize(servers);
|
||||
+
|
||||
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
|
||||
+ src->nhosts = nservers;
|
||||
+
|
||||
+ for (i = 0; i < nservers; i++) {
|
||||
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
|
||||
+ virJSONValueArrayGet(servers, i)) < 0)
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
|
||||
virJSONValue *json,
|
||||
@@ -1152,6 +1274,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
|
||||
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
|
||||
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
|
||||
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
|
||||
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
|
||||
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
|
||||
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
|
||||
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
|
||||
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
|
||||
index ed545848af..dbfdbe8476 100644
|
||||
--- a/src/test/test_driver.c
|
||||
+++ b/src/test/test_driver.c
|
||||
@@ -7336,6 +7336,7 @@ testStorageVolumeTypeForPool(int pooltype)
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
return VIR_STORAGE_VOL_NETWORK;
|
||||
case VIR_STORAGE_POOL_LOGICAL:
|
||||
case VIR_STORAGE_POOL_DISK:
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
index eee75af746..8bd0a57bdd 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='no'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
index 805950a937..852df0de16 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='yes'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
|
||||
index e8e40d695e..db55fe5f3a 100644
|
||||
--- a/tests/storagepoolxml2argvtest.c
|
||||
+++ b/tests/storagepoolxml2argvtest.c
|
||||
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
default:
|
||||
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
|
||||
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
|
||||
index 36f00cf643..5f5bd3464e 100644
|
||||
--- a/tools/virsh-pool.c
|
||||
+++ b/tools/virsh-pool.c
|
||||
@@ -1223,6 +1223,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
|
||||
break;
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
|
||||
+ break;
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
break;
|
||||
}
|
||||
--
|
||||
2.43.0
|
||||
|
643
patches/libvirt-10.4-vitastor.diff
Normal file
643
patches/libvirt-10.4-vitastor.diff
Normal file
@@ -0,0 +1,643 @@
|
||||
commit 1f7e90e36b2afca0312392979b96d31951a8d66b
|
||||
Author: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
Date: Thu Jun 27 01:34:54 2024 +0300
|
||||
|
||||
Add Vitastor support
|
||||
|
||||
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
|
||||
index aaad4a3da1..5f5daa8341 100644
|
||||
--- a/include/libvirt/libvirt-storage.h
|
||||
+++ b/include/libvirt/libvirt-storage.h
|
||||
@@ -326,6 +326,7 @@ typedef enum {
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
|
||||
} virConnectListAllStoragePoolsFlags;
|
||||
|
||||
int virConnectListAllStoragePools(virConnectPtr conn,
|
||||
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
|
||||
index fde594f811..66537db3e3 100644
|
||||
--- a/src/conf/domain_conf.c
|
||||
+++ b/src/conf/domain_conf.c
|
||||
@@ -7220,7 +7220,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
|
||||
src->configFile = virXPathString("string(./config/@file)", ctxt);
|
||||
|
||||
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
|
||||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
|
||||
src->query = virXMLPropString(node, "query");
|
||||
|
||||
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
|
||||
@@ -30734,6 +30735,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_POOL_MPATH:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
|
||||
index 395e036e8f..8a0190f85b 100644
|
||||
--- a/src/conf/domain_validate.c
|
||||
+++ b/src/conf/domain_validate.c
|
||||
@@ -495,6 +495,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
}
|
||||
}
|
||||
|
||||
- /* internal snapshots and config files are currently supported only with rbd: */
|
||||
+ /* internal snapshots are currently supported only with rbd: */
|
||||
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
|
||||
if (src->snapshot) {
|
||||
@@ -549,10 +550,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
_("<snapshot> element is currently supported only with 'rbd' disks"));
|
||||
return -1;
|
||||
}
|
||||
+ }
|
||||
|
||||
+ /* config files are currently supported only with rbd and vitastor: */
|
||||
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||
if (src->configFile) {
|
||||
virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
- _("<config> element is currently supported only with 'rbd' disks"));
|
||||
+ _("<config> element is currently supported only with 'rbd' and 'vitastor' disks"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
|
||||
index a46a824f88..4c5b720643 100644
|
||||
--- a/src/conf/schemas/domaincommon.rng
|
||||
+++ b/src/conf/schemas/domaincommon.rng
|
||||
@@ -1997,6 +1997,35 @@
|
||||
</element>
|
||||
</define>
|
||||
|
||||
+ <define name="diskSourceNetworkProtocolVitastor">
|
||||
+ <element name="source">
|
||||
+ <interleave>
|
||||
+ <attribute name="protocol">
|
||||
+ <value>vitastor</value>
|
||||
+ </attribute>
|
||||
+ <ref name="diskSourceCommon"/>
|
||||
+ <optional>
|
||||
+ <attribute name="name"/>
|
||||
+ </optional>
|
||||
+ <optional>
|
||||
+ <attribute name="query"/>
|
||||
+ </optional>
|
||||
+ <zeroOrMore>
|
||||
+ <ref name="diskSourceNetworkHost"/>
|
||||
+ </zeroOrMore>
|
||||
+ <optional>
|
||||
+ <element name="config">
|
||||
+ <attribute name="file">
|
||||
+ <ref name="absFilePath"/>
|
||||
+ </attribute>
|
||||
+ <empty/>
|
||||
+ </element>
|
||||
+ </optional>
|
||||
+ <empty/>
|
||||
+ </interleave>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
<define name="diskSourceNetworkProtocolISCSI">
|
||||
<element name="source">
|
||||
<attribute name="protocol">
|
||||
@@ -2347,6 +2376,7 @@
|
||||
<ref name="diskSourceNetworkProtocolSimple"/>
|
||||
<ref name="diskSourceNetworkProtocolVxHS"/>
|
||||
<ref name="diskSourceNetworkProtocolNFS"/>
|
||||
+ <ref name="diskSourceNetworkProtocolVitastor"/>
|
||||
</choice>
|
||||
</define>
|
||||
|
||||
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
|
||||
index 68842004b7..1d69a788b6 100644
|
||||
--- a/src/conf/storage_conf.c
|
||||
+++ b/src/conf/storage_conf.c
|
||||
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
|
||||
"logical", "disk", "iscsi",
|
||||
"iscsi-direct", "scsi", "mpath",
|
||||
"rbd", "sheepdog", "gluster",
|
||||
- "zfs", "vstorage",
|
||||
+ "zfs", "vstorage", "vitastor",
|
||||
);
|
||||
|
||||
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
|
||||
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
|
||||
.formatToString = virStorageFileFormatTypeToString,
|
||||
}
|
||||
},
|
||||
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
|
||||
+ .poolOptions = {
|
||||
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NAME),
|
||||
+ },
|
||||
+ .volOptions = {
|
||||
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
|
||||
+ .formatFromString = virStorageVolumeFormatFromString,
|
||||
+ .formatToString = virStorageFileFormatTypeToString,
|
||||
+ }
|
||||
+ },
|
||||
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
|
||||
.poolOptions = {
|
||||
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
@@ -538,6 +550,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
|
||||
_("element 'name' is mandatory for RBD pool"));
|
||||
return -1;
|
||||
}
|
||||
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
|
||||
+ virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
+ _("element 'name' is mandatory for Vitastor pool"));
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
if (options->formatFromString) {
|
||||
g_autofree char *format = NULL;
|
||||
@@ -1127,6 +1144,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
|
||||
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
|
||||
* files, so they don't have a target */
|
||||
if (def->type != VIR_STORAGE_POOL_RBD &&
|
||||
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
|
||||
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
|
||||
def->type != VIR_STORAGE_POOL_GLUSTER &&
|
||||
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
|
||||
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
|
||||
index fc67957cfe..720c07ef74 100644
|
||||
--- a/src/conf/storage_conf.h
|
||||
+++ b/src/conf/storage_conf.h
|
||||
@@ -103,6 +103,7 @@ typedef enum {
|
||||
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
|
||||
VIR_STORAGE_POOL_ZFS, /* ZFS */
|
||||
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
|
||||
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
|
||||
|
||||
VIR_STORAGE_POOL_LAST,
|
||||
} virStoragePoolType;
|
||||
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
|
||||
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
|
||||
index 959ec5ed40..e751dd4d6a 100644
|
||||
--- a/src/conf/storage_source_conf.c
|
||||
+++ b/src/conf/storage_source_conf.c
|
||||
@@ -88,6 +88,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
|
||||
"ssh",
|
||||
"vxhs",
|
||||
"nfs",
|
||||
+ "vitastor",
|
||||
);
|
||||
|
||||
|
||||
@@ -1301,6 +1302,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
return 24007;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
/* we don't provide a default for RBD */
|
||||
return 0;
|
||||
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
|
||||
index 05b4bda16c..b5ed143c39 100644
|
||||
--- a/src/conf/storage_source_conf.h
|
||||
+++ b/src/conf/storage_source_conf.h
|
||||
@@ -129,6 +129,7 @@ typedef enum {
|
||||
VIR_STORAGE_NET_PROTOCOL_SSH,
|
||||
VIR_STORAGE_NET_PROTOCOL_VXHS,
|
||||
VIR_STORAGE_NET_PROTOCOL_NFS,
|
||||
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
|
||||
|
||||
VIR_STORAGE_NET_PROTOCOL_LAST
|
||||
} virStorageNetProtocol;
|
||||
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
|
||||
index 59fa5da372..4739167f5f 100644
|
||||
--- a/src/conf/virstorageobj.c
|
||||
+++ b/src/conf/virstorageobj.c
|
||||
@@ -1438,6 +1438,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
|
||||
return 1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
@@ -1921,6 +1922,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
|
||||
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
|
||||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
|
||||
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
|
||||
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
|
||||
index db7660aac4..561df34709 100644
|
||||
--- a/src/libvirt-storage.c
|
||||
+++ b/src/libvirt-storage.c
|
||||
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
|
||||
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
|
||||
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
|
||||
index 62e1be6672..71a1d42896 100644
|
||||
--- a/src/libxl/libxl_conf.c
|
||||
+++ b/src/libxl/libxl_conf.c
|
||||
@@ -979,6 +979,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
|
||||
index 53f6871efc..c34b8cee1a 100644
|
||||
--- a/src/libxl/xen_xl.c
|
||||
+++ b/src/libxl/xen_xl.c
|
||||
@@ -1456,6 +1456,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
|
||||
index 738b72d7ea..5dd082fc89 100644
|
||||
--- a/src/qemu/qemu_block.c
|
||||
+++ b/src/qemu/qemu_block.c
|
||||
@@ -758,6 +758,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
|
||||
}
|
||||
|
||||
|
||||
+static virJSONValue *
|
||||
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
|
||||
+{
|
||||
+ virJSONValue *ret = NULL;
|
||||
+ virStorageNetHostDef *host;
|
||||
+ size_t i;
|
||||
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
|
||||
+ g_autofree char *etcd = NULL;
|
||||
+
|
||||
+ for (i = 0; i < src->nhosts; i++) {
|
||||
+ host = src->hosts + i;
|
||||
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
|
||||
+ }
|
||||
+ if (src->nhosts > 0) {
|
||||
+ etcd = virBufferContentAndReset(&buf);
|
||||
+ }
|
||||
+
|
||||
+ if (virJSONValueObjectAdd(&ret,
|
||||
+ "S:etcd-host", etcd,
|
||||
+ "S:etcd-prefix", src->query,
|
||||
+ "S:config-path", src->configFile,
|
||||
+ "s:image", src->path,
|
||||
+ NULL) < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static virJSONValue *
|
||||
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
|
||||
{
|
||||
@@ -1140,6 +1172,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return NULL;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
@@ -2020,6 +2058,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
@@ -2400,6 +2439,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
||||
index bda62f2e5c..84b4e5f2b8 100644
|
||||
--- a/src/qemu/qemu_domain.c
|
||||
+++ b/src/qemu/qemu_domain.c
|
||||
@@ -5260,7 +5260,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
|
||||
if (src->query &&
|
||||
(actualType != VIR_STORAGE_TYPE_NETWORK ||
|
||||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
|
||||
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
|
||||
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
||||
_("query is supported only with HTTP(S) protocols"));
|
||||
return -1;
|
||||
@@ -10514,6 +10515,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
|
||||
break;
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
|
||||
index f5260c4a22..2f9d8406fe 100644
|
||||
--- a/src/qemu/qemu_snapshot.c
|
||||
+++ b/src/qemu/qemu_snapshot.c
|
||||
@@ -423,6 +423,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
@@ -648,6 +649,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
|
||||
index 86c03762d2..630c6eff1a 100644
|
||||
--- a/src/storage/storage_driver.c
|
||||
+++ b/src/storage/storage_driver.c
|
||||
@@ -1626,6 +1626,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
|
||||
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
|
||||
index 80681924ea..8a3ade9ec0 100644
|
||||
--- a/src/storage_file/storage_source_backingstore.c
|
||||
+++ b/src/storage_file/storage_source_backingstore.c
|
||||
@@ -287,6 +287,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
|
||||
}
|
||||
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseVitastorColonString(const char *colonstr,
|
||||
+ virStorageSource *src)
|
||||
+{
|
||||
+ char *p, *e, *next;
|
||||
+ g_autofree char *options = NULL;
|
||||
+
|
||||
+ /* optionally skip the "vitastor:" prefix if provided */
|
||||
+ if (STRPREFIX(colonstr, "vitastor:"))
|
||||
+ colonstr += strlen("vitastor:");
|
||||
+
|
||||
+ options = g_strdup(colonstr);
|
||||
+
|
||||
+ p = options;
|
||||
+ while (*p) {
|
||||
+ /* find : delimiter or end of string */
|
||||
+ for (e = p; *e && *e != ':'; ++e) {
|
||||
+ if (*e == '\\') {
|
||||
+ e++;
|
||||
+ if (*e == '\0')
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (*e == '\0') {
|
||||
+ next = e; /* last kv pair */
|
||||
+ } else {
|
||||
+ next = e + 1;
|
||||
+ *e = '\0';
|
||||
+ }
|
||||
+
|
||||
+ if (STRPREFIX(p, "image=")) {
|
||||
+ src->path = g_strdup(p + strlen("image="));
|
||||
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
|
||||
+ src->query = g_strdup(p + strlen("etcd-prefix="));
|
||||
+ } else if (STRPREFIX(p, "config-path=")) {
|
||||
+ src->configFile = g_strdup(p + strlen("config-path="));
|
||||
+ } else if (STRPREFIX(p, "etcd-host=")) {
|
||||
+ char *h, *sep;
|
||||
+
|
||||
+ h = p + strlen("etcd-host=");
|
||||
+ while (h < e) {
|
||||
+ for (sep = h; sep < e; ++sep) {
|
||||
+ if (*sep == '\\' && (sep[1] == ',' ||
|
||||
+ sep[1] == ';' ||
|
||||
+ sep[1] == ' ')) {
|
||||
+ *sep = '\0';
|
||||
+ sep += 2;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (virStorageSourceRBDAddHost(src, h) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ h = sep;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ p = next;
|
||||
+ }
|
||||
+
|
||||
+ if (!src->path) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseNBDColonString(const char *nbdstr,
|
||||
virStorageSource *src)
|
||||
@@ -399,6 +468,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
@@ -975,6 +1049,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
|
||||
+ virJSONValue *json,
|
||||
+ const char *jsonstr G_GNUC_UNUSED,
|
||||
+ int opaque G_GNUC_UNUSED)
|
||||
+{
|
||||
+ const char *filename;
|
||||
+ const char *image = virJSONValueObjectGetString(json, "image");
|
||||
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
|
||||
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
|
||||
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
|
||||
+ size_t nservers;
|
||||
+ size_t i;
|
||||
+
|
||||
+ src->type = VIR_STORAGE_TYPE_NETWORK;
|
||||
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
|
||||
+
|
||||
+ /* legacy syntax passed via 'filename' option */
|
||||
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
|
||||
+ return virStorageSourceParseVitastorColonString(filename, src);
|
||||
+
|
||||
+ if (!image) {
|
||||
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
|
||||
+ _("missing image name in Vitastor backing volume "
|
||||
+ "JSON specification"));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ src->path = g_strdup(image);
|
||||
+ src->configFile = g_strdup(conf);
|
||||
+ src->query = g_strdup(etcd_prefix);
|
||||
+
|
||||
+ if (servers) {
|
||||
+ nservers = virJSONValueArraySize(servers);
|
||||
+
|
||||
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
|
||||
+ src->nhosts = nservers;
|
||||
+
|
||||
+ for (i = 0; i < nservers; i++) {
|
||||
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
|
||||
+ virJSONValueArrayGet(servers, i)) < 0)
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
|
||||
virJSONValue *json,
|
||||
@@ -1152,6 +1274,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
|
||||
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
|
||||
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
|
||||
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
|
||||
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
|
||||
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
|
||||
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
|
||||
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
|
||||
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
|
||||
index d2d1bc43e3..31a92e4a01 100644
|
||||
--- a/src/test/test_driver.c
|
||||
+++ b/src/test/test_driver.c
|
||||
@@ -7339,6 +7339,7 @@ testStorageVolumeTypeForPool(int pooltype)
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
return VIR_STORAGE_VOL_NETWORK;
|
||||
case VIR_STORAGE_POOL_LOGICAL:
|
||||
case VIR_STORAGE_POOL_DISK:
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
index eee75af746..8bd0a57bdd 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='no'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
index 805950a937..852df0de16 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='yes'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
|
||||
index e8e40d695e..db55fe5f3a 100644
|
||||
--- a/tests/storagepoolxml2argvtest.c
|
||||
+++ b/tests/storagepoolxml2argvtest.c
|
||||
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
default:
|
||||
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
|
||||
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
|
||||
index f9aad8ded0..64704b4288 100644
|
||||
--- a/tools/virsh-pool.c
|
||||
+++ b/tools/virsh-pool.c
|
||||
@@ -1187,6 +1187,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
|
||||
break;
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
|
||||
+ break;
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
break;
|
||||
}
|
288
patches/nova-28.diff
Normal file
288
patches/nova-28.diff
Normal file
@@ -0,0 +1,288 @@
|
||||
diff --git a/nova/virt/image/model.py b/nova/virt/image/model.py
|
||||
index 971f7e9c07..ec3fca72cb 100644
|
||||
--- a/nova/virt/image/model.py
|
||||
+++ b/nova/virt/image/model.py
|
||||
@@ -129,3 +129,22 @@ class RBDImage(Image):
|
||||
self.user = user
|
||||
self.password = password
|
||||
self.servers = servers
|
||||
+
|
||||
+
|
||||
+class VitastorImage(Image):
|
||||
+ """Class for images in a remote Vitastor cluster"""
|
||||
+
|
||||
+ def __init__(self, name, etcd_address = None, etcd_prefix = None, config_path = None):
|
||||
+ """Create a new Vitastor image object
|
||||
+
|
||||
+ :param name: name of the image
|
||||
+ :param etcd_address: etcd URL(s) (optional)
|
||||
+ :param etcd_prefix: etcd prefix (optional)
|
||||
+ :param config_path: path to the configuration (optional)
|
||||
+ """
|
||||
+ super(VitastorImage, self).__init__(FORMAT_RAW)
|
||||
+
|
||||
+ self.name = name
|
||||
+ self.etcd_address = etcd_address
|
||||
+ self.etcd_prefix = etcd_prefix
|
||||
+ self.config_path = config_path
|
||||
diff --git a/nova/virt/images.py b/nova/virt/images.py
|
||||
index 5358f3766a..ebe3d6effb 100644
|
||||
--- a/nova/virt/images.py
|
||||
+++ b/nova/virt/images.py
|
||||
@@ -41,7 +41,7 @@ IMAGE_API = glance.API()
|
||||
|
||||
def qemu_img_info(path, format=None):
|
||||
"""Return an object containing the parsed output from qemu-img info."""
|
||||
- if not os.path.exists(path) and not path.startswith('rbd:'):
|
||||
+ if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
|
||||
raise exception.DiskNotFound(location=path)
|
||||
|
||||
info = nova.privsep.qemu.unprivileged_qemu_img_info(path, format=format)
|
||||
@@ -50,7 +50,7 @@ def qemu_img_info(path, format=None):
|
||||
|
||||
def privileged_qemu_img_info(path, format=None, output_format='json'):
|
||||
"""Return an object containing the parsed output from qemu-img info."""
|
||||
- if not os.path.exists(path) and not path.startswith('rbd:'):
|
||||
+ if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
|
||||
raise exception.DiskNotFound(location=path)
|
||||
|
||||
info = nova.privsep.qemu.privileged_qemu_img_info(path, format=format)
|
||||
diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py
|
||||
index f9475776b3..a2e18aab67 100644
|
||||
--- a/nova/virt/libvirt/config.py
|
||||
+++ b/nova/virt/libvirt/config.py
|
||||
@@ -1060,6 +1060,8 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
|
||||
self.driver_iommu = False
|
||||
self.source_path = None
|
||||
self.source_protocol = None
|
||||
+ self.source_query = None
|
||||
+ self.source_config = None
|
||||
self.source_name = None
|
||||
self.source_hosts = []
|
||||
self.source_ports = []
|
||||
@@ -1189,6 +1191,10 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
|
||||
source = etree.Element("source", protocol=self.source_protocol)
|
||||
if self.source_name is not None:
|
||||
source.set('name', self.source_name)
|
||||
+ if self.source_query is not None:
|
||||
+ source.set('query', self.source_query)
|
||||
+ if self.source_config is not None:
|
||||
+ source.append(etree.Element('config', file=self.source_config))
|
||||
hosts_info = zip(self.source_hosts, self.source_ports)
|
||||
for name, port in hosts_info:
|
||||
host = etree.Element('host', name=name)
|
||||
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
|
||||
index 391231c527..f38faa1608 100644
|
||||
--- a/nova/virt/libvirt/driver.py
|
||||
+++ b/nova/virt/libvirt/driver.py
|
||||
@@ -179,6 +179,7 @@ VOLUME_DRIVERS = {
|
||||
'local': 'nova.virt.libvirt.volume.volume.LibvirtVolumeDriver',
|
||||
'fake': 'nova.virt.libvirt.volume.volume.LibvirtFakeVolumeDriver',
|
||||
'rbd': 'nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver',
|
||||
+ 'vitastor': 'nova.virt.libvirt.volume.vitastor.LibvirtVitastorVolumeDriver',
|
||||
'nfs': 'nova.virt.libvirt.volume.nfs.LibvirtNFSVolumeDriver',
|
||||
'smbfs': 'nova.virt.libvirt.volume.smbfs.LibvirtSMBFSVolumeDriver',
|
||||
'fibre_channel': 'nova.virt.libvirt.volume.fibrechannel.LibvirtFibreChannelVolumeDriver', # noqa:E501
|
||||
@@ -385,10 +386,10 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
# This prevents the risk of one test setting a capability
|
||||
# which bleeds over into other tests.
|
||||
|
||||
- # LVM and RBD require raw images. If we are not configured to
|
||||
+ # LVM, RBD, Vitastor require raw images. If we are not configured to
|
||||
# force convert images into raw format, then we _require_ raw
|
||||
# images only.
|
||||
- raw_only = ('rbd', 'lvm')
|
||||
+ raw_only = ('rbd', 'lvm', 'vitastor')
|
||||
requires_raw_image = (CONF.libvirt.images_type in raw_only and
|
||||
not CONF.force_raw_images)
|
||||
requires_ploop_image = CONF.libvirt.virt_type == 'parallels'
|
||||
@@ -775,12 +776,12 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
# Some imagebackends are only able to import raw disk images,
|
||||
# and will fail if given any other format. See the bug
|
||||
# https://bugs.launchpad.net/nova/+bug/1816686 for more details.
|
||||
- if CONF.libvirt.images_type in ('rbd',):
|
||||
+ if CONF.libvirt.images_type in ('rbd', 'vitastor'):
|
||||
if not CONF.force_raw_images:
|
||||
msg = _("'[DEFAULT]/force_raw_images = False' is not "
|
||||
- "allowed with '[libvirt]/images_type = rbd'. "
|
||||
+ "allowed with '[libvirt]/images_type = rbd' or 'vitastor'. "
|
||||
"Please check the two configs and if you really "
|
||||
- "do want to use rbd as images_type, set "
|
||||
+ "do want to use rbd or vitastor as images_type, set "
|
||||
"force_raw_images to True.")
|
||||
raise exception.InvalidConfiguration(msg)
|
||||
|
||||
@@ -2603,6 +2604,16 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
if connection_info['data'].get('auth_enabled'):
|
||||
username = connection_info['data']['auth_username']
|
||||
path = f"rbd:{volume_name}:id={username}"
|
||||
+ elif connection_info['driver_volume_type'] == 'vitastor':
|
||||
+ volume_name = connection_info['data']['name']
|
||||
+ path = 'vitastor:image='+volume_name.replace(':', '\\:')
|
||||
+ for k in [ 'config_path', 'etcd_address', 'etcd_prefix' ]:
|
||||
+ if k in connection_info['data']:
|
||||
+ kk = k
|
||||
+ if kk == 'etcd_address':
|
||||
+ # FIXME use etcd_address in qemu driver
|
||||
+ kk = 'etcd_host'
|
||||
+ path += ":"+kk.replace('_', '-')+"="+connection_info['data'][k].replace(':', '\\:')
|
||||
else:
|
||||
path = 'unknown'
|
||||
raise exception.DiskNotFound(location='unknown')
|
||||
@@ -2827,8 +2838,8 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
|
||||
image_format = CONF.libvirt.snapshot_image_format or source_type
|
||||
|
||||
- # NOTE(bfilippov): save lvm and rbd as raw
|
||||
- if image_format == 'lvm' or image_format == 'rbd':
|
||||
+ # NOTE(bfilippov): save lvm and rbd and vitastor as raw
|
||||
+ if image_format == 'lvm' or image_format == 'rbd' or image_format == 'vitastor':
|
||||
image_format = 'raw'
|
||||
|
||||
metadata = self._create_snapshot_metadata(instance.image_meta,
|
||||
@@ -2899,7 +2910,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
expected_state=task_states.IMAGE_UPLOADING)
|
||||
|
||||
# TODO(nic): possibly abstract this out to the root_disk
|
||||
- if source_type == 'rbd' and live_snapshot:
|
||||
+ if (source_type == 'rbd' or source_type == 'vitastor') and live_snapshot:
|
||||
# Standard snapshot uses qemu-img convert from RBD which is
|
||||
# not safe to run with live_snapshot.
|
||||
live_snapshot = False
|
||||
@@ -4099,7 +4110,7 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
# cleanup rescue volume
|
||||
lvm.remove_volumes([lvmdisk for lvmdisk in self._lvm_disks(instance)
|
||||
if lvmdisk.endswith('.rescue')])
|
||||
- if CONF.libvirt.images_type == 'rbd':
|
||||
+ if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
|
||||
filter_fn = lambda disk: (disk.startswith(instance.uuid) and
|
||||
disk.endswith('.rescue'))
|
||||
rbd_utils.RBDDriver().cleanup_volumes(filter_fn)
|
||||
@@ -4356,6 +4367,8 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
# TODO(mikal): there is a bug here if images_type has
|
||||
# changed since creation of the instance, but I am pretty
|
||||
# sure that this bug already exists.
|
||||
+ if CONF.libvirt.images_type == 'vitastor':
|
||||
+ return 'vitastor'
|
||||
return 'rbd' if CONF.libvirt.images_type == 'rbd' else 'raw'
|
||||
|
||||
@staticmethod
|
||||
@@ -4764,10 +4777,10 @@ class LibvirtDriver(driver.ComputeDriver):
|
||||
finally:
|
||||
# NOTE(mikal): if the config drive was imported into RBD,
|
||||
# then we no longer need the local copy
|
||||
- if CONF.libvirt.images_type == 'rbd':
|
||||
+ if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
|
||||
LOG.info('Deleting local config drive %(path)s '
|
||||
- 'because it was imported into RBD.',
|
||||
- {'path': config_disk_local_path},
|
||||
+ 'because it was imported into %(type).',
|
||||
+ {'path': config_disk_local_path, 'type': CONF.libvirt.images_type},
|
||||
instance=instance)
|
||||
os.unlink(config_disk_local_path)
|
||||
|
||||
diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
|
||||
index da2a6e8b8a..52c02e72f1 100644
|
||||
--- a/nova/virt/libvirt/utils.py
|
||||
+++ b/nova/virt/libvirt/utils.py
|
||||
@@ -340,6 +340,10 @@ def find_disk(guest: libvirt_guest.Guest) -> ty.Tuple[str, ty.Optional[str]]:
|
||||
disk_path = disk.source_name
|
||||
if disk_path:
|
||||
disk_path = 'rbd:' + disk_path
|
||||
+ elif not disk_path and disk.source_protocol == 'vitastor':
|
||||
+ disk_path = disk.source_name
|
||||
+ if disk_path:
|
||||
+ disk_path = 'vitastor:' + disk_path
|
||||
|
||||
if not disk_path:
|
||||
raise RuntimeError(_("Can't retrieve root device path "
|
||||
@@ -354,6 +358,8 @@ def get_disk_type_from_path(path: str) -> ty.Optional[str]:
|
||||
return 'lvm'
|
||||
elif path.startswith('rbd:'):
|
||||
return 'rbd'
|
||||
+ elif path.startswith('vitastor:'):
|
||||
+ return 'vitastor'
|
||||
elif (os.path.isdir(path) and
|
||||
os.path.exists(os.path.join(path, "DiskDescriptor.xml"))):
|
||||
return 'ploop'
|
||||
diff --git a/nova/virt/libvirt/volume/vitastor.py b/nova/virt/libvirt/volume/vitastor.py
|
||||
new file mode 100644
|
||||
index 0000000000..0256df62c1
|
||||
--- /dev/null
|
||||
+++ b/nova/virt/libvirt/volume/vitastor.py
|
||||
@@ -0,0 +1,75 @@
|
||||
+# Copyright (c) 2021+, Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
+#
|
||||
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
+# not use this file except in compliance with the License. You may obtain
|
||||
+# a copy of the License at
|
||||
+#
|
||||
+# http://www.apache.org/licenses/LICENSE-2.0
|
||||
+#
|
||||
+# Unless required by applicable law or agreed to in writing, software
|
||||
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
+# License for the specific language governing permissions and limitations
|
||||
+# under the License.
|
||||
+
|
||||
+from os_brick import exception as os_brick_exception
|
||||
+from os_brick import initiator
|
||||
+from os_brick.initiator import connector
|
||||
+from oslo_log import log as logging
|
||||
+
|
||||
+import nova.conf
|
||||
+from nova import utils
|
||||
+from nova.virt.libvirt.volume import volume as libvirt_volume
|
||||
+
|
||||
+
|
||||
+CONF = nova.conf.CONF
|
||||
+LOG = logging.getLogger(__name__)
|
||||
+
|
||||
+
|
||||
+class LibvirtVitastorVolumeDriver(libvirt_volume.LibvirtBaseVolumeDriver):
|
||||
+ """Driver to attach Vitastor volumes to libvirt."""
|
||||
+ def __init__(self, host):
|
||||
+ super(LibvirtVitastorVolumeDriver, self).__init__(host, is_block_dev=False)
|
||||
+
|
||||
+ def connect_volume(self, connection_info, instance):
|
||||
+ pass
|
||||
+
|
||||
+ def disconnect_volume(self, connection_info, instance, force=False):
|
||||
+ pass
|
||||
+
|
||||
+ def get_config(self, connection_info, disk_info):
|
||||
+ """Returns xml for libvirt."""
|
||||
+ conf = super(LibvirtVitastorVolumeDriver, self).get_config(connection_info, disk_info)
|
||||
+ conf.source_type = 'network'
|
||||
+ conf.source_protocol = 'vitastor'
|
||||
+ conf.source_name = connection_info['data'].get('name')
|
||||
+ conf.source_query = connection_info['data'].get('etcd_prefix') or None
|
||||
+ conf.source_config = connection_info['data'].get('config_path') or None
|
||||
+ conf.source_hosts = []
|
||||
+ conf.source_ports = []
|
||||
+ addresses = connection_info['data'].get('etcd_address', '')
|
||||
+ if addresses:
|
||||
+ if not isinstance(addresses, list):
|
||||
+ addresses = addresses.split(',')
|
||||
+ for addr in addresses:
|
||||
+ if addr.startswith('https://'):
|
||||
+ raise NotImplementedError('Vitastor block driver does not support SSL for etcd communication yet')
|
||||
+ if addr.startswith('http://'):
|
||||
+ addr = addr[7:]
|
||||
+ addr = addr.rstrip('/')
|
||||
+ if addr.endswith('/v3'):
|
||||
+ addr = addr[0:-3]
|
||||
+ p = addr.find('/')
|
||||
+ if p > 0:
|
||||
+ raise NotImplementedError('libvirt does not support custom URL paths for Vitastor etcd yet. Use /etc/vitastor/vitastor.conf')
|
||||
+ p = addr.find(':')
|
||||
+ port = '2379'
|
||||
+ if p > 0:
|
||||
+ port = addr[p+1:]
|
||||
+ addr = addr[0:p]
|
||||
+ conf.source_hosts.append(addr)
|
||||
+ conf.source_ports.append(port)
|
||||
+ return conf
|
||||
+
|
||||
+ def extend_volume(self, connection_info, instance, requested_size):
|
||||
+ return requested_size
|
193
patches/qemu-8.2-vitastor.patch
Normal file
193
patches/qemu-8.2-vitastor.patch
Normal file
@@ -0,0 +1,193 @@
|
||||
diff --git a/block/meson.build b/block/meson.build
|
||||
index 59ff6d380c..abde3715c2 100644
|
||||
--- a/block/meson.build
|
||||
+++ b/block/meson.build
|
||||
@@ -109,6 +109,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
diff --git a/meson.build b/meson.build
|
||||
index 6c77d9687d..390683ee71 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1295,6 +1295,26 @@ if not get_option('rbd').auto() or have_block
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'))
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -2157,6 +2177,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
|
||||
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
|
||||
@@ -4356,6 +4377,7 @@ summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt}
|
||||
summary_info += {'libcap-ng support': libcap_ng}
|
||||
summary_info += {'bpf support': libbpf}
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index c9baeda639..85e1df5a56 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -194,6 +194,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||
description: 'lzo compression support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index ca390c5700..d2dbaeb279 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3201,7 +3201,7 @@
|
||||
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4255,6 +4255,28 @@
|
||||
'*key-secret': 'str',
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -4713,6 +4735,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5148,6 +5171,20 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5370,6 +5407,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
index 76781f17f4..ac5fe3aa08 100755
|
||||
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
@@ -30,7 +30,7 @@
|
||||
--with-suffix="qemu-kvm" \
|
||||
--firmwarepath=/usr/share/qemu-firmware \
|
||||
--target-list="x86_64-softmmu" \
|
||||
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
--audio-drv-list="" \
|
||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||
--with-coroutine=ucontext \
|
||||
@@ -176,6 +176,7 @@
|
||||
--enable-opengl \
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+--enable-vitastor \
|
||||
--enable-rdma \
|
||||
--enable-seccomp \
|
||||
--enable-snappy \
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 680fa3f581..dab422bf04 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -168,6 +168,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qed qed image format support'
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' rutabaga-gfx rutabaga_gfx support'
|
||||
@@ -445,6 +446,8 @@ _meson_option_parse() {
|
||||
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-relocatable) printf "%s" -Drelocatable=true ;;
|
193
patches/qemu-9.0-vitastor.patch
Normal file
193
patches/qemu-9.0-vitastor.patch
Normal file
@@ -0,0 +1,193 @@
|
||||
diff --git a/block/meson.build b/block/meson.build
|
||||
index e1f03fd773..db0cfb2321 100644
|
||||
--- a/block/meson.build
|
||||
+++ b/block/meson.build
|
||||
@@ -114,6 +114,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
diff --git a/meson.build b/meson.build
|
||||
index 91a0aa64c6..e8bc710578 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1452,6 +1452,26 @@ if not get_option('rbd').auto() or have_block
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'))
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -2250,6 +2270,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
|
||||
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
|
||||
@@ -4443,6 +4464,7 @@ summary_info += {'fdt support': fdt_opt == 'disabled' ? false : fdt_opt}
|
||||
summary_info += {'libcap-ng support': libcap_ng}
|
||||
summary_info += {'bpf support': libbpf}
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index 0a99a059ec..16dc440118 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -194,6 +194,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||
description: 'lzo compression support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index 746d1694c2..199a146a0b 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3203,7 +3203,7 @@
|
||||
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4285,6 +4285,28 @@
|
||||
'*key-secret': 'str',
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -4741,6 +4763,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5180,6 +5203,20 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5402,6 +5439,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
index 76781f17f4..ac5fe3aa08 100755
|
||||
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
@@ -30,7 +30,7 @@
|
||||
--with-suffix="qemu-kvm" \
|
||||
--firmwarepath=/usr/share/qemu-firmware \
|
||||
--target-list="x86_64-softmmu" \
|
||||
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
--audio-drv-list="" \
|
||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||
--with-coroutine=ucontext \
|
||||
@@ -176,6 +176,7 @@
|
||||
--enable-opengl \
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+--enable-vitastor \
|
||||
--enable-rdma \
|
||||
--enable-seccomp \
|
||||
--enable-snappy \
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 680fa3f581..dab422bf04 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -168,6 +168,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qed qed image format support'
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' rutabaga-gfx rutabaga_gfx support'
|
||||
@@ -445,6 +446,8 @@ _meson_option_parse() {
|
||||
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-relocatable) printf "%s" -Drelocatable=true ;;
|
@@ -18,10 +18,11 @@ fi
|
||||
cd ~/rpmbuild/SPECS
|
||||
rpmbuild -bp fio.spec
|
||||
cd $VITASTOR
|
||||
VER=$(grep ^Version: rpm/vitastor-el7.spec | awk '{print $2}')
|
||||
ln -s ~/rpmbuild/BUILD/fio*/ fio
|
||||
sh copy-fio-includes.sh
|
||||
rm fio
|
||||
mv fio-copy fio
|
||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform 's#^#vitastor-1.6.1/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.6.1$(rpm --eval '%dist').tar.gz *
|
||||
tar --transform "s#^#vitastor-$VER/#" --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-$VER$(rpm --eval '%dist').tar.gz *
|
||||
|
@@ -36,7 +36,8 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-1.6.1.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
VER=$(grep ^Version: vitastor-el7.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.6.1
|
||||
Version: 1.8.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.6.1.el7.tar.gz
|
||||
Source0: vitastor-1.8.0.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
@@ -144,6 +144,8 @@ mkdir -p /etc/vitastor
|
||||
groupadd -r -f vitastor 2>/dev/null ||:
|
||||
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
|
||||
mkdir -p /etc/vitastor
|
||||
mkdir -p /var/lib/vitastor
|
||||
chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
|
||||
%files -n vitastor-client
|
||||
@@ -161,6 +163,7 @@ mkdir -p /etc/vitastor
|
||||
|
||||
%files -n vitastor-client-devel
|
||||
%_includedir/vitastor_c.h
|
||||
%_includedir/vitastor_kv.h
|
||||
%_libdir/pkgconfig
|
||||
|
||||
|
||||
|
@@ -35,7 +35,8 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-1.6.1.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
VER=$(grep ^Version: vitastor-el8.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.6.1
|
||||
Version: 1.8.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.6.1.el8.tar.gz
|
||||
Source0: vitastor-1.8.0.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
@@ -141,6 +141,8 @@ mkdir -p /etc/vitastor
|
||||
groupadd -r -f vitastor 2>/dev/null ||:
|
||||
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
|
||||
mkdir -p /etc/vitastor
|
||||
mkdir -p /var/lib/vitastor
|
||||
chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
|
||||
%files -n vitastor-client
|
||||
@@ -158,6 +160,7 @@ mkdir -p /etc/vitastor
|
||||
|
||||
%files -n vitastor-client-devel
|
||||
%_includedir/vitastor_c.h
|
||||
%_includedir/vitastor_kv.h
|
||||
%_libdir/pkgconfig
|
||||
|
||||
|
||||
|
@@ -18,7 +18,8 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-1.6.1.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
VER=$(grep ^Version: vitastor-el9.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 1.6.1
|
||||
Version: 1.8.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-1.6.1.el9.tar.gz
|
||||
Source0: vitastor-1.8.0.el9.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
@@ -134,6 +134,8 @@ mkdir -p /etc/vitastor
|
||||
groupadd -r -f vitastor 2>/dev/null ||:
|
||||
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
|
||||
mkdir -p /etc/vitastor
|
||||
mkdir -p /var/lib/vitastor
|
||||
chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
|
||||
%files -n vitastor-client
|
||||
@@ -151,6 +153,7 @@ mkdir -p /etc/vitastor
|
||||
|
||||
%files -n vitastor-client-devel
|
||||
%_includedir/vitastor_c.h
|
||||
%_includedir/vitastor_kv.h
|
||||
%_libdir/pkgconfig
|
||||
|
||||
|
||||
|
@@ -19,7 +19,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVERSION="1.6.1")
|
||||
add_definitions(-DVITASTOR_VERSION="1.8.0")
|
||||
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
|
||||
add_link_options(-fno-omit-frame-pointer)
|
||||
if (${WITH_ASAN})
|
||||
|
@@ -13,7 +13,7 @@ target_link_libraries(vitastor_blk
|
||||
# for timerfd_manager
|
||||
vitastor_common
|
||||
)
|
||||
set_target_properties(vitastor_blk PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||
set_target_properties(vitastor_blk PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||
|
||||
if (${WITH_FIO})
|
||||
# libfio_vitastor_blk.so
|
||||
|
@@ -12,6 +12,7 @@ add_library(vitastor_common STATIC
|
||||
msgr_stop.cpp msgr_op.cpp msgr_send.cpp msgr_receive.cpp ../util/ringloop.cpp ../../json11/json11.cpp
|
||||
http_client.cpp osd_ops.cpp pg_states.cpp ../util/timerfd_manager.cpp ../util/str_util.cpp ${MSGR_RDMA}
|
||||
)
|
||||
target_link_libraries(vitastor_common pthread)
|
||||
target_compile_options(vitastor_common PUBLIC -fPIC)
|
||||
|
||||
# libvitastor_client.so
|
||||
@@ -28,7 +29,7 @@ target_link_libraries(vitastor_client
|
||||
${LIBURING_LIBRARIES}
|
||||
${IBVERBS_LIBRARIES}
|
||||
)
|
||||
set_target_properties(vitastor_client PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||
set_target_properties(vitastor_client PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||
configure_file(vitastor.pc.in vitastor.pc @ONLY)
|
||||
|
||||
if (${WITH_FIO})
|
||||
|
@@ -452,11 +452,10 @@ void cluster_client_t::on_change_pg_state_hook(pool_id_t pool_id, pg_num_t pg_nu
|
||||
if (pg_cfg.cur_primary != prev_primary)
|
||||
{
|
||||
// Repeat this PG operations because an OSD which stopped being primary may not fsync operations
|
||||
if (wb->repeat_ops_for(this, 0, pool_id, pg_num) > 0)
|
||||
{
|
||||
continue_ops();
|
||||
}
|
||||
wb->repeat_ops_for(this, 0, pool_id, pg_num);
|
||||
}
|
||||
// Always continue to resume operations hung because of lack of the primary OSD
|
||||
continue_ops();
|
||||
}
|
||||
|
||||
bool cluster_client_t::get_immediate_commit(uint64_t inode)
|
||||
@@ -1066,11 +1065,11 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
||||
!pg_it->second.pause && pg_it->second.cur_primary)
|
||||
{
|
||||
osd_num_t primary_osd = pg_it->second.cur_primary;
|
||||
part->osd_num = primary_osd;
|
||||
auto peer_it = msgr.osd_peer_fds.find(primary_osd);
|
||||
if (peer_it != msgr.osd_peer_fds.end())
|
||||
{
|
||||
int peer_fd = peer_it->second;
|
||||
part->osd_num = primary_osd;
|
||||
part->flags |= PART_SENT;
|
||||
op->inflight_count++;
|
||||
uint64_t pg_bitmap_size = (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8) * (
|
||||
@@ -1287,7 +1286,11 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
|
||||
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||
{
|
||||
copy_part_bitmap(op, part);
|
||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
||||
if (op->inode == op->cur_inode)
|
||||
{
|
||||
// Read only returns the version of the uppermost layer
|
||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
||||
}
|
||||
}
|
||||
else if (op->opcode == OSD_OP_WRITE)
|
||||
{
|
||||
|
@@ -333,7 +333,10 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
etcd_watch_ws = NULL;
|
||||
}
|
||||
if (this->log_level > 1)
|
||||
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju\n", etcd_address.c_str(), etcd_watch_revision);
|
||||
{
|
||||
fprintf(stderr, "Trying to connect to etcd websocket at %s, watch from revision %ju/%ju/%ju\n", etcd_address.c_str(),
|
||||
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
|
||||
}
|
||||
etcd_watch_ws = open_websocket(tfd, etcd_address, etcd_api_path+"/watch", etcd_slow_timeout,
|
||||
[this, cur_addr = selected_etcd_address](const http_response_t *msg)
|
||||
{
|
||||
@@ -348,16 +351,20 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
|
||||
if (data["result"]["created"].bool_value())
|
||||
{
|
||||
uint64_t watch_id = data["result"]["watch_id"].uint64_value();
|
||||
if (watch_id == ETCD_CONFIG_WATCH_ID ||
|
||||
watch_id == ETCD_PG_STATE_WATCH_ID ||
|
||||
watch_id == ETCD_PG_HISTORY_WATCH_ID ||
|
||||
watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||
{
|
||||
etcd_watches_initialised++;
|
||||
}
|
||||
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && this->log_level > 0)
|
||||
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju\n", cur_addr.c_str(), etcd_watch_revision);
|
||||
{
|
||||
fprintf(stderr, "Successfully subscribed to etcd at %s, revision %ju/%ju/%ju\n", cur_addr.c_str(),
|
||||
etcd_watch_revision_config, etcd_watch_revision_osd, etcd_watch_revision_pg);
|
||||
}
|
||||
}
|
||||
if (data["result"]["canceled"].bool_value())
|
||||
{
|
||||
@@ -375,7 +382,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
data["result"]["compact_revision"].uint64_value());
|
||||
http_close(etcd_watch_ws);
|
||||
etcd_watch_ws = NULL;
|
||||
etcd_watch_revision = 0;
|
||||
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = 0;
|
||||
on_reload_hook();
|
||||
}
|
||||
return;
|
||||
@@ -393,13 +400,29 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
// Save revision only if it's present in the message - because sometimes etcd sends something without a header, like:
|
||||
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
|
||||
if (etcd_watches_initialised == ETCD_TOTAL_WATCHES && !data["result"]["header"]["revision"].is_null())
|
||||
{
|
||||
// Protect against a revision beign split into multiple messages and some
|
||||
// of them being lost. Even though I'm not sure if etcd actually splits them
|
||||
// Also sometimes etcd sends something without a header, like:
|
||||
// {"error": {"grpc_code": 14, "http_code": 503, "http_status": "Service Unavailable", "message": "error reading from server: EOF"}}
|
||||
etcd_watch_revision = data["result"]["header"]["revision"].uint64_value();
|
||||
// Restart watchers from the same revision number as in the last received message,
|
||||
// not from the next one to protect against revision being split into multiple messages,
|
||||
// even though etcd guarantees not to do that **within a single watcher** without fragment=true:
|
||||
// https://etcd.io/docs/v3.5/learning/api_guarantees/#watch-apis
|
||||
// Revision contents are ALWAYS split into separate messages for different watchers though!
|
||||
// So generally we have to resume each watcher from its own revision...
|
||||
// Progress messages may have watch_id=-1 if sent on behalf of multiple watchers though.
|
||||
// And antietcd has an advanced semantic which merges the same revision for all watchers
|
||||
// into one message and just omits watch_id.
|
||||
// So we also have to handle the case where watch_id is -1 or not present (0).
|
||||
auto watch_rev = data["result"]["header"]["revision"].uint64_value();
|
||||
if (!watch_id || watch_id == UINT64_MAX)
|
||||
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = watch_rev;
|
||||
else if (watch_id == ETCD_CONFIG_WATCH_ID)
|
||||
etcd_watch_revision_config = watch_rev;
|
||||
else if (watch_id == ETCD_PG_STATE_WATCH_ID)
|
||||
etcd_watch_revision_pg = watch_rev;
|
||||
else if (watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||
etcd_watch_revision_osd = watch_rev;
|
||||
addresses_to_try.clear();
|
||||
}
|
||||
// First gather all changes into a hash to remove multiple overwrites
|
||||
@@ -457,7 +480,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/config/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/config0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "start_revision", etcd_watch_revision_config },
|
||||
{ "watch_id", ETCD_CONFIG_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
@@ -466,29 +489,21 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/osd/state/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/osd/state0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "start_revision", etcd_watch_revision_osd },
|
||||
{ "watch_id", ETCD_OSD_STATE_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/state/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/pg/state0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/pg0") },
|
||||
{ "start_revision", etcd_watch_revision_pg },
|
||||
{ "watch_id", ETCD_PG_STATE_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
http_post_message(etcd_watch_ws, WS_TEXT, json11::Json(json11::Json::object {
|
||||
{ "create_request", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/history/") },
|
||||
{ "range_end", base64_encode(etcd_prefix+"/pg/history0") },
|
||||
{ "start_revision", etcd_watch_revision },
|
||||
{ "watch_id", ETCD_PG_HISTORY_WATCH_ID },
|
||||
{ "progress_notify", true },
|
||||
} }
|
||||
}).dump());
|
||||
// FIXME: Do not watch /pg/history/ at all in client code (not in OSD)
|
||||
if (on_start_watcher_hook)
|
||||
{
|
||||
on_start_watcher_hook(etcd_watch_ws);
|
||||
@@ -573,7 +588,7 @@ void etcd_state_client_t::load_global_config()
|
||||
{
|
||||
global_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
||||
}
|
||||
global_immediate_commit = parse_immediate_commit(global_config["immediate_commit"].string_value());
|
||||
global_immediate_commit = parse_immediate_commit(global_config["immediate_commit"].string_value(), IMMEDIATE_ALL);
|
||||
on_load_config_hook(global_config);
|
||||
});
|
||||
}
|
||||
@@ -591,6 +606,11 @@ void etcd_state_client_t::load_pgs()
|
||||
{ "key", base64_encode(etcd_prefix+"/config/pgs") },
|
||||
} }
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/pg/config") },
|
||||
} }
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(etcd_prefix+"/config/inode/") },
|
||||
@@ -640,13 +660,10 @@ void etcd_state_client_t::load_pgs()
|
||||
return;
|
||||
}
|
||||
reset_pg_exists();
|
||||
if (!etcd_watch_revision)
|
||||
etcd_watch_revision_config = etcd_watch_revision_osd = etcd_watch_revision_pg = data["header"]["revision"].uint64_value()+1;
|
||||
if (this->log_level > 3)
|
||||
{
|
||||
etcd_watch_revision = data["header"]["revision"].uint64_value()+1;
|
||||
if (this->log_level > 3)
|
||||
{
|
||||
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision-1);
|
||||
}
|
||||
fprintf(stderr, "Loaded revision %ju of PG configuration\n", etcd_watch_revision_pg-1);
|
||||
}
|
||||
for (auto & res: data["responses"].array_items())
|
||||
{
|
||||
@@ -713,7 +730,7 @@ void etcd_state_client_t::clean_nonexistent_pgs()
|
||||
{
|
||||
if (!pg_cfg.state_exists)
|
||||
{
|
||||
if (this->log_level > 3)
|
||||
if (this->log_level > 3 && (pg_cfg.cur_primary || pg_cfg.cur_state))
|
||||
{
|
||||
fprintf(stderr, "PG %u/%u primary OSD disappeared after reload, forgetting it\n", pool_item.first, pg_it->first);
|
||||
}
|
||||
@@ -723,7 +740,7 @@ void etcd_state_client_t::clean_nonexistent_pgs()
|
||||
}
|
||||
if (!pg_cfg.history_exists)
|
||||
{
|
||||
if (this->log_level > 3)
|
||||
if (this->log_level > 3 && (pg_cfg.target_history.size() || pg_cfg.all_peers.size() || pg_cfg.epoch || pg_cfg.next_scrub))
|
||||
{
|
||||
fprintf(stderr, "PG %u/%u history disappeared after reload, forgetting it\n", pool_item.first, pg_it->first);
|
||||
}
|
||||
@@ -867,7 +884,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
pc.used_for_fs = pool_item.second["used_for_fs"].as_string();
|
||||
// Immediate Commit Mode
|
||||
pc.immediate_commit = pool_item.second["immediate_commit"].is_string()
|
||||
? parse_immediate_commit(pool_item.second["immediate_commit"].string_value())
|
||||
? parse_immediate_commit(pool_item.second["immediate_commit"].string_value(), IMMEDIATE_ALL)
|
||||
: global_immediate_commit;
|
||||
// PG Stripe Size
|
||||
pc.pg_stripe_size = pool_item.second["pg_stripe_size"].uint64_value();
|
||||
@@ -895,8 +912,17 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
on_change_pool_config_hook();
|
||||
}
|
||||
}
|
||||
else if (key == etcd_prefix+"/config/pgs")
|
||||
else if (key == etcd_prefix+"/pg/config" || key == etcd_prefix+"/config/pgs")
|
||||
{
|
||||
if (key == etcd_prefix+"/pg/config")
|
||||
{
|
||||
new_pg_config = !value.is_null();
|
||||
}
|
||||
else if (new_pg_config)
|
||||
{
|
||||
// Ignore old key if the new one is present
|
||||
return;
|
||||
}
|
||||
for (auto & pool_item: this->pool_config)
|
||||
{
|
||||
for (auto & pg_item: pool_item.second.pg_config)
|
||||
@@ -1175,10 +1201,11 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t etcd_state_client_t::parse_immediate_commit(const std::string & immediate_commit_str)
|
||||
uint32_t etcd_state_client_t::parse_immediate_commit(const std::string & immediate_commit_str, uint32_t default_value)
|
||||
{
|
||||
return immediate_commit_str == "all" ? IMMEDIATE_ALL :
|
||||
(immediate_commit_str == "small" ? IMMEDIATE_SMALL : IMMEDIATE_NONE);
|
||||
return (immediate_commit_str == "all" ? IMMEDIATE_ALL :
|
||||
(immediate_commit_str == "small" ? IMMEDIATE_SMALL :
|
||||
(immediate_commit_str == "none" ? IMMEDIATE_NONE : default_value)));
|
||||
}
|
||||
|
||||
uint32_t etcd_state_client_t::parse_scheme(const std::string & scheme)
|
||||
|
@@ -10,10 +10,9 @@
|
||||
#include "timerfd_manager.h"
|
||||
|
||||
#define ETCD_CONFIG_WATCH_ID 1
|
||||
#define ETCD_PG_STATE_WATCH_ID 2
|
||||
#define ETCD_PG_HISTORY_WATCH_ID 3
|
||||
#define ETCD_OSD_STATE_WATCH_ID 4
|
||||
#define ETCD_TOTAL_WATCHES 4
|
||||
#define ETCD_OSD_STATE_WATCH_ID 2
|
||||
#define ETCD_PG_STATE_WATCH_ID 3
|
||||
#define ETCD_TOTAL_WATCHES 3
|
||||
|
||||
#define DEFAULT_BLOCK_SIZE 128*1024
|
||||
#define MIN_DATA_BLOCK_SIZE 4*1024
|
||||
@@ -95,7 +94,7 @@ protected:
|
||||
std::string selected_etcd_address;
|
||||
std::vector<std::string> addresses_to_try;
|
||||
std::vector<inode_watch_t*> watches;
|
||||
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
|
||||
bool new_pg_config = false;
|
||||
int ws_keepalive_timer = -1;
|
||||
int ws_alive = 0;
|
||||
bool rand_initialized = false;
|
||||
@@ -115,8 +114,11 @@ public:
|
||||
int log_level = 0;
|
||||
timerfd_manager_t *tfd = NULL;
|
||||
|
||||
http_co_t *etcd_watch_ws = NULL, *keepalive_client = NULL;
|
||||
int etcd_watches_initialised = 0;
|
||||
uint64_t etcd_watch_revision = 0;
|
||||
uint64_t etcd_watch_revision_config = 0;
|
||||
uint64_t etcd_watch_revision_osd = 0;
|
||||
uint64_t etcd_watch_revision_pg = 0;
|
||||
std::map<pool_id_t, pool_config_t> pool_config;
|
||||
std::map<osd_num_t, json11::Json> peer_states;
|
||||
std::set<osd_num_t> seen_peers;
|
||||
@@ -157,6 +159,6 @@ public:
|
||||
int address_count();
|
||||
~etcd_state_client_t();
|
||||
|
||||
static uint32_t parse_immediate_commit(const std::string & immediate_commit_str);
|
||||
static uint32_t parse_immediate_commit(const std::string & immediate_commit_str, uint32_t default_value);
|
||||
static uint32_t parse_scheme(const std::string & scheme_str);
|
||||
};
|
||||
|
@@ -18,7 +18,7 @@
|
||||
#include <sys/poll.h>
|
||||
|
||||
msgr_iothread_t::msgr_iothread_t():
|
||||
ring(RINGLOOP_DEFAULT_SIZE),
|
||||
ring(RINGLOOP_DEFAULT_SIZE, true),
|
||||
thread(&msgr_iothread_t::run, this)
|
||||
{
|
||||
eventfd = ring.register_eventfd();
|
||||
@@ -282,6 +282,10 @@ void osd_messenger_t::parse_config(const json11::Json & config)
|
||||
this->rdma_max_msg = 129*1024;
|
||||
this->rdma_odp = config["rdma_odp"].bool_value();
|
||||
#endif
|
||||
if (!osd_num)
|
||||
this->iothread_count = (uint32_t)config["client_iothread_count"].uint64_value();
|
||||
else
|
||||
this->iothread_count = (uint32_t)config["osd_iothread_count"].uint64_value();
|
||||
this->receive_buffer_size = (uint32_t)config["tcp_header_buffer_size"].uint64_value();
|
||||
if (!this->receive_buffer_size || this->receive_buffer_size > 1024*1024*1024)
|
||||
this->receive_buffer_size = 65536;
|
||||
|
@@ -161,7 +161,7 @@ protected:
|
||||
int osd_ping_timeout = 0;
|
||||
int log_level = 0;
|
||||
bool use_sync_send_recv = false;
|
||||
int iothread_count = 4;
|
||||
int iothread_count = 0;
|
||||
|
||||
#ifdef WITH_RDMA
|
||||
bool use_rdma = true;
|
||||
|
@@ -179,6 +179,8 @@ bool osd_messenger_t::handle_read_buffer(osd_client_t *cl, void *curbuf, int rem
|
||||
|
||||
bool osd_messenger_t::handle_finished_read(osd_client_t *cl)
|
||||
{
|
||||
cl->ping_time_remaining = 0;
|
||||
cl->idle_time_remaining = osd_idle_timeout;
|
||||
cl->recv_list.reset();
|
||||
if (cl->read_state == CL_READ_HDR)
|
||||
{
|
||||
|
@@ -253,7 +253,7 @@ nla_put_failure:
|
||||
const char *exe_name = NULL;
|
||||
|
||||
const char *help_text =
|
||||
"Vitastor NBD proxy " VERSION "\n"
|
||||
"Vitastor NBD proxy " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2020+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
||||
|
||||
Name: Vitastor
|
||||
Description: Vitastor client library
|
||||
Version: 1.6.1
|
||||
Version: 1.8.0
|
||||
Libs: -L${libdir} -lvitastor_client
|
||||
Cflags: -I${includedir}
|
||||
|
||||
|
@@ -384,6 +384,28 @@ int vitastor_c_inode_get_readonly(void *handle)
|
||||
return watch->cfg.readonly;
|
||||
}
|
||||
|
||||
uint64_t vitastor_c_inode_get_parent_id(void *handle)
|
||||
{
|
||||
inode_watch_t *watch = (inode_watch_t*)handle;
|
||||
return watch->cfg.parent_id;
|
||||
}
|
||||
|
||||
char* vitastor_c_inode_get_meta(void *handle)
|
||||
{
|
||||
inode_watch_t *watch = (inode_watch_t*)handle;
|
||||
if (watch->cfg.meta.is_null())
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return strdup(watch->cfg.meta.dump().c_str());
|
||||
}
|
||||
|
||||
uint64_t vitastor_c_inode_get_mod_revision(void *handle)
|
||||
{
|
||||
inode_watch_t *watch = (inode_watch_t*)handle;
|
||||
return watch->cfg.mod_revision;
|
||||
}
|
||||
|
||||
uint32_t vitastor_c_inode_get_immediate_commit(vitastor_c *client, uint64_t inode_num)
|
||||
{
|
||||
auto pool_it = client->cli->st_cli.pool_config.find(INODE_POOL(inode_num));
|
||||
|
@@ -69,6 +69,9 @@ void vitastor_c_watch_inode(vitastor_c *client, char *image, VitastorIOHandler c
|
||||
void vitastor_c_close_watch(vitastor_c *client, void *handle);
|
||||
uint64_t vitastor_c_inode_get_size(void *handle);
|
||||
uint64_t vitastor_c_inode_get_num(void *handle);
|
||||
uint64_t vitastor_c_inode_get_parent_id(void *handle);
|
||||
char* vitastor_c_inode_get_meta(void *handle);
|
||||
uint64_t vitastor_c_inode_get_mod_revision(void *handle);
|
||||
uint32_t vitastor_c_inode_get_block_size(vitastor_c *client, uint64_t inode_num);
|
||||
uint32_t vitastor_c_inode_get_bitmap_granularity(vitastor_c *client, uint64_t inode_num);
|
||||
int vitastor_c_inode_get_readonly(void *handle);
|
||||
|
@@ -12,7 +12,9 @@ add_library(vitastor_cli STATIC
|
||||
cli_ls.cpp
|
||||
cli_create.cpp
|
||||
cli_modify.cpp
|
||||
cli_modify_osd.cpp
|
||||
cli_osd_tree.cpp
|
||||
cli_pg_ls.cpp
|
||||
cli_flatten.cpp
|
||||
cli_merge.cpp
|
||||
cli_rm_data.cpp
|
||||
|
@@ -17,7 +17,7 @@
|
||||
static const char *exe_name = NULL;
|
||||
|
||||
static const char* help_text =
|
||||
"Vitastor command-line tool " VERSION "\n"
|
||||
"Vitastor command-line tool " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
@@ -70,6 +70,7 @@ static const char* help_text =
|
||||
" --wait-list Retrieve full objects listings before starting to remove objects.\n"
|
||||
" Requires more memory, but allows to show correct removal progress.\n"
|
||||
" --min-offset Purge only data starting with specified offset.\n"
|
||||
" --max-offset Purge only data before specified offset.\n"
|
||||
"\n"
|
||||
"vitastor-cli merge-data <from> <to> [--target <target>]\n"
|
||||
" Merge layer data without changing metadata. Merge <from>..<to> to <target>.\n"
|
||||
@@ -118,11 +119,23 @@ static const char* help_text =
|
||||
" With --dry-run only checks if deletion is possible without data loss and\n"
|
||||
" redundancy degradation.\n"
|
||||
"\n"
|
||||
"vitastor-cli osd-tree\n"
|
||||
" Show current OSD tree.\n"
|
||||
"vitastor-cli osd-tree [-l|--long]\n"
|
||||
" Show current OSD tree, optionally with I/O statistics if -l is specified.\n"
|
||||
"\n"
|
||||
"vitastor-cli osds|ls-osd|osd-ls\n"
|
||||
" Show current OSDs as list.\n"
|
||||
"vitastor-cli osds|ls-osd|osd-ls [-l|--long]\n"
|
||||
" Show current OSDs as list, optionally with I/O statistics if -l is specified.\n"
|
||||
"\n"
|
||||
"vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>\n"
|
||||
" Set OSD reweight, tags or noout flag.\n"
|
||||
"\n"
|
||||
"vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]\n"
|
||||
" List PGs with any of listed state filters (^ or ! in the beginning is negation). Options:\n"
|
||||
" --pool <pool name or number> Only list PGs of the given pool.\n"
|
||||
" --min <min pg number> Only list PGs with number >= min.\n"
|
||||
" --max <max pg number> Only list PGs with number <= max.\n"
|
||||
" Examples:\n"
|
||||
" vitastor-cli pg-list active+degraded\n"
|
||||
" vitastor-cli pg-list ^active\n"
|
||||
"\n"
|
||||
"vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]\n"
|
||||
" Create a pool. Required parameters:\n"
|
||||
@@ -136,7 +149,7 @@ static const char* help_text =
|
||||
" --osd_tags <tag>[,<tag>]... Put pool only on OSDs tagged with all specified tags\n"
|
||||
" --block_size 128k Put pool only on OSDs with this data block size\n"
|
||||
" --bitmap_granularity 4k Put pool only on OSDs with this logical sector size\n"
|
||||
" --immediate_commit none Put pool only on OSDs with this or larger immediate_commit (none < small < all)\n"
|
||||
" --immediate_commit all Put pool only on OSDs with this or larger immediate_commit (none < small < all)\n"
|
||||
" --level_placement <rules> Use additional failure domain rules (example: \"dc=112233\")\n"
|
||||
" --raw_placement <rules> Specify raw PG generation rules (see documentation for details)\n"
|
||||
" --primary_affinity_tags tags Prefer to put primary copies on OSDs with all specified tags\n"
|
||||
@@ -406,6 +419,23 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
|
||||
cfg["flat"] = true;
|
||||
action_cb = p->start_osd_tree(cfg);
|
||||
}
|
||||
else if (cmd[0] == "modify-osd")
|
||||
{
|
||||
// Modify OSD configuration
|
||||
if (cmd.size() > 1)
|
||||
cfg["osd_num"] = cmd[1];
|
||||
action_cb = p->start_modify_osd(cfg);
|
||||
}
|
||||
else if (cmd[0] == "pg-list" || cmd[0] == "pg-ls" || cmd[0] == "list-pg" || cmd[0] == "ls-pg" || cmd[0] == "ls-pgs")
|
||||
{
|
||||
// Modify OSD configuration
|
||||
if (cmd.size() > 1)
|
||||
{
|
||||
cmd.erase(cmd.begin(), cmd.begin()+1);
|
||||
cfg["pg_state"] = cmd;
|
||||
}
|
||||
action_cb = p->start_pg_list(cfg);
|
||||
}
|
||||
else if (cmd[0] == "create-pool" || cmd[0] == "pool-create")
|
||||
{
|
||||
// Create a new pool
|
||||
|
@@ -65,7 +65,9 @@ public:
|
||||
std::function<bool(cli_result_t &)> start_ls(json11::Json);
|
||||
std::function<bool(cli_result_t &)> start_merge(json11::Json);
|
||||
std::function<bool(cli_result_t &)> start_modify(json11::Json);
|
||||
std::function<bool(cli_result_t &)> start_modify_osd(json11::Json);
|
||||
std::function<bool(cli_result_t &)> start_osd_tree(json11::Json);
|
||||
std::function<bool(cli_result_t &)> start_pg_list(json11::Json);
|
||||
std::function<bool(cli_result_t &)> start_pool_create(json11::Json);
|
||||
std::function<bool(cli_result_t &)> start_pool_modify(json11::Json);
|
||||
std::function<bool(cli_result_t &)> start_pool_rm(json11::Json);
|
||||
|
210
src/cmd/cli_modify_osd.cpp
Normal file
210
src/cmd/cli_modify_osd.cpp
Normal file
@@ -0,0 +1,210 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#include "cli.h"
|
||||
#include "cluster_client.h"
|
||||
#include "str_util.h"
|
||||
#include "http_client.h"
|
||||
|
||||
// Reweight OSD, change tags or set noout flag
|
||||
struct osd_changer_t
|
||||
{
|
||||
cli_tool_t *parent;
|
||||
|
||||
uint64_t osd_num = 0;
|
||||
bool set_tags = false;
|
||||
std::vector<std::string> new_tags;
|
||||
bool set_reweight = false;
|
||||
double new_reweight = 1;
|
||||
bool set_noout = false;
|
||||
double new_noout = false;
|
||||
bool force = false;
|
||||
|
||||
json11::Json::object osd_cfg;
|
||||
uint64_t osd_cfg_mod_rev = 0;
|
||||
json11::Json::array compare, success;
|
||||
|
||||
int state = 0;
|
||||
std::function<bool(cli_result_t &)> cb;
|
||||
cli_result_t result;
|
||||
|
||||
bool is_done()
|
||||
{
|
||||
return state == 100;
|
||||
}
|
||||
|
||||
void loop()
|
||||
{
|
||||
if (state == 1)
|
||||
goto resume_1;
|
||||
else if (state == 2)
|
||||
goto resume_2;
|
||||
if (!osd_num)
|
||||
{
|
||||
result = (cli_result_t){ .err = EINVAL, .text = "OSD number is missing" };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
if (!set_tags && !set_reweight && !set_noout)
|
||||
{
|
||||
result = (cli_result_t){ .err = EINVAL, .text = "Nothing to update" };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
if (set_reweight && new_reweight < 0)
|
||||
{
|
||||
result = (cli_result_t){ .err = EINVAL, .text = "Reweight can't be negative" };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
parent->etcd_txn(json11::Json::object {
|
||||
{ "success", json11::Json::array {
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/osd/stats/"+std::to_string(osd_num)) },
|
||||
} },
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/config/osd/"+std::to_string(osd_num)) },
|
||||
} },
|
||||
},
|
||||
} },
|
||||
});
|
||||
state = 1;
|
||||
resume_1:
|
||||
if (parent->waiting > 0)
|
||||
return;
|
||||
if (parent->etcd_err.err)
|
||||
{
|
||||
result = parent->etcd_err;
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
{
|
||||
auto osd_stats = parent->cli->st_cli.parse_etcd_kv(parent->etcd_result["responses"][0]["response_range"]["kvs"][0]).value;
|
||||
if (!osd_stats.is_object() && !force)
|
||||
{
|
||||
result = (cli_result_t){ .err = ENOENT, .text = "OSD "+std::to_string(osd_num)+" does not exist. Use --force to set configuration anyway" };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
auto kv = parent->cli->st_cli.parse_etcd_kv(parent->etcd_result["responses"][1]["response_range"]["kvs"][0]);
|
||||
osd_cfg_mod_rev = kv.mod_revision;
|
||||
osd_cfg = kv.value.object_items();
|
||||
if (set_reweight)
|
||||
{
|
||||
if (new_reweight != 1)
|
||||
osd_cfg["reweight"] = new_reweight;
|
||||
else
|
||||
osd_cfg.erase("reweight");
|
||||
}
|
||||
if (set_tags)
|
||||
{
|
||||
if (new_tags.size())
|
||||
osd_cfg["tags"] = new_tags;
|
||||
else
|
||||
osd_cfg.erase("tags");
|
||||
}
|
||||
if (set_noout)
|
||||
{
|
||||
if (new_noout)
|
||||
osd_cfg["noout"] = true;
|
||||
else
|
||||
osd_cfg.erase("noout");
|
||||
}
|
||||
compare.push_back(json11::Json::object {
|
||||
{ "target", "MOD" },
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/config/osd/"+std::to_string(osd_num)) },
|
||||
{ "result", "LESS" },
|
||||
{ "mod_revision", osd_cfg_mod_rev+1 },
|
||||
});
|
||||
if (!osd_cfg.size())
|
||||
{
|
||||
success.push_back(json11::Json::object {
|
||||
{ "request_delete_range", json11::Json::object {
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/config/osd/"+std::to_string(osd_num)) },
|
||||
} },
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
success.push_back(json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/config/osd/"+std::to_string(osd_num)) },
|
||||
{ "value", base64_encode(json11::Json(osd_cfg).dump()) },
|
||||
} },
|
||||
});
|
||||
}
|
||||
}
|
||||
parent->etcd_txn(json11::Json::object {
|
||||
{ "compare", compare },
|
||||
{ "success", success },
|
||||
});
|
||||
state = 2;
|
||||
resume_2:
|
||||
if (parent->waiting > 0)
|
||||
return;
|
||||
if (parent->etcd_err.err)
|
||||
{
|
||||
result = parent->etcd_err;
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
if (!parent->etcd_result["succeeded"].bool_value())
|
||||
{
|
||||
result = (cli_result_t){ .err = EAGAIN, .text = "OSD "+std::to_string(osd_num)+" configuration was modified by someone else, please repeat your request" };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
result = (cli_result_t){
|
||||
.err = 0,
|
||||
.text = "OSD "+std::to_string(osd_num)+" configuration modified",
|
||||
.data = osd_cfg,
|
||||
};
|
||||
state = 100;
|
||||
}
|
||||
};
|
||||
|
||||
std::function<bool(cli_result_t &)> cli_tool_t::start_modify_osd(json11::Json cfg)
|
||||
{
|
||||
auto changer = new osd_changer_t();
|
||||
changer->parent = this;
|
||||
changer->osd_num = cfg["osd_num"].uint64_value();
|
||||
if (!cfg["tags"].is_null())
|
||||
{
|
||||
changer->set_tags = true;
|
||||
if (cfg["tags"].is_string())
|
||||
{
|
||||
if (cfg["tags"].string_value() != "")
|
||||
changer->new_tags = explode(",", cfg["tags"].string_value(), true);
|
||||
}
|
||||
else if (cfg["tags"].is_array())
|
||||
{
|
||||
for (auto item: cfg["tags"].array_items())
|
||||
changer->new_tags.push_back(item.as_string());
|
||||
}
|
||||
}
|
||||
if (!cfg["reweight"].is_null())
|
||||
{
|
||||
changer->set_reweight = true;
|
||||
changer->new_reweight = cfg["reweight"].number_value();
|
||||
}
|
||||
if (!cfg["noout"].is_null())
|
||||
{
|
||||
changer->set_noout = true;
|
||||
changer->new_noout = json_is_true(cfg["noout"]);
|
||||
}
|
||||
changer->force = cfg["force"].bool_value();
|
||||
return [changer](cli_result_t & result)
|
||||
{
|
||||
changer->loop();
|
||||
if (changer->is_done())
|
||||
{
|
||||
result = changer->result;
|
||||
delete changer;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
}
|
@@ -17,6 +17,7 @@ struct placement_osd_t
|
||||
uint64_t free;
|
||||
bool up;
|
||||
double reweight;
|
||||
bool noout;
|
||||
uint32_t block_size, bitmap_granularity, immediate_commit;
|
||||
};
|
||||
|
||||
@@ -132,9 +133,10 @@ resume_1:
|
||||
.free = kv.second["free"].uint64_value(),
|
||||
.up = parent->cli->st_cli.peer_states.find(kv.first) != parent->cli->st_cli.peer_states.end(),
|
||||
.reweight = 1,
|
||||
.noout = false,
|
||||
.block_size = (uint32_t)kv.second["data_block_size"].uint64_value(),
|
||||
.bitmap_granularity = (uint32_t)kv.second["bitmap_granularity"].uint64_value(),
|
||||
.immediate_commit = etcd_state_client_t::parse_immediate_commit(kv.second["immediate_commit"].string_value()),
|
||||
.immediate_commit = etcd_state_client_t::parse_immediate_commit(kv.second["immediate_commit"].string_value(), IMMEDIATE_NONE),
|
||||
};
|
||||
if (tree->nodes.find(osd.parent) == tree->nodes.end())
|
||||
{
|
||||
@@ -154,6 +156,7 @@ resume_1:
|
||||
for (auto & jtag: osd_cfg["tags"].array_items())
|
||||
osd.tags.push_back(jtag.string_value());
|
||||
}
|
||||
osd.noout = osd_cfg["noout"].bool_value();
|
||||
}
|
||||
auto np_it = node_placement.find(std::to_string(osd.num));
|
||||
if (np_it != node_placement.end())
|
||||
@@ -178,7 +181,7 @@ resume_1:
|
||||
return tree;
|
||||
}
|
||||
|
||||
std::string format_tree()
|
||||
void format_tree()
|
||||
{
|
||||
std::vector<std::string> node_seq = { "" };
|
||||
std::vector<int> indents = { -1 };
|
||||
@@ -198,6 +201,39 @@ resume_1:
|
||||
}
|
||||
}
|
||||
json11::Json::array fmt_items;
|
||||
if (parent->json_output)
|
||||
{
|
||||
for (int i = 1; i < node_seq.size(); i++)
|
||||
{
|
||||
auto & node = placement_tree->nodes.at(node_seq[i]);
|
||||
fmt_items.push_back(json11::Json::object{
|
||||
{ "type", node.level },
|
||||
{ "name", node.name },
|
||||
{ "parent", node.parent },
|
||||
});
|
||||
for (uint64_t osd_num: node.child_osds)
|
||||
{
|
||||
auto & osd = placement_tree->osds.at(osd_num);
|
||||
fmt_items.push_back(json11::Json::object{
|
||||
{ "type", "osd" },
|
||||
{ "name", osd.num },
|
||||
{ "parent", node.name },
|
||||
{ "up", osd.up ? "up" : "down" },
|
||||
{ "size", osd.size },
|
||||
{ "free", osd.free },
|
||||
{ "reweight", osd.reweight },
|
||||
{ "noout", osd.noout },
|
||||
{ "tags", osd.tags },
|
||||
{ "block", (uint64_t)osd.block_size },
|
||||
{ "bitmap", (uint64_t)osd.bitmap_granularity },
|
||||
{ "commit", osd.immediate_commit == IMMEDIATE_NONE ? "none" : (osd.immediate_commit == IMMEDIATE_ALL ? "all" : "small") },
|
||||
{ "op_stats", osd_stats[osd_num]["op_stats"] },
|
||||
});
|
||||
}
|
||||
}
|
||||
result.data = fmt_items;
|
||||
return;
|
||||
}
|
||||
for (int i = 1; i < node_seq.size(); i++)
|
||||
{
|
||||
auto & node = placement_tree->nodes.at(node_seq[i]);
|
||||
@@ -229,6 +265,7 @@ resume_1:
|
||||
{ "size", format_size(osd.size, false, true) },
|
||||
{ "used", format_q(100.0*(osd.size - osd.free)/osd.size)+" %" },
|
||||
{ "reweight", format_q(osd.reweight) },
|
||||
{ "noout", osd.noout ? "noout" : "-" },
|
||||
{ "tags", implode(",", osd.tags) },
|
||||
{ "block", format_size(osd.block_size, false, true) },
|
||||
{ "bitmap", format_size(osd.bitmap_granularity, false, true) },
|
||||
@@ -301,6 +338,10 @@ resume_1:
|
||||
{ "key", "commit" },
|
||||
{ "title", "IMM" },
|
||||
});
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "noout" },
|
||||
{ "title", "NOOUT" },
|
||||
});
|
||||
if (show_stats)
|
||||
{
|
||||
cols.push_back(json11::Json::object{
|
||||
@@ -340,7 +381,7 @@ resume_1:
|
||||
{ "title", "LAT" },
|
||||
});
|
||||
}
|
||||
return print_table(fmt_items, cols, parent->color);
|
||||
result.text = print_table(fmt_items, cols, parent->color);
|
||||
}
|
||||
|
||||
void loop()
|
||||
@@ -351,7 +392,7 @@ resume_1:
|
||||
load_osd_tree();
|
||||
if (parent->waiting > 0)
|
||||
return;
|
||||
result.text = format_tree();
|
||||
format_tree();
|
||||
state = 100;
|
||||
}
|
||||
};
|
||||
|
288
src/cmd/cli_pg_ls.cpp
Normal file
288
src/cmd/cli_pg_ls.cpp
Normal file
@@ -0,0 +1,288 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2024
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#include "cli.h"
|
||||
#include "cluster_client.h"
|
||||
#include "pg_states.h"
|
||||
#include "str_util.h"
|
||||
|
||||
struct pg_lister_t
|
||||
{
|
||||
cli_tool_t *parent;
|
||||
|
||||
uint64_t pool_id = 0;
|
||||
std::string pool_name;
|
||||
std::vector<std::string> pg_state;
|
||||
uint64_t min_pg_num = 0;
|
||||
uint64_t max_pg_num = 0;
|
||||
|
||||
std::map<pool_pg_num_t, json11::Json> pg_stats;
|
||||
|
||||
int state = 0;
|
||||
cli_result_t result;
|
||||
|
||||
bool is_done() { return state == 100; }
|
||||
|
||||
void load_pg_stats()
|
||||
{
|
||||
if (state == 1)
|
||||
goto resume_1;
|
||||
if (pool_name != "")
|
||||
{
|
||||
pool_id = 0;
|
||||
for (auto & pp: parent->cli->st_cli.pool_config)
|
||||
{
|
||||
if (pp.second.name == pool_name)
|
||||
{
|
||||
pool_id = pp.first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!pool_id)
|
||||
{
|
||||
result = (cli_result_t){ .err = ENOENT, .text = "Pool "+pool_name+" not found" };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
}
|
||||
parent->etcd_txn(json11::Json::object {
|
||||
{ "success", json11::Json::array {
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"/" : "/")) },
|
||||
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/pgstats"+(pool_id ? "/"+std::to_string(pool_id)+"0" : "0")) },
|
||||
} },
|
||||
},
|
||||
} },
|
||||
});
|
||||
state = 1;
|
||||
resume_1:
|
||||
if (parent->waiting > 0)
|
||||
return;
|
||||
if (parent->etcd_err.err)
|
||||
{
|
||||
result = parent->etcd_err;
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/", [&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||
{
|
||||
pg_stats[(pool_pg_num_t){ .pool_id = pool_id, .pg_num = (pg_num_t)pg_num }] = value;
|
||||
});
|
||||
}
|
||||
|
||||
void format_pgs()
|
||||
{
|
||||
uint64_t is_not = ((uint64_t)1 << 63);
|
||||
std::vector<uint64_t> masks;
|
||||
if (pg_state.size())
|
||||
{
|
||||
for (auto & st: pg_state)
|
||||
{
|
||||
if (st.size())
|
||||
{
|
||||
uint64_t mask = 0;
|
||||
size_t pos = 0;
|
||||
if (st[0] == '!' || st[0] == '^')
|
||||
{
|
||||
mask |= is_not;
|
||||
pos++;
|
||||
}
|
||||
size_t prev = pos;
|
||||
while (true)
|
||||
{
|
||||
if (pos < st.size() && (st[pos] >= 'a' && st[pos] <= 'z' || st[pos] == '_'))
|
||||
pos++;
|
||||
else
|
||||
{
|
||||
if (pos > prev)
|
||||
{
|
||||
std::string bit = st.substr(prev, pos-prev);
|
||||
bool found = false;
|
||||
for (int i = 0; i < pg_state_bit_count; i++)
|
||||
{
|
||||
if (pg_state_names[i] == bit)
|
||||
{
|
||||
mask |= (uint64_t)1 << i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
{
|
||||
result = (cli_result_t){ .err = EINVAL, .text = "Unknown PG state "+bit };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
}
|
||||
while (pos < st.size() && !(st[pos] >= 'a' && st[pos] <= 'z' || st[pos] == '_'))
|
||||
pos++;
|
||||
prev = pos;
|
||||
if (pos >= st.size())
|
||||
break;
|
||||
}
|
||||
}
|
||||
masks.push_back(mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
json11::Json::array pgs;
|
||||
for (auto & pp: parent->cli->st_cli.pool_config)
|
||||
{
|
||||
if ((!pool_id || pp.first == pool_id) && (pool_name == "" || pp.second.name == pool_name))
|
||||
{
|
||||
for (auto & pgp: pp.second.pg_config)
|
||||
{
|
||||
if (min_pg_num && pgp.first < min_pg_num || max_pg_num && pgp.first > max_pg_num)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (masks.size())
|
||||
{
|
||||
bool found = false;
|
||||
for (auto mask: masks)
|
||||
{
|
||||
if ((mask & is_not)
|
||||
? (pgp.second.cur_state & (mask & ~is_not)) != (mask & ~is_not)
|
||||
: ((pgp.second.cur_state & mask) == mask))
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
continue;
|
||||
}
|
||||
json11::Json::array state_names;
|
||||
for (int i = 0; i < pg_state_bit_count; i++)
|
||||
{
|
||||
if (pgp.second.cur_state & (1 << i))
|
||||
{
|
||||
state_names.push_back(std::string(pg_state_names[i]));
|
||||
}
|
||||
}
|
||||
if (!pgp.second.cur_state)
|
||||
{
|
||||
state_names.push_back("offline");
|
||||
}
|
||||
auto stat = pg_stats[(pool_pg_num_t){ .pool_id = pp.first, .pg_num = pgp.first }].object_items();
|
||||
stat.erase("write_osd_set");
|
||||
stat["pool_id"] = (uint64_t)pp.first;
|
||||
stat["pool_name"] = pp.second.name;
|
||||
stat["pg_num"] = (uint64_t)pgp.first;
|
||||
stat["pause"] = pgp.second.pause;
|
||||
stat["state"] = state_names;
|
||||
stat["cur_primary"] = pgp.second.cur_primary;
|
||||
stat["target_primary"] = pgp.second.primary;
|
||||
stat["target_set"] = pgp.second.target_set;
|
||||
stat["target_history"] = pgp.second.target_history;
|
||||
stat["all_peers"] = pgp.second.all_peers;
|
||||
stat["epoch"] = pgp.second.epoch;
|
||||
stat["next_scrub"] = pgp.second.next_scrub;
|
||||
if (!parent->json_output)
|
||||
{
|
||||
stat["fmt_state"] = implode("+", state_names);
|
||||
stat["fmt_primary"] = (!pgp.second.primary && !pgp.second.cur_primary
|
||||
? "-"
|
||||
: (std::to_string(pgp.second.cur_primary) + (pgp.second.primary == pgp.second.cur_primary
|
||||
? ""
|
||||
: "->"+std::to_string(pgp.second.primary))));
|
||||
stat["fmt_target_set"] = implode(",", stat["target_set"]);
|
||||
uint64_t pg_block = pp.second.data_block_size * (pp.second.scheme == POOL_SCHEME_REPLICATED
|
||||
? 1 : (pp.second.pg_size-pp.second.parity_chunks));
|
||||
stat["fmt_clean"] = format_size(stat["clean_count"].uint64_value() * pg_block);
|
||||
stat["fmt_misplaced"] = format_size(stat["misplaced_count"].uint64_value() * pg_block);
|
||||
stat["fmt_degraded"] = format_size(stat["degraded_count"].uint64_value() * pg_block);
|
||||
stat["fmt_incomplete"] = format_size(stat["incomplete_count"].uint64_value() * pg_block);
|
||||
}
|
||||
pgs.push_back(stat);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (parent->json_output)
|
||||
{
|
||||
result.data = pgs;
|
||||
return;
|
||||
}
|
||||
json11::Json::array cols;
|
||||
if (!pool_id)
|
||||
{
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "pool_name" },
|
||||
{ "title", "POOL" },
|
||||
});
|
||||
}
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "pg_num" },
|
||||
{ "title", "NUM" },
|
||||
});
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "fmt_target_set" },
|
||||
{ "title", "OSD SET" },
|
||||
});
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "fmt_primary" },
|
||||
{ "title", "PRIMARY" },
|
||||
});
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "fmt_clean" },
|
||||
{ "title", "DATA CLEAN" },
|
||||
});
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "fmt_misplaced" },
|
||||
{ "title", "MISPLACED" },
|
||||
});
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "fmt_misplaced" },
|
||||
{ "title", "DEGRADED" },
|
||||
});
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "fmt_incomplete" },
|
||||
{ "title", "INCOMPLETE" },
|
||||
});
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "fmt_state" },
|
||||
{ "title", "STATE" },
|
||||
});
|
||||
result.text = print_table(pgs, cols, parent->color);
|
||||
}
|
||||
|
||||
void loop()
|
||||
{
|
||||
if (state == 1)
|
||||
goto resume_1;
|
||||
resume_1:
|
||||
load_pg_stats();
|
||||
if (parent->waiting > 0)
|
||||
return;
|
||||
format_pgs();
|
||||
state = 100;
|
||||
}
|
||||
};
|
||||
|
||||
std::function<bool(cli_result_t &)> cli_tool_t::start_pg_list(json11::Json cfg)
|
||||
{
|
||||
auto pg_lister = new pg_lister_t();
|
||||
pg_lister->parent = this;
|
||||
if (cfg["pool"].uint64_value())
|
||||
pg_lister->pool_id = cfg["pool"].uint64_value();
|
||||
else
|
||||
pg_lister->pool_name = cfg["pool"].string_value();
|
||||
for (auto & st: cfg["pg_state"].array_items())
|
||||
pg_lister->pg_state.push_back(st.string_value());
|
||||
if (cfg["pg_state"].is_string())
|
||||
pg_lister->pg_state.push_back(cfg["pg_state"].string_value());
|
||||
pg_lister->min_pg_num = cfg["min"].uint64_value();
|
||||
pg_lister->max_pg_num = cfg["max"].uint64_value();
|
||||
return [pg_lister](cli_result_t & result)
|
||||
{
|
||||
pg_lister->loop();
|
||||
if (pg_lister->is_done())
|
||||
{
|
||||
result = pg_lister->result;
|
||||
delete pg_lister;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
}
|
@@ -71,8 +71,7 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
|
||||
auto & key = kv_it->first;
|
||||
auto & value = kv_it->second;
|
||||
if (key == "pg_size" || key == "parity_chunks" || key == "pg_minsize" ||
|
||||
key == "pg_count" || key == "max_osd_combinations" ||
|
||||
key == "bitmap_granularity" || key == "pg_stripe_size")
|
||||
key == "pg_count" || key == "max_osd_combinations")
|
||||
{
|
||||
if (value.is_number() && value.uint64_value() != value.number_value() ||
|
||||
value.is_string() && !value.uint64_value() && value.string_value() != "0")
|
||||
@@ -81,13 +80,14 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
|
||||
}
|
||||
value = value.uint64_value();
|
||||
}
|
||||
else if (key == "block_size")
|
||||
else if (key == "block_size" || key == "bitmap_granularity" || key == "pg_stripe_size")
|
||||
{
|
||||
uint64_t block_size = value.is_string() ? parse_size(value.string_value()) : value.uint64_value();
|
||||
if (!block_size)
|
||||
uint64_t sz = value.is_string() ? parse_size(value.string_value()) : value.uint64_value();
|
||||
if (!sz)
|
||||
{
|
||||
return key+" must be an integer with or without size suffix (K/M/G/T)";
|
||||
}
|
||||
value = sz;
|
||||
}
|
||||
else if (key == "name" || key == "scheme" || key == "immediate_commit" ||
|
||||
key == "failure_domain" || key == "root_node" || key == "scrub_interval" || key == "used_for_fs" ||
|
||||
@@ -319,7 +319,7 @@ std::string validate_pool_config(json11::Json::object & new_cfg, json11::Json ol
|
||||
}
|
||||
|
||||
// immediate_commit
|
||||
if (!cfg["immediate_commit"].is_null() && !etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value()))
|
||||
if (!cfg["immediate_commit"].is_null() && etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value(), UINT32_MAX) == UINT32_MAX)
|
||||
{
|
||||
return "immediate_commit must be one of \"all\", \"small\", or \"none\", but it is "+cfg["immediate_commit"].as_string();
|
||||
}
|
||||
|
@@ -19,6 +19,9 @@ struct pool_creator_t
|
||||
bool force = false;
|
||||
bool wait = false;
|
||||
|
||||
uint64_t block_size = 0, bitmap_granularity = 0;
|
||||
uint32_t immediate_commit = 0;
|
||||
|
||||
int state = 0;
|
||||
cli_result_t result;
|
||||
|
||||
@@ -187,13 +190,23 @@ resume_4:
|
||||
|
||||
if (cfg["pg_size"].uint64_value() > max_pg_size)
|
||||
{
|
||||
std::string pool_err = "Not enough matching OSDs to create pool."
|
||||
" Change parameters or add --force to create a degraded pool."
|
||||
"\n\nAt least "+std::to_string(cfg["pg_size"].uint64_value())+
|
||||
" (pg_size="+std::to_string(cfg["pg_size"].uint64_value())+") OSDs should have:"
|
||||
"\n- block_size "+format_size(block_size, false, true)+
|
||||
"\n- bitmap_granularity "+format_size(bitmap_granularity, false, true);
|
||||
if (immediate_commit == IMMEDIATE_ALL)
|
||||
pool_err += "\n- immediate_commit all";
|
||||
else if (immediate_commit == IMMEDIATE_SMALL)
|
||||
pool_err += "\n- immediate_commit all or small";
|
||||
if (cfg["osd_tags"].array_items().size())
|
||||
pool_err += "\n- '"+implode("', '", cfg["osd_tags"])+(cfg["osd_tags"].array_items().size() > 1 ? "' tags" : "' tag");
|
||||
if (failure_domain != "osd")
|
||||
pool_err += "\n- different parent '"+failure_domain+"' nodes";
|
||||
result = (cli_result_t){
|
||||
.err = EINVAL,
|
||||
.text =
|
||||
"There are "+std::to_string(max_pg_size)+" \""+failure_domain+"\" failure domains with OSDs matching tags and"
|
||||
" block_size/bitmap_granularity/immediate_commit parameters, but you want to create a"
|
||||
" pool with "+cfg["pg_size"].as_string()+" OSDs from different failure domains in a PG."
|
||||
" Change parameters or add --force if you want to create a degraded pool and add OSDs later."
|
||||
.text = pool_err,
|
||||
};
|
||||
state = 100;
|
||||
return;
|
||||
@@ -441,14 +454,14 @@ resume_8:
|
||||
// List of accepted osds
|
||||
std::vector<std::string> accepted_osds;
|
||||
|
||||
uint64_t p_block_size = cfg["block_size"].uint64_value()
|
||||
block_size = cfg["block_size"].uint64_value()
|
||||
? cfg["block_size"].uint64_value()
|
||||
: parent->cli->st_cli.global_block_size;
|
||||
uint64_t p_bitmap_granularity = cfg["bitmap_granularity"].uint64_value()
|
||||
bitmap_granularity = cfg["bitmap_granularity"].uint64_value()
|
||||
? cfg["bitmap_granularity"].uint64_value()
|
||||
: parent->cli->st_cli.global_bitmap_granularity;
|
||||
uint32_t p_immediate_commit = cfg["immediate_commit"].is_string()
|
||||
? etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value())
|
||||
immediate_commit = cfg["immediate_commit"].is_string()
|
||||
? etcd_state_client_t::parse_immediate_commit(cfg["immediate_commit"].string_value(), IMMEDIATE_ALL)
|
||||
: parent->cli->st_cli.global_immediate_commit;
|
||||
|
||||
for (size_t i = 0; i < osd_stats.size(); i++)
|
||||
@@ -456,10 +469,10 @@ resume_8:
|
||||
auto & os = osd_stats[i];
|
||||
// Get osd number
|
||||
auto osd_num = osds[i].as_string();
|
||||
if (!os["data_block_size"].is_null() && os["data_block_size"] != p_block_size ||
|
||||
!os["bitmap_granularity"].is_null() && os["bitmap_granularity"] != p_bitmap_granularity ||
|
||||
if (!os["data_block_size"].is_null() && os["data_block_size"] != block_size ||
|
||||
!os["bitmap_granularity"].is_null() && os["bitmap_granularity"] != bitmap_granularity ||
|
||||
!os["immediate_commit"].is_null() &&
|
||||
etcd_state_client_t::parse_immediate_commit(os["immediate_commit"].string_value()) < p_immediate_commit)
|
||||
etcd_state_client_t::parse_immediate_commit(os["immediate_commit"].string_value(), IMMEDIATE_NONE) < immediate_commit)
|
||||
{
|
||||
accepted_nodes.erase(osd_num);
|
||||
}
|
||||
|
@@ -214,10 +214,10 @@ resume_1:
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/pg/stats/"
|
||||
parent->cli->st_cli.etcd_prefix+"/pgstats/"
|
||||
) },
|
||||
{ "range_end", base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/pg/stats0"
|
||||
parent->cli->st_cli.etcd_prefix+"/pgstats0"
|
||||
) },
|
||||
} },
|
||||
},
|
||||
@@ -235,7 +235,7 @@ resume_1:
|
||||
}
|
||||
// Calculate recovery percent
|
||||
std::map<pool_id_t, object_counts_t> counts;
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pg/stats/",
|
||||
parent->iterate_kvs_2(parent->etcd_result["responses"][0]["response_range"]["kvs"], "/pgstats/",
|
||||
[&](pool_id_t pool_id, uint64_t pg_num, json11::Json value)
|
||||
{
|
||||
auto & cnt = counts[pool_id];
|
||||
|
@@ -25,6 +25,7 @@ struct rm_inode_t
|
||||
uint64_t inode = 0;
|
||||
pool_id_t pool_id = 0;
|
||||
uint64_t min_offset = 0;
|
||||
uint64_t max_offset = 0;
|
||||
bool down_ok = false;
|
||||
|
||||
cli_tool_t *parent = NULL;
|
||||
@@ -52,7 +53,7 @@ struct rm_inode_t
|
||||
.obj_done = 0,
|
||||
.synced = parent->cli->get_immediate_commit(inode),
|
||||
});
|
||||
if (min_offset == 0)
|
||||
if (min_offset == 0 && max_offset == 0)
|
||||
{
|
||||
total_count += objects.size();
|
||||
}
|
||||
@@ -60,7 +61,7 @@ struct rm_inode_t
|
||||
{
|
||||
for (object_id oid: objects)
|
||||
{
|
||||
if (oid.stripe >= min_offset)
|
||||
if (oid.stripe >= min_offset && (!max_offset || oid.stripe < max_offset))
|
||||
{
|
||||
total_count++;
|
||||
}
|
||||
@@ -116,7 +117,7 @@ struct rm_inode_t
|
||||
}
|
||||
while (cur_list->in_flight < parent->iodepth && cur_list->obj_pos != cur_list->objects.end())
|
||||
{
|
||||
if (cur_list->obj_pos->stripe >= min_offset)
|
||||
if (cur_list->obj_pos->stripe >= min_offset && (!max_offset || cur_list->obj_pos->stripe < max_offset))
|
||||
{
|
||||
osd_op_t *op = new osd_op_t();
|
||||
op->op_type = OSD_OP_OUT;
|
||||
@@ -287,6 +288,7 @@ std::function<bool(cli_result_t &)> cli_tool_t::start_rm_data(json11::Json cfg)
|
||||
remover->down_ok = cfg["down_ok"].bool_value();
|
||||
remover->pool_id = INODE_POOL(remover->inode);
|
||||
remover->min_offset = cfg["min_offset"].uint64_value();
|
||||
remover->max_offset = cfg["max_offset"].uint64_value();
|
||||
return [remover](cli_result_t & result)
|
||||
{
|
||||
remover->loop();
|
||||
|
@@ -176,7 +176,7 @@ struct rm_osd_t
|
||||
json11::Json::object {
|
||||
{ "request_range", json11::Json::object {
|
||||
{ "key", base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/config/pgs"
|
||||
parent->cli->st_cli.etcd_prefix+"/pg/config"
|
||||
) },
|
||||
} },
|
||||
},
|
||||
@@ -229,7 +229,7 @@ struct rm_osd_t
|
||||
}
|
||||
if (!new_pgs.is_null())
|
||||
{
|
||||
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/config/pgs");
|
||||
auto pgs_key = base64_encode(parent->cli->st_cli.etcd_prefix+"/pg/config");
|
||||
rm_items.push_back(json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
{ "key", pgs_key },
|
||||
@@ -427,7 +427,7 @@ struct rm_osd_t
|
||||
{ "target", "MOD" },
|
||||
{ "key", history_key },
|
||||
{ "result", "LESS" },
|
||||
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision+1 },
|
||||
{ "mod_revision", parent->cli->st_cli.etcd_watch_revision_pg+1 },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@@ -133,7 +133,7 @@ resume_2:
|
||||
}
|
||||
int osd_count = 0, osd_up = 0;
|
||||
uint64_t total_raw = 0, free_raw = 0, free_down_raw = 0, down_raw = 0;
|
||||
parent->iterate_kvs_1(osd_stats, "/osd/stats", [&](uint64_t stat_osd_num, json11::Json value)
|
||||
parent->iterate_kvs_1(osd_stats, "/osd/stats/", [&](uint64_t stat_osd_num, json11::Json value)
|
||||
{
|
||||
osd_count++;
|
||||
auto osd_size = value["size"].uint64_value();
|
||||
|
@@ -5,7 +5,7 @@
|
||||
#include "str_util.h"
|
||||
|
||||
static const char *help_text =
|
||||
"Vitastor disk management tool " VERSION "\n"
|
||||
"Vitastor disk management tool " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2022+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
|
@@ -383,7 +383,7 @@ int disk_tool_t::pre_exec_osd(std::string device)
|
||||
|
||||
int disk_tool_t::purge_devices(const std::vector<std::string> & devices)
|
||||
{
|
||||
std::vector<uint64_t> osd_numbers;
|
||||
std::set<uint64_t> osd_numbers;
|
||||
json11::Json::array superblocks;
|
||||
for (auto & device: devices)
|
||||
{
|
||||
@@ -391,8 +391,11 @@ int disk_tool_t::purge_devices(const std::vector<std::string> & devices)
|
||||
if (!sb.is_null())
|
||||
{
|
||||
uint64_t osd_num = sb["params"]["osd_num"].uint64_value();
|
||||
osd_numbers.push_back(osd_num);
|
||||
superblocks.push_back(sb);
|
||||
if (osd_numbers.find(osd_num) == osd_numbers.end())
|
||||
{
|
||||
osd_numbers.insert(osd_num);
|
||||
superblocks.push_back(sb);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!osd_numbers.size())
|
||||
|
@@ -10,7 +10,7 @@ set_target_properties(vitastor_kv PROPERTIES PUBLIC_HEADER "kv/vitastor_kv.h")
|
||||
target_link_libraries(vitastor_kv
|
||||
vitastor_client
|
||||
)
|
||||
set_target_properties(vitastor_kv PROPERTIES VERSION ${VERSION} SOVERSION 0)
|
||||
set_target_properties(vitastor_kv PROPERTIES VERSION ${VITASTOR_VERSION} SOVERSION 0)
|
||||
|
||||
# vitastor-kv
|
||||
add_executable(vitastor-kv
|
||||
|
@@ -25,7 +25,7 @@ public:
|
||||
std::map<std::string, std::string> cfg;
|
||||
std::vector<std::string> cli_cmd;
|
||||
|
||||
kv_dbw_t *db = NULL;
|
||||
vitastorkv_dbw_t *db = NULL;
|
||||
ring_loop_t *ringloop = NULL;
|
||||
epoll_manager_t *epmgr = NULL;
|
||||
cluster_client_t *cli = NULL;
|
||||
@@ -144,7 +144,7 @@ void kv_cli_t::run()
|
||||
ringloop = new ring_loop_t(512);
|
||||
epmgr = new epoll_manager_t(ringloop);
|
||||
cli = new cluster_client_t(ringloop, epmgr->tfd, cfg);
|
||||
db = new kv_dbw_t(cli);
|
||||
db = new vitastorkv_dbw_t(cli);
|
||||
// Load image metadata
|
||||
while (!cli->is_ready())
|
||||
{
|
||||
@@ -289,7 +289,7 @@ void kv_cli_t::next_cmd()
|
||||
|
||||
struct kv_cli_list_t
|
||||
{
|
||||
kv_dbw_t *db = NULL;
|
||||
vitastorkv_dbw_t *db = NULL;
|
||||
void *handle = NULL;
|
||||
int format = 0;
|
||||
int n = 0;
|
||||
|
@@ -501,7 +501,7 @@ void kv_block_t::dump(int base_level)
|
||||
|
||||
void kv_db_t::open(inode_t inode_id, json11::Json cfg, std::function<void(int)> cb)
|
||||
{
|
||||
if (block_cache.size() > 0)
|
||||
if (block_cache.size() > 0 || this->inode_id)
|
||||
{
|
||||
cb(-EINVAL);
|
||||
return;
|
||||
@@ -1958,38 +1958,38 @@ void kv_op_t::next_go_up()
|
||||
}
|
||||
}
|
||||
|
||||
kv_dbw_t::kv_dbw_t(cluster_client_t *cli)
|
||||
vitastorkv_dbw_t::vitastorkv_dbw_t(cluster_client_t *cli)
|
||||
{
|
||||
db = new kv_db_t();
|
||||
db->cli = cli;
|
||||
}
|
||||
|
||||
kv_dbw_t::~kv_dbw_t()
|
||||
vitastorkv_dbw_t::~vitastorkv_dbw_t()
|
||||
{
|
||||
delete db;
|
||||
}
|
||||
|
||||
void kv_dbw_t::open(uint64_t inode_id, std::map<std::string, std::string> cfg, std::function<void(int)> cb)
|
||||
void vitastorkv_dbw_t::open(uint64_t inode_id, std::map<std::string, std::string> cfg, std::function<void(int)> cb)
|
||||
{
|
||||
db->open(inode_id, cfg, cb);
|
||||
}
|
||||
|
||||
void kv_dbw_t::set_config(std::map<std::string, std::string> cfg)
|
||||
void vitastorkv_dbw_t::set_config(std::map<std::string, std::string> cfg)
|
||||
{
|
||||
db->set_config(cfg);
|
||||
}
|
||||
|
||||
uint64_t kv_dbw_t::get_size()
|
||||
uint64_t vitastorkv_dbw_t::get_size()
|
||||
{
|
||||
return db->next_free;
|
||||
}
|
||||
|
||||
void kv_dbw_t::close(std::function<void()> cb)
|
||||
void vitastorkv_dbw_t::close(std::function<void()> cb)
|
||||
{
|
||||
db->close(cb);
|
||||
}
|
||||
|
||||
void kv_dbw_t::get(const std::string & key, std::function<void(int res, const std::string & value)> cb, bool cached)
|
||||
void vitastorkv_dbw_t::get(const std::string & key, std::function<void(int res, const std::string & value)> cb, bool cached)
|
||||
{
|
||||
auto *op = new kv_op_t;
|
||||
op->db = db;
|
||||
@@ -2003,7 +2003,7 @@ void kv_dbw_t::get(const std::string & key, std::function<void(int res, const st
|
||||
op->exec();
|
||||
}
|
||||
|
||||
void kv_dbw_t::set(const std::string & key, const std::string & value, std::function<void(int res)> cb,
|
||||
void vitastorkv_dbw_t::set(const std::string & key, const std::string & value, std::function<void(int res)> cb,
|
||||
std::function<bool(int res, const std::string & value)> cas_compare)
|
||||
{
|
||||
auto *op = new kv_op_t;
|
||||
@@ -2023,7 +2023,7 @@ void kv_dbw_t::set(const std::string & key, const std::string & value, std::func
|
||||
op->exec();
|
||||
}
|
||||
|
||||
void kv_dbw_t::del(const std::string & key, std::function<void(int res)> cb,
|
||||
void vitastorkv_dbw_t::del(const std::string & key, std::function<void(int res)> cb,
|
||||
std::function<bool(int res, const std::string & value)> cas_compare)
|
||||
{
|
||||
auto *op = new kv_op_t;
|
||||
@@ -2042,7 +2042,7 @@ void kv_dbw_t::del(const std::string & key, std::function<void(int res)> cb,
|
||||
op->exec();
|
||||
}
|
||||
|
||||
void* kv_dbw_t::list_start(const std::string & start)
|
||||
void* vitastorkv_dbw_t::list_start(const std::string & start)
|
||||
{
|
||||
if (!db->inode_id || db->closing)
|
||||
return NULL;
|
||||
@@ -2055,7 +2055,7 @@ void* kv_dbw_t::list_start(const std::string & start)
|
||||
return op;
|
||||
}
|
||||
|
||||
void kv_dbw_t::list_next(void *handle, std::function<void(int res, const std::string & key, const std::string & value)> cb)
|
||||
void vitastorkv_dbw_t::list_next(void *handle, std::function<void(int res, const std::string & key, const std::string & value)> cb)
|
||||
{
|
||||
kv_op_t *op = (kv_op_t*)handle;
|
||||
if (cb)
|
||||
@@ -2068,7 +2068,7 @@ void kv_dbw_t::list_next(void *handle, std::function<void(int res, const std::st
|
||||
op->next();
|
||||
}
|
||||
|
||||
void kv_dbw_t::list_close(void *handle)
|
||||
void vitastorkv_dbw_t::list_close(void *handle)
|
||||
{
|
||||
kv_op_t *op = (kv_op_t*)handle;
|
||||
delete op;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user