Compare commits

..

2 Commits

Author SHA1 Message Date
Vitaliy Filippov 249a233b37 WIP another experiment - "smart" iothreads
Test / test_snapshot_chain_ec (push) Successful in 2m55s Details
Test / test_rebalance_verify_imm (push) Successful in 3m55s Details
Test / test_root_node (push) Successful in 11s Details
Test / test_rebalance_verify (push) Successful in 4m32s Details
Test / test_switch_primary (push) Successful in 37s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 2m53s Details
Test / test_write (push) Successful in 46s Details
Test / test_write_no_same (push) Successful in 17s Details
Test / test_write_xor (push) Successful in 1m55s Details
Test / test_rebalance_verify_ec (push) Successful in 5m59s Details
Test / test_heal_pg_size_2 (push) Successful in 3m58s Details
Test / test_heal_ec (push) Successful in 3m51s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m47s Details
Test / test_heal_csum_32k_dj (push) Successful in 6m10s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m50s Details
Test / test_osd_tags (push) Successful in 19s Details
Test / test_enospc (push) Successful in 1m11s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m1s Details
Test / test_enospc_xor (push) Successful in 1m11s Details
Test / test_heal_csum_32k (push) Failing after 10m14s Details
Test / test_enospc_imm (push) Successful in 46s Details
Test / test_enospc_imm_xor (push) Successful in 1m3s Details
Test / test_scrub (push) Successful in 28s Details
Test / test_scrub_zero_osd_2 (push) Successful in 28s Details
Test / test_scrub_xor (push) Successful in 34s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 39s Details
Test / test_scrub_pg_size_3 (push) Successful in 49s Details
Test / test_scrub_ec (push) Successful in 25s Details
Test / test_nfs (push) Successful in 16s Details
Test / test_heal_csum_4k (push) Successful in 8m55s Details
2024-07-03 11:05:45 +03:00
Vitaliy Filippov d07e072212 Change bool wr to event mask in epoll_manager 2024-07-01 00:30:59 +03:00
213 changed files with 1493 additions and 12928 deletions

View File

@ -16,7 +16,6 @@ env:
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
OSD_ARGS: '--etcd_quick_timeout 2000'
USE_RAMDISK: 1
concurrency:
group: ci-${{ github.ref }}
@ -198,24 +197,6 @@ jobs:
echo ""
done
test_etcd_fail_antietcd:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: ANTIETCD=1 /root/vitastor/tests/test_etcd_fail.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance:
runs-on: ubuntu-latest
needs: build
@ -558,24 +539,6 @@ jobs:
echo ""
done
test_dd:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_dd.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_root_node:
runs-on: ubuntu-latest
needs: build
@ -702,24 +665,6 @@ jobs:
echo ""
done
test_heal_antietcd:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: ANTIETCD=1 /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_csum_32k_dmj:
runs-on: ubuntu-latest
needs: build

View File

@ -34,10 +34,6 @@ for my $line (<>)
{
$test_name .= '_imm';
}
elsif ($1 eq 'ANTIETCD')
{
$test_name .= '_antietcd';
}
else
{
$test_name .= '_'.lc($1).'_'.$2;

View File

@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VITASTOR_VERSION "1.9.1")
set(VERSION "1.6.1")
add_subdirectory(src)

View File

@ -19,7 +19,7 @@ Vitastor нацелен в первую очередь на SSD и SSD+HDD кл
TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.
Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, OpenNebula, Proxmox, Kubernetes.
Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, Proxmox, Kubernetes.
Другие драйверы могут также быть легко реализованы.
Подробности смотрите в документации по ссылкам ниже.
@ -42,7 +42,6 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
- Установка
- [Пакеты](docs/installation/packages.ru.md)
- [Proxmox](docs/installation/proxmox.ru.md)
- [OpenNebula](docs/installation/opennebula.ru.md)
- [OpenStack](docs/installation/openstack.ru.md)
- [Kubernetes CSI](docs/installation/kubernetes.ru.md)
- [Сборка из исходных кодов](docs/installation/source.ru.md)

View File

@ -19,7 +19,7 @@ supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1
with proper hardware which is ~10 times faster than other popular SDS's like Ceph
or internal systems of public clouds.
Vitastor supports QEMU, NBD, NFS protocols, OpenStack, OpenNebula, Proxmox, Kubernetes drivers.
Vitastor supports QEMU, NBD, NFS protocols, OpenStack, Proxmox, Kubernetes drivers.
More drivers may be created easily.
Read more details below in the documentation.
@ -42,7 +42,6 @@ Read more details below in the documentation.
- Installation
- [Packages](docs/installation/packages.en.md)
- [Proxmox](docs/installation/proxmox.en.md)
- [OpenNebula](docs/installation/opennebula.en.md)
- [OpenStack](docs/installation/openstack.en.md)
- [Kubernetes CSI](docs/installation/kubernetes.en.md)
- [Building from Source](docs/installation/source.en.md)

View File

@ -1,9 +1,9 @@
VITASTOR_VERSION ?= v1.9.1
VERSION ?= v1.6.1
all: build push
build:
@docker build --rm -t vitalif/vitastor-csi:$(VITASTOR_VERSION) .
@docker build --rm -t vitalif/vitastor-csi:$(VERSION) .
push:
@docker push vitalif/vitastor-csi:$(VITASTOR_VERSION)
@docker push vitalif/vitastor-csi:$(VERSION)

View File

@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v1.9.1
image: vitalif/vitastor-csi:v1.6.1
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@ -121,7 +121,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v1.9.1
image: vitalif/vitastor-csi:v1.6.1
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "1.9.1"
vitastorCSIDriverVersion = "1.6.1"
)
// Config struct fills the parameters of request or user input

2
debian/changelog vendored
View File

@ -1,4 +1,4 @@
vitastor (1.9.1-1) unstable; urgency=medium
vitastor (1.6.1-1) unstable; urgency=medium
* Bugfixes

6
debian/control vendored
View File

@ -53,9 +53,3 @@ Architecture: amd64
Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client (= ${binary:Version})
Description: Vitastor Proxmox Virtual Environment storage plugin
Vitastor storage plugin for Proxmox Virtual Environment.
Package: vitastor-opennebula
Architecture: amd64
Depends: ${shlibs:Depends}, ${misc:Depends}, vitastor-client, patch, python3, jq
Description: Vitastor OpenNebula storage plugin
Vitastor storage plugin for OpenNebula.

View File

@ -1,3 +1,3 @@
mon usr/lib/vitastor/
mon usr/lib/vitastor/mon
mon/scripts/make-etcd usr/lib/vitastor/mon
mon/scripts/vitastor-mon.service /lib/systemd/system

View File

@ -6,6 +6,4 @@ if [ "$1" = "configure" ]; then
addgroup --system --quiet vitastor
adduser --system --quiet --ingroup vitastor --no-create-home --home /nonexistent vitastor
mkdir -p /etc/vitastor
mkdir -p /var/lib/vitastor
chown vitastor:vitastor /var/lib/vitastor
fi

View File

@ -1,3 +0,0 @@
opennebula/remotes var/lib/one/
opennebula/sudoers.d etc/
opennebula/install.sh var/lib/one/remotes/datastore/vitastor/

View File

@ -1,7 +0,0 @@
#!/bin/sh
set -e
if [ "$1" = "configure" ]; then
/var/lib/one/remotes/datastore/vitastor/install.sh
fi

View File

@ -1,4 +0,0 @@
interest /var/lib/one/remotes/datastore/downloader.sh
interest /etc/one/oned.conf
interest /etc/one/vmm_exec/vmm_execrc
interest /etc/apparmor.d/local/abstractions/libvirt-qemu

View File

@ -9,12 +9,12 @@ ARG REL=
WORKDIR /root
RUN set -e -x; \
if [ "$REL" = "buster" ]; then \
apt-get update; \
apt-get -y install wget; \
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
echo "deb http://deb.debian.org/debian $REL-backports main" >> /etc/apt/sources.list; \
echo >> /etc/apt/preferences; \
echo 'Package: *' >> /etc/apt/preferences; \
echo "Pin: release a=$REL-backports" >> /etc/apt/preferences; \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
fi; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
@ -22,9 +22,10 @@ RUN set -e -x; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep fio
RUN apt-get --download-only source fio
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev libnl-3-dev libnl-genl-3-dev
ADD . /root/vitastor
RUN set -e -x; \
@ -36,10 +37,8 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
FULLVER=$(head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
VER=${FULLVER%%-*}; \
cp -r /root/vitastor vitastor-$VER; \
cd vitastor-$VER; \
cp -r /root/vitastor vitastor-1.6.1; \
cd vitastor-1.6.1; \
ln -s /root/fio-build/fio-*/ ./fio; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@ -51,14 +50,10 @@ RUN set -e -x; \
echo fio-headers.patch >> debian/patches/series; \
rm -rf a b; \
echo "dep:fio=$FIO" > debian/fio_version; \
cd /root/packages/vitastor-$REL/vitastor-$VER; \
mkdir mon/node_modules; \
cd mon/node_modules; \
curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx; \
curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER; \
cd vitastor-$VER; \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.6.1.orig.tar.xz vitastor-1.6.1; \
cd vitastor-1.6.1; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
rm -rf /root/packages/vitastor-$REL/vitastor-*/

View File

@ -9,7 +9,6 @@
These parameters apply only to Vitastor clients (QEMU, fio, NBD and so on) and
affect their interaction with the cluster.
- [client_iothread_count](#client_iothread_count)
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_retry_enospc](#client_retry_enospc)
@ -24,23 +23,6 @@ affect their interaction with the cluster.
- [nbd_max_part](#nbd_max_part)
- [osd_nearfull_ratio](#osd_nearfull_ratio)
## client_iothread_count
- Type: integer
- Default: 0
Number of separate threads for handling TCP network I/O at client library
side. Enabling 4 threads usually allows to increase peak performance of each
client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
100-150 to 400 thousand iops, but at the same time it increases latency.
Latency increase depends on CPU: with CPU power saving disabled latency
only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
iops decrease from 2000 to 1000). RDMA isn't affected by this option.
It's recommended to enable client I/O threads if you don't use RDMA and want
to increase peak client performance.
## client_retry_interval
- Type: milliseconds

View File

@ -9,7 +9,6 @@
Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD и т.п.) и
затрагивают логику их работы с кластером.
- [client_iothread_count](#client_iothread_count)
- [client_retry_interval](#client_retry_interval)
- [client_eio_retry_interval](#client_eio_retry_interval)
- [client_retry_enospc](#client_retry_enospc)
@ -24,24 +23,6 @@
- [nbd_max_part](#nbd_max_part)
- [osd_nearfull_ratio](#osd_nearfull_ratio)
## client_iothread_count
- Тип: целое число
- Значение по умолчанию: 0
Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
RDMA данная опция не влияет.
Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
RDMA и хотите повысить пиковую производительность клиентов.
## client_retry_interval
- Тип: миллисекунды

View File

@ -56,24 +56,14 @@ Can't be smaller than the OSD data device sector.
## immediate_commit
- Type: string
- Default: all
- Default: false
One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
This parameter is also really important for performance.
TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
power loss protection (nonvolatile write-through cache) and also for most HDDs.
"none" or "small" should be only selected if you use desktop SSDs without
capacitors or drives with slow write-back cache that can't be disabled. Check
immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
Detailed explanation:
Another parameter which is really important for performance.
Desktop SSDs are very fast (100000+ iops) for simple random writes
without cache flush. However, they are really slow (only around 1000 iops)
if you try to fsync() each write, that is, if you want to guarantee that
each change gets actually persisted to the physical media.
if you try to fsync() each write, that is, when you want to guarantee that
each change gets immediately persisted to the physical media.
Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
"Supercapacitor-based Power Loss Protection", on the other hand, are equally
@ -85,8 +75,8 @@ really slow when used with desktop SSDs. Vitastor, however, can also
efficiently utilize desktop SSDs by postponing fsync until the client calls
it explicitly.
This is what this parameter regulates. When it's set to "all" Vitastor
cluster commits each change to disks immediately and clients just
This is what this parameter regulates. When it's set to "all" the whole
Vitastor cluster commits each change to disks immediately and clients just
ignore fsyncs because they know for sure that they're unneeded. This reduces
the amount of network roundtrips performed by clients and improves
performance. So it's always better to use server grade SSDs with
@ -109,5 +99,9 @@ Setting this parameter to "all" or "small" in OSD parameters requires enabling
[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
vitastor-disk tried to do that by default, first checking/disabling drive cache.
If it can't disable drive cache, OSD get initialized with "none".
TLDR: For optimal performance, set immediate_commit to "all" if you only use
SSDs with supercapacitor-based power loss protection (nonvolatile
write-through cache) for both data and journals in the whole Vitastor
cluster. Set it to "small" if you only use such SSDs for journals. Leave
empty if your drives have write-back cache.

View File

@ -57,18 +57,9 @@ amplification) и эффективность распределения нагр
## immediate_commit
- Тип: строка
- Значение по умолчанию: all
- Значение по умолчанию: false
Одно из значений "none", "small" или "all". Глобальное значение, может быть
переопределено [на уровне пула](pool.ru.md#immediate_commit).
Данный параметр тоже важен для производительности.
Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
устанавливать только при использовании SSD настольного класса без
суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
Ещё один важный для производительности параметр.
Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
секунду) при простой случайной записи без сбросов кэша. Однако они очень
@ -89,7 +80,7 @@ Power Loss Protection" - одинаково быстрые и со сбросо
эффективно утилизировать настольные SSD.
Данный параметр влияет как раз на это. Когда он установлен в значение "all",
кластер Vitastor мгновенно фиксирует каждое изменение на физические
весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
@ -115,3 +106,10 @@ HDD-дисках с внутренним SSD или "медиа" кэшем - н
включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
Итого, вкратце: для оптимальной производительности установите
immediate_commit в значение "all", если вы используете в кластере только SSD
с суперконденсаторами и для данных, и для журналов. Если вы используете
такие SSD для всех журналов, но не для данных - можете установить параметр
в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
оставьте параметр пустым.

View File

@ -8,14 +8,6 @@
These parameters only apply to Monitors.
- [use_antietcd](#use_antietcd)
- [enable_prometheus](#enable_prometheus)
- [mon_http_port](#mon_http_port)
- [mon_http_ip](#mon_http_ip)
- [mon_https_cert](#mon_https_cert)
- [mon_https_key](#mon_https_key)
- [mon_https_client_auth](#mon_https_client_auth)
- [mon_https_ca](#mon_https_ca)
- [etcd_mon_ttl](#etcd_mon_ttl)
- [etcd_mon_timeout](#etcd_mon_timeout)
- [etcd_mon_retries](#etcd_mon_retries)
@ -25,87 +17,6 @@ These parameters only apply to Monitors.
- [placement_levels](#placement_levels)
- [use_old_pg_combinator](#use_old_pg_combinator)
## use_antietcd
- Type: boolean
- Default: false
Enable experimental built-in etcd replacement (clustered key-value database):
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
When set to true, monitor runs internal antietcd automatically if it finds
a network interface with an IP address matching one of addresses in the
`etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
the monitor command line). If there are multiple matching addresses, it also
checks `antietcd_port` and antietcd is started for address with matching port.
By default, antietcd accepts connection on the selected IP address, but it
can also be overridden manually in the `antietcd_ip` option.
When antietcd is started, monitor stores cluster metadata itself and exposes
a etcd-compatible REST API. On disk, these metadata are stored in
`/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
or antietcd_data_dir options). All other antietcd parameters
(see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
cluster, cluster_key, persist_filter, stale_read can also be set in
Vitastor configuration with `antietcd_` prefix.
You can dump/load data to or from antietcd using Antietcd `anticli` tool:
```
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
```
## enable_prometheus
- Type: boolean
- Default: true
Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
Note that only the active (master) monitor exposes metrics, others return
HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
## mon_http_port
- Type: integer
- Default: 8060
HTTP port for monitors to listen on (including metrics exporter)
## mon_http_ip
- Type: string
IP address for monitors to listen on (all addresses by default)
## mon_https_cert
- Type: string
Path to PEM SSL certificate file for monitor to listen using HTTPS
## mon_https_key
- Type: string
Path to PEM SSL private key file for monitor to listen using HTTPS
## mon_https_client_auth
- Type: boolean
- Default: false
Enable HTTPS client certificate-based authorization for monitor connections
## mon_https_ca
- Type: string
Path to CA certificate for client HTTPS authorization
## etcd_mon_ttl
- Type: seconds

View File

@ -8,14 +8,6 @@
Данные параметры используются только мониторами Vitastor.
- [use_antietcd](#use_antietcd)
- [enable_prometheus](#enable_prometheus)
- [mon_http_port](#mon_http_port)
- [mon_http_ip](#mon_http_ip)
- [mon_https_cert](#mon_https_cert)
- [mon_https_key](#mon_https_key)
- [mon_https_client_auth](#mon_https_client_auth)
- [mon_https_ca](#mon_https_ca)
- [etcd_mon_ttl](#etcd_mon_ttl)
- [etcd_mon_timeout](#etcd_mon_timeout)
- [etcd_mon_retries](#etcd_mon_retries)
@ -25,89 +17,6 @@
- [placement_levels](#placement_levels)
- [use_old_pg_combinator](#use_old_pg_combinator)
## use_antietcd
- Тип: булево (да/нет)
- Значение по умолчанию: false
Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
Если параметр установлен в true, монитор запускает antietcd автоматически,
если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
конфигурации `etcd_address``/etc/vitastor/vitastor.conf` или в опциях
командной строки монитора). Если таких адресов несколько, также проверяется
опция `antietcd_port` и antietcd запускается для адреса с соответствующим
портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
IP, но его также можно определить вручную опцией `antietcd_ip`.
При запуске antietcd монитор сам хранит центральные метаданные кластера и
выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
`/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
(смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
в конфигурации Vitastor с префиксом `antietcd_`.
Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
`anticli`:
```
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
```
## enable_prometheus
- Тип: булево (да/нет)
- Значение по умолчанию: true
Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
в задание по сбору метрик Prometheus.
Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
## mon_http_port
- Тип: целое число
- Значение по умолчанию: 8060
Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
## mon_http_ip
- Тип: строка
IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
## mon_https_cert
- Тип: строка
Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
## mon_https_key
- Тип: строка
Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
## mon_https_client_auth
- Тип: булево (да/нет)
- Значение по умолчанию: false
Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
## mon_https_ca
- Тип: строка
Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
## etcd_mon_ttl
- Тип: секунды

View File

@ -10,7 +10,6 @@ These parameters only apply to OSDs, are not fixed at the moment of OSD drive
initialization and can be changed - either with an OSD restart or, for some of
them, even without restarting by updating configuration in etcd.
- [osd_iothread_count](#osd_iothread_count)
- [etcd_report_interval](#etcd_report_interval)
- [etcd_stats_interval](#etcd_stats_interval)
- [run_primary](#run_primary)
@ -62,18 +61,6 @@ them, even without restarting by updating configuration in etcd.
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
## osd_iothread_count
- Type: integer
- Default: 0
TCP network I/O thread count for OSD. When non-zero, a single OSD process
may handle more TCP I/O, but at a cost of increased latency because thread
switching overhead occurs. RDMA isn't affected by this option.
Because of latency, instead of enabling OSD I/O threads it's recommended to
just create multiple OSDs per disk, or use RDMA.
## etcd_report_interval
- Type: seconds

View File

@ -11,7 +11,6 @@
момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
изменения конфигурации в etcd.
- [osd_iothread_count](#osd_iothread_count)
- [etcd_report_interval](#etcd_report_interval)
- [etcd_stats_interval](#etcd_stats_interval)
- [run_primary](#run_primary)
@ -63,19 +62,6 @@
- [recovery_tune_sleep_min_us](#recovery_tune_sleep_min_us)
- [recovery_tune_sleep_cutoff_us](#recovery_tune_sleep_cutoff_us)
## osd_iothread_count
- Тип: целое число
- Значение по умолчанию: 0
Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
по сети больше данных, но ухудшает задержку из-за накладных расходов
переключения потоков. На работу RDMA опция не влияет.
Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
просто создавать по несколько OSD на каждом диске, или использовать RDMA.
## etcd_report_interval
- Тип: секунды

View File

@ -1,32 +1,3 @@
- name: client_iothread_count
type: int
default: 0
online: false
info: |
Number of separate threads for handling TCP network I/O at client library
side. Enabling 4 threads usually allows to increase peak performance of each
client from approx. 2-3 to 7-8 GByte/s linear read/write and from approx.
100-150 to 400 thousand iops, but at the same time it increases latency.
Latency increase depends on CPU: with CPU power saving disabled latency
only increases by ~10 us (equivalent to Q=1 iops decrease from 10500 to 9500),
with CPU power saving enabled it may be as high as 500 us (equivalent to Q=1
iops decrease from 2000 to 1000). RDMA isn't affected by this option.
It's recommended to enable client I/O threads if you don't use RDMA and want
to increase peak client performance.
info_ru: |
Число отдельных потоков для обработки ввода-вывода через TCP сеть на стороне
клиентской библиотеки. Включение 4 потоков обычно позволяет поднять пиковую
производительность каждого клиента примерно с 2-3 до 7-8 Гбайт/с линейного
чтения/записи и примерно с 100-150 до 400 тысяч операций ввода-вывода в
секунду, но ухудшает задержку. Увеличение задержки зависит от процессора:
при отключённом энергосбережении CPU это всего ~10 микросекунд (равносильно
падению iops с Q=1 с 10500 до 9500), а при включённом это может быть
и 500 микросекунд (равносильно падению iops с Q=1 с 2000 до 1000). На работу
RDMA данная опция не влияет.
Рекомендуется включать клиентские потоки ввода-вывода, если вы не используете
RDMA и хотите повысить пиковую производительность клиентов.
- name: client_retry_interval
type: ms
min: 10

View File

@ -47,24 +47,14 @@
Не может быть меньше размера сектора дисков данных OSD.
- name: immediate_commit
type: string
default: all
default: false
info: |
One of "none", "all" or "small". Global value, may be overriden [at pool level](pool.en.md#immediate_commit).
This parameter is also really important for performance.
TLDR: default "all" is optimal for server-grade SSDs with supercapacitor-based
power loss protection (nonvolatile write-through cache) and also for most HDDs.
"none" or "small" should be only selected if you use desktop SSDs without
capacitors or drives with slow write-back cache that can't be disabled. Check
immediate_commit of your OSDs in [ls-osd](../usage/cli.en.md#ls-osd).
Detailed explanation:
Another parameter which is really important for performance.
Desktop SSDs are very fast (100000+ iops) for simple random writes
without cache flush. However, they are really slow (only around 1000 iops)
if you try to fsync() each write, that is, if you want to guarantee that
each change gets actually persisted to the physical media.
if you try to fsync() each write, that is, when you want to guarantee that
each change gets immediately persisted to the physical media.
Server-grade SSDs with "Advanced/Enhanced Power Loss Protection" or with
"Supercapacitor-based Power Loss Protection", on the other hand, are equally
@ -76,8 +66,8 @@
efficiently utilize desktop SSDs by postponing fsync until the client calls
it explicitly.
This is what this parameter regulates. When it's set to "all" Vitastor
cluster commits each change to disks immediately and clients just
This is what this parameter regulates. When it's set to "all" the whole
Vitastor cluster commits each change to disks immediately and clients just
ignore fsyncs because they know for sure that they're unneeded. This reduces
the amount of network roundtrips performed by clients and improves
performance. So it's always better to use server grade SSDs with
@ -100,19 +90,14 @@
[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
vitastor-disk tried to do that by default, first checking/disabling drive cache.
If it can't disable drive cache, OSD get initialized with "none".
TLDR: For optimal performance, set immediate_commit to "all" if you only use
SSDs with supercapacitor-based power loss protection (nonvolatile
write-through cache) for both data and journals in the whole Vitastor
cluster. Set it to "small" if you only use such SSDs for journals. Leave
empty if your drives have write-back cache.
info_ru: |
Одно из значений "none", "small" или "all". Глобальное значение, может быть
переопределено [на уровне пула](pool.ru.md#immediate_commit).
Данный параметр тоже важен для производительности.
Вкратце: значение по умолчанию "all" оптимально для всех серверных SSD с
суперконденсаторами и также для большинства HDD. "none" и "small" имеет смысл
устанавливать только при использовании SSD настольного класса без
суперконденсаторов или дисков с медленным неотключаемым кэшем записи.
Проверьте настройку immediate_commit своих OSD в выводе команды [ls-osd](../usage/cli.ru.md#ls-osd).
Ещё один важный для производительности параметр.
Модели SSD для настольных компьютеров очень быстрые (100000+ операций в
секунду) при простой случайной записи без сбросов кэша. Однако они очень
@ -133,7 +118,7 @@
эффективно утилизировать настольные SSD.
Данный параметр влияет как раз на это. Когда он установлен в значение "all",
кластер Vitastor мгновенно фиксирует каждое изменение на физические
весь кластер Vitastor мгновенно фиксирует каждое изменение на физические
носители и клиенты могут просто игнорировать запросы fsync, т.к. они точно
знают, что fsync-и не нужны. Это уменьшает число необходимых обращений к OSD
по сети и улучшает производительность. Поэтому даже с Vitastor лучше всегда
@ -159,3 +144,10 @@
включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
Итого, вкратце: для оптимальной производительности установите
immediate_commit в значение "all", если вы используете в кластере только SSD
с суперконденсаторами и для данных, и для журналов. Если вы используете
такие SSD для всех журналов, но не для данных - можете установить параметр
в "small". Если и какие-то из дисков журналов имеют волатильный кэш записи -
оставьте параметр пустым.

View File

@ -1,103 +1,3 @@
- name: use_antietcd
type: bool
default: false
info: |
Enable experimental built-in etcd replacement (clustered key-value database):
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
When set to true, monitor runs internal antietcd automatically if it finds
a network interface with an IP address matching one of addresses in the
`etcd_address` configuration option (in `/etc/vitastor/vitastor.conf` or in
the monitor command line). If there are multiple matching addresses, it also
checks `antietcd_port` and antietcd is started for address with matching port.
By default, antietcd accepts connection on the selected IP address, but it
can also be overridden manually in the `antietcd_ip` option.
When antietcd is started, monitor stores cluster metadata itself and exposes
a etcd-compatible REST API. On disk, these metadata are stored in
`/var/lib/vitastor/mon_2379.json.gz` (can be overridden in antietcd_data_file
or antietcd_data_dir options). All other antietcd parameters
(see [here](https://git.yourcmc.ru/vitalif/antietcd/)) except node_id,
cluster, cluster_key, persist_filter, stale_read can also be set in
Vitastor configuration with `antietcd_` prefix.
You can dump/load data to or from antietcd using Antietcd `anticli` tool:
```
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
```
info_ru: |
Включить экспериментальный встроенный заменитель etcd (кластерную БД ключ-значение):
[antietcd](https://git.yourcmc.ru/vitalif/antietcd/).
Если параметр установлен в true, монитор запускает antietcd автоматически,
если обнаруживает сетевой интерфейс с одним из адресов, указанных в опции
конфигурации `etcd_address` (в `/etc/vitastor/vitastor.conf` или в опциях
командной строки монитора). Если таких адресов несколько, также проверяется
опция `antietcd_port` и antietcd запускается для адреса с соответствующим
портом. По умолчанию antietcd принимает подключения по выбранному совпадающему
IP, но его также можно определить вручную опцией `antietcd_ip`.
При запуске antietcd монитор сам хранит центральные метаданные кластера и
выставляет etcd-совместимое REST API. На диске эти метаданные хранятся в файле
`/var/lib/vitastor/mon_2379.json.gz` (можно переопределить параметрами
antietcd_data_file или antietcd_data_dir). Все остальные параметры antietcd
(смотрите [по ссылке](https://git.yourcmc.ru/vitalif/antietcd/)), за исключением
node_id, cluster, cluster_key, persist_filter, stale_read также можно задавать
в конфигурации Vitastor с префиксом `antietcd_`.
Вы можете выгружать/загружать данные в или из antietcd с помощью его инструмента
`anticli`:
```
npm exec anticli -e http://etcd:2379/v3 get --prefix '' --no-temp > dump.json
npm exec anticli -e http://antietcd:2379/v3 load < dump.json
```
- name: enable_prometheus
type: bool
default: true
info: |
Enable built-in Prometheus metrics exporter at mon_http_port (8060 by default).
Note that only the active (master) monitor exposes metrics, others return
HTTP 503. So you should add all monitor URLs to your Prometheus job configuration.
Grafana dashboard suitable for this exporter is here: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
info_ru: |
Включить встроенный Prometheus-экспортер метрик на порту mon_http_port (по умолчанию 8060).
Обратите внимание, что метрики выставляет только активный (главный) монитор, остальные
возвращают статус HTTP 503, поэтому вам следует добавлять адреса всех мониторов
в задание по сбору метрик Prometheus.
Дашборд для Grafana, подходящий для этого экспортера: [Vitastor-Grafana-6+.json](../../mon/scripts/Vitastor-Grafana-6+.json).
- name: mon_http_port
type: int
default: 8060
info: HTTP port for monitors to listen on (including metrics exporter)
info_ru: Порт, на котором мониторы принимают HTTP-соединения (в том числе для отдачи метрик)
- name: mon_http_ip
type: string
info: IP address for monitors to listen on (all addresses by default)
info_ru: IP-адрес, на котором мониторы принимают HTTP-соединения (по умолчанию все адреса)
- name: mon_https_cert
type: string
info: Path to PEM SSL certificate file for monitor to listen using HTTPS
info_ru: Путь к PEM-файлу SSL-сертификата для монитора, чтобы принимать соединения через HTTPS
- name: mon_https_key
type: string
info: Path to PEM SSL private key file for monitor to listen using HTTPS
info_ru: Путь к PEM-файлу секретного SSL-ключа для монитора, чтобы принимать соединения через HTTPS
- name: mon_https_client_auth
type: bool
default: false
info: Enable HTTPS client certificate-based authorization for monitor connections
info_ru: Включить в HTTPS-сервере монитора авторизацию по клиентским сертификатам
- name: mon_https_ca
type: string
info: Path to CA certificate for client HTTPS authorization
info_ru: Путь к удостоверяющему сертификату для авторизации клиентских HTTPS соединений
- name: etcd_mon_ttl
type: sec
min: 5

View File

@ -1,21 +1,3 @@
- name: osd_iothread_count
type: int
default: 0
info: |
TCP network I/O thread count for OSD. When non-zero, a single OSD process
may handle more TCP I/O, but at a cost of increased latency because thread
switching overhead occurs. RDMA isn't affected by this option.
Because of latency, instead of enabling OSD I/O threads it's recommended to
just create multiple OSDs per disk, or use RDMA.
info_ru: |
Число отдельных потоков для обработки ввода-вывода через TCP-сеть на
стороне OSD. Включение опции позволяет каждому отдельному OSD передавать
по сети больше данных, но ухудшает задержку из-за накладных расходов
переключения потоков. На работу RDMA опция не влияет.
Из-за задержек вместо включения потоков ввода-вывода OSD рекомендуется
просто создавать по несколько OSD на каждом диске, или использовать RDMA.
- name: etcd_report_interval
type: sec
default: 5

View File

@ -1,184 +0,0 @@
[Documentation](../../README.md#documentation) → Installation → OpenNebula
-----
[Читать на русском](opennebula.ru.md)
## Automatic Installation
OpenNebula plugin is packaged as `vitastor-opennebula` Debian and RPM package since Vitastor 1.9.0. So:
- Run `apt-get install vitastor-opennebula` or `yum install vitastor-opennebula` after installing OpenNebula on all nodes
- Check that it prints "OK, Vitastor OpenNebula patches successfully applied" or "OK, Vitastor OpenNebula patches are already applied"
- If it does not, refer to [Manual Installation](#manual-installation) and apply configuration file changes manually
- Make sure that Vitastor patched versions of QEMU and libvirt are installed
(`dpkg -l qemu-system-x86`, `dpkg -l | grep libvirt`, `rpm -qa | grep qemu`, `rpm -qa | grep qemu`, `rpm -qa | grep libvirt-libs` should show "vitastor" in version names)
- [Block VM access to Vitastor cluster](#block-vm-access-to-vitastor-cluster)
## Manual Installation
Install OpenNebula. Then, on each node:
- Copy [opennebula/remotes](../../opennebula/remotes) into `/var/lib/one` recursively: `cp -r opennebula/remotes /var/lib/one/`
- Copy [opennebula/sudoers.d](../../opennebula/sudoers.d) to `/etc`: `cp -r opennebula/sudoers.d /etc/`
- Apply [downloader-vitastor.sh.diff](../../opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff) to `/var/lib/one/remotes/datastore/downloader.sh`:
`patch /var/lib/one/remotes/datastore/downloader.sh < opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff` - or read the patch and apply the same change manually
- Add `kvm-vitastor` to `LIVE_DISK_SNAPSHOTS` in `/etc/one/vmm_exec/vmm_execrc`
- If on Debian or Ubuntu (and AppArmor is used), add Vitastor config file path(s) to `/etc/apparmor.d/local/abstractions/libvirt-qemu`: for example,
`echo ' "/etc/vitastor/vitastor.conf" r,' >> /etc/apparmor.d/local/abstractions/libvirt-qemu`
- Apply changes to `/etc/one/oned.conf`
### oned.conf changes
1. Add deploy script override in kvm VM_MAD: add `-l deploy.vitastor` to ARGUMENTS.
```diff
VM_MAD = [
NAME = "kvm",
SUNSTONE_NAME = "KVM",
EXECUTABLE = "one_vmm_exec",
- ARGUMENTS = "-t 15 -r 0 kvm -p",
+ ARGUMENTS = "-t 15 -r 0 kvm -p -l deploy=deploy.vitastor",
DEFAULT = "vmm_exec/vmm_exec_kvm.conf",
TYPE = "kvm",
KEEP_SNAPSHOTS = "yes",
LIVE_RESIZE = "yes",
SUPPORT_SHAREABLE = "yes",
IMPORTED_VMS_ACTIONS = "terminate, terminate-hard, hold, release, suspend,
resume, delete, reboot, reboot-hard, resched, unresched, disk-attach,
disk-detach, nic-attach, nic-detach, snapshot-create, snapshot-delete,
resize, updateconf, update"
]
```
Optional: if you also want to save VM RAM checkpoints to Vitastor, use
`-l deploy=deploy.vitastor,save=save.vitastor,restore=restore.vitastor`
instead of just `-l deploy=deploy.vitastor`.
2. Add `vitastor` to TM_MAD.ARGUMENTS and DATASTORE_MAD.ARGUMENTS:
```diff
TM_MAD = [
EXECUTABLE = "one_tm",
- ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,dev,vcenter,iscsi_libvirt"
+ ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,vitastor,dev,vcenter,iscsi_libvirt"
]
DATASTORE_MAD = [
EXECUTABLE = "one_datastore",
- ARGUMENTS = "-t 15 -d dummy,fs,lvm,ceph,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
+ ARGUMENTS = "-t 15 -d dummy,fs,lvm,ceph,vitastor,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,vitastor,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
]
```
3. Add INHERIT_DATASTORE_ATTR for two Vitastor attributes:
```
INHERIT_DATASTORE_ATTR = "VITASTOR_CONF"
INHERIT_DATASTORE_ATTR = "IMAGE_PREFIX"
```
4. Add TM_MAD_CONF and DS_MAD_CONF for Vitastor:
```
TM_MAD_CONF = [
NAME = "vitastor", LN_TARGET = "NONE", CLONE_TARGET = "SELF", SHARED = "YES",
DS_MIGRATE = "NO", DRIVER = "raw", ALLOW_ORPHANS="format",
TM_MAD_SYSTEM = "ssh,shared", LN_TARGET_SSH = "SYSTEM", CLONE_TARGET_SSH = "SYSTEM",
DISK_TYPE_SSH = "FILE", LN_TARGET_SHARED = "NONE",
CLONE_TARGET_SHARED = "SELF", DISK_TYPE_SHARED = "FILE"
]
DS_MAD_CONF = [
NAME = "vitastor",
REQUIRED_ATTRS = "DISK_TYPE,BRIDGE_LIST",
PERSISTENT_ONLY = "NO",
MARKETPLACE_ACTIONS = "export"
]
```
## Create Datastores
Example Image and System Datastore definitions:
[opennebula/vitastor-imageds.conf](../../opennebula/vitastor-imageds.conf) and
[opennebula/vitastor-systemds.conf](../../opennebula/vitastor-systemds.conf).
Change parameters to your will:
- POOL_NAME is Vitastor pool name to store images.
- IMAGE_PREFIX is a string prepended to all Vitastor image names.
- BRIDGE_LIST is a list of hosts with access to Vitastor cluster, mostly used for image (not system) datastore operations.
- VITASTOR_CONF is the path to cluster configuration. Note that it should be also added to `/etc/apparmor.d/local/abstractions/libvirt-qemu` if you use AppArmor.
- STAGING_DIR is a temporary directory used when importing external images. Should have free space sufficient for downloading external images.
Then create datastores using `onedatastore create vitastor-imageds.conf` and `onedatastore create vitastor-systemds.conf` (or use UI).
## Block VM access to Vitastor cluster
Vitastor doesn't support any authentication yet, so you MUST block VM guest access to the Vitastor cluster at the network level.
If you use VLAN networking for VMs - make sure you use different VLANs for VMs and hypervisor/storage network and
block access between them using your firewall/switch configuration.
If you use something more stupid like bridged networking, you probably have to use manual firewall/iptables setup
to only allow access to Vitastor from hypervisor IPs.
Also you need to switch network to "Bridged & Security Groups" and enable IP spoofing filters in OpenNebula.
Problem is that OpenNebula's IP spoofing filter doesn't affect local interfaces of the hypervisor i.e. when
it's enabled a VM can't talk to other VMs or to the outer world using a spoofed IP, but it CAN talk to the
hypervisor if it takes an IP from its subnet. To fix that you also need some more iptables.
So the complete "stupid" bridged network filter setup could look like the following
(here `10.0.3.0/24` is the VM subnet and `10.0.2.0/24` is the hypervisor subnet):
```
# Allow incoming traffic from physical device
iptables -A INPUT -m physdev --physdev-in eth0 -j ACCEPT
# Do not allow incoming traffic from VMs, but not from VM subnet
iptables -A INPUT ! -s 10.0.3.0/24 -i onebr0 -j DROP
# Drop traffic from VMs to hypervisor/storage subnet
iptables -I FORWARD 1 -s 10.0.3.0/24 -d 10.0.2.0/24 -j DROP
```
## Testing
The OpenNebula plugin includes quite a bit of bash scripts, so here's their description to get an idea about what they actually do.
| Script | Action | How to Test |
| ----------------------- | ----------------------------------------- | ------------------------------------------------------------------------------------ |
| vmm/kvm/deploy.vitastor | Start a VM | Create and start a VM with Vitastor disk(s): persistent / non-persistent / volatile. |
| vmm/kvm/save.vitastor | Save VM memory checkpoint | Stop a VM using "Stop" command. |
| vmm/kvm/restore.vitastor| Restore VM memory checkpoint | Start a VM back after stopping it. |
| datastore/clone | Copy an image as persistent | Create a VM template and instantiate it as persistent. |
| datastore/cp | Import an external image | Import a VM template with images from Marketplace. |
| datastore/export | Export an image as URL | Probably: export a VM template with images to Marketplace. |
| datastore/mkfs | Create an image with FS | Storage → Images → Create → Type: Datablock, Location: Empty disk image, Filesystem: Not empty. |
| datastore/monitor | Monitor used space in image datastore | Check reported used/free space in image datastore list. |
| datastore/rm | Remove a persistent image | Storage → Images → Select an image → Delete. |
| datastore/snap_delete | Delete a snapshot of a persistent image | Storage → Images → Select an image → Select a snapshot → Delete; <br> To create an image with snapshot: attach a persistent image to a VM; create a snapshot; detach the image. |
| datastore/snap_flatten | Revert an image to snapshot and delete other snapshots | Storage → Images → Select an image → Select a snapshot → Flatten. |
| datastore/snap_revert | Revert an image to snapshot | Storage → Images → Select an image → Select a snapshot → Revert. |
| datastore/stat | Get virtual size of an image in MB | No idea. Seems to be unused both in Vitastor and Ceph datastores. |
| tm/clone | Clone a non-persistent image to a VM disk | Attach a non-persistent image to a VM. |
| tm/context | Generate a contextualisation VM disk | Create a VM with enabled contextualisation (default). Common host FS-based version is used in Vitastor and Ceph datastores. |
| tm/cpds | Copy a VM disk / its snapshot to an image | Select a VM → Select a disk → Optionally select a snapshot → Save as. |
| tm/delete | Delete a cloned or volatile VM disk | Detach a volatile disk or a non-persistent image from a VM. |
| tm/failmigrate | Handle live migration failure | No action. Script is empty in Vitastor and Ceph. In other datastores, should roll back actions done by tm/premigrate. |
| tm/ln | Attach a persistent image to a VM | No action. Script is empty in Vitastor and Ceph. |
| tm/mkimage | Create a volatile disk, maybe with FS | Attach a volatile disk to a VM, with or without file system. |
| tm/mkswap | Create a volatile swap disk | Attach a volatile disk to a VM, formatted as swap. |
| tm/monitor | Monitor used space in system datastore | Check reported used/free space in system datastore list. |
| tm/mv | Move a migrated VM disk between hosts | Migrate a VM between hosts. In Vitastor and Ceph datastores, doesn't do any storage action. |
| tm/mvds | Detach a persistent image from a VM | No action. The opposite of tm/ln. Script is empty in Vitastor and Ceph. In other datastores, script may copy the image from VM host back to the datastore. |
| tm/postbackup | Executed after backup | Seems that the script just removes temporary files after backup. Perform a VM backup and check that temporary files are cleaned up. |
| tm/postbackup_live | Executed after backup of a running VM | Same as tm/postbackup, but for a running VM. |
| tm/postmigrate | Executed after VM live migration | No action. Only executed for system datastore, so the script tries to call other TMs for other disks. Except that, the script does nothing in Vitastor and Ceph datastores. |
| tm/prebackup | Actual backup script: backup VM disks | Set up "rsync" backup datastore → Backup a VM to it. |
| tm/prebackup_live | Backup VM disks of a running VM | Same as tm/prebackup, but also does fsfreeze/thaw. So perform a live backup, restore it and check that disks are consistent. |
| tm/premigrate | Executed before live migration | No action. Only executed for system datastore, so the script tries to call other TMs for other disks. Except that, the script does nothing in Vitastor and Ceph datastores. |
| tm/resize | Resize a VM disk | Select a VM → Select a non-persistent disk → Resize. |
| tm/restore | Restore VM disks from backup | Set up "rsync" backup datastore → Backup a VM to it → Restore it back. |
| tm/snap_create | Create a VM disk snapshot | Select a VM → Select a disk → Create snapshot. |
| tm/snap_create_live | Create a VM disk snapshot for a live VM | Select a running VM → Select a disk → Create snapshot. |
| tm/snap_delete | Delete a VM disk snapshot | Select a VM → Select a disk → Select a snapshot → Delete. |
| tm/snap_revert | Revert a VM disk to a snapshot | Select a VM → Select a disk → Select a snapshot → Revert. |

View File

@ -1,187 +0,0 @@
[Документация](../../README-ru.md#документация) → Установка → OpenNebula
-----
[Read in English](opennebula.en.md)
## Автоматическая установка
Плагин OpenNebula Vitastor распространяется как Debian и RPM пакет `vitastor-opennebula`, начиная с версии Vitastor 1.9.0. Так что:
- Запустите `apt-get install vitastor-opennebula` или `yum install vitastor-opennebula` после установки OpenNebula на всех серверах
- Проверьте, что он выводит "OK, Vitastor OpenNebula patches successfully applied" или "OK, Vitastor OpenNebula patches are already applied" в процессе установки
- Если сообщение не выведено, пройдите по шагам инструкцию [Ручная установка](#ручная-установка) и примените правки файлов конфигурации вручную
- Удостоверьтесь, что установлены версии QEMU и libvirt с изменениями Vitastor
(`dpkg -l qemu-system-x86`, `dpkg -l | grep libvirt`, `rpm -qa | grep qemu`, `rpm -qa | grep qemu`, `rpm -qa | grep libvirt-libs` должны показывать "vitastor" в номере версии)
- [Заблокируйте доступ виртуальных машин в Vitastor](#блокировка-доступа-вм-в-vitastor)
## Ручная установка
Сначала установите саму OpenNebula. После этого, на каждом сервере:
- Скопируйте директорию [opennebula/remotes](../../opennebula/remotes) в `/var/lib/one`: `cp -r opennebula/remotes /var/lib/one/`
- Скопируйте директорию [opennebula/sudoers.d](../../opennebula/sudoers.d) в `/etc`: `cp -r opennebula/sudoers.d /etc/`
- Примените патч [downloader-vitastor.sh.diff](../../opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff) к `/var/lib/one/remotes/datastore/downloader.sh`:
`patch /var/lib/one/remotes/datastore/downloader.sh < opennebula/remotes/datastore/vitastor/downloader-vitastor.sh.diff` - либо прочитайте патч и примените изменение вручную
- Добавьте `kvm-vitastor` в список `LIVE_DISK_SNAPSHOTS` в файле `/etc/one/vmm_exec/vmm_execrc`
- Если вы используете Debian или Ubuntu (и AppArmor), добавьте пути к файлу(ам) конфигурации Vitastor в файл `/etc/apparmor.d/local/abstractions/libvirt-qemu`: например,
`echo ' "/etc/vitastor/vitastor.conf" r,' >> /etc/apparmor.d/local/abstractions/libvirt-qemu`
- Примените изменения `/etc/one/oned.conf`
### Изменения oned.conf
1. Добавьте переопределение скрипта deploy в VM_MAD kvm, добавив `-l deploy.vitastor` в `ARGUMENTS`:
```diff
VM_MAD = [
NAME = "kvm",
SUNSTONE_NAME = "KVM",
EXECUTABLE = "one_vmm_exec",
- ARGUMENTS = "-t 15 -r 0 kvm -p",
+ ARGUMENTS = "-t 15 -r 0 kvm -p -l deploy=deploy.vitastor",
DEFAULT = "vmm_exec/vmm_exec_kvm.conf",
TYPE = "kvm",
KEEP_SNAPSHOTS = "yes",
LIVE_RESIZE = "yes",
SUPPORT_SHAREABLE = "yes",
IMPORTED_VMS_ACTIONS = "terminate, terminate-hard, hold, release, suspend,
resume, delete, reboot, reboot-hard, resched, unresched, disk-attach,
disk-detach, nic-attach, nic-detach, snapshot-create, snapshot-delete,
resize, updateconf, update"
]
```
Опционально: если вы хотите также сохранять снимки памяти ВМ в Vitastor, добавьте
`-l deploy=deploy.vitastor,save=save.vitastor,restore=restore.vitastor`
вместо просто `-l deploy=deploy.vitastor`.
2. Добавьте `vitastor` в значения TM_MAD.ARGUMENTS и DATASTORE_MAD.ARGUMENTS:
```diff
TM_MAD = [
EXECUTABLE = "one_tm",
- ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,dev,vcenter,iscsi_libvirt"
+ ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,vitastor,dev,vcenter,iscsi_libvirt"
]
DATASTORE_MAD = [
EXECUTABLE = "one_datastore",
- ARGUMENTS = "-t 15 -d dummy,fs,lvm,ceph,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
+ ARGUMENTS = "-t 15 -d dummy,fs,lvm,ceph,vitastor,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,vitastor,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
]
```
3. Добавьте строчки с INHERIT_DATASTORE_ATTR для двух атрибутов Vitastor-хранилищ:
```
INHERIT_DATASTORE_ATTR = "VITASTOR_CONF"
INHERIT_DATASTORE_ATTR = "IMAGE_PREFIX"
```
4. Добавьте TM_MAD_CONF и DS_MAD_CONF для Vitastor:
```
TM_MAD_CONF = [
NAME = "vitastor", LN_TARGET = "NONE", CLONE_TARGET = "SELF", SHARED = "YES",
DS_MIGRATE = "NO", DRIVER = "raw", ALLOW_ORPHANS="format",
TM_MAD_SYSTEM = "ssh,shared", LN_TARGET_SSH = "SYSTEM", CLONE_TARGET_SSH = "SYSTEM",
DISK_TYPE_SSH = "FILE", LN_TARGET_SHARED = "NONE",
CLONE_TARGET_SHARED = "SELF", DISK_TYPE_SHARED = "FILE"
]
DS_MAD_CONF = [
NAME = "vitastor",
REQUIRED_ATTRS = "DISK_TYPE,BRIDGE_LIST",
PERSISTENT_ONLY = "NO",
MARKETPLACE_ACTIONS = "export"
]
```
## Создайте хранилища
Примеры настроек хранилищ образов (image) и дисков ВМ (system):
[opennebula/vitastor-imageds.conf](../../opennebula/vitastor-imageds.conf) и
[opennebula/vitastor-systemds.conf](../../opennebula/vitastor-systemds.conf).
Скопируйте настройки и поменяйте следующие параметры так, как вам необходимо:
- POOL_NAME - имя пула Vitastor для сохранения образов дисков.
- IMAGE_PREFIX - строка, добавляемая в начало имён образов дисков.
- BRIDGE_LIST - список серверов с доступом к кластеру Vitastor, используемых для операций с хранилищем образов (image, не system).
- VITASTOR_CONF - путь к конфигурации Vitastor. Имейте в виду, что этот путь также надо добавить в `/etc/apparmor.d/local/abstractions/libvirt-qemu`, если вы используете AppArmor.
- STAGING_DIR - путь к временному каталогу, используемому при импорте внешних образов. Должен иметь достаточно свободного места, чтобы вмещать скачанные образы.
После этого создайте хранилища с помощью команд `onedatastore create vitastor-imageds.conf` и `onedatastore create vitastor-systemds.conf` (либо через UI).
## Блокировка доступа ВМ в Vitastor
Vitastor пока не поддерживает никакую аутентификацию, так что вы ДОЛЖНЫ заблокировать доступ гостевых ВМ
в кластер Vitastor на сетевом уровне.
Если вы используете VLAN-сети для ВМ - удостоверьтесь, что ВМ и гипервизор/сеть хранения помещены в разные
изолированные друг от друга VLAN-ы.
Если вы используете что-то более примитивное, например, мосты (bridge), вам, скорее всего, придётся вручную
настроить iptables / межсетевой экран, чтобы разрешить доступ к Vitastor только с IP гипервизоров.
Также в этом случае нужно будет переключить обычные мосты на "Bridged & Security Groups" и включить фильтр
спуфинга IP в OpenNebula. Правда, реализация этого фильтра пока не полная, и она не блокирует доступ к
локальным интерфейсам гипервизора. То есть, включённый фильтр спуфинга IP запрещает ВМ отправлять трафик
с чужими IP к другим ВМ или во внешний мир, но не запрещает отправлять его напрямую гипервизору. Чтобы
исправить это, тоже нужны дополнительные правила iptables.
Таким образом, более-менее полная блокировка при использовании простой сети на сетевых мостах может
выглядеть так (здесь `10.0.3.0/24` - подсеть ВМ, `10.0.2.0/24` - подсеть гипервизора):
```
# Разрешаем входящий трафик с физического устройства
iptables -A INPUT -m physdev --physdev-in eth0 -j ACCEPT
# Запрещаем трафик со всех ВМ, но с IP не из подсети ВМ
iptables -A INPUT ! -s 10.0.3.0/24 -i onebr0 -j DROP
# Запрещаем трафик от ВМ к сети гипервизора
iptables -I FORWARD 1 -s 10.0.3.0/24 -d 10.0.2.0/24 -j DROP
```
## Тестирование
Плагин OpenNebula по большей части состоит из bash-скриптов, и чтобы было понятнее, что они
вообще делают - ниже приведены описания процедур, которыми можно протестировать каждый из них.
| Скрипт | Описание | Как протестировать |
| ----------------------- | --------------------------------------------- | ------------------------------------------------------------------------------------ |
| vmm/kvm/deploy.vitastor | Запустить виртуальную машину | Создайте и запустите виртуальную машину с дисками Vitastor: постоянным / непостоянным / волатильным (временным). |
| vmm/kvm/save.vitastor | Сохранить снимок памяти ВМ | Остановите виртуальную машину командой "Остановить". |
| vmm/kvm/restore.vitastor| Восстановить снимок памяти ВМ | Запустите ВМ после остановки обратно. |
| datastore/clone | Скопировать образ как "постоянный" | Создайте шаблон ВМ и создайте из него постоянную ВМ. |
| datastore/cp | Импортировать внешний образ | Импортируйте шаблон ВМ с образами дисков из Магазина OpenNebula. |
| datastore/export | Экспортировать образ как URL | Вероятно: экспортируйте шаблон ВМ с образами в Магазин. |
| datastore/mkfs | Создать образ с файловой системой | Хранилище → Образы → Создать → Тип: базовый блок данных, Расположение: пустой образ диска, Файловая система: любая непустая. |
| datastore/monitor | Вывод статистики места в хранилище образов | Проверьте статистику свободного/занятого места в списке хранилищ образов. |
| datastore/rm | Удалить "постоянный" образ | Хранилище → Образы → Выберите образ → Удалить. |
| datastore/snap_delete | Удалить снимок "постоянного" образа | Хранилище → Образы → Выберите образ → Выберите снимок → Удалить; <br> Чтобы создать образ со снимком: подключите постоянный образ к ВМ, создайте снимок, отключите образ. |
| datastore/snap_flatten | Откатить образ к снимку, удалив другие снимки | Хранилище → Образы → Выберите образ → Выберите снимок → "Выровнять" (flatten). |
| datastore/snap_revert | Откатить образ к снимку | Хранилище → Образы → Выберите образ → Выберите снимок → Откатить. |
| datastore/stat | Показать виртуальный размер образа в МБ | Неизвестно. По-видимому, в плагинах Vitastor и Ceph не используется. |
| tm/clone | Клонировать "непостоянный" образ в диск ВМ | Подключите "непостоянный" образ к ВМ. |
| tm/context | Создать диск контекстуализации ВМ | Создайте ВМ с контекстуализацией, как обычно. Но тестировать особенно нечего: в плагинах Vitastor и Ceph образ контекста хранится в локальной ФС гипервизора. |
| tm/cpds | Копировать диск ВМ/его снимок в новый образ | Выберите ВМ → Выберите диск → Опционально выберите снимок → "Сохранить как". |
| tm/delete | Удалить диск-клон или волатильный диск ВМ | Отключите волатильный или не-постоянный диск от ВМ. |
| tm/failmigrate | Обработать неудачную миграцию | Тестировать нечего. Скрипт пуст в плагинах Vitastor и Ceph. В других плагинах скрипт должен откатывать действия tm/premigrate. |
| tm/ln | Подключить "постоянный" образ к ВМ | Тестировать нечего. Скрипт пуст в плагинах Vitastor и Ceph. |
| tm/mkimage | Создать волатильный диск, без или с ФС | Подключите волатильный диск к ВМ, с или без файловой системы. |
| tm/mkswap | Создать волатильный диск подкачки | Подключите волатильный диск к ВМ, форматированный как диск подкачки (swap). |
| tm/monitor | Вывод статистики места в хранилище дисков ВМ | Проверьте статистику свободного/занятого места в списке хранилищ дисков ВМ. |
| tm/mv | Мигрировать диск ВМ между хостами | Мигрируйте ВМ между серверами. Правда, с точки зрения хранилища в плагинах Vitastor и Ceph этот скрипт ничего не делает. |
| tm/mvds | Отключить "постоянный" образ от ВМ | Тестировать нечего. Скрипт пуст в плагинах Vitastor и Ceph. В целом же скрипт обратный к tm/ln и в других хранилищах он может, например, копировать образ ВМ с диска гипервизора обратно в хранилище. |
| tm/postbackup | Выполняется после бэкапа | По-видимому, скрипт просто удаляет временные файлы после резервного копирования. Так что можно провести его и проверить, что на серверах не осталось временных файлов. |
| tm/postbackup_live | Выполняется после бэкапа запущенной ВМ | То же, что tm/postbackup, но для запущенной ВМ. |
| tm/postmigrate | Выполняется после миграции ВМ | Тестировать нечего. Однако, OpenNebula запускает скрипт только для системного хранилища, поэтому он вызывает аналогичные скрипты для хранилищ других дисков той же ВМ. Помимо этого в плагинах Vitastor и Ceph скрипт ничего не делает. |
| tm/prebackup | Выполнить резервное копирование дисков ВМ | Создайте хранилище резервных копий типа "rsync" → Забэкапьте в него ВМ. |
| tm/prebackup_live | То же самое для запущенной ВМ | То же, что tm/prebackup, но запускает fsfreeze/thaw (остановку доступа к дискам). Так что смысл теста - проведите резервное копирование и проверьте, что данные скопировались консистентно. |
| tm/premigrate | Выполняется перед миграцией ВМ | Тестировать нечего. Аналогично tm/postmigrate запускается только для системного хранилища. |
| tm/resize | Изменить размер диска ВМ | Выберите ВМ → Выберите непостоянный диск → Измените его размер. |
| tm/restore | Восстановить диски ВМ из бэкапа | Создайте хранилище резервных копий → Забэкапьте в него ВМ → Восстановите её обратно. |
| tm/snap_create | Создать снимок диска ВМ | Выберите ВМ → Выберите диск → Создайте снимок. |
| tm/snap_create_live | Создать снимок диска запущенной ВМ | Выберите запущенную ВМ → Выберите диск → Создайте снимок. |
| tm/snap_delete | Удалить снимок диска ВМ | Выберите ВМ → Выберите диск → Выберите снимок → Удалить. |
| tm/snap_revert | Откатить диск ВМ к снимку | Выберите ВМ → Выберите диск → Выберите снимок → Откатить. |

View File

@ -16,6 +16,8 @@
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
stable version from 0.9.x branch instead of 1.x
- For Debian 10 (Buster) also enable backports repository:
`deb http://deb.debian.org/debian buster-backports main`
- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
## CentOS

View File

@ -16,6 +16,8 @@
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
- Для Debian 10 (Buster) также включите репозиторий backports:
`deb http://deb.debian.org/debian buster-backports main`
- Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
## CentOS

View File

@ -17,10 +17,10 @@ To enable Vitastor support in Proxmox Virtual Environment (6.4-8.1 are supported
- Restart pvedaemon: `systemctl restart pvedaemon`
`/etc/pve/storage.cfg` example (the only required option is vitastor_pool, all others
are listed below with their default values; `vitastor_ssd` is Proxmox storage pool id):
are listed below with their default values):
```
vitastor: vitastor_ssd
vitastor: vitastor
# pool to put new images into
vitastor_pool testpool
# path to the configuration file

View File

@ -16,10 +16,10 @@
- Перезапустите демон Proxmox: `systemctl restart pvedaemon`
Пример `/etc/pve/storage.cfg` (единственная обязательная опция - vitastor_pool, все остальные
перечислены внизу для понимания значений по умолчанию; `vitastor_ssd` - имя хранилища в Proxmox):
перечислены внизу для понимания значений по умолчанию):
```
vitastor: vitastor_ssd
vitastor: vitastor
# Пул, в который будут помещаться образы дисков
vitastor_pool testpool
# Путь к файлу конфигурации

View File

@ -34,15 +34,9 @@
- [Client write-back cache](../config/client.en.md#client_enable_writeback)
- [Intelligent recovery auto-tuning](../config/osd.en.md#recovery_tune_interval)
- [Clustered file system](../usage/nfs.en.md#vitastorfs)
- [Experimental internal etcd replacement - antietcd](../config/monitor.en.md#use_antietcd)
- [Built-in Prometheus metric exporter](../config/monitor.en.md#enable_prometheus)
## Plugins and tools
- [Proxmox storage plugin and packages](../installation/proxmox.en.md)
- [OpenNebula storage plugin](../installation/opennebula.en.md)
- [CSI plugin for Kubernetes](../installation/kubernetes.en.md)
- [OpenStack support: Cinder driver, Nova and libvirt patches](../installation/openstack.en.md)
- [Debian and CentOS packages](../installation/packages.en.md)
- [Image management CLI (vitastor-cli)](../usage/cli.en.md)
- [Disk management CLI (vitastor-disk)](../usage/disk.en.md)
@ -50,6 +44,9 @@
- [Native QEMU driver](../usage/qemu.en.md)
- [Loadable fio engine for benchmarks](../usage/fio.en.md)
- [NBD proxy for kernel mounts](../usage/nbd.en.md)
- [CSI plugin for Kubernetes](../installation/kubernetes.en.md)
- [OpenStack support: Cinder driver, Nova and libvirt patches](../installation/openstack.en.md)
- [Proxmox storage plugin and packages](../installation/proxmox.en.md)
- [Simplified NFS proxy for file-based image access emulation (suitable for VMWare)](../usage/nfs.en.md#pseudo-fs)
## Roadmap
@ -59,6 +56,7 @@ The following features are planned for the future:
- Control plane optimisation
- Other administrative tools
- Web GUI
- OpenNebula plugin
- iSCSI and NVMeoF gateways
- Multi-threaded client
- Faster failover

View File

@ -36,15 +36,9 @@
- [Буферизация записи на стороне клиента](../config/client.ru.md#client_enable_writeback)
- [Интеллектуальная автоподстройка скорости восстановления](../config/osd.ru.md#recovery_tune_interval)
- [Кластерная файловая система](../usage/nfs.ru.md#vitastorfs)
- [Экспериментальная встроенная замена etcd - antietcd](../config/monitor.ru.md#use_antietcd)
- [Встроенный Prometheus-экспортер метрик](../config/monitor.ru.md#enable_prometheus)
## Драйверы и инструменты
- [Плагин для Proxmox](../installation/proxmox.ru.md)
- [Плагин для OpenNebula](../installation/opennebula.ru.md)
- [CSI-плагин для Kubernetes](../installation/kubernetes.ru.md)
- [Базовая поддержка OpenStack: драйвер Cinder, патчи для Nova и libvirt](../installation/openstack.ru.md)
- [Пакеты для Debian и CentOS](../installation/packages.ru.md)
- [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md)
- [Инструмент управления дисками (vitastor-disk)](../usage/disk.ru.md)
@ -52,6 +46,9 @@
- [Драйвер диска для QEMU](../usage/qemu.ru.md)
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
- [NBD-прокси для монтирования образов ядром](../usage/nbd.ru.md) ("блочное устройство в режиме пользователя")
- [CSI-плагин для Kubernetes](../installation/kubernetes.ru.md)
- [Базовая поддержка OpenStack: драйвер Cinder, патчи для Nova и libvirt](../installation/openstack.ru.md)
- [Плагин для Proxmox](../installation/proxmox.ru.md)
- [Упрощённая NFS-прокси для эмуляции файлового доступа к образам (подходит для VMWare)](../usage/nfs.ru.md#псевдо-фс)
## Планы развития
@ -59,6 +56,7 @@
- Оптимизация слоя управления
- Другие инструменты администрирования
- Web-интерфейс
- Плагин для OpenNebula
- iSCSI и NVMeoF прокси
- Многопоточный клиент
- Более быстрое переключение при отказах

View File

@ -68,6 +68,10 @@ On the monitor hosts:
but some free unpartitioned space must be available because the script creates new partitions for journals.
- You can change OSD configuration in units or in `vitastor.conf`.
Check [Configuration Reference](../config.en.md) for parameter descriptions.
- If all your drives have capacitors, and even if not, but if you ran `vitastor-disk`
without `--disable_data_fsync off` at the first step, then put the following
setting into etcd: \
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
- Start all OSDs: `systemctl start vitastor.target`
## Create a pool
@ -84,10 +88,6 @@ For EC pools the configuration should look like the following:
vitastor-cli create-pool testpool --ec 2+2 --pg_count 256
```
Add `--immediate_commit none` if you added `--disable_data_fsync off` at the OSD
initialization step, or if `vitastor-disk` complained about impossibility to
disable drive cache.
After you do this, one of the monitors will configure PGs and OSDs will start them.
If you use HDDs you should also add `"block_size": 1048576` to pool configuration.

View File

@ -69,6 +69,11 @@
для журналов, на SSD должно быть доступно свободное нераспределённое место.
- Вы можете менять параметры OSD в юнитах systemd или в `vitastor.conf`. Описания параметров
смотрите в [справке по конфигурации](../config.ru.md).
- Если все ваши диски - серверные с конденсаторами, и даже если нет, но при этом
вы не добавляли опцию `--disable_data_fsync off` на первом шаге, а `vitastor-disk`
не ругался на невозможность отключения кэша дисков, пропишите следующую настройку
в глобальную конфигурацию в etcd: \
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`.
- Запустите все OSD: `systemctl start vitastor.target`
## Создайте пул
@ -85,10 +90,6 @@ vitastor-cli create-pool testpool --pg_size 2 --pg_count 256
vitastor-cli create-pool testpool --ec 2+2 --pg_count 256
```
Добавьте также опцию `--immediate_commit none`, если вы добавляли `--disable_data_fsync off`
на этапе инициализации OSD, либо если `vitastor-disk` ругался на невозможность отключения
кэша дисков.
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
Если вы используете HDD-диски, то добавьте в конфигурацию пулов опцию `"block_size": 1048576`.

View File

@ -42,7 +42,7 @@ PG state always includes exactly 1 of the following base states:
- **offline** — PG isn't activated by any OSD at all. Either primary OSD isn't set for
this PG at all (if the pool is just created), or an unavailable OSD is set as primary,
or the primary OSD refuses to start this PG (for example, because of wrong block_size),
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/pg/config` in etcd.
or the PG is stopped by the monitor using `pause: true` flag in `/vitastor/config/pgs` in etcd.
- **starting** — primary OSD has acquired PG lock in etcd, PG is starting.
- **peering** — primary OSD requests PG object listings from secondary OSDs and calculates
the PG state.
@ -107,17 +107,16 @@ If a PG is active it can also have any number of the following additional states
## Removing a healthy disk
Before removing a healthy disk from the cluster set its OSD weight(s) to 0 to
move data away. To do that, run `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
Then wait until rebalance finishes and remove OSD by running `vitastor-disk purge /dev/vitastor/osdN-data`.
Zero weight can also be put manually into etcd key `/vitastor/config/osd/<НОМЕР_OSD>`, for example:
Befor removing a healthy disk from the cluster set its OSD weight(s) to 0 to
move data away. To do that, add `"reweight":0` to etcd key `/vitastor/config/osd/<OSD_NUMBER>`.
For example:
```
etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
```
Then wait until rebalance finishes and remove OSD by running `vitastor-disk purge /dev/vitastor/osdN-data`.
## Removing a failed disk
If a disk is already dead, its OSD(s) are likely already stopped.
@ -150,7 +149,7 @@ POOL_ID=1
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
for i in $(seq 1 $PG_COUNT); do
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
done
```
@ -169,63 +168,21 @@ Upgrading is performed without stopping clients (VMs/containers), you just need
upgrade and restart servers one by one. However, ideally you should restart VMs too
to make them use the new version of the client library.
### 1.7.x to 1.8.0
Exceptions (specific upgrade instructions):
- Upgrading <= 1.1.x to 1.2.0 or later, if you use EC n+k with k>=2, is recommended
to be performed with full downtime: first you should stop all clients, then all OSDs,
then upgrade and start everything back — because versions before 1.2.0 have several
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
- Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
without this intermediate step, client I/O will hang until the end of upgrade process.
- Upgrading from <= 0.5.x to >= 0.6.x is not supported.
After upgrading version <= 1.7.x to version >= 1.8.0, BUT <= 1.9.0: restart all clients
(VMs and so on), otherwise they will hang when monitor clears old PG configuration key,
which happens 24 hours after upgrade.
This is fixed in 1.9.1. So, after upgrading version <= 1.7.x directly to version >= 1.9.1,
you DO NOT have to restart all old clients immediately - they will work like before until
you decide to upgrade them too. The downside is that you'll have to remove the old PG
configuration key (`/vitastor/config/pgs`) from etcd by hand when you make sure that all
your clients are restarted.
### 1.1.x to 1.2.0
Upgrading version <= 1.1.x to version >= 1.2.0, if you use EC n+k with k>=2, is recommended
to be performed with full downtime: first you should stop all clients, then all OSDs,
then upgrade and start everything back — because versions before 1.2.0 have several
bugs leading to invalid data being read in EC n+k, k>=2 configurations in degraded pools.
### 0.8.7 to 0.9.0
Versions <= 0.8.7 are incompatible with versions >= 0.9.0, so you should first
upgrade from <= 0.8.7 to 0.8.8 or 0.8.9, and only then to >= 0.9.x. If you upgrade
without this intermediate step, client I/O will hang until the end of upgrade process.
### 0.5.x to 0.6.x
Upgrading from <= 0.5.x to >= 0.6.x is not supported.
## Downgrade
Downgrade are also allowed freely, except the following specific instructions:
### 1.8.0 to 1.7.1
Before downgrading from version >= 1.8.0 to version <= 1.7.1
you have to copy /vitastor/pg/config etcd key to /vitastor/config/pgs:
```
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
etcdctl --endpoints=http://... put /vitastor/config/pgs
```
Then you can just install older packages and restart all services.
If you performed downgrade without first copying that key, run "add all OSDs into the
history records of all PGs" from [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration).
### 1.0.0 to 0.9.x
Version 1.0.0 has a new disk format, so OSDs initialized on 1.0.0 or later can't
be rolled back to 0.9.x or previous versions.
### 0.8.0 to 0.7.x
Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
start with older versions (0.4.x - 0.7.x). :-)
Rollback:
- Version 1.0.0 has a new disk format, so OSDs initiaziled on 1.0.0 can't be rolled
back to 0.9.x or previous versions.
- Versions before 0.8.0 don't have vitastor-disk, so OSDs, initialized by it, won't
start with 0.7.x or 0.6.x. :-)
## OSD memory usage

View File

@ -42,7 +42,7 @@
- **offline** — PG вообще не активирована ни одним OSD. Либо первичный OSD не назначен вообще
(если пул только создан), либо в качестве первичного назначен недоступный OSD, либо
назначенный OSD отказывается запускать эту PG (например, из-за несовпадения block_size),
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/pg/config` в etcd.
либо PG остановлена монитором через флаг `pause: true` в `/vitastor/config/pgs` в etcd.
- **starting** — первичный OSD захватил блокировку PG в etcd, PG запускается.
- **peering** — первичный OSD опрашивает вторичные OSD на предмет списков объектов данной PG и рассчитывает её состояние.
- **repeering** — PG ожидает завершения текущих операций ввода-вывода, после чего перейдёт в состояние **peering**.
@ -105,16 +105,14 @@ PG должны очень быстро переходить из них в др
## Удаление исправного диска
Перед удалением исправного диска из кластера установите его OSD вес в 0, чтобы убрать с него данные.
Для этого выполните команду `vitastor-cli modify-osd --reweight 0 <НОМЕР_OSD>`.
Дождитесь завершения перебалансировки данных, после чего удалите OSD командой `vitastor-disk purge /dev/vitastor/osdN-data`.
Также вес 0 можно прописать вручную прямо в etcd в ключ `/vitastor/config/osd/<НОМЕР_OSD>`, например:
Для этого добавьте в ключ `/vitastor/config/osd/<НОМЕР_OSD>` в etcd значение `"reweight":0`, например:
```
etcdctl --endpoints=http://1.1.1.1:2379/v3 put /vitastor/config/osd/1 '{"reweight":0}'
```
Дождитесь завершения ребаланса, после чего удалите OSD командой `vitastor-disk purge /dev/vitastor/osdN-data`.
## Удаление неисправного диска
Если диск уже умер, его OSD, скорее всего, уже будет/будут остановлен(ы).
@ -147,7 +145,7 @@ POOL_ID=1
ALL_OSDS=$(etcdctl --endpoints=your_etcd_address:2379 get --keys-only --prefix /vitastor/osd/stats/ | \
perl -e '$/ = undef; $a = <>; $a =~ s/\s*$//; $a =~ s!/vitastor/osd/stats/!!g; $a =~ s/\s+/,/g; print $a')
for i in $(seq 1 $PG_COUNT); do
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'
etcdctl --endpoints=your_etcd_address:2379 put /vitastor/pg/history/$POOL_ID/$i '{"all_peers":['$ALL_OSDS']}'; done
done
```
@ -166,63 +164,21 @@ done
достаточно обновлять серверы по одному. Однако, конечно, чтобы запущенные виртуальные машины
начали использовать новую версию клиентской библиотеки, их тоже нужно перезапустить.
### 1.7.x -> 1.8.0
Исключения (особые указания при обновлении):
- Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
могли приводить к некорректному чтению данных в деградированных EC-пулах.
- Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
Иначе клиентский ввод-вывод зависнет до завершения обновления.
- Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
После обновления с версий <= 1.7.x до версий >= 1.8.0, НО <= 1.9.0: перезапустите всех
клиентов (процессы виртуальных машин можно перезапустить путём миграции на другой сервер),
иначе они зависнут, когда монитор удалит старый ключ конфигурации PG, что происходит через
24 часа после обновления.
Однако, это исправлено в 1.9.1. Так что, если вы обновляетесь с <= 1.7.x сразу до >= 1.9.1,
вам НЕ нужно сразу перезапускать всех клиентов - они будут работать, как раньше. Минус,
правда, в том, что старый ключ конфигурации PG (`/vitastor/config/pgs`) будет нужно удалить
вам из etcd вручную - после того, как вы убедитесь, что все клиенты перезапущены.
### 1.1.x -> 1.2.0
Обновляться с версий <= 1.1.x до версий >= 1.2.0, если вы используете EC n+k и k>=2,
рекомендуется с временной остановкой кластера — сначала нужно остановить всех клиентов,
потом все OSD, потом обновить и запустить всё обратно — из-за нескольких багов, которые
могли приводить к некорректному чтению данных в деградированных EC-пулах.
### 0.8.7 -> 0.9.0
Версии <= 0.8.7 несовместимы с версиями >= 0.9.0, поэтому при обновлении с <= 0.8.7
нужно сначала обновиться до 0.8.8 или 0.8.9, а уже потом до любых версий >= 0.9.x.
Иначе клиентский ввод-вывод зависнет до завершения обновления.
### 0.5.x -> 0.6.x
Обновление с версий 0.5.x и более ранних до 0.6.x и более поздних не поддерживается.
## Откат версии
Откат (понижение версии) тоже свободно разрешён, кроме указанных ниже случаев:
### 1.8.0 -> 1.7.1
Перед понижением версии с >= 1.8.0 до <= 1.7.1 вы должны скопировать ключ
etcd `/vitastor/pg/config` в `/vitastor/config/pgs`:
```
etcdctl --endpoints=http://... get --print-value-only /vitastor/pg/config | \
etcdctl --endpoints=http://... put /vitastor/config/pgs
```
После этого можно просто установить более старые пакеты и перезапустить все сервисы.
Если вы откатили версию, не скопировав предварительно этот ключ - выполните "добавление всех
OSD в исторические записи всех PG" из раздела [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов).
### 1.0.0 -> 0.9.x
В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
нельзя откатить до версии 0.9.x и более ранних.
### 0.8.0 -> 0.7.x
В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD не запустятся на
более ранних версиях (0.4.x - 0.7.x). :-)
Откат:
- В версии 1.0.0 поменялся дисковый формат, поэтому OSD, созданные на версии >= 1.0.0,
нельзя откатить до версии 0.9.x и более ранних.
- В версиях ранее 0.8.0 нет vitastor-disk, значит, созданные им OSD нельзя откатить
до 0.7.x или 0.6.x. :-)
## Потребление памяти OSD

View File

@ -16,7 +16,6 @@ It supports the following commands:
- [create](#create)
- [snap-create](#create)
- [modify](#modify)
- [dd](#dd)
- [rm](#rm)
- [flatten](#flatten)
- [rm-data](#rm-data)
@ -25,10 +24,6 @@ It supports the following commands:
- [fix](#fix)
- [alloc-osd](#alloc-osd)
- [rm-osd](#rm-osd)
- [osd-tree](#osd-tree)
- [ls-osd](#ls-osd)
- [modify-osd](#modify-osd)
- [pg-list](#pg-list)
- [create-pool](#create-pool)
- [modify-pool](#modify-pool)
- [ls-pools](#ls-pools)
@ -149,60 +144,19 @@ You should resize file system in the image, if present, before shrinking it.
* `-f|--force` - Proceed with shrinking or setting readwrite flag even if the image has children.
* `--down-ok` - Proceed with shrinking even if some data will be left on unavailable OSDs.
## dd
```
vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
[count=N] [seek/oseek=N] [skip/iseek=M] [iodepth=N] [status=progress] \
[conv=nocreat,noerror,nofsync,trunc,nosparse] [iflag=direct] [oflag=direct,append]
```
Copy data between Vitastor images, files and pipes.
Options can be specified in classic dd style (`key=value`) or like usual (`--key value`).
| <!-- --> | <!-- --> |
|-----------------|-------------------------------------------------------------------------|
| `iimg=<image>` | Copy from Vitastor image `<image>` |
| `if=<file>` | Copy from file `<file>` |
| `oimg=<image>` | Copy to Vitastor image `<image>` |
| `of=<file>` | Copy to file `<file>` |
| `bs=1M` | Set copy block size |
| `count=N` | Copy only N input blocks. If N ends in B it counts bytes, not blocks |
| `seek/oseek=N` | Skip N output blocks. If N ends in B it counts bytes, not blocks |
| `skip/iseek=N` | Skip N input blocks. If N ends in B it counts bytes, not blocks |
| `iodepth=N` | Send N reads or writes in parallel (default 4) |
| `status=LEVEL` | The LEVEL of information to print to stderr: none/noxfer/progress |
| `size=N` | Specify size for the created output file/image (defaults to input size) |
| `iflag=direct` | For input files only: use direct I/O |
| `oflag=direct` | For output files only: use direct I/O |
| `oflag=append` | For files only: append to output file |
| `conv=nocreat` | Do not create output file/image |
| `conv=trunc` | Truncate output file/image |
| `conv=noerror` | Continue copying after errors |
| `conv=nofsync` | Do not call fsync before finishing (default behaviour is fsync) |
| `conv=nosparse` | Write all output blocks including all-zero blocks |
## rm
`vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`
`vitastor-cli rm (--exact|--matching) <glob> ...`
Remove `<from>` or all layers between `<from>` and `<to>` (`<to>` must be a child of `<from>`),
rebasing all their children accordingly. --writers-stopped allows merging to be a bit
more effective in case of a single 'slim' read-write child and 'fat' removed parent:
the child is merged into parent and parent is renamed to child in that case.
In other cases parent layers are always merged into children.
Remove layer(s) and rebase all their children accordingly.
Other options:
In the first form, remove `<from>` or layers between `<from>` and its child `<to>`.
In the second form, remove all images with exact or pattern-matched names.
Options:
* `--writers-stopped` allows optimised removal in case of a single 'slim' read-write
child and 'fat' removed parent: the child is merged into parent and parent is renamed
to child in that case. In other cases parent layers are always merged into children.
* `--exact` - remove multiple images with names matching given glob patterns.
* `--matching` - remove multiple images with given names
* `--down-ok` - continue deletion/merging even if some data will be left on unavailable OSDs.
* `--down-ok` - Continue deletion/merging even if some data will be left on unavailable OSDs.
## flatten
@ -220,7 +174,6 @@ Remove inode data without changing metadata.
--wait-list Retrieve full objects listings before starting to remove objects.
Requires more memory, but allows to show correct removal progress.
--min-offset Purge only data starting with specified offset.
--max-offset Purge only data before specified offset.
```
## merge-data
@ -293,82 +246,6 @@ Refuses to remove OSDs with data without `--force` and `--allow-data-loss`.
With `--dry-run` only checks if deletion is possible without data loss and
redundancy degradation.
## osd-tree
`vitastor-cli osd-tree [-l|--long]`
Show current OSD tree, optionally with I/O statistics if -l is specified.
Example output:
```
TYPE NAME UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
host kaveri
disk nvme0n1p1
osd 3 down 100G 0 % abc,kaveri 1 128k 4k none -
osd 4 down 100G 0 % 1 128k 4k none -
disk nvme1n1p1
osd 5 down 100G 0 % abc,kaveri 1 128k 4k none -
osd 6 down 100G 0 % 1 128k 4k none -
host stump
osd 1 up 100G 37.29 % osdone 1 128k 4k all -
osd 2 up 100G 26.8 % abc 1 128k 4k all -
osd 7 up 100G 21.84 % 1 128k 4k all -
osd 8 up 100G 21.63 % 1 128k 4k all -
osd 9 up 100G 20.69 % 1 128k 4k all -
osd 10 up 100G 21.61 % 1 128k 4k all -
osd 11 up 100G 21.53 % 1 128k 4k all -
osd 12 up 100G 22.4 % 1 128k 4k all -
```
## ls-osd
`vitastor-cli osds|ls-osd|osd-ls [-l|--long]`
Show current OSDs as list, optionally with I/O statistics if -l is specified.
Example output:
```
OSD PARENT UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
3 kaveri/nvme0n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
4 kaveri/nvme0n1p1 down 100G 0 % 1 128k 4k none -
5 kaveri/nvme1n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
6 kaveri/nvme1n1p1 down 100G 0 % 1 128k 4k none -
1 stump up 100G 37.29 % osdone 1 128k 4k all -
2 stump up 100G 26.8 % globl 1 128k 4k all -
7 stump up 100G 21.84 % 1 128k 4k all -
8 stump up 100G 21.63 % 1 128k 4k all -
9 stump up 100G 20.69 % 1 128k 4k all -
10 stump up 100G 21.61 % 1 128k 4k all -
11 stump up 100G 21.53 % 1 128k 4k all -
12 stump up 100G 22.4 % 1 128k 4k all -
```
## modify-osd
`vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
Set OSD reweight, tags or noout flag. See detail description in [OSD config documentation](../config/pool.en.md#osd-settings).
## pg-list
`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
List PGs with any of listed state filters (^ or ! in the beginning is negation). Options:
```
--pool <pool name or number> Only list PGs of the given pool.
--min <min pg number> Only list PGs with number >= min.
--max <max pg number> Only list PGs with number <= max.
```
Examples:
`vitastor-cli pg-list active+degraded`
`vitastor-cli pg-list ^active`
## create-pool
`vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]`

View File

@ -17,17 +17,12 @@ vitastor-cli - интерфейс командной строки для адм
- [create](#create)
- [snap-create](#create)
- [modify](#modify)
- [dd](#dd)
- [rm](#rm)
- [flatten](#flatten)
- [rm-data](#rm-data)
- [merge-data](#merge-data)
- [alloc-osd](#alloc-osd)
- [rm-osd](#rm-osd)
- [osd-tree](#osd-tree)
- [ls-osd](#ls-osd)
- [modify-osd](#modify-osd)
- [pg-list](#pg-list)
- [create-pool](#create-pool)
- [modify-pool](#modify-pool)
- [ls-pools](#ls-pools)
@ -152,61 +147,23 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
* `-f|--force` - Разрешить уменьшение или перевод в чтение-запись образа, у которого есть клоны.
* `--down-ok` - Разрешить уменьшение, даже если часть данных останется неудалённой на недоступных OSD.
## dd
```
vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
[count=N] [seek/oseek=N] [skip/iseek=M] [iodepth=N] [status=progress] \
[conv=nocreat,noerror,nofsync,trunc,nosparse] [iflag=direct] [oflag=direct,append]
```
Копировать данные между образами Vitastor, файлами и каналами.
Опции можно передавать в классическом стиле dd (`key=value`) или как обычно (`--key value`).
| <!-- --> | <!-- --> |
|-----------------|-------------------------------------------------------------------------|
| `iimg=<image>` | Копировать из образа Vitastor `<image>` |
| `if=<file>` | Копировать из файла `<file>` |
| `oimg=<image>` | Копировать в образ Vitastor `<image>` |
| `of=<file>` | Копировать в файл `<file>` |
| `bs=1M` | Задать размер блока копирования |
| `count=N` | Копировать не более N блоков. Если N заканчивается на B - то N байт. |
| `seek/oseek=N` | Пропустить N выходных блоков. Если N заканчивается на B - то N байт. |
| `skip/iseek=N` | Пропустить N входных блоков. Если N заканчивается на B - то N байт. |
| `iodepth=N` | Отправлять N чтений/записей параллельно (по умолчанию 4). |
| `status=LEVEL` | Уровень вывода в консоль: none/noxfer/progress |
| `size=N` | Задать размер выходного файла/образа (по умолчанию равен размеру входа).|
| `iflag=direct` | Только для входного файла: использовать прямой ввод-вывод |
| `oflag=direct` | Только для выходного файла: использовать прямой ввод-вывод |
| `oflag=append` | Только для файлов: дописывать в конец выходного файла |
| `conv=nocreat` | Не создавать выходной файл/образ |
| `conv=trunc` | Обрезать выходной файл/образ до размера входа |
| `conv=noerror` | Продолжать копирование после ошибок |
| `conv=nofsync` | Не вызывать fsync перед завершением |
| `conv=nosparse` | Записывать все выходные блоки, включая пустые |
## rm
`vitastor-cli rm <from> [<to>] [--writers-stopped] [--down-ok]`
`vitastor-cli rm (--exact|--matching) <glob> ...`
Удалить образ `<from>` или все слои от `<from>` до `<to>` (`<to>` должен быть дочерним
образом `<from>`), одновременно меняя родительские образы их клонов (если таковые есть).
Удалить образ(ы), корректно перебазируя их дочерние образы.
`--writers-stopped` позволяет чуть более эффективно удалять образы в частом случае, когда
у удаляемой цепочки есть только один дочерний образ, содержащий небольшой объём данных.
В этом случае дочерний образ вливается в родительский и удаляется, а родительский
переименовывается в дочерний.
В первой форме удаляет один образ `<from>` или все слои между `<from>` и его дочерним `<to>`.
В других случаях родительские слои вливаются в дочерние.
Во второй форме, удаляет все образы с точными именами или именами, подходящими под шаблон(ы).
Другие опции:
Опции:
* `--writers-stopped` позволяет чуть более эффективно удалять образы в частом случае, когда
у удаляемой цепочки есть только один дочерний образ, содержащий небольшой объём данных.
В этом случае дочерний образ вливается в родительский и удаляется, а родительский
переименовывается в дочерний.
* `--exact` - удалить все образы с именами, подходящими под переданные glob-шаблоны.
* `--matching` - удалить все образы с точно заданными именами.
* `--down-ok` - продолжать удаление/слияние, даже если часть данных останется неудалённой на недоступных OSD.
* `--down-ok` - Продолжать удаление/слияние, даже если часть данных останется неудалённой на недоступных OSD.
## flatten
@ -225,7 +182,6 @@ vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
--wait-list Сначала запросить полный листинг объектов, а потом начать удалять.
Требует больше памяти, но позволяет правильно печатать прогресс удаления.
--min-offset Удалять только данные, начиная с заданного смещения.
--max-offset Удалять только данные до (исключительно) заданного смещения.
```
## merge-data
@ -307,83 +263,6 @@ vitastor-cli dd [iimg=<image> | if=<file>] [oimg=<image> | of=<file>] [bs=1M] \
С опцией `--dry-run` только проверяет, возможно ли удаление без потери данных и деградации
избыточности.
## osd-tree
`vitastor-cli osd-tree [-l|--long]`
Показать дерево OSD, со статистикой ввода-вывода, если установлено -l.
Пример вывода:
```
TYPE NAME UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
host kaveri
disk nvme0n1p1
osd 3 down 100G 0 % globl,kaveri 1 128k 4k none -
osd 4 down 100G 0 % 1 128k 4k none -
disk nvme1n1p1
osd 5 down 100G 0 % globl,kaveri 1 128k 4k none -
osd 6 down 100G 0 % 1 128k 4k none -
host stump
osd 1 up 100G 37.29 % osdone 1 128k 4k all -
osd 2 up 100G 26.8 % globl 1 128k 4k all -
osd 7 up 100G 21.84 % 1 128k 4k all -
osd 8 up 100G 21.63 % 1 128k 4k all -
osd 9 up 100G 20.69 % 1 128k 4k all -
osd 10 up 100G 21.61 % 1 128k 4k all -
osd 11 up 100G 21.53 % 1 128k 4k all -
osd 12 up 100G 22.4 % 1 128k 4k all -
```
## ls-osd
`vitastor-cli osds|ls-osd|osd-ls [-l|--long]`
Показать список OSD, со статистикой ввода-вывода, если установлено -l.
Пример вывода:
```
OSD PARENT UP SIZE USED% TAGS WEIGHT BLOCK BITMAP IMM NOOUT
3 kaveri/nvme0n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
4 kaveri/nvme0n1p1 down 100G 0 % 1 128k 4k none -
5 kaveri/nvme1n1p1 down 100G 0 % globl,kaveri 1 128k 4k none -
6 kaveri/nvme1n1p1 down 100G 0 % 1 128k 4k none -
1 stump up 100G 37.29 % osdone 1 128k 4k all -
2 stump up 100G 26.8 % globl 1 128k 4k all -
7 stump up 100G 21.84 % 1 128k 4k all -
8 stump up 100G 21.63 % 1 128k 4k all -
9 stump up 100G 20.69 % 1 128k 4k all -
10 stump up 100G 21.61 % 1 128k 4k all -
11 stump up 100G 21.53 % 1 128k 4k all -
12 stump up 100G 22.4 % 1 128k 4k all -
```
## modify-osd
`vitastor-cli modify-osd [--tags tag1,tag2,...] [--reweight <number>] [--noout true/false] <osd_number>`
Установить вес OSD, теги или флаг noout. Смотрите подробное описание в [документации настроек OSD](../config/pool.ru.md#настройки-osd).
## pg-list
`vitastor-cli pg-list|pg-ls|list-pg|ls-pg|ls-pgs [OPTIONS] [state1+state2] [^state3] [...]`
Вывести список PG с состояними, удовлетворяющими любому из переданных фильтров (^ или !
в начале фильтра означает отрицание). Опции:
```
--pool <pool name or number> Only list PGs of the given pool.
--min <min pg number> Only list PGs with number >= min.
--max <max pg number> Only list PGs with number <= max.
```
Примеры:
`vitastor-cli pg-list active+degraded`
`vitastor-cli pg-list ^active`
## create-pool
`vitastor-cli create-pool|pool-create <name> (-s <pg_size>|--ec <N>+<K>) -n <pg_count> [OPTIONS]`

View File

@ -11,8 +11,6 @@ Vitastor has two file system implementations. Both can be used via `vitastor-nfs
Commands:
- [mount](#mount)
- [start](#start)
- [upgrade](#upgrade)
- [defrag](#defrag)
## Pseudo-FS
@ -88,6 +86,10 @@ POSIX features currently not implemented in VitastorFS:
- Modification time (`mtime`) is updated lazily every second (like `-o lazytime`)
Other notable missing features which should be addressed in the future:
- Defragmentation of "shared" inodes. Files smaller than pool object size (block_size
multiplied by data part count if pool is EC) are internally stored in large block
volumes sequentially, one after another, and leave garbage after deleting or resizing.
Defragmentator will be implemented to collect this garbage.
- Inode ID reuse. Currently inode IDs always grow, the limit is 2^48 inodes, so
in theory you may hit it if you create and delete a very large number of files
- Compaction of the key-value B-Tree. Current implementation never merges or deletes
@ -137,37 +139,6 @@ Start network NFS server. Options:
| `--port <PORT>` | use port \<PORT> for NFS services (default is 2049) |
| `--portmap 0` | do not listen on port 111 (portmap/rpcbind, requires root) |
### upgrade
`vitastor-nfs --fs <NAME> upgrade`
Upgrade FS metadata. Can be run online, but server(s) should be restarted after upgrade.
### defrag
`vitastor-nfs --fs <NAME> defrag [OPTIONS] [--dry-run]`
Defragment volumes used for small file storage having more than \<defrag_percent> %
of data removed. Can be run online.
In VitastorFS, small files are stored in large "volumes" / "shared inodes" one
after another. When you delete or extend such files, they are moved and garbage is left
behind. Defragmentation removes garbage and moves data still in use to new volumes.
Options:
| <!-- --> | <!-- --> |
|--------------------------|------------------------------------------------------------------------ |
| --volume_untouched 86400 | Defragment volumes last appended to at least this number of seconds ago |
| --defrag_percent 50 | Defragment volumes with at least this % of removed data |
| --defrag_block_count 16 | Read this number of pool blocks at once during defrag |
| --defrag_iodepth 16 | Move up to this number of files in parallel during defrag |
| --trace | Print verbose defragmentation status |
| --dry-run | Skip modifications, only print status |
| --recalc-stats | Recalculate all volume statistics |
| --include-empty | Include old and empty volumes; make sure to restart NFS servers before using it |
| --no-rm | Move, but do not delete data |
## Common options
| <!-- --> | <!-- --> |

View File

@ -11,8 +11,6 @@
Команды:
- [mount](#mount)
- [start](#start)
- [upgrade](#upgrade)
- [defrag](#defrag)
## Псевдо-ФС
@ -90,6 +88,11 @@ JSON-формате :-). Для инспекции содержимого БД
- Времена модификации (`mtime`) отслеживаются асинхронно (как будто ФС смонтирована с `-o lazytime`)
Другие недостающие функции, которые нужно добавить в будущем:
- Дефрагментация "общих инодов". На уровне реализации ФС файлы, меньшие, чем размер
объекта пула (block_size умножить на число частей данных, если пул EC),
упаковываются друг за другом в большие "общие" иноды/тома. Если такие файлы удалять
или увеличивать, они перемещаются и оставляют за собой "мусор", вот тут-то и нужен
дефрагментатор.
- Переиспользование номеров инодов. В текущей реализации номера инодов всё время
увеличиваются, так что в теории вы можете упереться в лимит, если насоздаёте
и наудаляете больше, чем 2^48 файлов.
@ -142,40 +145,6 @@ JSON-формате :-). Для инспекции содержимого БД
| `--port <PORT>` | использовать порт \<PORT> для NFS-сервисов (по умолчанию 2049) |
| `--portmap 0` | отключить сервис portmap/rpcbind на порту 111 (по умолчанию включён и требует root привилегий) |
### upgrade
`vitastor-nfs --fs <NAME> upgrade`
Обновить метаданные ФС. Можно запускать онлайн (при запущенных серверах NFS), но после выполнения их всё
же желательно перезапустить.
### defrag
`vitastor-nfs --fs <NAME> defrag [OPTIONS] [--dry-run]`
Дефрагментировать тома, используемые для хранения мелких файлов, в которых более, чем
<defrag_percent> процентов данных удалено. Можно запускать онлайн.
На уровне реализации ФС файлы, меньшие, чем размер объекта пула (block_size умножить на число
частей данных, если пул EC), упаковываются друг за другом в большие "тома" / "общие иноды".
Когда такие файлы удаляются или увеличиваются, они перемещаются и оставляют за собой "мусор".
При дефрагментации мусор удаляется, а всё ещё используемые данные перемещаются в новые тома.
Опции:
| <!-- --> | <!-- --> |
|--------------------------|------------------------------------------------------------------------ |
| --volume_untouched 86400 | Дефрагментировать только тома, в которые уже не писали это число секунд |
| --defrag_percent 50 | Дефрагментировать только тома, в которых этот % данных удалён |
| --defrag_block_count 16 | Читать это количество блоков пула за один раз |
| --defrag_iodepth 16 | Перемещать одновременно до этого числа файлов |
| --trace | Печатать детальную статистику дефрагментации |
| --dry-run | Не производить никаких изменений, только описать выполняемые действия |
| --recalc-stats | Пересчитать и сохранить статистику всех томов |
| --include-empty | Дефрагментировать старые и пустые тома; обязательно перезапустите NFS-сервера после использования этой опции |
| --no-rm | Перемещать, но не удалять данные |
## Общие опции
| <!-- --> | <!-- --> |

View File

@ -1,188 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const AntiEtcd = require('antietcd');
const vitastor_persist_filter = require('./vitastor_persist_filter.js');
const { b64, local_ips } = require('./utils.js');
class AntiEtcdAdapter
{
static async start_antietcd(config)
{
let antietcd;
if (config.use_antietcd)
{
let cluster = config.etcd_address;
if (!(cluster instanceof Array))
cluster = cluster ? (''+(cluster||'')).split(/,+/) : [];
cluster = Object.keys(cluster.reduce((a, url) =>
{
a[url.toLowerCase().replace(/^(https?:\/\/)/, '').replace(/\/.*$/, '')] = true;
return a;
}, {}));
const cfg_port = config.antietcd_port;
const is_local = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
const selected = cluster.map(s => s.split(':', 2)).filter(ip => is_local[ip[0]] && (!cfg_port || ip[1] == cfg_port));
if (selected.length > 1)
{
console.error('More than 1 etcd_address matches local IPs, please specify port');
process.exit(1);
}
else if (selected.length == 1)
{
const antietcd_config = {
ip: selected[0][0],
port: selected[0][1],
data: config.antietcd_data_file || ((config.antietcd_data_dir || '/var/lib/vitastor') + '/mon_'+selected[0][1]+'.json.gz'),
persist_filter: vitastor_persist_filter({ vitastor_prefix: config.etcd_prefix || '/vitastor' }),
node_id: selected[0][0]+':'+selected[0][1], // node_id = ip:port
cluster: (cluster.length == 1 ? null : cluster.reduce((a, c) => { a[c] = "http://"+c; return a; }, {})),
cluster_key: (config.etcd_prefix || '/vitastor'),
stale_read: 1,
log_level: 1,
};
for (const key in config)
{
if (key.substr(0, 9) === 'antietcd_')
{
const noprefix = key.substr(9);
if (!(noprefix in antietcd_config) || noprefix == 'ip' || noprefix == 'cluster_key')
{
antietcd_config[noprefix] = config[key];
}
}
}
console.log('Starting Antietcd node '+antietcd_config.node_id);
antietcd = new AntiEtcd(antietcd_config);
await antietcd.start();
}
else
{
console.log('Antietcd is enabled, but etcd_address does not contain local IPs, proceeding without it');
}
}
return antietcd;
}
constructor(mon, antietcd)
{
this.mon = mon;
this.antietcd = antietcd;
this.on_leader = [];
this.on_change = (st) =>
{
if (st.state === 'leader')
{
for (const cb of this.on_leader)
{
cb();
}
this.on_leader = [];
}
};
this.antietcd.on('raftchange', this.on_change);
}
parse_config(/*config*/)
{
}
stop_watcher()
{
this.antietcd.off('raftchange', this.on_change);
const watch_id = this.watch_id;
if (watch_id)
{
this.watch_id = null;
this.antietcd.cancel_watch(watch_id).catch(console.error);
}
}
async start_watcher()
{
if (this.watch_id)
{
await this.antietcd.cancel_watch(this.watch_id);
this.watch_id = null;
}
const watch_id = await this.antietcd.create_watch({
key: b64(this.mon.config.etcd_prefix+'/'),
range_end: b64(this.mon.config.etcd_prefix+'0'),
start_revision: ''+this.mon.etcd_watch_revision,
watch_id: 1,
progress_notify: true,
}, (message) =>
{
setImmediate(() => this.mon.on_message(message.result));
});
console.log('Successfully subscribed to antietcd revision '+this.antietcd.etctree.mod_revision);
this.watch_id = watch_id;
}
async become_master()
{
if (!this.antietcd.cluster)
{
console.log('Running in non-clustered mode');
}
else
{
console.log('Waiting to become master');
if (this.antietcd.cluster.raft.state !== 'leader')
{
await new Promise(ok => this.on_leader.push(ok));
}
}
const state = { ...this.mon.get_mon_state(), id: ''+this.mon.etcd_lease_id };
await this.etcd_call('/kv/txn', {
success: [ { requestPut: { key: b64(this.mon.config.etcd_prefix+'/mon/master'), value: b64(JSON.stringify(state)), lease: ''+this.mon.etcd_lease_id } } ],
}, this.mon.config.etcd_start_timeout, 0);
if (this.antietcd.cluster)
{
console.log('Became master');
}
}
async etcd_call(path, body, timeout, retries)
{
let retry = 0;
if (retries >= 0 && retries < 1)
{
retries = 1;
}
let prev = 0;
while (retries < 0 || retry < retries)
{
retry++;
if (this.mon.stopped)
{
throw new Error('Monitor instance is stopped');
}
try
{
if (Date.now()-prev < timeout)
{
await new Promise(ok => setTimeout(ok, timeout-(Date.now()-prev)));
}
prev = Date.now();
const res = await this.antietcd.api(path.replace(/^\/+/, '').replace(/\/+$/, '').replace(/\/+/g, '_'), body);
if (res.error)
{
console.error('Failed to query antietcd '+path+' (retry '+retry+'/'+retries+'): '+res.error);
}
else
{
return res;
}
}
catch (e)
{
console.error('Failed to query antietcd '+path+' (retry '+retry+'/'+retries+'): '+e.stack);
}
}
throw new Error('Failed to query antietcd ('+retries+' retries)');
}
}
module.exports = AntiEtcdAdapter;

View File

@ -3,7 +3,6 @@
const http = require('http');
const WebSocket = require('ws');
const { b64, local_ips } = require('./utils.js');
const MON_STOPPED = 'Monitor instance is stopped';
@ -24,7 +23,7 @@ class EtcdAdapter
parse_etcd_addresses(addrs)
{
const is_local_ip = local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
const is_local_ip = this.mon.local_ips(true).reduce((a, c) => { a[c] = true; return a; }, {});
this.etcd_local = [];
this.etcd_urls = [];
this.selected_etcd_url = null;
@ -349,4 +348,9 @@ function POST(url, body, timeout)
});
}
function b64(str)
{
return Buffer.from(str).toString('base64');
}
module.exports = EtcdAdapter;

View File

@ -6,7 +6,7 @@ const etcd_nonempty_keys = {
'config/global': 1,
'config/node_placement': 1,
'config/pools': 1,
'pg/config': 1,
'config/pgs': 1,
'history/last_clean_pgs': 1,
'stats': 1,
};
@ -15,8 +15,7 @@ const etcd_allow = new RegExp('^'+[
'config/node_placement',
'config/pools',
'config/osd/[1-9]\\d*',
'config/pgs', // old name
'pg/config',
'config/pgs',
'config/inode/[1-9]\\d*/[1-9]\\d*',
'osd/state/[1-9]\\d*',
'osd/stats/[1-9]\\d*',
@ -25,8 +24,7 @@ const etcd_allow = new RegExp('^'+[
'mon/master',
'mon/member/[a-f0-9]+',
'pg/state/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*', // old name
'pgstats/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*',
'pg/history/[1-9]\\d*/[1-9]\\d*',
'history/last_clean_pgs',
'inode/stats/[1-9]\\d*/\\d+',
@ -71,7 +69,7 @@ const etcd_tree = {
block_size: 131072,
disk_alignment: 4096,
bitmap_granularity: 4096,
immediate_commit: 'all', // 'none', 'all' or 'small'
immediate_commit: false, // 'all' or 'small'
// client - configurable online
client_max_dirty_bytes: 33554432,
client_max_dirty_ops: 1024,
@ -191,7 +189,7 @@ const etcd_tree = {
block_size: 131072,
bitmap_granularity: 4096,
// 'all'/'small'/'none', same as in OSD options
immediate_commit: 'all',
immediate_commit: 'none',
pg_stripe_size: 0,
root_node?: 'rack1',
// restrict pool to OSDs having all of these tags
@ -207,6 +205,19 @@ const etcd_tree = {
osd: {
/* <id>: { reweight?: 1, tags?: [ 'nvme', ... ], noout?: true }, ... */
},
/* pgs: {
hash: string,
items: {
<pool_id>: {
<pg_id>: {
osd_set: [ 1, 2, 3 ],
primary: 1,
pause: false,
}
}
}
}, */
pgs: {},
/* inode: {
<pool_id>: {
<inode_t>: {
@ -234,9 +245,6 @@ const etcd_tree = {
stats: {
/* <osd_num_t>: {
time: number, // unix time
data_block_size: uint64_t, // bytes
bitmap_granularity: uint64_t, // bytes
immediate_commit: "all"|"small"|"none",
blockstore_ready: boolean,
size: uint64_t, // bytes
free: uint64_t, // bytes
@ -274,24 +282,11 @@ const etcd_tree = {
master: {
/* ip: [ string ], id: uint64_t */
},
member: {
standby: {
/* <uint64_t>: { ip: [ string ] }, */
},
},
pg: {
/* config: {
hash: string,
items: {
<pool_id>: {
<pg_id>: {
osd_set: [ 1, 2, 3 ],
primary: 1,
pause: false,
}
}
}
}, */
config: {},
state: {
/* <pool_id>: {
<pg_id>: {
@ -302,6 +297,18 @@ const etcd_tree = {
}
}, */
},
stats: {
/* <pool_id>: {
<pg_id>: {
object_count: uint64_t,
clean_count: uint64_t,
misplaced_count: uint64_t,
degraded_count: uint64_t,
incomplete_count: uint64_t,
write_osd_set: osd_num_t[],
},
}, */
},
history: {
/* <pool_id>: {
<pg_id>: {
@ -313,18 +320,6 @@ const etcd_tree = {
}, */
},
},
pgstats: {
/* <pool_id>: {
<pg_id>: {
object_count: uint64_t,
clean_count: uint64_t,
misplaced_count: uint64_t,
degraded_count: uint64_t,
incomplete_count: uint64_t,
write_osd_set: osd_num_t[],
},
}, */
},
inode: {
stats: {
/* <pool_id>: {

View File

@ -1,50 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const fsp = require('fs').promises;
const http = require('http');
const https = require('https');
async function create_http_server(cfg, handler)
{
let server;
if (cfg.mon_https_cert)
{
const tls = {
key: await fsp.readFile(cfg.mon_https_key),
cert: await fsp.readFile(cfg.mon_https_cert),
};
if (cfg.mon_https_ca)
{
tls.mon_https_ca = await fsp.readFile(cfg.mon_https_ca);
}
if (cfg.mon_https_client_auth)
{
tls.requestCert = true;
}
server = https.createServer(tls, handler);
}
else
{
server = http.createServer(handler);
}
try
{
let err;
server.once('error', e => err = e);
server.listen(cfg.mon_http_port || 8060, cfg.mon_http_ip || undefined);
if (err)
throw err;
}
catch (e)
{
console.error(
'HTTP server disabled because listen at address: '+
(cfg.mon_http_ip || '')+':'+(cfg.mon_http_port || 9090)+' failed with error: '+e
);
return null;
}
return server;
}
module.exports = { create_http_server };

View File

@ -23,4 +23,4 @@ for (let i = 2; i < process.argv.length; i++)
}
}
Mon.run_forever(options).catch(console.error);
Mon.run_forever(options);

View File

@ -1,43 +1,27 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const { URL } = require('url');
const fs = require('fs');
const crypto = require('crypto');
const os = require('os');
const AntiEtcdAdapter = require('./antietcd_adapter.js');
const EtcdAdapter = require('./etcd_adapter.js');
const { create_http_server } = require('./http_server.js');
const { export_prometheus_metrics } = require('./prometheus.js');
const { etcd_tree, etcd_allow, etcd_nonempty_keys } = require('./etcd_schema.js');
const { validate_pool_cfg } = require('./pool_config.js');
const { sum_op_stats, sum_object_counts, sum_inode_stats, serialize_bigints } = require('./stats.js');
const stableStringify = require('./stable-stringify.js');
const { scale_pg_history } = require('./pg_utils.js');
const { get_osd_tree } = require('./osd_tree.js');
const { b64, de64, local_ips } = require('./utils.js');
const { recheck_primary, save_new_pgs_txn, generate_pool_pgs } = require('./pg_gen.js');
class Mon
{
static async run_forever(config)
static run_forever(config)
{
let mergedConfig = config;
if (fs.existsSync(config.config_path||'/etc/vitastor/vitastor.conf'))
{
const fileConfig = JSON.parse(fs.readFileSync(config.config_path||'/etc/vitastor/vitastor.conf', { encoding: 'utf-8' }));
mergedConfig = { ...fileConfig, ...config };
}
let antietcd = await AntiEtcdAdapter.start_antietcd(mergedConfig);
let mon;
const run = () =>
{
console.log('Starting Monitor');
const my_mon = new Mon(config);
my_mon.etcd = antietcd
? new AntiEtcdAdapter(my_mon, antietcd)
: new EtcdAdapter(my_mon);
my_mon.etcd.parse_config(my_mon.config);
mon = my_mon;
my_mon.on_die = () =>
{
@ -74,57 +58,24 @@ class Mon
this.state = JSON.parse(JSON.stringify(etcd_tree));
this.prev_stats = { osd_stats: {}, osd_diff: {} };
this.recheck_pgs_active = false;
this.watcher_active = false;
this.old_pg_config = false;
this.old_pg_stats_seen = false;
this.etcd = new EtcdAdapter(this);
this.etcd.parse_config(this.config);
}
async start()
{
if (this.config.enable_prometheus || !('enable_prometheus' in this.config))
{
this.http = await create_http_server(this.config, (req, res) =>
{
const u = new URL(req.url, 'http://'+(req.headers.host || 'localhost'));
if (u.pathname.replace(/\/+$/, '') == (this.config.prometheus_path||'/metrics'))
{
if (!this.watcher_active)
{
res.writeHead(503);
res.write('Monitor is in standby mode. Please retrieve metrics from master monitor instance\n');
}
else
{
res.write(export_prometheus_metrics(this.state));
}
}
else
{
res.writeHead(404);
res.write('Not found. Metrics path: '+(this.config.prometheus_path||'/metrics\n'));
}
res.end();
});
this.http_connections = new Set();
this.http.on('connection', conn =>
{
this.http_connections.add(conn);
conn.once('close', () => this.http_connections.delete(conn));
});
}
await this.load_config();
await this.get_lease();
await this.etcd.become_master();
await this.load_cluster_state();
await this.etcd.start_watcher(this.config.etcd_mon_retries);
this.watcher_active = true;
for (const pool_id in this.state.config.pools)
{
if (!this.state.pool.stats[pool_id] ||
!Number(this.state.pool.stats[pool_id].pg_real_size))
{
// Generate missing data in etcd
this.state.pg.config.hash = null;
this.state.config.pgs.hash = null;
break;
}
}
@ -196,37 +147,28 @@ class Mon
this.etcd_watch_revision = BigInt(msg.header.revision)+BigInt(1);
for (const e of msg.events||[])
{
const kv = this.parse_kv(e.kv);
const key = kv.key.substr(this.config.etcd_prefix.length);
this.parse_kv(e.kv);
const key = e.kv.key.substr(this.config.etcd_prefix.length);
if (key.substr(0, 11) == '/osd/state/')
{
stats_changed = true;
changed = true;
}
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 9) == '/pgstats/' || key.substr(0, 16) == '/osd/inodestats/')
else if (key.substr(0, 11) == '/osd/stats/' || key.substr(0, 10) == '/pg/stats/' || key.substr(0, 16) == '/osd/inodestats/')
{
stats_changed = true;
}
else if (key.substr(0, 10) == '/pg/stats/')
{
this.old_pg_stats_seen = true;
stats_changed = true;
}
else if (key.substr(0, 10) == '/pg/state/')
{
pg_states_changed = true;
}
else if (key != '/stats' && key.substr(0, 13) != '/inode/stats/')
{
if (key == '/config/pgs' && !kv.value)
{
this.old_pg_config = false;
}
changed = true;
}
if (this.config.verbose)
{
console.log(JSON.stringify({ ...e, kv: kv || undefined }));
console.log(JSON.stringify(e));
}
}
if (pg_states_changed)
@ -296,7 +238,7 @@ class Mon
continue next_pool;
}
}
new_clean_pgs.items[pool_id] = this.state.pg.config.items[pool_id];
new_clean_pgs.items[pool_id] = this.state.config.pgs.items[pool_id];
}
this.state.history.last_clean_pgs = new_clean_pgs;
await this.etcd.etcd_call('/kv/txn', {
@ -310,7 +252,7 @@ class Mon
get_mon_state()
{
return { ip: local_ips(), hostname: os.hostname() };
return { ip: this.local_ips(), hostname: os.hostname() };
}
async get_lease()
@ -342,16 +284,6 @@ class Mon
async on_stop()
{
console.log('Stopping Monitor');
if (this.http)
{
await new Promise(ok =>
{
this.http.close(ok);
for (const conn of this.http_connections)
conn.destroy();
});
this.http = null;
}
this.etcd.stop_watcher();
if (this.save_last_clean_timer)
{
@ -407,26 +339,6 @@ class Mon
this.parse_kv(kv);
}
}
if (Object.keys((this.state.config.pgs||{}).items||{}).length)
{
// Support seamless upgrade to new OSDs
if (!Object.keys((this.state.pg.config||{}).items||{}).length)
{
const pgs = JSON.stringify(this.state.config.pgs);
this.state.pg.config = JSON.parse(pgs);
const res = await this.etcd.etcd_call('/kv/txn', {
success: [
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(pgs) } },
],
compare: [
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
],
}, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
if (!res.succeeded)
throw new Error('Failed to duplicate old PG config to new PG config');
}
this.old_pg_config = true;
}
}
all_osds()
@ -437,7 +349,7 @@ class Mon
async stop_all_pgs(pool_id)
{
let has_online = false, paused = true;
for (const pg in this.state.pg.config.items[pool_id]||{})
for (const pg in this.state.config.pgs.items[pool_id]||{})
{
// FIXME: Change all (||{}) to ?. (optional chaining) at some point
const cur_state = (((this.state.pg.state[pool_id]||{})[pg]||{}).state||[]).join(',');
@ -445,7 +357,7 @@ class Mon
{
has_online = true;
}
if (!this.state.pg.config.items[pool_id][pg].pause)
if (!this.state.config.pgs.items[pool_id][pg].pause)
{
paused = false;
}
@ -453,7 +365,7 @@ class Mon
if (!paused)
{
console.log('Stopping all PGs for pool '+pool_id+' before changing PG count');
const new_cfg = JSON.parse(JSON.stringify(this.state.pg.config));
const new_cfg = JSON.parse(JSON.stringify(this.state.config.pgs));
for (const pg in new_cfg.items[pool_id])
{
new_cfg.items[pool_id][pg].pause = true;
@ -461,26 +373,22 @@ class Mon
// Check that no OSDs change their state before we pause PGs
// Doing this we make sure that OSDs don't wake up in the middle of our "transaction"
// and can't see the old PG configuration
const checks = [
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
];
const checks = [];
for (const osd_num of this.all_osds())
{
const key = b64(this.config.etcd_prefix+'/osd/state/'+osd_num);
checks.push({ key, target: 'MOD', result: 'LESS', mod_revision: ''+this.etcd_watch_revision });
}
const txn = {
compare: checks,
success: [
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_cfg)) } },
await this.etcd.etcd_call('/kv/txn', {
compare: [
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
...checks,
],
};
if (this.old_pg_config)
{
txn.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } });
}
await this.etcd.etcd_call('/kv/txn', txn, this.config.etcd_mon_timeout, 0);
success: [
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_cfg)) } },
],
}, this.config.etcd_mon_timeout, 0);
return false;
}
return !has_online;
@ -508,7 +416,7 @@ class Mon
pools: this.state.config.pools,
};
const tree_hash = sha1hex(stableStringify(tree_cfg));
if (this.state.pg.config.hash != tree_hash)
if (this.state.config.pgs.hash != tree_hash)
{
// Something has changed
console.log('Pool configuration or OSD tree changed, re-optimizing');
@ -549,10 +457,10 @@ class Mon
else
{
// Nothing changed, but we still want to recheck the distribution of primaries
let new_pg_config = recheck_primary(this.state, this.config, up_osds, osd_tree);
if (new_pg_config)
let new_config_pgs = recheck_primary(this.state, this.config, up_osds, osd_tree);
if (new_config_pgs)
{
const ok = await this.save_pg_config(new_pg_config);
const ok = await this.save_pg_config(new_config_pgs);
if (ok)
console.log('PG configuration successfully changed');
else
@ -567,12 +475,12 @@ class Mon
async apply_pool_pgs(results, up_osds, osd_tree, tree_hash)
{
for (const pool_id in (this.state.pg.config||{}).items||{})
for (const pool_id in (this.state.config.pgs||{}).items||{})
{
// We should stop all PGs when deleting a pool or changing its PG count
if (!this.state.config.pools[pool_id] ||
this.state.pg.config.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
Object.keys(this.state.pg.config.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
this.state.config.pgs.items[pool_id] && this.state.config.pools[pool_id].pg_count !=
Object.keys(this.state.config.pgs.items[pool_id]).reduce((a, c) => (a < (0|c) ? (0|c) : a), 0))
{
if (!await this.stop_all_pgs(pool_id))
{
@ -580,22 +488,22 @@ class Mon
}
}
}
const new_pg_config = JSON.parse(JSON.stringify(this.state.pg.config));
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
const etcd_request = { compare: [], success: [] };
for (const pool_id in (new_pg_config||{}).items||{})
for (const pool_id in (new_config_pgs||{}).items||{})
{
if (!this.state.config.pools[pool_id])
{
const prev_pgs = [];
for (const pg in new_pg_config.items[pool_id]||{})
for (const pg in new_config_pgs.items[pool_id]||{})
{
prev_pgs[pg-1] = new_pg_config.items[pool_id][pg].osd_set;
prev_pgs[pg-1] = new_config_pgs.items[pool_id][pg].osd_set;
}
// Also delete pool statistics
etcd_request.success.push({ requestDeleteRange: {
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
} });
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
this.etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, [], []);
}
}
@ -604,7 +512,7 @@ class Mon
const pool_id = pool_res.pool_id;
const pool_cfg = this.state.config.pools[pool_id];
let pg_history = [];
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
{
if (this.state.pg.history[pool_id] &&
this.state.pg.history[pool_id][pg])
@ -613,9 +521,9 @@ class Mon
}
}
const real_prev_pgs = [];
for (const pg in ((this.state.pg.config.items||{})[pool_id]||{}))
for (const pg in ((this.state.config.pgs.items||{})[pool_id]||{}))
{
real_prev_pgs[pg-1] = [ ...this.state.pg.config.items[pool_id][pg].osd_set ];
real_prev_pgs[pg-1] = [ ...this.state.config.pgs.items[pool_id][pg].osd_set ];
}
if (real_prev_pgs.length > 0 && real_prev_pgs.length != pool_res.pgs.length)
{
@ -626,8 +534,8 @@ class Mon
pg_history = scale_pg_history(pg_history, real_prev_pgs, pool_res.pgs);
// Drop stats
etcd_request.success.push({ requestDeleteRange: {
key: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'/'),
range_end: b64(this.config.etcd_prefix+'/pgstats/'+pool_id+'0'),
key: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'/'),
range_end: b64(this.config.etcd_prefix+'/pg/stats/'+pool_id+'0'),
} });
}
const stats = {
@ -638,26 +546,22 @@ class Mon
key: b64(this.config.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(stats)),
} });
save_new_pgs_txn(new_pg_config, etcd_request, this.state, this.config.etcd_prefix,
save_new_pgs_txn(new_config_pgs, etcd_request, this.state, this.config.etcd_prefix,
this.etcd_watch_revision, pool_id, up_osds, osd_tree, real_prev_pgs, pool_res.pgs, pg_history);
}
new_pg_config.hash = tree_hash;
return await this.save_pg_config(new_pg_config, etcd_request);
new_config_pgs.hash = tree_hash;
return await this.save_pg_config(new_config_pgs, etcd_request);
}
async save_pg_config(new_pg_config, etcd_request = { compare: [], success: [] })
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
{
etcd_request.compare.push(
{ key: b64(this.config.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.config.etcd_prefix+'/pg/config'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
{ key: b64(this.config.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
);
etcd_request.success.push(
{ requestPut: { key: b64(this.config.etcd_prefix+'/pg/config'), value: b64(JSON.stringify(new_pg_config)) } },
{ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
);
if (this.old_pg_config)
{
etcd_request.success.push({ requestPut: { key: b64(this.config.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_pg_config)) } });
}
const txn_res = await this.etcd.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
return txn_res.succeeded;
}
@ -786,16 +690,15 @@ class Mon
{
if (!kv || !kv.key)
{
return kv;
return;
}
kv = { ...kv };
kv.key = de64(kv.key);
kv.value = kv.value ? de64(kv.value) : null;
let key = kv.key.substr(this.config.etcd_prefix.length+1);
if (!etcd_allow.exec(key))
{
console.log('Bad key in etcd: '+kv.key+' = '+kv.value);
return kv;
return;
}
try
{
@ -804,7 +707,7 @@ class Mon
catch (e)
{
console.log('Bad value in etcd: '+kv.key+' = '+kv.value);
return kv;
return;
}
let key_parts = key.split('/');
let cur = this.state;
@ -818,14 +721,7 @@ class Mon
kv.value = kv.value || {};
}
const old = cur[key_parts[key_parts.length-1]];
if (kv.value == null)
{
delete cur[key_parts[key_parts.length-1]];
}
else
{
cur[key_parts[key_parts.length-1]] = kv.value;
}
cur[key_parts[key_parts.length-1]] = kv.value;
if (key === 'config/global')
{
this.config = { ...this.fileConfig, ...this.state.config.global, ...this.cliConfig };
@ -861,7 +757,6 @@ class Mon
!this.state.osd.stats[osd_num] ? 0 : this.state.osd.stats[osd_num].time+this.config.osd_out_time
);
}
return kv;
}
_die(err)
@ -871,6 +766,33 @@ class Mon
this.on_stop().catch(console.error);
this.on_die();
}
local_ips(all)
{
const ips = [];
const ifaces = os.networkInterfaces();
for (const ifname in ifaces)
{
for (const iface of ifaces[ifname])
{
if (iface.family == 'IPv4' && !iface.internal || all)
{
ips.push(iface.address);
}
}
}
return ips;
}
}
function b64(str)
{
return Buffer.from(str).toString('base64');
}
function de64(str)
{
return Buffer.from(str, 'base64').toString();
}
function sha1hex(str)

View File

@ -1,6 +1,6 @@
{
"name": "vitastor-mon",
"version": "1.9.1",
"version": "1.6.1",
"description": "Vitastor SDS monitor service",
"main": "mon-main.js",
"scripts": {
@ -9,7 +9,6 @@
"author": "Vitaliy Filippov",
"license": "UNLICENSED",
"dependencies": {
"antietcd": "^1.1.0",
"sprintf-js": "^1.1.2",
"ws": "^7.2.5"
},

View File

@ -57,7 +57,7 @@ function pick_primary(pool_config, osd_set, up_osds, aff_osds)
function recheck_primary(state, global_config, up_osds, osd_tree)
{
let new_pg_config;
let new_config_pgs;
for (const pool_id in state.config.pools)
{
const pool_cfg = state.config.pools[pool_id];
@ -69,30 +69,30 @@ function recheck_primary(state, global_config, up_osds, osd_tree)
reset_rng();
for (let pg_num = 1; pg_num <= pool_cfg.pg_count; pg_num++)
{
if (!state.pg.config.items[pool_id])
if (!state.config.pgs.items[pool_id])
{
continue;
}
const pg_cfg = state.pg.config.items[pool_id][pg_num];
const pg_cfg = state.config.pgs.items[pool_id][pg_num];
if (pg_cfg)
{
const new_primary = pick_primary(state.config.pools[pool_id], pg_cfg.osd_set, up_osds, aff_osds);
if (pg_cfg.primary != new_primary)
{
if (!new_pg_config)
if (!new_config_pgs)
{
new_pg_config = JSON.parse(JSON.stringify(state.pg.config));
new_config_pgs = JSON.parse(JSON.stringify(state.config.pgs));
}
console.log(
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
);
new_pg_config.items[pool_id][pg_num].primary = new_primary;
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
}
}
}
}
return new_pg_config;
return new_config_pgs;
}
function save_new_pgs_txn(save_to, request, state, etcd_prefix, etcd_watch_revision, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
@ -174,7 +174,7 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
state.osd.stats,
pool_cfg.block_size || global_config.block_size || 131072,
pool_cfg.bitmap_granularity || global_config.bitmap_granularity || 4096,
pool_cfg.immediate_commit || global_config.immediate_commit || 'all'
pool_cfg.immediate_commit || global_config.immediate_commit || 'none'
);
pool_tree = make_hier_tree(global_config, pool_tree);
// First try last_clean_pgs to minimize data movement
@ -185,10 +185,10 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
}
if (!prev_pgs.length)
{
// Fall back to pg/config if it's empty
for (const pg in ((state.pg.config.items||{})[pool_id]||{}))
// Fall back to config/pgs if it's empty
for (const pg in ((state.config.pgs.items||{})[pool_id]||{}))
{
prev_pgs[pg-1] = [ ...state.pg.config.items[pool_id][pg].osd_set ];
prev_pgs[pg-1] = [ ...state.config.pgs.items[pool_id][pg].osd_set ];
}
}
const old_pg_count = prev_pgs.length;
@ -205,8 +205,8 @@ async function generate_pool_pgs(state, global_config, pool_id, osd_tree, levels
ordered: pool_cfg.scheme != 'replicated',
};
let optimize_result;
// Re-shuffle PGs if pg/config.hash is empty
if (old_pg_count > 0 && state.pg.config.hash)
// Re-shuffle PGs if config/pgs.hash is empty
if (old_pg_count > 0 && state.config.pgs.hash)
{
if (prev_pgs.length != pool_cfg.pg_count)
{

View File

@ -1,220 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const metric_help =
`# HELP vitastor_object_bytes Total size of objects in cluster in bytes
# TYPE vitastor_object_bytes gauge
# HELP vitastor_object_count Total number of objects in cluster
# TYPE vitastor_object_count gauge
# HELP vitastor_stat_count Total operation count
# TYPE vitastor_stat_count counter
# HELP vitastor_stat_usec Total operation latency in usec
# TYPE vitastor_stat_usec counter
# HELP vitastor_stat_bytes Total operation size in bytes
# HELP vitastor_stat_bytes counter
# HELP vitastor_image_raw_used Image raw used size in bytes
# TYPE vitastor_image_raw_used counter
# HELP vitastor_image_stat_count Per-image total operation count
# TYPE vitastor_image_stat_count counter
# HELP vitastor_image_stat_usec Per-image total operation latency
# TYPE vitastor_image_stat_usec counter
# HELP vitastor_image_stat_bytes Per-image total operation size in bytes
# TYPE vitastor_image_stat_bytes counter
# HELP vitastor_osd_status OSD up/down status
# TYPE vitastor_osd_status gauge
# HELP vitastor_osd_size_bytes OSD total space in bytes
# TYPE vitastor_osd_size_bytes gauge
# HELP vitastor_osd_free_bytes OSD free space in bytes
# TYPE vitastor_osd_free_bytes gauge
# HELP vitastor_osd_stat_count Per-image total operation count
# TYPE vitastor_osd_stat_count counter
# HELP vitastor_osd_stat_usec Per-image total operation latency
# TYPE vitastor_osd_stat_usec counter
# HELP vitastor_osd_stat_bytes Per-image total operation size in bytes
# TYPE vitastor_osd_stat_bytes counter
# HELP vitastor_monitor_info Monitor info, 1 is master, 0 is standby
# TYPE vitastor_monitor_info gauge
# HELP vitastor_pool_info Pool configuration (in labels)
# TYPE vitastor_pool_info gauge
# HELP vitastor_pool_status Pool up/down status
# TYPE vitastor_pool_status gauge
# HELP vitastor_pool_raw_to_usable Raw to usable space ratio
# TYPE vitastor_pool_raw_to_usable gauge
# HELP vitastor_pool_space_efficiency Pool space usage efficiency
# TYPE vitastor_pool_space_efficiency gauge
# HELP vitastor_pool_total_raw_tb Total raw space in pool in TB
# TYPE vitastor_pool_total_raw_tb gauge
# HELP vitastor_pool_used_raw_tb Used raw space in pool in TB
# TYPE vitastor_pool_used_raw_tb gauge
# HELP vitastor_pg_count PG counts by state
# HELP vitastor_pg_count gauge
`;
function export_prometheus_metrics(st)
{
let res = metric_help;
// Global statistics
for (const k in st.stats.object_bytes)
{
res += `vitastor_object_bytes{object_type="${k}"} ${st.stats.object_bytes[k]}\n`;
}
for (const k in st.stats.object_counts)
{
res += `vitastor_object_count{object_type="${k}"} ${st.stats.object_counts[k]}\n`;
}
for (const typ of [ 'op', 'subop', 'recovery' ])
{
for (const op in st.stats[typ+"_stats"]||{})
{
const op_stat = st.stats[typ+"_stats"][op];
for (const key of [ 'count', 'usec', 'bytes' ])
{
res += `vitastor_stat_${key}{op="${op}",op_type="${typ}"} ${op_stat[key]||0}\n`;
}
}
}
// Per-image statistics
for (const pool in st.inode.stats)
{
for (const inode in st.inode.stats[pool])
{
const ist = st.inode.stats[pool][inode];
const inode_name = ((st.config.inode[pool]||{})[inode]||{}).name||'';
const inode_label = `image_name="${addslashes(inode_name)}",inode_num="${inode}",pool_id="${pool}"`;
res += `vitastor_image_raw_used{${inode_label}} ${ist.raw_used||0}\n`;
for (const op of [ 'read', 'write', 'delete' ])
{
for (const k of [ 'count', 'usec', 'bytes' ])
{
if (ist[op])
{
res += `vitastor_image_stat_${k}{${inode_label},op="${op}"} ${ist[op][k]||0}\n`;
}
}
}
}
}
// Per-OSD statistics
for (const osd in st.osd.stats)
{
const osd_stat = st.osd.stats[osd];
const up = st.osd.state[osd] && st.osd.state[osd].state == 'up' ? 1 : 0;
res += `vitastor_osd_status{host="${addslashes(osd_stat.host)}",osd_num="${osd}"} ${up}\n`;
res += `vitastor_osd_size_bytes{osd_num="${osd}"} ${osd_stat.size||0}\n`;
res += `vitastor_osd_free_bytes{osd_num="${osd}"} ${osd_stat.free||0}\n`;
for (const op in osd_stat.op_stats)
{
const ist = osd_stat.op_stats[op];
for (const k of [ 'count', 'usec', 'bytes' ])
{
res += `vitastor_osd_stat_${k}{osd_num="${osd}",op="${op}",op_type="op"} ${ist[k]||0}\n`;
}
}
for (const op in osd_stat.subop_stats)
{
const ist = osd_stat.subop_stats[op];
for (const k of [ 'count', 'usec', 'bytes' ])
{
res += `vitastor_osd_stat_${k}{osd_num="${osd}",op="${op}",op_type="subop"} ${ist[k]||0}\n`;
}
}
}
// Monitor statistics
for (const mon_id in st.mon.member)
{
const mon = st.mon.member[mon_id];
const master = st.mon.master && st.mon.master.id == mon_id ? 1 : 0;
const ip = (mon.ip instanceof Array ? mon.ip[0] : mon.ip) || '';
res += `vitastor_monitor_info{monitor_hostname="${addslashes(mon.hostname)}",monitor_id="${mon_id}",monitor_ip="${addslashes(ip)}"} ${master}\n`;
}
// Per-pool statistics
for (const pool_id in st.config.pools)
{
const pool_cfg = st.config.pools[pool_id];
const pool_label = `pool_id="${pool_id}",pool_name="${addslashes(pool_cfg.name)}"`;
const pool_stat = st.pool.stats[pool_id];
res += `vitastor_pool_info{${pool_label}`+
`,pool_scheme="${addslashes(pool_cfg.scheme)}"`+
`,pg_size="${pool_cfg.pg_size||0}",pg_minsize="${pool_cfg.pg_minsize||0}"`+
`,parity_chunks="${pool_cfg.parity_chunks||0}",pg_count="${pool_cfg.pg_count||0}"`+
`,failure_domain="${addslashes(pool_cfg.failure_domain)}"`+
`} 1\n`;
if (!pool_stat)
{
continue;
}
res += `vitastor_pool_raw_to_usable{${pool_label}} ${pool_stat.raw_to_usable||0}\n`;
res += `vitastor_pool_space_efficiency{${pool_label}} ${pool_stat.space_efficiency||0}\n`;
res += `vitastor_pool_total_raw_tb{${pool_label}} ${pool_stat.total_raw_tb||0}\n`;
res += `vitastor_pool_used_raw_tb{${pool_label}} ${pool_stat.used_raw_tb||0}\n`;
// PG states and pool up/down status
const real_pg_count = (Object.keys(((st.pg.config||{}).items||{})[pool_id]||{}).length) || (0|pool_cfg.pg_count);
const per_state = {
active: 0,
starting: 0,
peering: 0,
incomplete: 0,
repeering: 0,
stopping: 0,
offline: 0,
degraded: 0,
has_inconsistent: 0,
has_corrupted: 0,
has_incomplete: 0,
has_degraded: 0,
has_misplaced: 0,
has_unclean: 0,
has_invalid: 0,
left_on_dead: 0,
scrubbing: 0,
};
const pool_pg_states = st.pg.state[pool_id] || {};
for (let i = 1; i <= real_pg_count; i++)
{
if (!pool_pg_states[i])
{
per_state['offline'] = 1 + (per_state['offline']|0);
}
else
{
for (const st_name of pool_pg_states[i].state)
{
per_state[st_name] = 1 + (per_state[st_name]|0);
}
}
}
for (const st_name in per_state)
{
res += `vitastor_pg_count{pg_state="${st_name}",${pool_label}} ${per_state[st_name]}\n`;
}
const pool_active = per_state['active'] >= real_pg_count ? 1 : 0;
res += `vitastor_pool_status{${pool_label}} ${pool_active}\n`;
}
return res;
}
function addslashes(str)
{
return ((str||'')+'').replace(/(["\n\\])/g, "\\$1"); // escape " \n \
}
module.exports = { export_prometheus_metrics };

File diff suppressed because it is too large Load Diff

View File

@ -3,10 +3,10 @@
function derive_osd_stats(st, prev, prev_diff)
{
const diff = prev_diff || { op_stats: {}, subop_stats: {}, recovery_stats: {}, inode_stats: {} };
const diff = { op_stats: {}, subop_stats: {}, recovery_stats: {}, inode_stats: {} };
if (!st || !st.time || !prev || !prev.time || prev.time >= st.time)
{
return diff;
return prev_diff || diff;
}
const timediff = BigInt(st.time*1000 - prev.time*1000);
for (const op in st.op_stats||{})
@ -17,7 +17,8 @@ function derive_osd_stats(st, prev, prev_diff)
const b = c.bytes - BigInt(pr && pr.bytes||0);
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
diff.op_stats[op] = { ...c, bps: n > 0 ? b*1000n/timediff : 0n, iops: n > 0 ? n*1000n/timediff : 0n, lat: n > 0 ? us/n : 0n };
if (n > 0)
diff.op_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.subop_stats||{})
{
@ -26,7 +27,8 @@ function derive_osd_stats(st, prev, prev_diff)
c = { usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
diff.subop_stats[op] = { ...c, iops: n > 0 ? n*1000n/timediff : 0n, lat: n > 0 ? us/n : 0n };
if (n > 0)
diff.subop_stats[op] = { ...c, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.recovery_stats||{})
{
@ -35,7 +37,8 @@ function derive_osd_stats(st, prev, prev_diff)
c = { bytes: BigInt(c.bytes||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const n = c.count - BigInt(pr && pr.count||0);
diff.recovery_stats[op] = { ...c, bps: n > 0 ? b*1000n/timediff : 0n, iops: n > 0 ? n*1000n/timediff : 0n };
if (n > 0)
diff.recovery_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff };
}
for (const pool_id in st.inode_stats||{})
{
@ -50,9 +53,9 @@ function derive_osd_stats(st, prev, prev_diff)
prev.inode_stats[pool_id][inode_num] && prev.inode_stats[pool_id][inode_num][op];
const n = BigInt(c.count||0) - BigInt(pr && pr.count||0);
inode_diff[op] = {
bps: n > 0 ? (BigInt(c.bytes||0) - BigInt(pr && pr.bytes||0))*1000n/timediff : 0n,
iops: n > 0 ? n*1000n/timediff : 0n,
lat: n > 0 ? (BigInt(c.usec||0) - BigInt(pr && pr.usec||0))/n : 0n,
bps: (BigInt(c.bytes||0) - BigInt(pr && pr.bytes||0))*1000n/timediff,
iops: n*1000n/timediff,
lat: (BigInt(c.usec||0) - BigInt(pr && pr.usec||0))/(n || 1n),
};
}
}
@ -71,7 +74,7 @@ function sum_op_stats(all_osd, prev_stats)
);
prev_stats.osd_stats[osd] = cur;
}
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: { degraded: {}, misplaced: {} } };
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
// Sum derived values instead of deriving summed
for (const osd in all_osd.state)
{
@ -100,19 +103,10 @@ function sum_object_counts(state, global_config)
{
const object_counts = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
const object_bytes = { object: 0n, clean: 0n, misplaced: 0n, degraded: 0n, incomplete: 0n };
let pgstats = state.pgstats;
if (state.pg.stats)
{
// Merge with old stats for seamless transition to new stats
for (const pool_id in state.pg.stats)
{
pgstats[pool_id] = { ...(state.pg.stats[pool_id] || {}), ...(pgstats[pool_id] || {}) };
}
}
for (const pool_id in pgstats)
for (const pool_id in state.pg.stats)
{
let object_size = 0;
for (const osd_num of pgstats[pool_id].write_osd_set||[])
for (const osd_num of state.pg.stats[pool_id].write_osd_set||[])
{
if (osd_num && state.osd.stats[osd_num] && state.osd.stats[osd_num].block_size)
{
@ -130,9 +124,9 @@ function sum_object_counts(state, global_config)
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
}
object_size = BigInt(object_size);
for (const pg_num in pgstats[pool_id])
for (const pg_num in state.pg.stats[pool_id])
{
const st = pgstats[pool_id][pg_num];
const st = state.pg.stats[pool_id][pg_num];
if (st)
{
for (const k in object_counts)

View File

@ -1,37 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
const os = require('os');
function local_ips(all)
{
const ips = [];
const ifaces = os.networkInterfaces();
for (const ifname in ifaces)
{
for (const iface of ifaces[ifname])
{
if (iface.family == 'IPv4' && !iface.internal || all)
{
ips.push(iface.address);
}
}
}
return ips;
}
function b64(str)
{
return Buffer.from(str).toString('base64');
}
function de64(str)
{
return Buffer.from(str, 'base64').toString();
}
module.exports = {
b64,
de64,
local_ips,
};

View File

@ -1,49 +0,0 @@
// AntiEtcd persistence filter for Vitastor
// (c) Vitaliy Filippov, 2024
// License: Mozilla Public License 2.0 or Vitastor Network Public License 1.1
function vitastor_persist_filter(cfg)
{
const prefix = cfg.vitastor_prefix || '/vitastor';
return (key, value) =>
{
if (key.substr(0, prefix.length+'/osd/stats/'.length) == prefix+'/osd/stats/')
{
if (value)
{
try
{
value = JSON.parse(value);
value = JSON.stringify({
bitmap_granularity: value.bitmap_granularity || undefined,
data_block_size: value.data_block_size || undefined,
host: value.host || undefined,
immediate_commit: value.immediate_commit || undefined,
});
}
catch (e)
{
console.error('invalid JSON in '+key+' = '+value+': '+e);
value = '{}';
}
}
else
{
value = undefined;
}
return value;
}
else if (key.substr(0, prefix.length+'/osd/'.length) == prefix+'/osd/' ||
key.substr(0, prefix.length+'/inode/stats/'.length) == prefix+'/inode/stats/' ||
key.substr(0, prefix.length+'/pg/stats/'.length) == prefix+'/pg/stats/' || // old name
key.substr(0, prefix.length+'/pgstats/'.length) == prefix+'/pgstats/' ||
key.substr(0, prefix.length+'/pool/stats/'.length) == prefix+'/pool/stats/' ||
key == prefix+'/stats')
{
return undefined;
}
return value;
};
}
module.exports = vitastor_persist_filter;

View File

@ -1,80 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include "addon.h"
// Initialize the node addon
NAN_MODULE_INIT(InitAddon)
{
// vitastor.Client
v8::Local<v8::FunctionTemplate> tpl = Nan::New<v8::FunctionTemplate>(NodeVitastor::Create);
tpl->SetClassName(Nan::New("Client").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Nan::SetPrototypeMethod(tpl, "read", NodeVitastor::Read);
Nan::SetPrototypeMethod(tpl, "write", NodeVitastor::Write);
Nan::SetPrototypeMethod(tpl, "sync", NodeVitastor::Sync);
Nan::SetPrototypeMethod(tpl, "read_bitmap", NodeVitastor::ReadBitmap);
//Nan::SetPrototypeMethod(tpl, "destroy", NodeVitastor::Destroy);
Nan::Set(target, Nan::New("Client").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
// vitastor.Image (opened image)
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorImage::Create);
tpl->SetClassName(Nan::New("Image").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Nan::SetPrototypeMethod(tpl, "read", NodeVitastorImage::Read);
Nan::SetPrototypeMethod(tpl, "write", NodeVitastorImage::Write);
Nan::SetPrototypeMethod(tpl, "sync", NodeVitastorImage::Sync);
Nan::SetPrototypeMethod(tpl, "get_info", NodeVitastorImage::GetInfo);
Nan::SetPrototypeMethod(tpl, "read_bitmap", NodeVitastorImage::ReadBitmap);
Nan::Set(target, Nan::New("Image").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
// vitastor.KV
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorKV::Create);
tpl->SetClassName(Nan::New("KV").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Nan::SetPrototypeMethod(tpl, "open", NodeVitastorKV::Open);
Nan::SetPrototypeMethod(tpl, "set_config", NodeVitastorKV::SetConfig);
Nan::SetPrototypeMethod(tpl, "close", NodeVitastorKV::Close);
Nan::SetPrototypeMethod(tpl, "get_size", NodeVitastorKV::GetSize);
Nan::SetPrototypeMethod(tpl, "get", NodeVitastorKV::Get);
Nan::SetPrototypeMethod(tpl, "get_cached", NodeVitastorKV::GetCached);
Nan::SetPrototypeMethod(tpl, "set", NodeVitastorKV::Set);
Nan::SetPrototypeMethod(tpl, "del", NodeVitastorKV::Del);
Nan::SetPrototypeMethod(tpl, "list", NodeVitastorKV::List);
Nan::Set(target, Nan::New("KV").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
Nan::Set(target, Nan::New("ENOENT").ToLocalChecked(), Nan::New<v8::Int32>(-ENOENT));
Nan::Set(target, Nan::New("EIO").ToLocalChecked(), Nan::New<v8::Int32>(-EIO));
Nan::Set(target, Nan::New("EINVAL").ToLocalChecked(), Nan::New<v8::Int32>(-EINVAL));
Nan::Set(target, Nan::New("EROFS").ToLocalChecked(), Nan::New<v8::Int32>(-EROFS));
Nan::Set(target, Nan::New("ENOSPC").ToLocalChecked(), Nan::New<v8::Int32>(-ENOSPC));
Nan::Set(target, Nan::New("EINTR").ToLocalChecked(), Nan::New<v8::Int32>(-EINTR));
Nan::Set(target, Nan::New("EILSEQ").ToLocalChecked(), Nan::New<v8::Int32>(-EILSEQ));
Nan::Set(target, Nan::New("ENOTBLK").ToLocalChecked(), Nan::New<v8::Int32>(-ENOTBLK));
Nan::Set(target, Nan::New("ENOSYS").ToLocalChecked(), Nan::New<v8::Int32>(-ENOSYS));
Nan::Set(target, Nan::New("EAGAIN").ToLocalChecked(), Nan::New<v8::Int32>(-EAGAIN));
// Listing handle
tpl = Nan::New<v8::FunctionTemplate>(NodeVitastorKVListing::Create);
tpl->SetClassName(Nan::New("KVListing").ToLocalChecked());
tpl->InstanceTemplate()->SetInternalFieldCount(1);
Nan::SetPrototypeMethod(tpl, "next", NodeVitastorKVListing::Next);
Nan::SetPrototypeMethod(tpl, "close", NodeVitastorKVListing::Close);
Nan::Set(target, Nan::New("KVListing").ToLocalChecked(), Nan::GetFunction(tpl).ToLocalChecked());
NodeVitastorKV::listing_class.Reset(Nan::GetFunction(tpl).ToLocalChecked());
}
NODE_MODULE(addon, (void*)InitAddon)

View File

@ -1,20 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#ifndef NODE_VITASTOR_ADDON_H
#define NODE_VITASTOR_ADDON_H
#include <nan.h>
#include <vitastor_c.h>
#include "client.h"
#define ERRORF(format, ...) fprintf(stderr, format "\n", __VA_ARGS__);
//#define TRACEF(format, ...) fprintf(stderr, format "\n", __VA_ARGS__);
//#define TRACE(msg) fprintf(stderr, "%s\n", msg);
#define TRACEF(format, ...) ;
#define TRACE(msg) ;
#endif

View File

@ -1,21 +0,0 @@
{
'targets': [
{
'target_name': 'addon',
'sources': [
'client.cc',
'addon.cc'
],
'include_dirs': [
'<!(node -e "require(\'nan\')")'
],
'cflags': [
'<!(pkg-config --cflags vitastor)'
],
'libraries': [
'<!(pkg-config --libs vitastor)',
'-lvitastor_kv'
]
}
]
}

View File

@ -1,981 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include "addon.h"
#define NODE_VITASTOR_READ 1
#define NODE_VITASTOR_WRITE 2
#define NODE_VITASTOR_SYNC 3
#define NODE_VITASTOR_READ_BITMAP 4
#define NODE_VITASTOR_GET_INFO 5
#ifndef INODE_POOL
#define INODE_POOL(inode) (uint32_t)((inode) >> (64 - POOL_ID_BITS))
#define INODE_NO_POOL(inode) (uint64_t)((inode) & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1))
#define INODE_WITH_POOL(pool_id, inode) (((uint64_t)(pool_id) << (64-POOL_ID_BITS)) | INODE_NO_POOL(inode))
#endif
class NodeVitastorRequest: public Nan::AsyncResource
{
public:
NodeVitastorRequest(NodeVitastor *cli, v8::Local<v8::Function> cb): Nan::AsyncResource("NodeVitastorRequest")
{
this->cli = cli;
callback.Reset(cb);
}
iovec iov;
std::vector<iovec> iov_list;
NodeVitastor *cli = NULL;
NodeVitastorImage *img = NULL;
int op = 0;
uint64_t offset = 0, len = 0, version = 0;
bool with_parents = false;
Nan::Persistent<v8::Function> callback;
};
static uint64_t get_ui64(const v8::Local<v8::Value> & val)
{
if (val->IsBigInt())
return val->ToBigInt(Nan::GetCurrentContext()).ToLocalChecked()->Uint64Value();
return Nan::To<int64_t>(val).FromJust();
}
//////////////////////////////////////////////////
// NodeVitastor
//////////////////////////////////////////////////
NodeVitastor::NodeVitastor(): Nan::ObjectWrap()
{
TRACE("NodeVitastor: constructor");
poll_watcher.data = this;
}
NodeVitastor::~NodeVitastor()
{
TRACE("NodeVitastor: destructor");
uv_poll_stop(&poll_watcher);
vitastor_c_destroy(c);
c = NULL;
}
NAN_METHOD(NodeVitastor::Create)
{
TRACE("NodeVitastor::Create");
v8::Local<v8::Object> jsParams = info[0].As<v8::Object>();
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
std::vector<std::string> cfg;
for (uint32_t i = 0; i < keys->Length(); i++)
{
auto key = Nan::Get(keys, i).ToLocalChecked();
cfg.push_back(std::string(*Nan::Utf8String(key)));
cfg.push_back(std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked())));
}
const char **c_cfg = new const char*[cfg.size()];
for (size_t i = 0; i < cfg.size(); i++)
{
c_cfg[i] = cfg[i].c_str();
}
NodeVitastor* cli = new NodeVitastor();
cli->c = vitastor_c_create_uring_json(c_cfg, cfg.size());
delete[] c_cfg;
int res = vitastor_c_uring_register_eventfd(cli->c);
if (res >= 0)
{
cli->eventfd = res;
res = uv_poll_init(uv_default_loop(), &cli->poll_watcher, cli->eventfd);
if (res >= 0)
res = uv_poll_start(&cli->poll_watcher, UV_READABLE, on_io_readable);
on_io_readable(&cli->poll_watcher, 0, UV_READABLE);
}
if (res < 0)
{
ERRORF("NodeVitastor: failed to create and register io_uring eventfd in libuv: %s", strerror(-cli->eventfd));
vitastor_c_destroy(cli->c);
cli->c = NULL;
Nan::ThrowError("failed to create and register io_uring eventfd");
return;
}
cli->Wrap(info.This());
info.GetReturnValue().Set(info.This());
}
void NodeVitastor::on_io_readable(uv_poll_t* handle, int status, int revents)
{
TRACEF("NodeVitastor::on_io_readable status/revents %d %d", status, revents);
if (revents & UV_READABLE)
{
NodeVitastor* self = (NodeVitastor*)handle->data;
vitastor_c_uring_handle_events(self->c);
}
}
NodeVitastorRequest* NodeVitastor::get_read_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
{
uint64_t offset = get_ui64(info[argpos+0]);
uint64_t len = get_ui64(info[argpos+1]);
uint8_t *buf = (uint8_t*)malloc(len);
if (!buf)
{
Nan::ThrowError("failed to allocate memory");
return NULL;
}
v8::Local<v8::Function> callback = info[argpos+2].As<v8::Function>();
auto req = new NodeVitastorRequest(this, callback);
req->offset = offset;
req->len = len;
req->iov = { .iov_base = buf, .iov_len = len };
return req;
}
// read(pool, inode, offset, len, callback(err, buffer, version))
NAN_METHOD(NodeVitastor::Read)
{
TRACE("NodeVitastor::Read");
if (info.Length() < 5)
Nan::ThrowError("Not enough arguments to read(pool, inode, offset, len, callback(err, buffer, version))");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
uint64_t pool = get_ui64(info[0]);
uint64_t inode = get_ui64(info[1]);
auto req = self->get_read_request(info, 2);
self->Ref();
vitastor_c_read(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, &req->iov, 1, on_read_finish, req);
}
NodeVitastorRequest* NodeVitastor::get_write_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos)
{
uint64_t offset = get_ui64(info[argpos+0]);
const auto & bufarg = info[argpos+1];
uint64_t version = 0;
if (!info[argpos+2].IsEmpty() &&
!info[argpos+2]->IsFunction() &&
info[argpos+2]->IsObject())
{
auto key = Nan::New<v8::String>("version").ToLocalChecked();
auto params = info[argpos+2].As<v8::Object>();
auto versionObj = Nan::Get(params, key).ToLocalChecked();
if (!versionObj.IsEmpty())
version = get_ui64(versionObj);
argpos++;
}
v8::Local<v8::Function> callback = info[argpos+2].As<v8::Function>();
auto req = new NodeVitastorRequest(this, callback);
req->offset = offset;
req->version = version;
if (bufarg->IsArray())
{
auto buffers = bufarg.As<v8::Array>();
req->len = 0;
for (uint32_t i = 0; i < buffers->Length(); i++)
{
auto buffer_obj = Nan::Get(buffers, i).ToLocalChecked();
char *buf = node::Buffer::Data(buffer_obj);
uint64_t len = node::Buffer::Length(buffer_obj);
req->iov_list.push_back({ .iov_base = buf, .iov_len = len });
req->len += len;
}
}
else
{
char *buf = node::Buffer::Data(bufarg);
uint64_t len = node::Buffer::Length(bufarg);
req->iov = { .iov_base = buf, .iov_len = len };
req->len = len;
}
return req;
}
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
NAN_METHOD(NodeVitastor::Write)
{
TRACE("NodeVitastor::Write");
if (info.Length() < 5)
Nan::ThrowError("Not enough arguments to write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
uint64_t pool = get_ui64(info[0]);
uint64_t inode = get_ui64(info[1]);
auto req = self->get_write_request(info, 2);
self->Ref();
vitastor_c_write(self->c, ((pool << (64-POOL_ID_BITS)) | inode), req->offset, req->len, req->version,
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
on_write_finish, req);
}
// sync(callback(err))
NAN_METHOD(NodeVitastor::Sync)
{
TRACE("NodeVitastor::Sync");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to sync(callback(err))");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
auto req = new NodeVitastorRequest(self, callback);
self->Ref();
vitastor_c_sync(self->c, on_write_finish, req);
}
// read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))
NAN_METHOD(NodeVitastor::ReadBitmap)
{
TRACE("NodeVitastor::ReadBitmap");
if (info.Length() < 6)
Nan::ThrowError("Not enough arguments to read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))");
NodeVitastor* self = Nan::ObjectWrap::Unwrap<NodeVitastor>(info.This());
uint64_t pool = get_ui64(info[0]);
uint64_t inode = get_ui64(info[1]);
uint64_t offset = get_ui64(info[2]);
uint64_t len = get_ui64(info[3]);
bool with_parents = Nan::To<bool>(info[4]).FromJust();
v8::Local<v8::Function> callback = info[5].As<v8::Function>();
auto req = new NodeVitastorRequest(self, callback);
self->Ref();
vitastor_c_read_bitmap(self->c, ((pool << (64-POOL_ID_BITS)) | inode), offset, len, with_parents, on_read_bitmap_finish, req);
}
static void on_error(NodeVitastorRequest *req, Nan::Callback & nanCallback, long retval)
{
// Legal errors: EINVAL, EIO, EROFS, ENOSPC, EINTR, ENOENT
v8::Local<v8::Value> args[1];
if (!retval)
args[0] = Nan::Null();
else
args[0] = Nan::New<v8::Int32>((int32_t)retval);
nanCallback.Call(1, args, req);
}
void NodeVitastor::on_read_finish(void *opaque, long retval, uint64_t version)
{
TRACE("NodeVitastor::on_read_finish");
Nan::HandleScope scope;
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
Nan::Callback nanCallback(Nan::New(req->callback));
if (retval == -ENOENT)
{
free(req->iov.iov_base);
nanCallback.Call(0, NULL, req);
}
else if (retval < 0 || (uint64_t)retval != req->len)
{
free(req->iov.iov_base);
on_error(req, nanCallback, retval);
}
else
{
v8::Local<v8::Value> args[3];
args[0] = Nan::Null();
args[1] = Nan::NewBuffer((char*)req->iov.iov_base, req->iov.iov_len).ToLocalChecked();
args[2] = v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), version);
nanCallback.Call(3, args, req);
}
req->cli->Unref();
delete req;
}
void NodeVitastor::on_write_finish(void *opaque, long retval)
{
TRACE("NodeVitastor::on_write_finish");
Nan::HandleScope scope;
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
Nan::Callback nanCallback(Nan::New(req->callback));
if (retval < 0 || (uint64_t)retval != req->len)
{
on_error(req, nanCallback, retval);
}
else
{
v8::Local<v8::Value> args[1];
args[0] = Nan::Null();
nanCallback.Call(1, args, req);
}
req->cli->Unref();
delete req;
}
void NodeVitastor::on_read_bitmap_finish(void *opaque, long retval, uint8_t *bitmap)
{
TRACE("NodeVitastor::on_read_bitmap_finish");
Nan::HandleScope scope;
NodeVitastorRequest *req = (NodeVitastorRequest *)opaque;
Nan::Callback nanCallback(Nan::New(req->callback));
if (retval == -ENOENT)
nanCallback.Call(0, NULL, req);
else if (retval < 0)
on_error(req, nanCallback, retval);
else
{
v8::Local<v8::Value> args[2];
args[0] = Nan::Null();
args[1] = Nan::NewBuffer((char*)bitmap, (retval+7)/8).ToLocalChecked();
nanCallback.Call(2, args, req);
}
req->cli->Unref();
delete req;
}
//NAN_METHOD(NodeVitastor::Destroy)
//{
// TRACE("NodeVitastor::Destroy");
//}
//////////////////////////////////////////////////
// NodeVitastorImage
//////////////////////////////////////////////////
NAN_METHOD(NodeVitastorImage::Create)
{
TRACE("NodeVitastorImage::Create");
if (info.Length() < 2)
Nan::ThrowError("Not enough arguments to Image(client, name)");
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
std::string name = std::string(*Nan::Utf8String(info[1].As<v8::String>()));
NodeVitastor *cli = Nan::ObjectWrap::Unwrap<NodeVitastor>(parent);
NodeVitastorImage *img = new NodeVitastorImage();
img->Wrap(info.This());
img->cli = cli;
img->name = name;
img->Ref();
cli->Ref();
vitastor_c_watch_inode(cli->c, (char*)img->name.c_str(), on_watch_start, img);
info.GetReturnValue().Set(info.This());
}
NodeVitastorImage::~NodeVitastorImage()
{
if (watch)
{
vitastor_c_close_watch(cli->c, watch);
watch = NULL;
}
cli->Unref();
}
// read(offset, len, callback(err, buffer, version))
NAN_METHOD(NodeVitastorImage::Read)
{
TRACE("NodeVitastorImage::Read");
if (info.Length() < 3)
Nan::ThrowError("Not enough arguments to read(offset, len, callback(err, buffer, version))");
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
auto req = img->cli->get_read_request(info, 0);
req->img = img;
req->op = NODE_VITASTOR_READ;
img->exec_or_wait(req);
}
// write(offset, buffer, { version }?, callback(err))
NAN_METHOD(NodeVitastorImage::Write)
{
TRACE("NodeVitastorImage::Write");
if (info.Length() < 3)
Nan::ThrowError("Not enough arguments to write(offset, buffer, { version }?, callback(err))");
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
auto req = img->cli->get_write_request(info, 0);
req->img = img;
req->op = NODE_VITASTOR_WRITE;
img->exec_or_wait(req);
}
// sync(callback(err))
NAN_METHOD(NodeVitastorImage::Sync)
{
TRACE("NodeVitastorImage::Sync");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to sync(callback(err))");
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
auto req = new NodeVitastorRequest(img->cli, callback);
req->img = img;
req->op = NODE_VITASTOR_SYNC;
img->exec_or_wait(req);
}
// read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))
NAN_METHOD(NodeVitastorImage::ReadBitmap)
{
TRACE("NodeVitastorImage::ReadBitmap");
if (info.Length() < 4)
Nan::ThrowError("Not enough arguments to read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))");
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
uint64_t offset = get_ui64(info[0]);
uint64_t len = get_ui64(info[1]);
bool with_parents = Nan::To<bool>(info[2]).FromJust();
v8::Local<v8::Function> callback = info[3].As<v8::Function>();
auto req = new NodeVitastorRequest(img->cli, callback);
req->img = img;
req->op = NODE_VITASTOR_READ_BITMAP;
req->offset = offset;
req->len = len;
req->with_parents = with_parents;
img->exec_or_wait(req);
}
// get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))
NAN_METHOD(NodeVitastorImage::GetInfo)
{
TRACE("NodeVitastorImage::GetInfo");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))");
NodeVitastorImage* img = Nan::ObjectWrap::Unwrap<NodeVitastorImage>(info.This());
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
auto req = new NodeVitastorRequest(img->cli, callback);
req->img = img;
req->op = NODE_VITASTOR_GET_INFO;
img->exec_or_wait(req);
}
void NodeVitastorImage::exec_or_wait(NodeVitastorRequest *req)
{
if (!watch)
{
// Need to wait for initialisation
on_init.push_back(req);
}
else
{
exec_request(req);
}
}
void NodeVitastorImage::exec_request(NodeVitastorRequest *req)
{
if (req->op == NODE_VITASTOR_READ)
{
uint64_t ino = vitastor_c_inode_get_num(watch);
cli->Ref();
vitastor_c_read(cli->c, ino, req->offset, req->len, &req->iov, 1, NodeVitastor::on_read_finish, req);
}
else if (req->op == NODE_VITASTOR_WRITE)
{
uint64_t ino = vitastor_c_inode_get_num(watch);
cli->Ref();
vitastor_c_write(cli->c, ino, req->offset, req->len, req->version,
req->iov_list.size() ? req->iov_list.data() : &req->iov,
req->iov_list.size() ? req->iov_list.size() : 1,
NodeVitastor::on_write_finish, req);
}
else if (req->op == NODE_VITASTOR_SYNC)
{
uint64_t ino = vitastor_c_inode_get_num(watch);
uint32_t imm = vitastor_c_inode_get_immediate_commit(cli->c, ino);
cli->Ref();
if (imm != IMMEDIATE_ALL)
{
vitastor_c_sync(cli->c, NodeVitastor::on_write_finish, req);
}
else
{
NodeVitastor::on_write_finish(req, 0);
}
}
else if (req->op == NODE_VITASTOR_READ_BITMAP)
{
uint64_t ino = vitastor_c_inode_get_num(watch);
cli->Ref();
vitastor_c_read_bitmap(cli->c, ino, req->offset, req->len, req->with_parents, NodeVitastor::on_read_bitmap_finish, req);
}
else if (req->op == NODE_VITASTOR_GET_INFO)
{
uint64_t size = vitastor_c_inode_get_size(watch);
uint64_t num = vitastor_c_inode_get_num(watch);
uint32_t block_size = vitastor_c_inode_get_block_size(cli->c, num);
uint32_t bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(cli->c, num);
int readonly = vitastor_c_inode_get_readonly(watch);
uint32_t immediate_commit = vitastor_c_inode_get_immediate_commit(cli->c, num);
uint64_t parent_id = vitastor_c_inode_get_parent_id(watch);
char *meta = vitastor_c_inode_get_meta(watch);
uint64_t mod_revision = vitastor_c_inode_get_mod_revision(watch);
Nan::HandleScope scope;
v8::Local<v8::Object> res = Nan::New<v8::Object>();
Nan::Set(res, Nan::New<v8::String>("pool_id").ToLocalChecked(), Nan::New<v8::Number>(INODE_POOL(num)));
Nan::Set(res, Nan::New<v8::String>("inode_num").ToLocalChecked(), Nan::New<v8::Number>(INODE_NO_POOL(num)));
if (size < ((uint64_t)1<<53))
Nan::Set(res, Nan::New<v8::String>("size").ToLocalChecked(), Nan::New<v8::Number>(size));
else
Nan::Set(res, Nan::New<v8::String>("size").ToLocalChecked(), v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), size));
if (parent_id)
{
Nan::Set(res, Nan::New<v8::String>("parent_pool_id").ToLocalChecked(), Nan::New<v8::Number>(INODE_POOL(parent_id)));
Nan::Set(res, Nan::New<v8::String>("parent_inode_num").ToLocalChecked(), Nan::New<v8::Number>(INODE_NO_POOL(parent_id)));
}
Nan::Set(res, Nan::New<v8::String>("readonly").ToLocalChecked(), Nan::New((bool)readonly));
if (meta)
{
Nan::JSON nanJSON;
Nan::Set(res, Nan::New<v8::String>("meta").ToLocalChecked(), nanJSON.Parse(Nan::New<v8::String>(meta).ToLocalChecked()).ToLocalChecked());
}
if (mod_revision < ((uint64_t)1<<53))
Nan::Set(res, Nan::New<v8::String>("mod_revision").ToLocalChecked(), Nan::New<v8::Number>(mod_revision));
else
Nan::Set(res, Nan::New<v8::String>("mod_revision").ToLocalChecked(), v8::BigInt::NewFromUnsigned(v8::Isolate::GetCurrent(), mod_revision));
Nan::Set(res, Nan::New<v8::String>("block_size").ToLocalChecked(), Nan::New(block_size));
Nan::Set(res, Nan::New<v8::String>("bitmap_granularity").ToLocalChecked(), Nan::New(bitmap_granularity));
Nan::Set(res, Nan::New<v8::String>("immediate_commit").ToLocalChecked(), Nan::New(immediate_commit));
Nan::Callback nanCallback(Nan::New(req->callback));
v8::Local<v8::Value> args[1];
args[0] = res;
nanCallback.Call(1, args, req);
delete req;
}
}
void NodeVitastorImage::on_watch_start(void *opaque, long retval)
{
NodeVitastorImage *img = (NodeVitastorImage *)opaque;
{
img->watch = (void*)retval;
auto on_init = std::move(img->on_init);
for (auto req: on_init)
{
img->exec_request(req);
}
}
img->Unref();
}
//////////////////////////////////////////////////
// NodeVitastorKV
//////////////////////////////////////////////////
Nan::Persistent<v8::Function> NodeVitastorKV::listing_class;
// constructor(node_vitastor)
NAN_METHOD(NodeVitastorKV::Create)
{
TRACE("NodeVitastorKV::Create");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to new KV(client)");
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
NodeVitastor *cli = Nan::ObjectWrap::Unwrap<NodeVitastor>(parent);
NodeVitastorKV *kv = new NodeVitastorKV();
kv->cli = cli;
{
kv->dbw = new vitastorkv_dbw_t((cluster_client_t*)vitastor_c_get_internal_client(cli->c));
}
kv->Wrap(info.This());
cli->Ref();
info.GetReturnValue().Set(info.This());
}
NodeVitastorKV::~NodeVitastorKV()
{
delete dbw;
cli->Unref();
}
// open(pool_id, inode_num, { ...config }, callback(err))
NAN_METHOD(NodeVitastorKV::Open)
{
TRACE("NodeVitastorKV::Open");
if (info.Length() < 4)
Nan::ThrowError("Not enough arguments to open(pool_id, inode_num, { ...config }, callback(err))");
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
uint64_t inode_id = INODE_WITH_POOL(get_ui64(info[0]), get_ui64(info[1]));
v8::Local<v8::Object> jsParams = info[2].As<v8::Object>();
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
std::map<std::string, std::string> cfg;
for (uint32_t i = 0; i < keys->Length(); i++)
{
auto key = Nan::Get(keys, i).ToLocalChecked();
cfg[std::string(*Nan::Utf8String(key))] = std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked()));
}
v8::Local<v8::Function> callback = info[3].As<v8::Function>();
auto req = new NodeVitastorRequest(kv->cli, callback);
kv->Ref();
kv->dbw->open(inode_id, cfg, [kv, req](int res)
{
Nan::HandleScope scope;
Nan::Callback nanCallback(Nan::New(req->callback));
v8::Local<v8::Value> args[1];
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
nanCallback.Call(1, args, req);
delete req;
kv->Unref();
});
}
// close(callback(err))
NAN_METHOD(NodeVitastorKV::Close)
{
TRACE("NodeVitastorKV::Close");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to close(callback(err))");
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
auto req = new NodeVitastorRequest(kv->cli, callback);
kv->Ref();
kv->dbw->close([kv, req]()
{
Nan::HandleScope scope;
Nan::Callback nanCallback(Nan::New(req->callback));
nanCallback.Call(0, NULL, req);
delete req;
kv->Unref();
});
}
// set_config({ ...config })
NAN_METHOD(NodeVitastorKV::SetConfig)
{
TRACE("NodeVitastorKV::SetConfig");
if (info.Length() < 1)
Nan::ThrowError("Not enough arguments to set_config({ ...config })");
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
v8::Local<v8::Object> jsParams = info[0].As<v8::Object>();
v8::Local<v8::Array> keys = Nan::GetOwnPropertyNames(jsParams).ToLocalChecked();
std::map<std::string, std::string> cfg;
for (uint32_t i = 0; i < keys->Length(); i++)
{
auto key = Nan::Get(keys, i).ToLocalChecked();
cfg[std::string(*Nan::Utf8String(key))] = std::string(*Nan::Utf8String(Nan::Get(jsParams, key).ToLocalChecked()));
}
kv->dbw->set_config(cfg);
}
// get_size()
NAN_METHOD(NodeVitastorKV::GetSize)
{
TRACE("NodeVitastorKV::GetSize");
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
auto size = kv->dbw->get_size();
info.GetReturnValue().Set((size < ((uint64_t)1<<53))
? v8::Local<v8::Value>(Nan::New<v8::Number>(size))
: v8::Local<v8::Value>(v8::BigInt::NewFromUnsigned(info.GetIsolate(), size)));
}
void NodeVitastorKV::get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info, bool allow_cache)
{
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
// FIXME: Handle Buffer too
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
v8::Local<v8::Function> callback = info[1].As<v8::Function>();
auto req = new NodeVitastorRequest(kv->cli, callback);
kv->Ref();
kv->dbw->get(key, [kv, req](int res, const std::string & value)
{
Nan::HandleScope scope;
Nan::Callback nanCallback(Nan::New(req->callback));
v8::Local<v8::Value> args[2];
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
args[1] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
nanCallback.Call(2, args, req);
delete req;
kv->Unref();
}, allow_cache);
}
// get(key, callback(err, value))
NAN_METHOD(NodeVitastorKV::Get)
{
TRACE("NodeVitastorKV::Get");
if (info.Length() < 2)
Nan::ThrowError("Not enough arguments to get(key, callback(err, value))");
get_impl(info, false);
}
// get_cached(key, callback(err, value))
NAN_METHOD(NodeVitastorKV::GetCached)
{
TRACE("NodeVitastorKV::GetCached");
if (info.Length() < 2)
Nan::ThrowError("Not enough arguments to get_cached(key, callback(err, value))");
get_impl(info, true);
}
static std::function<bool(int, const std::string &)> make_cas_callback(NodeVitastorRequest *cas_req)
{
return [cas_req](int res, const std::string & value)
{
Nan::HandleScope scope;
Nan::Callback nanCallback(Nan::New(cas_req->callback));
v8::Local<v8::Value> args[1];
args[0] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
Nan::MaybeLocal<v8::Value> ret = nanCallback.Call(1, args, cas_req);
if (ret.IsEmpty())
return false;
return Nan::To<bool>(ret.ToLocalChecked()).FromJust();
};
}
// set(key, value, callback(err), cas_compare(old_value)?)
NAN_METHOD(NodeVitastorKV::Set)
{
TRACE("NodeVitastorKV::Set");
if (info.Length() < 3)
Nan::ThrowError("Not enough arguments to set(key, value, callback(err), cas_compare(old_value)?)");
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
// FIXME: Handle Buffer too
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
std::string value(*Nan::Utf8String(info[1].As<v8::String>()));
v8::Local<v8::Function> callback = info[2].As<v8::Function>();
NodeVitastorRequest *req = new NodeVitastorRequest(kv->cli, callback), *cas_req = NULL;
std::function<bool(int, const std::string &)> cas_cb;
if (info.Length() > 3 && info[3]->IsObject())
{
v8::Local<v8::Function> cas_callback = info[3].As<v8::Function>();
cas_req = new NodeVitastorRequest(kv->cli, cas_callback);
cas_cb = make_cas_callback(cas_req);
}
kv->Ref();
kv->dbw->set(key, value, [kv, req, cas_req](int res)
{
Nan::HandleScope scope;
Nan::Callback nanCallback(Nan::New(req->callback));
v8::Local<v8::Value> args[1];
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
nanCallback.Call(1, args, req);
delete req;
if (cas_req)
delete cas_req;
kv->Unref();
}, cas_cb);
}
// del(key, callback(err), cas_compare(old_value)?)
NAN_METHOD(NodeVitastorKV::Del)
{
TRACE("NodeVitastorKV::Del");
if (info.Length() < 2)
Nan::ThrowError("Not enough arguments to del(key, callback(err), cas_compare(old_value)?)");
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
// FIXME: Handle Buffer too
std::string key(*Nan::Utf8String(info[0].As<v8::String>()));
v8::Local<v8::Function> callback = info[1].As<v8::Function>();
NodeVitastorRequest *req = new NodeVitastorRequest(kv->cli, callback), *cas_req = NULL;
std::function<bool(int, const std::string &)> cas_cb;
if (info.Length() > 2 && info[2]->IsObject())
{
v8::Local<v8::Function> cas_callback = info[2].As<v8::Function>();
cas_req = new NodeVitastorRequest(kv->cli, cas_callback);
cas_cb = make_cas_callback(cas_req);
}
kv->Ref();
kv->dbw->del(key, [kv, req, cas_req](int res)
{
Nan::HandleScope scope;
Nan::Callback nanCallback(Nan::New(req->callback));
v8::Local<v8::Value> args[1];
args[0] = !res ? v8::Local<v8::Value>(Nan::Null()) : v8::Local<v8::Value>(Nan::New<v8::Int32>(res));
nanCallback.Call(1, args, req);
delete req;
if (cas_req)
delete cas_req;
kv->Unref();
}, cas_cb);
}
// list(start_key?)
NAN_METHOD(NodeVitastorKV::List)
{
TRACE("NodeVitastorKV::List");
v8::Local<v8::Function> cons = Nan::New(listing_class);
v8::Local<v8::Value> args[2];
args[0] = info.This();
int narg = 1;
if (info.Length() > 1 && info[1]->IsString())
{
args[1] = info[1];
narg = 2;
}
info.GetReturnValue().Set(Nan::NewInstance(cons, narg, args).ToLocalChecked());
}
/*NAN_METHOD(NodeVitastorKV::Destroy)
{
TRACE("NodeVitastorKV::Destroy");
NodeVitastorKV* kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(info.This());
if (!kv->dead)
kv->Unref();
}*/
//////////////////////////////////////////////////
// NodeVitastorKVListing
//////////////////////////////////////////////////
// constructor(node_vitastor_kv, start_key?)
NAN_METHOD(NodeVitastorKVListing::Create)
{
TRACE("NodeVitastorKVListing::Create");
v8::Local<v8::Object> parent = info[0].As<v8::Object>();
NodeVitastorKV *kv = Nan::ObjectWrap::Unwrap<NodeVitastorKV>(parent);
std::string start_key;
// FIXME: Handle Buffer too
if (info.Length() > 1 && info[1]->IsString())
{
start_key = std::string(*Nan::Utf8String(info[1].As<v8::String>()));
}
NodeVitastorKVListing *list = new NodeVitastorKVListing();
list->kv = kv;
list->handle = list->kv->dbw->list_start(start_key);
list->Wrap(info.This());
kv->Ref();
info.GetReturnValue().Set(info.This());
}
NodeVitastorKVListing::~NodeVitastorKVListing()
{
if (handle)
{
kv->dbw->list_close(handle);
handle = NULL;
}
if (iter)
{
delete iter;
iter = NULL;
}
kv->Unref();
}
// next(callback(err, value)?)
NAN_METHOD(NodeVitastorKVListing::Next)
{
TRACE("NodeVitastorKVListing::Next");
NodeVitastorKVListing* list = Nan::ObjectWrap::Unwrap<NodeVitastorKVListing>(info.This());
if (info.Length() > 0)
{
v8::Local<v8::Function> callback = info[0].As<v8::Function>();
if (list->iter)
{
delete list->iter;
}
list->iter = new NodeVitastorRequest(list->kv->cli, callback);
}
if (!list->handle)
{
// Already closed
if (list->iter)
{
auto req = list->iter;
list->iter = NULL;
Nan::Callback nanCallback(Nan::New(req->callback));
v8::Local<v8::Value> args[1];
args[0] = Nan::New<v8::Int32>(-EINVAL);
nanCallback.Call(1, args, req);
delete req;
}
return;
}
list->kv->Ref();
list->kv->dbw->list_next(list->handle, [list](int res, const std::string & key, const std::string & value)
{
auto req = list->iter;
list->iter = NULL;
Nan::HandleScope scope;
Nan::Callback nanCallback(Nan::New(req->callback));
v8::Local<v8::Value> args[3];
args[0] = Nan::New<v8::Int32>(res);
args[1] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(key).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
args[2] = !res ? v8::Local<v8::Value>(Nan::New<v8::String>(value).ToLocalChecked()) : v8::Local<v8::Value>(Nan::Null());
nanCallback.Call(3, args, req);
if (list->iter)
delete req;
else
list->iter = req;
list->kv->Unref();
});
}
// close()
NAN_METHOD(NodeVitastorKVListing::Close)
{
TRACE("NodeVitastorKVListing::Close");
NodeVitastorKVListing* list = Nan::ObjectWrap::Unwrap<NodeVitastorKVListing>(info.This());
if (list->handle)
{
list->kv->dbw->list_close(list->handle);
list->handle = NULL;
}
if (list->iter)
{
delete list->iter;
list->iter = NULL;
}
}

View File

@ -1,136 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#ifndef NODE_VITASTOR_CLIENT_H
#define NODE_VITASTOR_CLIENT_H
#include <nan.h>
#include <vitastor_c.h>
#include <vitastor_kv.h>
class NodeVitastorRequest;
class NodeVitastor: public Nan::ObjectWrap
{
public:
// constructor({ ...config })
static NAN_METHOD(Create);
// read(pool, inode, offset, len, callback(err, buffer, version))
static NAN_METHOD(Read);
// write(pool, inode, offset, buf: Buffer | Buffer[], { version }?, callback(err))
static NAN_METHOD(Write);
// sync(callback(err))
static NAN_METHOD(Sync);
// read_bitmap(pool, inode, offset, len, with_parents, callback(err, bitmap_buffer))
static NAN_METHOD(ReadBitmap);
// // destroy()
// static NAN_METHOD(Destroy);
~NodeVitastor();
private:
vitastor_c *c = NULL;
int eventfd = -1;
uv_poll_t poll_watcher;
NodeVitastor();
static void on_io_readable(uv_poll_t* handle, int status, int revents);
static void on_read_finish(void *opaque, long retval, uint64_t version);
static void on_write_finish(void *opaque, long retval);
static void on_read_bitmap_finish(void *opaque, long retval, uint8_t *bitmap);
NodeVitastorRequest* get_read_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos);
NodeVitastorRequest* get_write_request(const Nan::FunctionCallbackInfo<v8::Value> & info, int argpos);
friend class NodeVitastorImage;
friend class NodeVitastorKV;
friend class NodeVitastorKVListing;
};
class NodeVitastorImage: public Nan::ObjectWrap
{
public:
// constructor(node_vitastor, name)
static NAN_METHOD(Create);
// read(offset, len, callback(err, buffer, version))
static NAN_METHOD(Read);
// write(offset, buf: Buffer | Buffer[], { version }?, callback(err))
static NAN_METHOD(Write);
// sync(callback(err))
static NAN_METHOD(Sync);
// read_bitmap(offset, len, with_parents, callback(err, bitmap_buffer))
static NAN_METHOD(ReadBitmap);
// get_info(callback({ num, name, size, parent_id?, readonly?, meta?, mod_revision, block_size, bitmap_granularity, immediate_commit }))
static NAN_METHOD(GetInfo);
~NodeVitastorImage();
private:
NodeVitastor *cli = NULL;
std::string name;
void *watch = NULL;
std::vector<NodeVitastorRequest*> on_init;
Nan::Persistent<v8::Object> cliObj;
static void on_watch_start(void *opaque, long retval);
void exec_request(NodeVitastorRequest *req);
void exec_or_wait(NodeVitastorRequest *req);
};
class NodeVitastorKV: public Nan::ObjectWrap
{
public:
// constructor(node_vitastor)
static NAN_METHOD(Create);
// open(pool_id, inode_num, { ...config }, callback(err))
static NAN_METHOD(Open);
// set_config({ ...config })
static NAN_METHOD(SetConfig);
// close(callback())
static NAN_METHOD(Close);
// get_size()
static NAN_METHOD(GetSize);
// get(key, callback(err, value))
static NAN_METHOD(Get);
// get_cached(key, callback(err, value))
static NAN_METHOD(GetCached);
// set(key, value, callback(err), cas_compare(old_value)?)
static NAN_METHOD(Set);
// del(key, callback(err), cas_compare(old_value)?)
static NAN_METHOD(Del);
// list(start_key?)
static NAN_METHOD(List);
~NodeVitastorKV();
static Nan::Persistent<v8::Function> listing_class;
private:
NodeVitastor *cli = NULL;
vitastorkv_dbw_t *dbw = NULL;
static void get_impl(const Nan::FunctionCallbackInfo<v8::Value> & info, bool allow_cache);
friend class NodeVitastorKVListing;
};
class NodeVitastorKVListing: public Nan::ObjectWrap
{
public:
// constructor(node_vitastor_kv, start_key?)
static NAN_METHOD(Create);
// next(callback(err, value)?)
static NAN_METHOD(Next);
// close()
static NAN_METHOD(Close);
~NodeVitastorKVListing();
private:
NodeVitastorKV *kv = NULL;
void *handle = NULL;
NodeVitastorRequest *iter = NULL;
};
#endif

View File

@ -1 +0,0 @@
module.exports = require('bindings')('addon.node');

View File

@ -1,24 +0,0 @@
{
"name": "vitastor",
"version": "1.7.0",
"description": "Low-level native bindings to Vitastor client library",
"main": "index.js",
"keywords": [
"storage",
"sds",
"vitastor"
],
"repository": {
"type": "git",
"url": "git://git.yourcmc.ru/vitalif/vitastor.git"
},
"scripts": {
"build": "node-gyp rebuild"
},
"author": "Vitaliy Filippov",
"license": "VNPL-2.0",
"dependencies": {
"bindings": "1.5.0",
"nan": "^2.19.0"
}
}

View File

@ -1 +0,0 @@
See [../docs/installation/opennebula.en.md](../docs/installation/opennebula.en.md).

View File

@ -1,29 +0,0 @@
#!/bin/bash
set -e
reapply_patch() {
if ! patch -f --dry-run -F 0 -R $1 < $2 >/dev/null; then
already_applied=0
if ! patch --no-backup-if-mismatch -r - -F 0 -f $1 < $2; then
applied_ok=0
echo "ERROR: Failed to patch file $1, please apply the patch $2 manually"
fi
fi
}
echo "Reapplying Vitastor patches to OpenNebula's oned.conf, vmm_execrc and downloader.sh"
already_applied=1
applied_ok=1
reapply_patch /var/lib/one/remotes/datastore/downloader.sh /var/lib/one/remotes/datastore/vitastor/downloader-vitastor.sh.diff
reapply_patch /etc/one/oned.conf /var/lib/one/remotes/datastore/vitastor/oned.conf.diff
reapply_patch /etc/one/vmm_exec/vmm_execrc /var/lib/one/remotes/datastore/vitastor/vmm_execrc.diff
if [[ "$already_applied" = 1 ]]; then
echo "OK: Vitastor OpenNebula patches are already applied"
elif [[ "$applied_ok" = 1 ]]; then
echo "OK: Vitastor OpenNebula patches successfully applied"
fi
if [[ -f /etc/apparmor.d/local/abstractions/libvirt-qemu ]]; then
if ! grep -q /etc/vitastor/vitastor.conf /etc/apparmor.d/local/abstractions/libvirt-qemu; then
echo ' "/etc/vitastor/vitastor.conf" r,' >> /etc/apparmor.d/local/abstractions/libvirt-qemu
fi
fi

View File

@ -1,76 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# This script is used to copy a VM image (SRC) to the image repository as DST
# -------- Set up the environment to source common tools & conf ------------
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
source ${DRIVER_PATH}/../libfs.sh
# -------- Get cp and datastore arguments from OpenNebula core ------------
DRV_ACTION=`cat -`
ID=$1
XPATH="${DRIVER_PATH}/../xpath.rb -b $DRV_ACTION"
unset i XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/BASE_PATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/POOL_NAME \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/IMAGE_PREFIX \
/DS_DRIVER_ACTION_DATA/IMAGE/PATH \
/DS_DRIVER_ACTION_DATA/IMAGE/SIZE \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/VITASTOR_CONF)
unset i
BASE_PATH="${XPATH_ELEMENTS[i++]}"
BRIDGE_LIST="${XPATH_ELEMENTS[i++]}"
POOL_NAME="${XPATH_ELEMENTS[i++]}"
IMAGE_PREFIX="${XPATH_ELEMENTS[i++]:-one}"
SRC="${XPATH_ELEMENTS[i++]}"
SIZE="${XPATH_ELEMENTS[i++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[i++]}"
DST_HOST=`get_destination_host $ID`
if [ -z "$DST_HOST" ]; then
error_message "Datastore template missing 'BRIDGE_LIST' attribute."
exit -1
fi
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
if [ -n "$POOL_NAME" ]; then
CLI="$CLI --pool ${POOL_NAME}"
fi
SAFE_DIRS=""
DST="${IMAGE_PREFIX}-${ID}"
ssh_exec_and_log "$DST_HOST" "$CLI create --parent $SRC $DST" \
"Error during $CLI create --parent $SRC $DST in $DST_HOST"
ssh_exec_and_log "$DST_HOST" "$CLI flatten $DST" \
"Error during $CLI create flatten $DST in $DST_HOST"
echo "$DST raw"

View File

@ -1,135 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# This script is used to copy a local image SRC to the image repository as DST
# -------- Set up the environment to source common tools & conf ------------
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
source ${DRIVER_PATH}/../libfs.sh
# -------- Get cp and datastore arguments from OpenNebula core ------------
DRV_ACTION=`cat -`
ID=$1
export DRV_ACTION
UTILS_PATH="${DRIVER_PATH}/.."
XPATH="$UTILS_PATH/xpath.rb -b $DRV_ACTION"
unset i XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/BASE_PATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/RESTRICTED_DIRS \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/SAFE_DIRS \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/POOL_NAME \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/IMAGE_PREFIX \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/STAGING_DIR \
/DS_DRIVER_ACTION_DATA/IMAGE/PATH \
/DS_DRIVER_ACTION_DATA/IMAGE/SIZE \
/DS_DRIVER_ACTION_DATA/IMAGE/TEMPLATE/MD5 \
/DS_DRIVER_ACTION_DATA/IMAGE/TEMPLATE/SHA1 \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/NO_DECOMPRESS \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/LIMIT_TRANSFER_BW \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/VITASTOR_CONF)
unset i
BASE_PATH="${XPATH_ELEMENTS[i++]}"
RESTRICTED_DIRS="${XPATH_ELEMENTS[i++]}"
SAFE_DIRS="${XPATH_ELEMENTS[i++]}"
BRIDGE_LIST="${XPATH_ELEMENTS[i++]}"
POOL_NAME="${XPATH_ELEMENTS[i++]}"
IMAGE_PREFIX="${XPATH_ELEMENTS[i++]:-one}"
STAGING_DIR="${XPATH_ELEMENTS[i++]:-/var/tmp}"
SRC="${XPATH_ELEMENTS[i++]}"
SIZE="${XPATH_ELEMENTS[i++]}"
MD5="${XPATH_ELEMENTS[i++]}"
SHA1="${XPATH_ELEMENTS[i++]}"
NO_DECOMPRESS="${XPATH_ELEMENTS[i++]}"
LIMIT_TRANSFER_BW="${XPATH_ELEMENTS[i++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[i++]}"
DST_HOST=`get_destination_host $ID`
if [ -z "$DST_HOST" ]; then
error_message "Datastore template missing 'BRIDGE_LIST' attribute."
exit -1
fi
CLI=vitastor-cli
QEMU_ARG=""
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
QEMU_ARG=":config_path=${VITASTOR_CONF}"
fi
if [ -n "$POOL_NAME" ]; then
CLI="$CLI --pool ${POOL_NAME}"
fi
set_up_datastore "$BASE_PATH" "$RESTRICTED_DIRS" "$SAFE_DIRS"
IMAGE_HASH=`generate_image_hash`
TMP_DST="$STAGING_DIR/$IMAGE_HASH"
DST="${IMAGE_PREFIX}-${ID}"
DOWNLOADER_ARGS=`set_downloader_args "$MD5" "$SHA1" "$NO_DECOMPRESS" "$LIMIT_TRANSFER_BW" "$SRC" -`
COPY_COMMAND="$UTILS_PATH/downloader.sh $DOWNLOADER_ARGS"
case $SRC in
http://*|https://*)
log "Downloading $SRC to the image repository"
DUMP="$COPY_COMMAND"
;;
*)
if [ `check_restricted $SRC` -eq 1 ]; then
log_error "Not allowed to copy images from $RESTRICTED_DIRS"
error_message "Not allowed to copy image file $SRC"
exit -1
fi
log "Copying local image $SRC to the image repository"
DUMP="$COPY_COMMAND"
;;
esac
multiline_exec_and_log "set -e -o pipefail; $DUMP | $SSH $DST_HOST $DD of=$TMP_DST bs=1M" \
"Error copying $SRC to $DST_HOST:$TMP_DST"
REGISTER_CMD=$(cat <<EOF
set -e -o pipefail
SIZE=\$($QEMU_IMG info --output json "$TMP_DST" | jq -r '.["virtual-size"]')
$CLI create -s \$SIZE "$DST"
$QEMU_IMG convert -O raw "$TMP_DST" "vitastor:image=$DST$QEMU_ARG"
# remove original
$RM -f $TMP_DST
EOF
)
ssh_exec_and_log "$DST_HOST" "$REGISTER_CMD" "Error registering $DST in $DST_HOST"
echo "$DST raw"

View File

@ -1,555 +0,0 @@
#!/bin/bash
# -------------------------------------------------------------------------- #
# Copyright 2002-2023, OpenNebula Project, OpenNebula Systems #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
# not use this file except in compliance with the License. You may obtain #
# a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
#--------------------------------------------------------------------------- #
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
VAR_LOCATION=/var/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
VAR_LOCATION=$ONE_LOCATION/var
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
# Escape single quotes
function esc_sq
{
echo "$1" | sed -e "s/'/'\\\''/g"
}
# Execute a command (first parameter) and use the first kb of stdout
# to determine the file type
function get_type
{
if [ "$NO_DECOMPRESS" = "yes" ]; then
echo "application/octet-stream"
else
command=$1
( eval "$command" | head -n 1024 | file -b --mime-type - ) 2>/dev/null
fi
}
# Gets the command needed to decompress an stream.
function get_decompressor
{
type=$1
case "$type" in
"application/x-gzip"|"application/gzip")
echo "gunzip -c -"
;;
"application/x-bzip2")
echo "bunzip2 -qc -"
;;
"application/x-xz")
echo "unxz -c -"
;;
*)
echo "cat"
;;
esac
}
# Function called to decompress a stream. The first parameter is the command
# used to decompress the stream. Second parameter is the output file or
# - for stdout.
function decompress
{
command="$1"
to="$2"
if [ "$to" = "-" ]; then
$command
else
$command > "$to"
fi
}
# Function called to hash a stream. First parameter is the algorithm name.
function hasher
{
if [ -n "$1" ]; then
openssl dgst -$1 | awk '{print $NF}' > $HASH_FILE
else
# Needs something consuming stdin or the pipe will break
cat >/dev/null
fi
}
# Unarchives a tar or a zip a file to a directory with the same name.
function unarchive
{
TO="$1"
file_type=$(get_type "cat $TO")
tmp="$TO"
# Add full path if it is relative
if [ ${tmp:0:1} != "/" ]; then
tmp="$PWD/$tmp"
fi
IN="$tmp.tmp"
OUT="$tmp"
case "$file_type" in
"application/x-tar")
command="tar -xf $IN -C $OUT"
;;
"application/zip")
command="unzip -d $OUT $IN"
;;
*)
command=""
;;
esac
if [ -n "$command" ]; then
mv "$OUT" "$IN"
mkdir "$OUT"
$command
if [ "$?" != "0" ]; then
echo "Error uncompressing archive" >&2
exit -1
fi
rm "$IN"
fi
}
function s3_env
{
XPATH="$DRIVER_PATH/xpath.rb -b $DRV_ACTION"
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH /DS_DRIVER_ACTION_DATA/MARKETPLACE/TEMPLATE/ACCESS_KEY_ID \
/DS_DRIVER_ACTION_DATA/MARKETPLACE/TEMPLATE/SECRET_ACCESS_KEY \
/DS_DRIVER_ACTION_DATA/MARKETPLACE/TEMPLATE/REGION \
/DS_DRIVER_ACTION_DATA/MARKETPLACE/TEMPLATE/AWS \
/DS_DRIVER_ACTION_DATA/MARKETPLACE/TEMPLATE/ENDPOINT)
S3_ACCESS_KEY_ID="${XPATH_ELEMENTS[j++]}"
S3_SECRET_ACCESS_KEY="${XPATH_ELEMENTS[j++]}"
S3_REGION="${XPATH_ELEMENTS[j++]}"
S3_AWS="${XPATH_ELEMENTS[j++]}"
S3_ENDPOINT="${XPATH_ELEMENTS[j++]}"
CURRENT_DATE_DAY="$(date -u '+%Y%m%d')"
CURRENT_DATE_ISO8601="${CURRENT_DATE_DAY}T$(date -u '+%H%M%S')Z"
}
# Create an SHA-256 hash in hexadecimal.
# Usage:
# hash_sha256 <string>
function hash_sha256 {
printf "${1}" | openssl dgst -sha256 | sed 's/^.* //'
}
# Create an SHA-256 hmac in hexadecimal.
# Usage:
# hmac_sha256 <key> <data>
function hmac_sha256 {
printf "${2}" | openssl dgst -sha256 -mac HMAC -macopt "${1}" | sed 's/^.* //'
}
# Create the signature.
# Usage:
# create_signature
function create_signature {
stringToSign="AWS4-HMAC-SHA256\n${CURRENT_DATE_ISO8601}\n${CURRENT_DATE_DAY}/${S3_REGION}/s3/aws4_request\n$(hash_sha256 "${HTTP_CANONICAL_REQUEST}")"
dateKey=$(hmac_sha256 key:"AWS4${S3_SECRET_ACCESS_KEY}" "${CURRENT_DATE_DAY}")
regionKey=$(hmac_sha256 hexkey:"${dateKey}" "${S3_REGION}")
serviceKey=$(hmac_sha256 hexkey:"${regionKey}" "s3")
signingKey=$(hmac_sha256 hexkey:"${serviceKey}" "aws4_request")
printf "${stringToSign}" | openssl dgst -sha256 -mac HMAC -macopt hexkey:"${signingKey}" | sed 's/.*(stdin)= //'
}
function s3_curl_args
{
FROM="$1"
ENDPOINT="$S3_ENDPOINT"
OBJECT=$(basename "$FROM")
BUCKET=$(basename $(dirname "$FROM"))
DATE="`date -u +'%a, %d %b %Y %H:%M:%S GMT'`"
AUTH_STRING="GET\n\n\n${DATE}\n/${BUCKET}/${OBJECT}"
SIGNED_AUTH_STRING=`echo -en "$AUTH_STRING" | \
openssl sha1 -hmac ${S3_SECRET_ACCESS_KEY} -binary | \
base64`
echo " -H \"Date: ${DATE}\"" \
" -H \"Authorization: AWS ${S3_ACCESS_KEY_ID}:${SIGNED_AUTH_STRING}\"" \
" '$(esc_sq "${ENDPOINT}/${BUCKET}/${OBJECT}")'"
}
function s3_curl_args_aws
{
FROM="$1"
OBJECT=$(basename "$FROM")
BUCKET=$(basename "$(dirname "$FROM")")
ENDPOINT="$BUCKET.s3.amazonaws.com"
AWS_S3_PATH="$(echo $OBJECT | sed 's;^\([^/]\);/\1;')"
HTTP_REQUEST_PAYLOAD_HASH="$(echo "" | openssl dgst -sha256 | sed 's/^.* //')"
HTTP_CANONICAL_REQUEST_URI="${AWS_S3_PATH}"
HTTP_REQUEST_CONTENT_TYPE='application/octet-stream'
HTTP_CANONICAL_REQUEST_HEADERS="content-type:${HTTP_REQUEST_CONTENT_TYPE}
host:${ENDPOINT}
x-amz-content-sha256:${HTTP_REQUEST_PAYLOAD_HASH}
x-amz-date:${CURRENT_DATE_ISO8601}"
HTTP_REQUEST_SIGNED_HEADERS="content-type;host;x-amz-content-sha256;x-amz-date"
HTTP_CANONICAL_REQUEST="GET
${HTTP_CANONICAL_REQUEST_URI}\n
${HTTP_CANONICAL_REQUEST_HEADERS}\n
${HTTP_REQUEST_SIGNED_HEADERS}
${HTTP_REQUEST_PAYLOAD_HASH}"
SIGNATURE="$(create_signature)"
HTTP_REQUEST_AUTHORIZATION_HEADER="AWS4-HMAC-SHA256 Credential=${S3_ACCESS_KEY_ID}/${CURRENT_DATE_DAY}/${S3_REGION}/s3/aws4_request, SignedHeaders=${HTTP_REQUEST_SIGNED_HEADERS}, Signature=${SIGNATURE}"
echo " -H \"Authorization: ${HTTP_REQUEST_AUTHORIZATION_HEADER}\"" \
" -H \"content-type: ${HTTP_REQUEST_CONTENT_TYPE}\"" \
" -H \"x-amz-content-sha256: ${HTTP_REQUEST_PAYLOAD_HASH}\"" \
" -H \"x-amz-date: ${CURRENT_DATE_ISO8601}\"" \
" \"https://${ENDPOINT}${HTTP_CANONICAL_REQUEST_URI}\""
}
function get_rbd_cmd
{
local i j URL_ELEMENTS
FROM="$1"
URL_RB="$DRIVER_PATH/url.rb"
while IFS= read -r -d '' element; do
URL_ELEMENTS[i++]="$element"
done < <($URL_RB "$FROM" \
USER \
HOST \
SOURCE \
PARAM_DS \
PARAM_CEPH_USER \
PARAM_CEPH_KEY \
PARAM_CEPH_CONF)
USER="${URL_ELEMENTS[j++]}"
DST_HOST="${URL_ELEMENTS[j++]}"
SOURCE="${URL_ELEMENTS[j++]}"
DS="${URL_ELEMENTS[j++]}"
CEPH_USER="${URL_ELEMENTS[j++]}"
CEPH_KEY="${URL_ELEMENTS[j++]}"
CEPH_CONF="${URL_ELEMENTS[j++]}"
# Remove leading '/'
SOURCE="${SOURCE#/}"
if [ -n "$USER" ]; then
DST_HOST="$USER@$DST_HOST"
fi
if [ -n "$CEPH_USER" ]; then
RBD="$RBD --id '$(esc_sq "${CEPH_USER}")'"
fi
if [ -n "$CEPH_KEY" ]; then
RBD="$RBD --keyfile '$(esc_sq "${CEPH_KEY}")'"
fi
if [ -n "$CEPH_CONF" ]; then
RBD="$RBD --conf '$(esc_sq "${CEPH_CONF}")'"
fi
echo "ssh '$(esc_sq "$DST_HOST")' \"$RBD export '$(esc_sq "$SOURCE")' -\""
}
function get_vitastor_cmd
{
local i j URL_ELEMENTS
FROM="$1"
URL_RB="$DRIVER_PATH/url.rb"
while IFS= read -r -d '' element; do
URL_ELEMENTS[i++]="$element"
done < <($URL_RB "$FROM" \
USER \
HOST \
SOURCE \
PARAM_DS \
PARAM_VITASTOR_CONF)
USER="${URL_ELEMENTS[j++]}"
DST_HOST="${URL_ELEMENTS[j++]}"
SOURCE="${URL_ELEMENTS[j++]}"
DS="${URL_ELEMENTS[j++]}"
VITASTOR_CONF="${URL_ELEMENTS[j++]}"
# Remove leading '/'
SOURCE="${SOURCE#/}"
if [ -n "$USER" ]; then
DST_HOST="$USER@$DST_HOST"
fi
local CLI
CLI="vitastor-cli"
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path '$(esc_sq "${VITASTOR_CONF}")'"
fi
echo "ssh '$(esc_sq "$DST_HOST")' \"$CLI dd iimg='$(esc_sq "$SOURCE")'\""
}
# Compare 2 version strings using sort -V
# Usage:
# verlte "3.2.9" "3.4.0"
function verlte() {
[ "$1" = "`echo -e "$1\n$2" | sort -V | head -n1`" ]
}
# Returns curl retry options based on its version
function curl_retry_args {
[ "$NO_RETRY" = "yes" ] && return
RETRY_ARGS="--retry 3 --retry-delay 3"
CURL_VER=`curl --version | grep -o 'curl [0-9\.]*' | awk '{print $2}'`
# To retry also on conn-reset-by-peer fresh curl is needed
if verlte "7.71.0" "$CURL_VER" && [ -z ${MAX_SIZE} ] ; then
RETRY_ARGS+=" --retry-all-errors"
fi
echo $RETRY_ARGS
}
TEMP=`getopt -o m:s:l:c:no -l md5:,sha1:,limit:,max-size:,nodecomp,noretry -- "$@"`
if [ $? != 0 ] ; then
echo "Arguments error" >&2
exit -1
fi
eval set -- "$TEMP"
while true; do
case "$1" in
-m|--md5)
HASH_TYPE=md5
HASH=$2
shift 2
;;
-s|--sha1)
HASH_TYPE=sha1
HASH=$2
shift 2
;;
-n|--nodecomp)
export NO_DECOMPRESS="yes"
shift
;;
-l|--limit)
export LIMIT_RATE="$2"
shift 2
;;
-c|--max-size)
export MAX_SIZE="$2"
shift 2
;;
-o|--noretry)
export NO_RETRY="yes"
shift
;;
--)
shift
break
;;
*)
shift
;;
esac
done
FROM="$1"
TO="$2"
if [ -n "${HASH_TYPE}" -a -n "${MAX_SIZE}" ]; then
echo "Hash check not supported for partial downloads" >&2
exit -1
else
# File used by the hasher function to store the resulting hash
export HASH_FILE="/tmp/downloader.hash.$$"
fi
GLOBAL_CURL_ARGS="--fail -sS -k -L $(curl_retry_args)"
case "$FROM" in
http://*|https://*)
# -k so it does not check the certificate
# -L to follow redirects
# -sS to hide output except on failure
# --limit_rate to limit the bw
curl_args="$GLOBAL_CURL_ARGS '$(esc_sq "${FROM}")'"
if [ -n "$LIMIT_RATE" ]; then
curl_args="--limit-rate $LIMIT_RATE $curl_args"
fi
command="curl $curl_args"
;;
ssh://*)
# pseudo-url for ssh transfers ssh://user@host:path
# -l to limit the bw
ssh_src=${FROM#ssh://}
ssh_arg=(${ssh_src/:/ })
rmt_cmd="\"cat '$(esc_sq "${ssh_arg[1]}")'\""
command="ssh ${ssh_arg[0]} $rmt_cmd"
;;
s3://*)
# Read s3 environment
s3_env
if [ -z "$S3_ACCESS_KEY_ID" -o -z "$S3_SECRET_ACCESS_KEY" ]; then
echo "S3_ACCESS_KEY_ID and S3_SECRET_ACCESS_KEY are required" >&2
exit -1
fi
curl_args=""
if [[ "$S3_AWS" =~ (no|NO) ]]; then
curl_args="$(s3_curl_args "$FROM")"
else
curl_args="$(s3_curl_args_aws "$FROM")"
fi
command="curl $GLOBAL_CURL_ARGS $curl_args"
;;
rbd://*)
command="$(get_rbd_cmd "$FROM")"
;;
vitastor://*)
command="$(get_vitastor_cmd "$FROM")"
;;
vcenter://*)
command="$VAR_LOCATION/remotes/datastore/vcenter_downloader.rb '$(esc_sq "$FROM")'"
;;
lxd://*)
file_type="application/octet-stream"
command="$VAR_LOCATION/remotes/datastore/lxd_downloader.sh \"$FROM\""
;;
restic://*)
eval `$VAR_LOCATION/remotes/datastore/restic_downloader.rb "$FROM" | grep -e '^command=' -e '^clean_command='`
;;
rsync://*)
eval `$VAR_LOCATION/remotes/datastore/rsync_downloader.rb "$FROM" | grep -e '^command=' -e '^clean_command='`
;;
*)
if [ ! -r $FROM ]; then
echo "Cannot read from $FROM" >&2
exit -1
fi
command="cat '$(esc_sq "$FROM")'"
;;
esac
[ -z "$file_type" ] && file_type=$(get_type "$command")
decompressor=$(get_decompressor "$file_type")
if [ -z "${MAX_SIZE}" ]; then
eval "$command" | \
tee >( hasher $HASH_TYPE) | \
decompress "$decompressor" "$TO"
if [ "$?" != "0" -o "$PIPESTATUS" != "0" ]; then
echo "Error copying" >&2
exit -1
fi
else
# Order of the 'head' command is here on purpose:
# 1. We want to download more bytes than needed to get a requested
# number of bytes on the output. Decompressor may need more
# data to decompress the stream.
# 2. Decompressor command is also misused to detect SIGPIPE error.
eval "$command" | \
decompress "$decompressor" "$TO" 2>/dev/null | \
head -c "${MAX_SIZE}"
# Following table shows exit codes of each command
# in the pipe for various scenarios:
#
# ----------------------------------------------------
# | $COMMAND | TYPE | PIPESTATUS | BEHAVIOUR
# ----------------------------------------------------
# | cat | partial | 141 141 0 | OK
# | cat | full | 0 0 0 | OK
# | cat | error | 1 0 0 | fail
# | curl | partial | 23 141 0 | OK
# | curl | full | 0 0 0 | OK
# | curl | error | 22 0 0 | fail
# | ssh | partial | 255 141 0 | OK
# | ssh | full | 0 0 0 | OK
# | ssh | error ssh | 255 0 0 | fail
# | ssh | error ssh cat | 1 0 0 | fail
if [ \( "${PIPESTATUS[0]}" != '0' -a "${PIPESTATUS[1]}" = '0' \) \
-o \( "${PIPESTATUS[1]}" != '0' -a "${PIPESTATUS[1]}" != '141' \) \
-o \( "${PIPESTATUS[2]}" != "0" \) ];
then
echo "Error copying" >&2
exit -1
fi
fi
if [ -n "$HASH_TYPE" ]; then
HASH_RESULT=$( cat $HASH_FILE)
rm $HASH_FILE
if [ "$HASH_RESULT" != "$HASH" ]; then
echo "Hash does not match" >&2
exit -1
fi
fi
# Unarchive only if the destination is filesystem
if [ "$TO" != "-" ]; then
unarchive "$TO"
fi
# Perform any clean operation
if [ -n "${clean_command}" ]; then
eval "$clean_command"
fi

View File

@ -1,60 +0,0 @@
diff --git /var/lib/one/remotes/datastore/downloader.sh /var/lib/one/remotes/datastore/downloader.sh
index 9b75d8ee4b..09d2a5d41d 100755
--- /var/lib/one/remotes/datastore/downloader.sh
+++ /var/lib/one/remotes/datastore/downloader.sh
@@ -295,6 +295,45 @@ function get_rbd_cmd
echo "ssh '$(esc_sq "$DST_HOST")' \"$RBD export '$(esc_sq "$SOURCE")' -\""
}
+function get_vitastor_cmd
+{
+ local i j URL_ELEMENTS
+
+ FROM="$1"
+
+ URL_RB="$DRIVER_PATH/url.rb"
+
+ while IFS= read -r -d '' element; do
+ URL_ELEMENTS[i++]="$element"
+ done < <($URL_RB "$FROM" \
+ USER \
+ HOST \
+ SOURCE \
+ PARAM_DS \
+ PARAM_VITASTOR_CONF)
+
+ USER="${URL_ELEMENTS[j++]}"
+ DST_HOST="${URL_ELEMENTS[j++]}"
+ SOURCE="${URL_ELEMENTS[j++]}"
+ DS="${URL_ELEMENTS[j++]}"
+ VITASTOR_CONF="${URL_ELEMENTS[j++]}"
+
+ # Remove leading '/'
+ SOURCE="${SOURCE#/}"
+
+ if [ -n "$USER" ]; then
+ DST_HOST="$USER@$DST_HOST"
+ fi
+
+ local CLI
+ CLI="vitastor-cli"
+ if [ -n "$VITASTOR_CONF" ]; then
+ CLI="$CLI --config_path '$(esc_sq "${VITASTOR_CONF}")'"
+ fi
+
+ echo "ssh '$(esc_sq "$DST_HOST")' \"$CLI dd iimg='$(esc_sq "$SOURCE")'\""
+}
+
# Compare 2 version strings using sort -V
# Usage:
# verlte "3.2.9" "3.4.0"
@@ -424,6 +463,9 @@ s3://*)
rbd://*)
command="$(get_rbd_cmd "$FROM")"
;;
+vitastor://*)
+ command="$(get_vitastor_cmd "$FROM")"
+ ;;
vcenter://*)
command="$VAR_LOCATION/remotes/datastore/vcenter_downloader.rb '$(esc_sq "$FROM")'"
;;

View File

@ -1,114 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# This script is used to export an image to qcow2 file
# ------------ Set up the environment to source common tools ------------
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
source ${DRIVER_PATH}/../libfs.sh
# -------- Get rm and datastore arguments from OpenNebula core ------------
DRV_ACTION=`cat -`
ID=$1
XPATH="${DRIVER_PATH}/../xpath.rb -b $DRV_ACTION"
unset i XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH \
/DS_DRIVER_ACTION_DATA/IMAGE/SOURCE \
/DS_DRIVER_ACTION_DATA/IMAGE/SIZE \
/DS_DRIVER_ACTION_DATA/IMAGE/TEMPLATE/MD5 \
/DS_DRIVER_ACTION_DATA/IMAGE/TEMPLATE/SHA1 \
/DS_DRIVER_ACTION_DATA/IMAGE/TEMPLATE/FORMAT \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/VITASTOR_CONF)
unset i
SRC="${XPATH_ELEMENTS[i++]}"
SIZE="${XPATH_ELEMENTS[i++]}"
MD5="${XPATH_ELEMENTS[i++]}"
SHA1="${XPATH_ELEMENTS[i++]}"
FORMAT="${XPATH_ELEMENTS[i++]:-raw}"
BRIDGE_LIST="${XPATH_ELEMENTS[i++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[i++]}"
DST_HOST=`get_destination_host $ID`
if [ -z "$DST_HOST" ]; then
error_message "Datastore template missing 'BRIDGE_LIST' attribute."
exit -1
fi
IMPORT_SOURCE="vitastor://$DST_HOST/$SRC"
IS_JOIN="?"
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path $VITASTOR_CONF"
IMPORT_SOURCE="${IMPORT_SOURCE}${IS_JOIN}VITASTOR_CONF=${VITASTOR_CONF}"
fi
# FIXME: this is inefficient - it pipes the image twice...
INFO_SCRIPT=$(cat <<EOF
if [ -z "$MD5" ]; then
CHECKSUM=\$(
$CLI dd iimg=${SRC} | ${MD5SUM} | cut -f1 -d' '
ps=\$PIPESTATUS
if [ "\$ps" != "0" ]; then
exit \$ps
fi
)
status=\$?
[ "\$status" != "0" ] && exit \$status
else
CHECKSUM="$MD5"
fi
if [ -z "\$CHECKSUM" ]; then
exit 1
fi
cat <<EOT
<MD5><![CDATA[\$CHECKSUM]]></MD5>
<SIZE><![CDATA[$SIZE]]></SIZE>
<FORMAT><![CDATA[${FORMAT}]]></FORMAT>
EOT
EOF
)
INFO=$(ssh_monitor_and_log "$DST_HOST" "$INFO_SCRIPT" "Image info script" 2>&1)
INFO_STATUS=$?
if [ "$INFO_STATUS" != "0" ]; then
echo "$INFO"
exit $INFO_STATUS
fi
cat <<EOF
<IMPORT_INFO>
<IMPORT_SOURCE><![CDATA[$IMPORT_SOURCE]]></IMPORT_SOURCE>
$INFO
<DISPOSE>NO</DISPOSE>
</IMPORT_INFO>"
EOF

View File

@ -1,124 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# This script is used to create a VM image (SRC) of size (SIZE) and formatted as (FS)
# -------- Set up the environment to source common tools & conf ------------
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
source ${DRIVER_PATH}/../libfs.sh
source ${DRIVER_PATH}/../../etc/datastore/datastore.conf
# -------- Get mkfs and datastore arguments from OpenNebula core ------------
DRV_ACTION=`cat -`
ID=$1
XPATH="${DRIVER_PATH}/../xpath.rb -b $DRV_ACTION"
unset i XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/BASE_PATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/RESTRICTED_DIRS \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/SAFE_DIRS \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/POOL_NAME \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/IMAGE_PREFIX \
/DS_DRIVER_ACTION_DATA/IMAGE/FORMAT \
/DS_DRIVER_ACTION_DATA/IMAGE/SIZE \
/DS_DRIVER_ACTION_DATA/IMAGE/FS \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/VITASTOR_CONF)
unset i
BASE_PATH="${XPATH_ELEMENTS[i++]}"
RESTRICTED_DIRS="${XPATH_ELEMENTS[i++]}"
SAFE_DIRS="${XPATH_ELEMENTS[i++]}"
BRIDGE_LIST="${XPATH_ELEMENTS[i++]}"
POOL_NAME="${XPATH_ELEMENTS[i++]}"
IMAGE_PREFIX="${XPATH_ELEMENTS[i++]:-one}"
FORMAT="${XPATH_ELEMENTS[i++]}"
SIZE="${XPATH_ELEMENTS[i++]}"
FS="${XPATH_ELEMENTS[i++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[i++]}"
DST_HOST=`get_destination_host $ID`
if [ -z "$DST_HOST" ]; then
error_message "Datastore template missing 'BRIDGE_LIST' attribute."
exit -1
fi
CLI=
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
if [ -n "$POOL_NAME" ]; then
CLI="$CLI --pool ${POOL_NAME}"
fi
set_up_datastore "$BASE_PATH" "$RESTRICTED_DIRS" "$SAFE_DIRS"
IMAGE_NAME="${IMAGE_PREFIX}-${ID}"
# ------------ Image to save_as disk, no need to create a new image ------------
if [ "$FORMAT" = "save_as" ]; then
echo "$IMAGE_NAME"
exit 0
fi
# ------------ Create the image in the repository ------------
# FIXME: Duplicate code with tm/vitastor/mkimage
MKIMAGE_CMD=$(cat <<EOF
set -e -o pipefail
export PATH=/usr/sbin:/sbin:\$PATH
vitastor-cli $CLI create --pool "${POOL_NAME}" "$IMAGE_NAME" --size "${SIZE}M"
EOF
)
if [ -n "$FS" -o "$FORMAT" = "swap" ]; then
MKFS_CMD=`mkfs_command '$NBD' raw "$SIZE" "$SUPPORTED_FS" "$FS" "$FS_OPTS" | grep -v $QEMU_IMG`
fi
MKIMAGE_CMD=$(cat <<EOF
set -e -o pipefail
export PATH=/usr/sbin:/sbin:\$PATH
vitastor-cli $CLI create --pool "${POOL_NAME}" "$IMAGE_NAME" --size "${SIZE}M"
EOF
)
if [ ! -z $FS ]; then
set -e -o pipefail
IMAGE_HASH=`generate_image_hash`
FS_OPTS=$(eval $(echo "echo \$FS_OPTS_$FS"))
MKFS_CMD=`mkfs_command '$NBD' raw "$SIZE" "$SUPPORTED_FS" "$FS" "$FS_OPTS" | grep -v $QEMU_IMG`
MKIMAGE_CMD=$(cat <<EOF
$MKIMAGE_CMD
NBD=\$(sudo vitastor-nbd $CLI map --image "$IMAGE_NAME")
trap "sudo vitastor-nbd $CLI unmap \$NBD" EXIT
$MKFS_CMD
EOF
)
fi
ssh_exec_and_log "$DST_HOST" "$MKIMAGE_CMD" "Error registering $IMAGE_NAME in $DST_HOST"
echo "$IMAGE_NAME"

View File

@ -1,64 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# This script is used to monitor the free and used space of a datastore
# -------- Set up the environment to source common tools & conf ------------
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
source ${DRIVER_PATH}/../../datastore/libfs.sh
# -------- Get datastore arguments from OpenNebula core ------------
DRV_ACTION=`cat -`
ID=$1
XPATH="${DRIVER_PATH}/../../datastore/xpath.rb -b $DRV_ACTION"
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/POOL_NAME \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/VITASTOR_CONF)
BRIDGE_LIST="${XPATH_ELEMENTS[j++]}"
POOL_NAME="${XPATH_ELEMENTS[j++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[j++]}"
HOST=`get_destination_host`
if [ -z "$HOST" ]; then
error_message "Datastore template missing 'BRIDGE_LIST' attribute."
exit -1
fi
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
# ------------ Compute datastore usage -------------
MONITOR_SCRIPT=$(cat <<EOF
vitastor-cli df --json | jq -r '.[] | select(.name == "${POOL_NAME}") |
"TOTAL_MB="+(.total_raw/.raw_to_usable/1024/1024 | tostring)+
"\nUSED_MB="+(.used_raw/.raw_to_usable/1024/1024 | tostring)+
"\nFREE_MB="+(.max_available/1024/1024 | tostring)'
EOF
)
ssh_monitor_and_log $HOST "$MONITOR_SCRIPT 2>&1" "Error monitoring ${POOL_NAME} in $HOST"

View File

@ -1,73 +0,0 @@
diff --git /etc/one/oned.conf /etc/one/oned.conf
index be02d646a8..27f876ec36 100644
--- /etc/one/oned.conf
+++ /etc/one/oned.conf
@@ -481,7 +481,7 @@ VM_MAD = [
NAME = "kvm",
SUNSTONE_NAME = "KVM",
EXECUTABLE = "one_vmm_exec",
- ARGUMENTS = "-t 15 -r 0 kvm -p",
+ ARGUMENTS = "-t 15 -r 0 kvm -p -l deploy=deploy.vitastor",
DEFAULT = "vmm_exec/vmm_exec_kvm.conf",
TYPE = "kvm",
KEEP_SNAPSHOTS = "yes",
@@ -592,7 +592,7 @@ VM_MAD = [
TM_MAD = [
EXECUTABLE = "one_tm",
- ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,dev,vcenter,iscsi_libvirt"
+ ARGUMENTS = "-t 15 -d dummy,lvm,shared,fs_lvm,fs_lvm_ssh,qcow2,ssh,ceph,vitastor,dev,vcenter,iscsi_libvirt"
]
#*******************************************************************************
@@ -612,7 +612,7 @@ TM_MAD = [
DATASTORE_MAD = [
EXECUTABLE = "one_datastore",
- ARGUMENTS = "-t 15 -d dummy,fs,lvm,ceph,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
+ ARGUMENTS = "-t 15 -d dummy,fs,lvm,ceph,vitastor,dev,iscsi_libvirt,vcenter,restic,rsync -s shared,ssh,ceph,vitastor,fs_lvm,fs_lvm_ssh,qcow2,vcenter"
]
#*******************************************************************************
@@ -1050,6 +1050,9 @@ INHERIT_DATASTORE_ATTR = "VCENTER_DS_IMAGE_DIR"
INHERIT_DATASTORE_ATTR = "VCENTER_DS_VOLATILE_DIR"
INHERIT_DATASTORE_ATTR = "VCENTER_INSTANCE_ID"
+INHERIT_DATASTORE_ATTR = "VITASTOR_CONF"
+INHERIT_DATASTORE_ATTR = "IMAGE_PREFIX"
+
INHERIT_IMAGE_ATTR = "DISK_TYPE"
INHERIT_IMAGE_ATTR = "VCENTER_ADAPTER_TYPE"
INHERIT_IMAGE_ATTR = "VCENTER_DISK_TYPE"
@@ -1180,6 +1183,14 @@ TM_MAD_CONF = [
CLONE_TARGET_SHARED = "SELF", DISK_TYPE_SHARED = "RBD"
]
+TM_MAD_CONF = [
+ NAME = "vitastor", LN_TARGET = "NONE", CLONE_TARGET = "SELF", SHARED = "YES",
+ DS_MIGRATE = "NO", DRIVER = "raw", ALLOW_ORPHANS="format",
+ TM_MAD_SYSTEM = "ssh,shared", LN_TARGET_SSH = "SYSTEM", CLONE_TARGET_SSH = "SYSTEM",
+ DISK_TYPE_SSH = "FILE", LN_TARGET_SHARED = "NONE",
+ CLONE_TARGET_SHARED = "SELF", DISK_TYPE_SHARED = "FILE"
+]
+
TM_MAD_CONF = [
NAME = "iscsi_libvirt", LN_TARGET = "NONE", CLONE_TARGET = "SELF", SHARED = "YES",
DS_MIGRATE = "NO", DRIVER = "raw"
@@ -1263,9 +1274,16 @@ DS_MAD_CONF = [
NAME = "ceph",
REQUIRED_ATTRS = "DISK_TYPE,BRIDGE_LIST",
PERSISTENT_ONLY = "NO",
MARKETPLACE_ACTIONS = "export"
+]
+
+DS_MAD_CONF = [
+ NAME = "vitastor",
+ REQUIRED_ATTRS = "DISK_TYPE,BRIDGE_LIST",
+ PERSISTENT_ONLY = "NO",
+ MARKETPLACE_ACTIONS = "export"
]
DS_MAD_CONF = [
NAME = "dev", REQUIRED_ATTRS = "DISK_TYPE", PERSISTENT_ONLY = "YES"
]

View File

@ -1,63 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# This script is used to remove a VM image from the image repository
# ------------ Set up the environment to source common tools ------------
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
source ${DRIVER_PATH}/../libfs.sh
# -------- Get rm and datastore arguments from OpenNebula core ------------
DRV_ACTION=`cat -`
ID=$1
XPATH="${DRIVER_PATH}/../xpath.rb -b $DRV_ACTION"
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH \
/DS_DRIVER_ACTION_DATA/IMAGE/SOURCE \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/VITASTOR_CONF)
IMAGE_NAME="${XPATH_ELEMENTS[j++]}"
BRIDGE_LIST="${XPATH_ELEMENTS[j++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[j++]}"
DST_HOST=`get_destination_host $ID`
if [ -z "$DST_HOST" ]; then
error_message "Datastore template missing 'BRIDGE_LIST' attribute."
exit -1
fi
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
# -------- Remove Image from Datastore ------------
log "Removing $IMAGE_NAME from the image repository in $DST_HOST"
DELETE_CMD=$(cat <<EOF
$CLI rm $IMAGE_NAME
EOF
)
ssh_exec_and_log "$DST_HOST" "$DELETE_CMD" "Error deleting $IMAGE_NAME in $DST_HOST"

View File

@ -1,64 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# This script is used to delete a snapshot of an image
# -------- Set up the environment to source common tools & conf ------------
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
source ${DRIVER_PATH}/../libfs.sh
# -------- Get image and datastore arguments from OpenNebula core ------------
DRV_ACTION=`cat -`
ID=$1
XPATH="${DRIVER_PATH}/../xpath.rb -b $DRV_ACTION"
unset i XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \
/DS_DRIVER_ACTION_DATA/IMAGE/SOURCE \
/DS_DRIVER_ACTION_DATA/IMAGE/TARGET_SNAPSHOT \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/VITASTOR_CONF)
unset i
BRIDGE_LIST="${XPATH_ELEMENTS[i++]}"
IMAGE_NAME="${XPATH_ELEMENTS[i++]}"
SNAP_ID="${XPATH_ELEMENTS[i++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[i++]}"
DST_HOST=`get_destination_host $ID`
if [ -z "$DST_HOST" ]; then
error_message "Datastore template missing 'BRIDGE_LIST' attribute."
exit -1
fi
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
SNAP_DELETE_CMD=$(cat <<EOF
$CLI rm ${IMAGE_NAME}@${SNAP_ID}
EOF
)
ssh_exec_and_log "$DST_HOST" "$SNAP_DELETE_CMD" "Error deleting snapshot $IMAGE_NAME-$SNAP_ID@$SNAP_ID"

View File

@ -1,69 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# This script is used to flatten a snapshot of a persistent image
# -------- Set up the environment to source common tools & conf ------------
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
source ${DRIVER_PATH}/../libfs.sh
# -------- Get image and datastore arguments from OpenNebula core ------------
DRV_ACTION=`cat -`
ID=$1
XPATH="${DRIVER_PATH}/../xpath.rb -b $DRV_ACTION"
unset i XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/POOL_NAME \
/DS_DRIVER_ACTION_DATA/IMAGE/SOURCE \
/DS_DRIVER_ACTION_DATA/IMAGE/TARGET_SNAPSHOT \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/VITASTOR_CONF)
unset i
BRIDGE_LIST="${XPATH_ELEMENTS[i++]}"
POOL_NAME="${XPATH_ELEMENTS[i++]}"
IMAGE_NAME="${XPATH_ELEMENTS[i++]}"
SNAP_ID="${XPATH_ELEMENTS[i++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[i++]}"
DST_HOST=`get_destination_host $ID`
if [ -z "$DST_HOST" ]; then
error_message "Datastore template missing 'BRIDGE_LIST' attribute."
exit -1
fi
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
SNAP_FLATTEN_CMD=$(cat <<EOF
set -e
$CLI flatten "$IMAGE_NAME@$SNAP_ID"
$CLI modify "$IMAGE_NAME@$SNAP_ID" --rename "$IMAGE_NAME"
$CLI rm --matching "$IMAGE_NAME@*"
EOF
)
ssh_exec_and_log "$DST_HOST" "$SNAP_FLATTEN_CMD" "Error flattening snapshot $SNAP_ID for $IMAGE_NAME"

View File

@ -1,72 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# This script is used to revert a snapshot of an image
# -------- Set up the environment to source common tools & conf ------------
if [ -z "${ONE_LOCATION}" ]; then
LIB_LOCATION=/usr/lib/one
else
LIB_LOCATION=$ONE_LOCATION/lib
fi
. $LIB_LOCATION/sh/scripts_common.sh
DRIVER_PATH=$(dirname $0)
source ${DRIVER_PATH}/../libfs.sh
# -------- Get image and datastore arguments from OpenNebula core ------------
DRV_ACTION=`cat -`
ID=$1
XPATH="${DRIVER_PATH}/../xpath.rb -b $DRV_ACTION"
unset i XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <($XPATH \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/BRIDGE_LIST \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/POOL_NAME \
/DS_DRIVER_ACTION_DATA/IMAGE/SOURCE \
/DS_DRIVER_ACTION_DATA/IMAGE/TARGET_SNAPSHOT \
/DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/VITASTOR_CONF)
unset i
BRIDGE_LIST="${XPATH_ELEMENTS[i++]}"
POOL_NAME="${XPATH_ELEMENTS[i++]}"
IMAGE_NAME="${XPATH_ELEMENTS[i++]}"
SNAP_ID="${XPATH_ELEMENTS[i++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[i++]}"
DST_HOST=`get_destination_host $ID`
if [ -z "$DST_HOST" ]; then
error_message "Datastore template missing 'BRIDGE_LIST' attribute."
exit -1
fi
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
if [ -n "$POOL_NAME" ]; then
CLI="$CLI --pool ${POOL_NAME}"
fi
SNAP_REVERT_CMD=$(cat <<EOF
$CLI rm ${IMAGE_NAME}.flatten || true
$CLI create --pool "${POOL_NAME}" --parent ${IMAGE_NAME}@${SNAP_ID} ${IMAGE_NAME}.flatten
$CLI rm ${IMAGE_NAME} || true
$CLI modify ${IMAGE_NAME}.flatten --rename ${IMAGE_NAME}
EOF
)
ssh_exec_and_log "$DST_HOST" "$SNAP_REVERT_CMD" "Error reverting snapshot $SNAP_ID for $IMAGE_NAME"

View File

@ -1 +0,0 @@
../ceph/stat

View File

@ -1,12 +0,0 @@
diff --git /etc/one/vmm_exec/vmm_execrc /etc/one/vmm_exec/vmm_execrc
index e210526e63..cb51d3b5e8 100644
--- /etc/one/vmm_exec/vmm_execrc
+++ /etc/one/vmm_exec/vmm_execrc
@@ -1,6 +1,6 @@
# Space separated list of VMM-TM pairs that support live disk snapshots. VMM
# and TM must be separated by '-'
-LIVE_DISK_SNAPSHOTS="kvm-qcow2 kvm-shared kvm-ceph kvm-ssh qemu-qcow2 qemu-shared qemu-ceph qemu-ssh"
+LIVE_DISK_SNAPSHOTS="kvm-qcow2 kvm-shared kvm-ceph kvm-vitastor kvm-ssh qemu-qcow2 qemu-shared qemu-ceph qemu-ssh"
# Space separated list VNM_MAD-ACTION pairs that run locally
VNMAD_LOCAL_ACTIONS="elastic-post elastic-clean"

View File

@ -1,97 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# clone fe:SOURCE host:remote_system_ds/disk.i size
# - fe is the front-end hostname
# - SOURCE is the path of the disk image in the form DS_BASE_PATH/disk
# - host is the target host to deploy the VM
# - remote_system_ds is the path for the system datastore in the host
SRC=$1
DST=$2
VM_ID=$3
DS_ID=$4
#--------------------------------------------------------------------------------
if [ -z "${ONE_LOCATION}" ]; then
TMCOMMON=/var/lib/one/remotes/tm/tm_common.sh
LIB_LOCATION=/usr/lib/one
else
TMCOMMON=$ONE_LOCATION/var/remotes/tm/tm_common.sh
LIB_LOCATION=$ONE_LOCATION/lib
fi
DRIVER_PATH=$(dirname $0)
source $TMCOMMON
#-------------------------------------------------------------------------------
# Compute the destination image name
#-------------------------------------------------------------------------------
DST_HOST=`arg_host $DST`
SRC_PATH=`arg_path $SRC`
DST_PATH=`arg_path $DST`
DST_DIR=`dirname $DST_PATH`
DISK_ID=$(echo $DST|awk -F. '{print $NF}')
VM_DST="${SRC_PATH}-${VM_ID}-${DISK_ID}"
DST_DS_ID=`echo $DST | sed s#//*#/#g | awk -F/ '{print $(NF-2)}'`
#-------------------------------------------------------------------------------
# Get Image information
#-------------------------------------------------------------------------------
XPATH="${DRIVER_PATH}/../../datastore/xpath.rb --stdin"
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(onevm show -x $VM_ID | $XPATH \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/VITASTOR_CONF \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/SIZE)
VITASTOR_CONF="${XPATH_ELEMENTS[j++]}"
SIZE="${XPATH_ELEMENTS[j++]}"
#-------------------------------------------------------------------------------
# Get Datastore information
#-------------------------------------------------------------------------------
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(onedatastore show -x $DST_DS_ID | $XPATH \
/DATASTORE/TEMPLATE/POOL_NAME)
POOL_NAME="${XPATH_ELEMENTS[j++]}"
disable_local_monitoring $DST_HOST $DST_DIR
#-------------------------------------------------------------------------------
# Clone the image
#-------------------------------------------------------------------------------
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
if [ -n "$POOL_NAME" ]; then
CLI="$CLI --pool ${POOL_NAME}"
fi
CLONE_CMD=$(cat <<EOF
$CLI create --parent $SRC_PATH --size ${SIZE}M $VM_DST
EOF
)
ssh_exec_and_log "$DST_HOST" "$CLONE_CMD" "Error cloning $SRC_PATH to $VM_DST in $DST_HOST"
exit 0

View File

@ -1 +0,0 @@
../ceph/context

View File

@ -1,113 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# cpds host:remote_system_ds/disk.i fe:SOURCE snapid vmid dsid
# - fe is the front-end hostname
# - SOURCE is the path of the disk image in the form DS_BASE_PATH/disk
# - host is the target host to deploy the VM
# - remote_system_ds is the path for the system datastore in the host
# - snapid is the snapshot id. "-1" for none
SRC=$1
DST=$2
SNAP_ID=$3
VM_ID=$4
DS_ID=$5
#--------------------------------------------------------------------------------
if [ -z "${ONE_LOCATION}" ]; then
TMCOMMON=/var/lib/one/remotes/tm/tm_common.sh
LIB_LOCATION=/usr/lib/one
else
TMCOMMON=$ONE_LOCATION/var/remotes/tm/tm_common.sh
LIB_LOCATION=$ONE_LOCATION/lib
fi
DRIVER_PATH=$(dirname $0)
source $TMCOMMON
source ${DRIVER_PATH}/../../datastore/libfs.sh
source ${DRIVER_PATH}/../../etc/vmm/kvm/kvmrc
#-------------------------------------------------------------------------------
# Set dst path and dir
#-------------------------------------------------------------------------------
SRC_HOST=`arg_host $SRC`
SRC_PATH=`arg_path $SRC`
#-------------------------------------------------------------------------------
# Get Image information
#-------------------------------------------------------------------------------
DISK_ID=$(echo "$SRC_PATH" | $AWK -F. '{print $NF}')
XPATH="${DRIVER_PATH}/../../datastore/xpath.rb --stdin"
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(onevm show -x $VM_ID | $XPATH \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/SOURCE \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/CLONE \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/VITASTOR_CONF \
/VM/LCM_STATE)
SRC_IMAGE="${XPATH_ELEMENTS[j++]}"
CLONE="${XPATH_ELEMENTS[j++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[j++]}"
LCM_STATE="${XPATH_ELEMENTS[j++]}"
#-------------------------------------------------------------------------------
# Get Datastore information
#-------------------------------------------------------------------------------
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(onedatastore show -x $DS_ID | $XPATH \
/DATASTORE/TEMPLATE/POOL_NAME \
/DATASTORE/TEMPLATE/BRIDGE_LIST)
POOL_NAME="${XPATH_ELEMENTS[j++]}"
BRIDGE_LIST="${XPATH_ELEMENTS[j++]}"
#-------------------------------------------------------------------------------
# Copy Image back to the datastore
#-------------------------------------------------------------------------------
if [ "$CLONE" = "YES" ]; then
SRC_IMAGE="${SRC_IMAGE}-${VM_ID}-${DISK_ID}"
fi
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
if [ -n "$POOL_NAME" ]; then
CLI="$CLI --pool ${POOL_NAME}"
fi
# Undeployed VM state, do not use front-end, choose host from bridge_list
if [ "$LCM_STATE" = '67' ] || [ "$LCM_STATE" = '68' ]; then
if [ -n "$BRIDGE_LIST" ]; then
SRC_HOST=`get_destination_host`
fi
fi
if [ "$SNAP_ID" != "-1" ]; then
SRC_IMAGE=$SRC_IMAGE@$SNAP_ID
fi
COPY_CMD=$(cat <<EOF
$CLI dd iimg=$SRC_IMAGE oimg=$DST
EOF
)
ssh_exec_and_log "$SRC_HOST" "$COPY_CMD" "Error cloning $SRC_IMAGE to $DST in $SRC_HOST"

View File

@ -1,139 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# DELETE <host:remote_system_ds/disk.i|host:remote_system_ds/>
# - host is the target host to deploy the VM
# - remote_system_ds is the path for the system datastore in the host
DST=$1
VM_ID=$2
DS_ID=$3
#--------------------------------------------------------------------------------
if [ -z "${ONE_LOCATION}" ]; then
TMCOMMON=/var/lib/one/remotes/tm/tm_common.sh
LIB_LOCATION=/usr/lib/one
else
TMCOMMON=$ONE_LOCATION/var/remotes/tm/tm_common.sh
LIB_LOCATION=$ONE_LOCATION/lib
fi
DRIVER_PATH=$(dirname $0)
source $TMCOMMON
source ${DRIVER_PATH}/../../datastore/libfs.sh
#-------------------------------------------------------------------------------
# Process destination
#-------------------------------------------------------------------------------
DST_PATH=`arg_path $DST`
DST_HOST=`arg_host $DST`
XPATH="${DRIVER_PATH}/../../datastore/xpath.rb --stdin"
#-------------------------------------------------------------------------------
# Delete and exit if directory
#-------------------------------------------------------------------------------
if [ `is_disk $DST_PATH` -eq 0 ]; then
# Directory: delete checkpoint and directory
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(onedatastore show -x $DS_ID | $XPATH \
/DATASTORE/TEMPLATE/SOURCE \
/DATASTORE/TEMPLATE/CLONE \
/DATASTORE/TEMPLATE/VITASTOR_CONF \
/DATASTORE/TEMPLATE/IMAGE_PREFIX \
/DATASTORE/TEMPLATE/POOL_NAME)
SRC="${XPATH_ELEMENTS[j++]}"
CLONE="${XPATH_ELEMENTS[j++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[j++]}"
IMAGE_PREFIX="${XPATH_ELEMENTS[j++]:-one}"
POOL_NAME="${XPATH_ELEMENTS[j++]}"
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
SRC_CHECKPOINT="${IMAGE_PREFIX}-sys-${VM_ID}-checkpoint"
ssh_exec_and_log "$DST_HOST" "$CLI rm $SRC_CHECKPOINT 2>/dev/null || exit 0" \
"Error deleting $SRC_CHECKPOINT in $DST_HOST"
log "Deleting $DST_PATH"
ssh_exec_and_log "$DST_HOST" "rm -rf $DST_PATH" "Error deleting $DST_PATH"
exit 0
fi
#-------------------------------------------------------------------------------
# Get Image information
#-------------------------------------------------------------------------------
DISK_ID=$(echo "$DST_PATH" | $AWK -F. '{print $NF}')
# Reads the disk parameters -- taken from image datastore
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(onevm show -x $VM_ID | $XPATH \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/SOURCE \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/CLONE \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/VITASTOR_CONF \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/IMAGE_PREFIX \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/POOL_NAME)
SRC="${XPATH_ELEMENTS[j++]}"
CLONE="${XPATH_ELEMENTS[j++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[j++]}"
IMAGE_PREFIX="${XPATH_ELEMENTS[j++]:-one}"
POOL_NAME="${XPATH_ELEMENTS[j++]}"
if is_undeployed "$VM_ID" "$DST_HOST"; then
# get BRIDGE_LIST from datastore
XPATH="${DRIVER_PATH}/../../datastore/xpath.rb --stdin"
IFS= read -r -d '' BRIDGE_LIST < <(onedatastore show -x "$DS_ID" \
| $XPATH /DATASTORE/TEMPLATE/BRIDGE_LIST )
if [ -n "$BRIDGE_LIST" ]; then
DST_HOST=$(get_destination_host)
fi
fi
# No need to delete not cloned images
if [ "$CLONE" = "NO" ]; then
exit 0
fi
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
if [ -n "$SRC" ]; then
# cloned, so the name will be "one-<imageid>-<vmid>-<diskid>"
SRC_IMAGE="${SRC}-${VM_ID}-${DISK_ID}"
else
# volatile
SRC_IMAGE="${IMAGE_PREFIX}-sys-${VM_ID}-${DISK_ID}"
fi
# Delete the image
log "Deleting $DST_PATH"
DELETE_CMD=$(cat <<EOF
$CLI rm $SRC_IMAGE
EOF
)
ssh_exec_and_log "$DST_HOST" "$DELETE_CMD" "Error deleting $SRC_IMAGE in $DST_HOST"

View File

@ -1 +0,0 @@
../ceph/failmigrate

View File

@ -1,16 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# <CLONE|LN>(.tm_mad_system) tm_mad fe:SOURCE host:remote_system_ds/disk.i vmid dsid
# LN = Attach disk to a VM (Vitastor doesn't need to do anything in this case)
SRC=$1
DST=$2
VM_ID=$3
DS_ID=$4
exit 0

View File

@ -1,120 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# mkimage size format host:remote_system_ds/disk.i vmid dsid
# - size in MB of the image
# - format for the image
# - host is the target host to deploy the VM
# - remote_system_ds is the path for the system datastore in the host
# - vmid is the id of the VM
# - dsid is the target datastore (0 is the system datastore)
SIZE=$1
FORMAT=$2
DST=$3
VMID=$4
DSID=$5
#-------------------------------------------------------------------------------
if [ -z "${ONE_LOCATION}" ]; then
TMCOMMON=/var/lib/one/remotes/tm/tm_common.sh
LIB_LOCATION=/usr/lib/one
else
TMCOMMON=$ONE_LOCATION/var/remotes/tm/tm_common.sh
LIB_LOCATION=$ONE_LOCATION/lib
fi
DRIVER_PATH=$(dirname $0)
source $TMCOMMON
source ${DRIVER_PATH}/../../etc/datastore/datastore.conf
source ${DRIVER_PATH}/../../datastore/libfs.sh
#-------------------------------------------------------------------------------
# Set dst path and dir
#-------------------------------------------------------------------------------
DST_PATH=`arg_path $DST`
DST_HOST=`arg_host $DST`
DST_DIR=`dirname $DST_PATH`
DISK_ID=$(echo $DST|awk -F. '{print $NF}')
#-------------------------------------------------------------------------------
# Get Image information
#-------------------------------------------------------------------------------
XPATH="${DRIVER_PATH}/../../datastore/xpath.rb --stdin"
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(onevm show -x $VMID | $XPATH \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/VITASTOR_CONF \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/POOL_NAME \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/IMAGE_PREFIX \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/FS)
VITASTOR_CONF="${XPATH_ELEMENTS[j++]}"
POOL_NAME="${XPATH_ELEMENTS[j++]}"
IMAGE_PREFIX="${XPATH_ELEMENTS[j++]:-one}"
FS="${XPATH_ELEMENTS[j++]}"
CLI=
QEMU_ARG=""
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
QEMU_ARG=":config_path=${VITASTOR_CONF}"
fi
IMAGE_NAME="${IMAGE_PREFIX}-sys-${VMID}-${DISK_ID}"
ssh_make_path $DST_HOST $DST_DIR
set -e -o pipefail
# if user requested a swap or specifies a FS, we need to create a local
# formatted image and upload into existing Vitastor image
FS_OPTS=$(eval $(echo "echo \$FS_OPTS_$FS"))
MKIMAGE_CMD=$(cat <<EOF
set -e -o pipefail
export PATH=/usr/sbin:/sbin:\$PATH
vitastor-cli $CLI create --pool "${POOL_NAME}" "$IMAGE_NAME" --size "${SIZE}M"
EOF
)
if [ -n "$FS" -o "$FORMAT" = "swap" ]; then
MKFS_CMD=`mkfs_command '$NBD' raw "$SIZE" "$SUPPORTED_FS" "$FS" "$FS_OPTS" | grep -v $QEMU_IMG`
MKIMAGE_CMD=$(cat <<EOF
$MKIMAGE_CMD
NBD=\$(sudo vitastor-nbd $CLI map --image "$IMAGE_NAME")
trap "sudo vitastor-nbd $CLI unmap \$NBD" EXIT
$MKFS_CMD
EOF
)
fi
DELIMAGE_CMD=$(cat <<EOF
vitastor-cli $CLI rm "$IMAGE_NAME"
EOF
)
log "Making volatile disk of ${SIZE}M at $DST"
ssh_exec_and_log_no_error "$DST_HOST" "$MKIMAGE_CMD" "Error creating volatile disk.$DISK_ID ($IMAGE_NAME) in $DST_HOST in pool $POOL_NAME."
rc=$?
if [ $rc != 0 ]; then
ssh_exec_and_log_no_error "$DST_HOST" "$DELIMAGE_CMD" "Error removing image"
fi
exit $rc

View File

@ -1 +0,0 @@
../ceph/mkswap

View File

@ -1 +0,0 @@
../../datastore/vitastor/monitor

View File

@ -1 +0,0 @@
../ceph/mv

View File

@ -1,15 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# mvds host:remote_system_ds/disk.i fe:SOURCE vmid dsid
# - fe is the front-end hostname
# - SOURCE is the path of the disk image in the form DS_BASE_PATH/disk
# - host is the target host to deploy the VM
# - remote_system_ds is the path for the system datastore in the host
# - vmid is the id of the VM
# - dsid is the target datastore (0 is the system datastore)
exit 0

View File

@ -1 +0,0 @@
postbackup_live

View File

@ -1 +0,0 @@
../ceph/postbackup_live

View File

@ -1 +0,0 @@
../ceph/postmigrate

View File

@ -1,152 +0,0 @@
#!/usr/bin/env ruby
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
ONE_LOCATION = ENV['ONE_LOCATION']
LIVE = ENV['LIVE']
if !ONE_LOCATION
RUBY_LIB_LOCATION = '/usr/lib/one/ruby'
GEMS_LOCATION = '/usr/share/one/gems'
VMDIR = '/var/lib/one'
CONFIG_FILE = '/var/lib/one/config'
else
RUBY_LIB_LOCATION = ONE_LOCATION + '/lib/ruby'
GEMS_LOCATION = ONE_LOCATION + '/share/gems'
VMDIR = ONE_LOCATION + '/var'
CONFIG_FILE = ONE_LOCATION + '/var/config'
end
# %%RUBYGEMS_SETUP_BEGIN%%
if File.directory?(GEMS_LOCATION)
real_gems_path = File.realpath(GEMS_LOCATION)
if !defined?(Gem) || Gem.path != [real_gems_path]
$LOAD_PATH.reject! {|l| l =~ /vendor_ruby/ }
# Suppress warnings from Rubygems
# https://github.com/OpenNebula/one/issues/5379
begin
verb = $VERBOSE
$VERBOSE = nil
require 'rubygems'
Gem.use_paths(real_gems_path)
ensure
$VERBOSE = verb
end
end
end
# %%RUBYGEMS_SETUP_END%%
$LOAD_PATH << RUBY_LIB_LOCATION
require 'rexml/document'
require_relative '../lib/tm_action'
require_relative '../lib/kvm'
require_relative '../lib/datastore'
if LIVE
# TODO: fsfreeze for each hypervisor based on VM_MAD
include TransferManager::KVM
end
#-------------------------------------------------------------------------------
# BACKUP tm_mad host:remote_dir DISK_ID:...:DISK_ID deploy_id bjid vmid dsid
#-------------------------------------------------------------------------------
TransferManager::Datastore.load_env
vm_xml = STDIN.read
dir = ARGV[0].split ':'
disks = ARGV[1].split ':'
deploy_id = ARGV[2]
_bjid = ARGV[3]
vmid = ARGV[4]
_dsid = ARGV[5]
rhost = dir[0]
rdir = dir[1]
xml_doc = REXML::Document.new(vm_xml)
vm = xml_doc.root
ds = TransferManager::Datastore.from_vm_backup_ds(:vm_xml => vm_xml)
base_path = ENV['BACKUP_BASE_PATH']
bck_dir = if base_path
"#{base_path}/#{vmid}/backup"
else
"#{rdir}/backup"
end
snap_cmd = ''
expo_cmd = ''
clup_cmd = ''
vm.elements.each 'TEMPLATE/DISK' do |d|
did = d.elements['DISK_ID'].text
next unless disks.include? did
src = d.elements['SOURCE'].text
clon = d.elements['CLONE'].text
src_image = if clon == 'NO' then src else "#{src}-#{vmid}-#{did}" end
cmd = 'vitastor-cli'
qemu_arg = ''
if d.elements['VITASTOR_CONF']
cmd = cmd + ' --config_path ' + d.elements['VITASTOR_CONF']
qemu_arg += 'config_path='+d.elements['VITASTOR_CONF']+':'
end
draw = "#{bck_dir}/disk.#{did}.raw"
ddst = "#{bck_dir}/disk.#{did}.0"
expo_cmd << ds.cmd_confinement("qemu-img convert -m 4 -O qcow2 'vitastor:#{qemu_arg}image=#{src_image}' #{ddst}\n", rdir)
clup_cmd << "rm -f #{draw}\n"
rescue StandardError => e
STDERR.puts "Missing configuration attributes in DISK: #{e.message}"
exit(1)
end
if LIVE
freeze, thaw = fsfreeze(vm, deploy_id)
end
script = <<~EOS
set -ex -o pipefail
# Prepare backup folder
[ -d #{bck_dir} ] && rm -rf #{bck_dir}
mkdir -p #{bck_dir}
echo "#{Base64.encode64(vm_xml)}" > #{bck_dir}/vm.xml
#{freeze}
#{snap_cmd}
#{thaw}
#{expo_cmd}
#{clup_cmd}
EOS
rc = TransferManager::Action.ssh('prebackup_live',
:host => rhost,
:cmds => script,
:nostdout => false,
:nostderr => false
)
if rc.code != 0
STDERR.puts "Error preparing disk files: #{rc.stdout} #{rc.stderr}"
end
exit(rc.code)

View File

@ -1,8 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
export LIVE=1
`dirname $0`/prebackup $@

View File

@ -1 +0,0 @@
../ceph/premigrate

View File

@ -1,81 +0,0 @@
#!/bin/bash
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
# resize image size vmid
SRC=$1
SIZE=$2
VM_ID=$3
#--------------------------------------------------------------------------------
if [ -z "${ONE_LOCATION}" ]; then
TMCOMMON=/var/lib/one/remotes/tm/tm_common.sh
LIB_LOCATION=/usr/lib/one
else
TMCOMMON=$ONE_LOCATION/var/remotes/tm/tm_common.sh
LIB_LOCATION=$ONE_LOCATION/lib
fi
DRIVER_PATH=$(dirname $0)
source $TMCOMMON
#-------------------------------------------------------------------------------
# Set dst path and dir
#-------------------------------------------------------------------------------
SRC_HOST=`arg_host $SRC`
SRC_PATH=`arg_path $SRC`
#-------------------------------------------------------------------------------
# Get Image information
#-------------------------------------------------------------------------------
DISK_ID=$(echo "$SRC_PATH" | $AWK -F. '{print $NF}')
XPATH="${DRIVER_PATH}/../../datastore/xpath.rb --stdin"
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(onevm show -x $VM_ID | $XPATH \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/SOURCE \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/VITASTOR_CONF \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/IMAGE_PREFIX \
/VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/PERSISTENT)
SRC_IMAGE="${XPATH_ELEMENTS[j++]}"
VITASTOR_CONF="${XPATH_ELEMENTS[j++]}"
IMAGE_PREFIX="${XPATH_ELEMENTS[j++]:-one}"
PERSISTENT="${XPATH_ELEMENTS[j++]}"
if [ -n "${SRC_IMAGE}" ]; then
if [ "${PERSISTENT}" != 'YES' ]; then
SRC_IMAGE="${SRC_IMAGE}-${VM_ID}-${DISK_ID}"
fi
else
SRC_IMAGE="${IMAGE_PREFIX}-sys-${VM_ID}-${DISK_ID}"
fi
#-------------------------------------------------------------------------------
# Resize disk
#-------------------------------------------------------------------------------
CLI=vitastor-cli
if [ -n "$VITASTOR_CONF" ]; then
CLI="$CLI --config_path ${VITASTOR_CONF}"
fi
RESIZE_CMD=$(cat <<EOF
$CLI modify --resize ${SIZE}M "$SRC_IMAGE"
EOF
)
ssh_exec_and_log "$SRC_HOST" "$RESIZE_CMD" "Error resizing disk $SRC_IMAGE"
exit 0

View File

@ -1,201 +0,0 @@
#!/usr/bin/env ruby
# Vitastor OpenNebula driver
# Copyright (c) Vitaliy Filippov, 2024+
# License: Apache-2.0 http://www.apache.org/licenses/LICENSE-2.0
ONE_LOCATION = ENV['ONE_LOCATION']
if !ONE_LOCATION
RUBY_LIB_LOCATION = '/usr/lib/one/ruby'
GEMS_LOCATION = '/usr/share/one/gems'
VMDIR = '/var/lib/one'
CONFIG_FILE = '/var/lib/one/config'
else
RUBY_LIB_LOCATION = ONE_LOCATION + '/lib/ruby'
GEMS_LOCATION = ONE_LOCATION + '/share/gems'
VMDIR = ONE_LOCATION + '/var'
CONFIG_FILE = ONE_LOCATION + '/var/config'
end
# %%RUBYGEMS_SETUP_BEGIN%%
if File.directory?(GEMS_LOCATION)
real_gems_path = File.realpath(GEMS_LOCATION)
if !defined?(Gem) || Gem.path != [real_gems_path]
$LOAD_PATH.reject! {|l| l =~ /vendor_ruby/ }
# Suppress warnings from Rubygems
# https://github.com/OpenNebula/one/issues/5379
begin
verb = $VERBOSE
$VERBOSE = nil
require 'rubygems'
Gem.use_paths(real_gems_path)
ensure
$VERBOSE = verb
end
end
end
# %%RUBYGEMS_SETUP_END%%
$LOAD_PATH << RUBY_LIB_LOCATION
require 'rexml/document'
require 'json'
require 'securerandom'
require_relative '../lib/tm_action'
require_relative '../lib/datastore'
def get_vitastor_disks(vm_xml)
vm_xml = REXML::Document.new(vm_xml) if vm_xml.is_a?(String)
vm = vm_xml.root
vmid = vm.elements['VMID'].text
indexed_disks = []
vm.elements.each('DISK[TM_MAD="vitastor"]') do |d|
disk = new(vmid, d)
indexed_disks[disk.id] = disk
end
indexed_disks
end
#-------------------------------------------------------------------------------
# RESTORE vm_id img_id inc_id disk_id
#-------------------------------------------------------------------------------
_dir = ARGV[0].split ':'
vm_id = ARGV[1]
bk_img_id = ARGV[2].to_i
inc_id = ARGV[3]
disk_id = ARGV[4].to_i
begin
action = TransferManager::Action.new(:action_name => 'restore',
:vm_id => vm_id)
# --------------------------------------------------------------------------
# Image & Datastore information
# --------------------------------------------------------------------------
bk_img = OpenNebula::Image.new_with_id(bk_img_id, action.one)
rc = bk_img.info
raise rc.message.to_s if OpenNebula.is_error?(rc)
bk_ds = TransferManager::Datastore.from_image_ds(:image => bk_img,
:client => action.one)
# --------------------------------------------------------------------------
# Backup information
# sample output: {"0":"rsync://100//0:3ffce7/var/lib/one/datastores/100/1/3ffce7/disk.0.0"}
# --------------------------------------------------------------------------
xml_data = <<~EOS
#{action.vm.to_xml}
#{bk_img.to_xml}
EOS
rc = bk_ds.action("ls -i #{inc_id}", xml_data)
raise 'cannot list backup contents' unless rc.code == 0
disk_urls = JSON.parse(rc.stdout)
disk_urls = disk_urls.select {|id, _url| id.to_i == disk_id } if disk_id != -1
# --------------------------------------------------------------------------
# Restore disk_urls in Host VM folder
# --------------------------------------------------------------------------
vitastor_disks = get_vitastor_disks(action.vm.template_xml)
success_disks = []
info = {}
disk_urls.each do |id, url|
vitastor_disk = vitastor_disks[id.to_i]
randsuffix = SecureRandom.hex(5)
vitastor_one_ds = OpenNebula::Datastore.new_with_id(
action.vm["/VM/TEMPLATE/DISK[DISK_ID = #{id}]/DATASTORE_ID"].to_i, action.one
)
vitastor_ds = TransferManager::Datastore.new(:ds => vitastor_one_ds, :client => action.one)
src_image = vitastor_disk.elements['SOURCE'].text
disk_id = vitastor_disk.elements['DISK_ID'].text
if vitastor_disk.elements['CLONE'].text == 'YES'
src_image += '-'+vm_id+'-'+disk_id
end
cli = 'vitastor-cli'
config_path = vitastor_disk.elements['VITASTOR_CONF']
qemu_args = ''
if config_path:
cli += ' --config_path "'+config_path.text+'"'
qemu_args += ':config_path='+config_path.text
info[vitastor_disk] = {
:br => vitastor_ds.pick_bridge,
:bak => "#{src_image}.backup.#{randsuffix}",
:old => "#{src_image}.old.#{randsuffix}",
:cli => cli,
:img => src_image,
}
upload_vitastor = <<~EOS
set -e
tmpimg="$(mktemp -t disk#{id}.XXXX)"
#{__dir__}/../../datastore/downloader.sh --nodecomp #{url} $tmpimg
#{cli} create -s $(qemu-img info --output json $tmpimg | jq -r '.["virtual-size"]') #{info[vitastor_disk][:bak]}
qemu-img convert -m 4 -O raw $tmpimg "vitastor:image=#{info[vitastor_disk][:bak]}#{qemu_args}"
rm -f $tmpimg
EOS
rc = action.ssh(:host => info[vitastor_disk][:br],
:cmds => upload_ceph,
:forward => false,
:nostdout => false,
:nostderr => false)
break if rc.code != 0
success_disks << vitastor_disk
end
# Rollback and raise error if it was unable to backup all disks
if success_disks.length != disk_urls.length
success_disks.each do |vitastor_disk|
cleanup = <<~EOS
#{info[vitastor_disk][:cli]} rm #{info[vitastor_disk][:bak]}
EOS
action.ssh(:host => info[vitastor_disk][:br],
:cmds => cleanup,
:forward => false,
:nostdout => false,
:nostderr => false)
end
raise "error uploading backup disk to Vitastor (#{success_disks.length}/#{disk_urls.length})"
end
# --------------------------------------------------------------------------
# Replace VM disk_urls with backup copies (~prolog)
# --------------------------------------------------------------------------
success_disks.each do |vitastor_disk|
move = <<~EOS
set -e
#{info[vitastor_disk][:cli]} mv #{info[vitastor_disk][:img]} #{info[vitastor_disk][:old]}
#{info[vitastor_disk][:cli]} mv #{info[vitastor_disk][:bak]} #{info[vitastor_disk][:img]}
#{info[vitastor_disk][:cli]} rm --matching "#{info[vitastor_disk][:img]}@*"
#{info[vitastor_disk][:cli]} rm #{info[vitastor_disk][:old]}
EOS
rc = action.ssh(:host => info[vitastor_disk][:br],
:cmds => move,
:forward => false,
:nostdout => false,
:nostderr => false)
warn 'cannot restore disk backup' if rc.code != 0
end
rescue StandardError => e
STDERR.puts "Error restoring VM disks: #{e.message}"
exit(1)
end

Some files were not shown because too many files have changed in this diff Show More