Compare commits

..

1 Commits

Author SHA1 Message Date
0d1b6d0760 OpenSSL support in http_client.cpp
All checks were successful
Test / test_change_pg_count_ec (push) Has been skipped
Test / test_change_pg_size (push) Has been skipped
Test / test_create_nomaxid (push) Has been skipped
Test / test_etcd_fail (push) Has been skipped
Test / test_failure_domain (push) Has been skipped
Test / test_interrupted_rebalance (push) Has been skipped
Test / test_interrupted_rebalance_imm (push) Has been skipped
Test / test_interrupted_rebalance_ec (push) Has been skipped
Test / test_interrupted_rebalance_ec_imm (push) Has been skipped
Test / test_minsize_1 (push) Has been skipped
Test / test_move_reappear (push) Has been skipped
Test / test_rebalance_verify (push) Has been skipped
Test / test_rebalance_verify_imm (push) Has been skipped
Test / test_rebalance_verify_ec (push) Has been skipped
Test / test_rebalance_verify_ec_imm (push) Has been skipped
Test / test_rm (push) Has been skipped
Test / test_snapshot (push) Has been skipped
Test / test_snapshot_ec (push) Has been skipped
Test / test_splitbrain (push) Has been skipped
Test / test_write (push) Has been skipped
Test / test_write_xor (push) Has been skipped
Test / test_write_no_same (push) Has been skipped
Test / test_heal_pg_size_2 (push) Has been skipped
Test / test_heal_ec (push) Has been skipped
Test / test_scrub (push) Has been skipped
Test / test_scrub_zero_osd_2 (push) Has been skipped
Test / test_scrub_xor (push) Has been skipped
Test / test_scrub_pg_size_3 (push) Has been skipped
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Has been skipped
Test / test_scrub_ec (push) Has been skipped
2023-06-19 02:17:45 +03:00
92 changed files with 512 additions and 2115 deletions

View File

@@ -10,9 +10,6 @@ RUN set -e -x; \
ln -s /root/fio-build/fio-*/ ./fio; \ ln -s /root/fio-build/fio-*/ ./fio; \
ln -s /root/qemu-build/qemu-*/ ./qemu; \ ln -s /root/qemu-build/qemu-*/ ./qemu; \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \ ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
cd mon; \
npm install; \
cd ..; \
mkdir build; \ mkdir build; \
cd build; \ cd build; \
cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \ cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \

View File

@@ -190,6 +190,24 @@ jobs:
echo "" echo ""
done done
test_failure_domain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_failure_domain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance: test_interrupted_rebalance:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: build needs: build
@@ -262,60 +280,6 @@ jobs:
echo "" echo ""
done done
test_failure_domain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_failure_domain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_minsize_1: test_minsize_1:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: build needs: build
@@ -352,114 +316,6 @@ jobs:
echo "" echo ""
done done
test_rm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_rm.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_splitbrain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_splitbrain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify: test_rebalance_verify:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: build needs: build
@@ -532,6 +388,78 @@ jobs:
echo "" echo ""
done done
test_rm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_rm.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_splitbrain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_splitbrain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write: test_write:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: build needs: build

View File

@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor) project(vitastor)
set(VERSION "0.9.5") set(VERSION "0.9.2")
add_subdirectory(src) add_subdirectory(src)

View File

@@ -15,7 +15,7 @@ Vitastor архитектурно похож на Ceph, что означает
и автоматическое распределение данных по любому числу дисков любого размера с настраиваемыми схемами и автоматическое распределение данных по любому числу дисков любого размера с настраиваемыми схемами
избыточности - репликацией или с произвольными кодами коррекции ошибок. избыточности - репликацией или с произвольными кодами коррекции ошибок.
Vitastor нацелен в первую очередь на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает Vitastor нацелен на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс, TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД. что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.

View File

@@ -14,8 +14,8 @@ Vitastor is architecturally similar to Ceph which means strong consistency,
primary-replication, symmetric clustering and automatic data distribution over any primary-replication, symmetric clustering and automatic data distribution over any
number of drives of any size with configurable redundancy (replication or erasure codes/XOR). number of drives of any size with configurable redundancy (replication or erasure codes/XOR).
Vitastor targets primarily SSD and SSD+HDD clusters with at least 10 Gbit/s network, Vitastor targets SSD and SSD+HDD clusters with at least 10 Gbit/s network, supports
supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
with proper hardware which is ~10 times faster than other popular SDS's like Ceph with proper hardware which is ~10 times faster than other popular SDS's like Ceph
or internal systems of public clouds. or internal systems of public clouds.

View File

@@ -1,4 +1,4 @@
VERSION ?= v0.9.5 VERSION ?= v0.9.2
all: build push all: build push

View File

@@ -49,7 +49,7 @@ spec:
capabilities: capabilities:
add: ["SYS_ADMIN"] add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v0.9.5 image: vitalif/vitastor-csi:v0.9.2
args: args:
- "--node=$(NODE_ID)" - "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)" - "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -116,7 +116,7 @@ spec:
privileged: true privileged: true
capabilities: capabilities:
add: ["SYS_ADMIN"] add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v0.9.5 image: vitalif/vitastor-csi:v0.9.2
args: args:
- "--node=$(NODE_ID)" - "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)" - "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -5,7 +5,7 @@ package vitastor
const ( const (
vitastorCSIDriverName = "csi.vitastor.io" vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "0.9.5" vitastorCSIDriverVersion = "0.9.2"
) )
// Config struct fills the parameters of request or user input // Config struct fills the parameters of request or user input

View File

@@ -1,58 +0,0 @@
exit
git clone https://git.yourcmc.ru/vitalif/pve-qemu .
# bookworm
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription' >> /etc/apt/sources.list
echo 'deb https://vitastor.io/debian bookworm main' >> /etc/apt/sources.list
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
echo 'ru_RU UTF-8' >> /etc/locale.gen
echo 'en_US UTF-8' >> /etc/locale.gen
apt-get update
apt-get install wget ca-certificates
wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
apt-get update
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
mk-build-deps --install ./control
# bullseye
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bullseye pve-no-subscription' >> /etc/apt/sources.list
echo 'deb https://vitastor.io/debian bullseye main' >> /etc/apt/sources.list
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
echo 'ru_RU UTF-8' >> /etc/locale.gen
echo 'en_US UTF-8' >> /etc/locale.gen
apt-get update
apt-get install wget
wget https://enterprise.proxmox.com/debian/proxmox-release-bullseye.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bullseye.gpg
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
apt-get update
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
mk-build-deps --install ./control
# buster
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-buster debian:buster bash
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve buster pve-no-subscription' >> /etc/apt/sources.list
echo 'deb https://vitastor.io/debian buster main' >> /etc/apt/sources.list
echo 'deb http://deb.debian.org/debian buster-backports main' >> /etc/apt/sources.list
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
echo 'ru_RU UTF-8' >> /etc/locale.gen
echo 'en_US UTF-8' >> /etc/locale.gen
apt-get update
apt-get install wget ca-certificates
wget http://download.proxmox.com/debian/proxmox-ve-release-6.x.gpg -O /etc/apt/trusted.gpg.d/proxmox-ve-release-6.x.gpg
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
apt-get update
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
mk-build-deps --install ./control

4
debian/changelog vendored
View File

@@ -1,10 +1,10 @@
vitastor (0.9.5-1) unstable; urgency=medium vitastor (0.9.2-1) unstable; urgency=medium
* Bugfixes * Bugfixes
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300 -- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
vitastor (0.9.5-1) unstable; urgency=medium vitastor (0.9.2-1) unstable; urgency=medium
* Implement NFS proxy * Implement NFS proxy
* Add documentation * Add documentation

View File

@@ -28,19 +28,13 @@ RUN apt-get --download-only source qemu
ADD patches /root/vitastor/patches ADD patches /root/vitastor/patches
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
#RUN set -e; \
# apt-get install -y wget; \
# wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
# (echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
# (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
# apt-get update; \
# apt-get install -y vitastor-client vitastor-client-dev quilt
RUN set -e; \ RUN set -e; \
dpkg -i /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \ apt-get install -y wget; \
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
(echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
(echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
apt-get update; \ apt-get update; \
apt-get install -y quilt; \ apt-get install -y vitastor-client vitastor-client-dev quilt; \
mkdir -p /root/packages/qemu-$REL; \ mkdir -p /root/packages/qemu-$REL; \
rm -rf /root/packages/qemu-$REL/*; \ rm -rf /root/packages/qemu-$REL/*; \
cd /root/packages/qemu-$REL; \ cd /root/packages/qemu-$REL; \
@@ -54,7 +48,7 @@ RUN set -e; \
quilt add block/vitastor.c; \ quilt add block/vitastor.c; \
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \ cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
quilt refresh; \ quilt refresh; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor3; \ V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor1; \
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \ DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \ DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
rm -rf /root/packages/qemu-$REL/qemu-*/ rm -rf /root/packages/qemu-$REL/qemu-*/

View File

@@ -35,8 +35,8 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \ mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \ rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \ cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-0.9.5; \ cp -r /root/vitastor vitastor-0.9.2; \
cd vitastor-0.9.5; \ cd vitastor-0.9.2; \
ln -s /root/fio-build/fio-*/ ./fio; \ ln -s /root/fio-build/fio-*/ ./fio; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \ FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \ ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@@ -49,8 +49,8 @@ RUN set -e -x; \
rm -rf a b; \ rm -rf a b; \
echo "dep:fio=$FIO" > debian/fio_version; \ echo "dep:fio=$FIO" > debian/fio_version; \
cd /root/packages/vitastor-$REL; \ cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.5.orig.tar.xz vitastor-0.9.5; \ tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.2.orig.tar.xz vitastor-0.9.2; \
cd vitastor-0.9.5; \ cd vitastor-0.9.2; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \ V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \ DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \ DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

View File

@@ -21,7 +21,7 @@ Configuration parameters can be set in 3 places:
mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter
doesn't allow to set all variables directly, but it allows to override the doesn't allow to set all variables directly, but it allows to override the
configuration file and set everything you need inside it. configuration file and set everything you need inside it.
- OSD superblocks created by [vitastor-disk](usage/disk.en.md) contain - OSD superblocks created by [vitastor-disk](../usage/disk.en.md) contain
primarily disk layout parameters of specific OSDs. In fact, these parameters primarily disk layout parameters of specific OSDs. In fact, these parameters
are automatically passed into the command line of vitastor-osd process, so are automatically passed into the command line of vitastor-osd process, so
they have the same "status" as command-line parameters. they have the same "status" as command-line parameters.

View File

@@ -23,7 +23,7 @@
монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние, монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние,
как правило, не включают полный набор параметров напрямую, но позволяют как правило, не включают полный набор параметров напрямую, но позволяют
определить путь к файлу конфигурации и задать любые параметры в нём. определить путь к файлу конфигурации и задать любые параметры в нём.
- В суперблоке OSD, записываемом [vitastor-disk](usage/disk.ru.md) - параметры, - В суперблоке OSD, записываемом [vitastor-disk](../usage/disk.ru.md) - параметры,
связанные с дисковым форматом и с этим конкретным OSD. На самом деле, связанные с дисковым форматом и с этим конкретным OSD. На самом деле,
при запуске OSD эти параметры автоматически передаются в командную строку при запуске OSD эти параметры автоматически передаются в командную строку
процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам

View File

@@ -33,13 +33,12 @@ Size of objects (data blocks) into which all physical and virtual drives
in Vitastor, affects memory usage, write amplification and I/O load in Vitastor, affects memory usage, write amplification and I/O load
distribution effectiveness. distribution effectiveness.
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact, Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
it's possible to use 1 MB for SSD too - it will lower memory usage, but it's possible to use 4 MB for SSD too - it will lower memory usage, but
may increase average WA and reduce linear performance. may increase average WA and reduce linear performance.
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
544 MB per 1 TB of used disk space with the default 128 KB block size. 544 MB per 1 TB of used disk space with the default 128 KB block size.
With 1 MB it's 8 times lower.
## bitmap_granularity ## bitmap_granularity

View File

@@ -33,14 +33,14 @@ OSD) могут сосуществовать в одном кластере Vita
настроек, влияет на потребление памяти, объём избыточной записи (write настроек, влияет на потребление памяти, объём избыточной записи (write
amplification) и эффективность распределения нагрузки по OSD. amplification) и эффективность распределения нагрузки по OSD.
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт, мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
это понизит использование памяти, но ухудшит распределение нагрузки и в это понизит использование памяти, но ухудшит распределение нагрузки и в
среднем увеличит WA. среднем увеличит WA.
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт), Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше. стандартном 128 КБ блоке.
## bitmap_granularity ## bitmap_granularity

View File

@@ -1,145 +0,0 @@
#!/usr/bin/nodejs
const fsp = require('fs').promises;
run(process.argv).catch(console.error);
async function run(argv)
{
if (argv.length < 3)
{
console.log('Markdown preprocessor\nUSAGE: ./include.js file.md');
return;
}
const index_file = await fsp.realpath(argv[2]);
const re = /(\{\{[\s\S]*?\}\}|\[[^\]]+\]\([^\)]+\)|(?:^|\n)#[^\n]+)/;
let text = await fsp.readFile(index_file, { encoding: 'utf-8' });
text = text.split(re);
let included = {};
let heading = 0, heading_name = '', m;
for (let i = 0; i < text.length; i++)
{
if (text[i].substr(0, 2) == '{{')
{
// Inclusion
let incfile = text[i].substr(2, text[i].length-4);
let section = null;
let indent = heading;
incfile = incfile.replace(/\s*\|\s*indent\s*=\s*(-?\d+)\s*$/, (m, m1) => { indent = parseInt(m1); return ''; });
incfile = incfile.replace(/\s*#\s*([^#]+)$/, (m, m1) => { section = m1; return ''; });
let inc_heading = section;
incfile = rel2abs(index_file, incfile);
let inc = await fsp.readFile(incfile, { encoding: 'utf-8' });
inc = inc.trim().replace(/^[\s\S]+?\n#/, '#'); // remove until the first header
inc = inc.split(re);
const indent_str = new Array(indent+1).join('#');
let section_start = -1, section_end = -1;
for (let j = 0; j < inc.length; j++)
{
if ((m = /^(\n?)(#+\s*)([\s\S]+)$/.exec(inc[j])))
{
if (!inc_heading)
{
inc_heading = m[3].trim();
}
if (section)
{
if (m[3].trim() == section)
section_start = j;
else if (section_start >= 0)
{
section_end = j;
break;
}
}
inc[j] = m[1] + indent_str + m[2] + m[3];
}
else if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(inc[j])) && !/^https?:(\/\/)|^#/.exec(m[2]))
{
const abs_m2 = rel2abs(incfile, m[2]);
const rel_m = abs2rel(__filename, abs_m2);
if (rel_m.substr(0, 9) == '../../../') // outside docs
inc[j] = m[1] + 'https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/'+rel2abs('docs/config/src/include.js', rel_m) + m[3];
else
inc[j] = m[1] + abs_m2 + m[3];
}
}
if (section)
{
inc = section_start >= 0 ? inc.slice(section_start, section_end < 0 ? inc.length : section_end) : [];
}
if (inc.length)
{
if (!inc_heading)
inc_heading = heading_name||'';
included[incfile+(section ? '#'+section : '')] = '#'+inc_heading.toLowerCase().replace(/\P{L}+/ug, '-').replace(/^-|-$/g, '');
inc[0] = inc[0].replace(/^\s+/, '');
inc[inc.length-1] = inc[inc.length-1].replace(/\s+$/, '');
}
text.splice(i, 1, ...inc);
i = i + inc.length - 1;
}
else if ((m = /^\n?(#+)\s*([\s\S]+)$/.exec(text[i])))
{
// Heading
heading = m[1].length;
heading_name = m[2].trim();
}
}
for (let i = 0; i < text.length; i++)
{
if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(text[i])) && !/^https?:(\/\/)|^#/.exec(m[2]))
{
const p = m[2].indexOf('#');
if (included[m[2]])
{
text[i] = m[1]+included[m[2]]+m[3];
}
else if (p >= 0 && included[m[2].substr(0, p)])
{
text[i] = m[1]+m[2].substr(p)+m[3];
}
}
}
console.log(text.join(''));
}
function rel2abs(ref, rel)
{
rel = [ ...ref.replace(/^(.*)\/[^\/]+$/, '$1').split(/\/+/), ...rel.split(/\/+/) ];
return killdots(rel).join('/');
}
function abs2rel(ref, abs)
{
ref = ref.split(/\/+/);
abs = abs.split(/\/+/);
while (ref.length > 1 && ref[0] == abs[0])
{
ref.shift();
abs.shift();
}
for (let i = 1; i < ref.length; i++)
{
abs.unshift('..');
}
return killdots(abs).join('/');
}
function killdots(rel)
{
for (let i = 0; i < rel.length; i++)
{
if (rel[i] == '.')
{
rel.splice(i, 1);
i--;
}
else if (i >= 1 && rel[i] == '..' && rel[i-1] != '..')
{
rel.splice(i-1, 2);
i -= 2;
}
}
return rel;
}

View File

@@ -1,65 +0,0 @@
# Vitastor
{{../../../README.md#The Idea}}
{{../../../README.md#Talks and presentations}}
{{../../intro/features.en.md}}
{{../../intro/quickstart.en.md}}
{{../../intro/architecture.en.md}}
## Installation
{{../../installation/packages.en.md}}
{{../../installation/proxmox.en.md}}
{{../../installation/openstack.en.md}}
{{../../installation/kubernetes.en.md}}
{{../../installation/source.en.md}}
{{../../config.en.md|indent=1}}
{{../../config/common.en.md|indent=2}}
{{../../config/network.en.md|indent=2}}
{{../../config/layout-cluster.en.md|indent=2}}
{{../../config/layout-osd.en.md|indent=2}}
{{../../config/osd.en.md|indent=2}}
{{../../config/monitor.en.md|indent=2}}
{{../../config/pool.en.md|indent=2}}
{{../../config/inode.en.md|indent=2}}
## Usage
{{../../usage/cli.en.md}}
{{../../usage/disk.en.md}}
{{../../usage/fio.en.md}}
{{../../usage/nbd.en.md}}
{{../../usage/qemu.en.md}}
{{../../usage/nfs.en.md}}
## Performance
{{../../performance/understanding.en.md}}
{{../../performance/theoretical.en.md}}
{{../../performance/comparison1.en.md}}
{{../../intro/author.en.md|indent=1}}

View File

@@ -1,65 +0,0 @@
# Vitastor
{{../../../README-ru.md#Идея|indent=0}}
{{../../../README-ru.md#Презентации и записи докладов|indent=0}}
{{../../intro/features.ru.md}}
{{../../intro/quickstart.ru.md}}
{{../../intro/architecture.ru.md}}
## Установка
{{../../installation/packages.ru.md}}
{{../../installation/proxmox.ru.md}}
{{../../installation/openstack.ru.md}}
{{../../installation/kubernetes.ru.md}}
{{../../installation/source.ru.md}}
{{../../config.ru.md|indent=1}}
{{../../config/common.ru.md|indent=2}}
{{../../config/network.ru.md|indent=2}}
{{../../config/layout-cluster.ru.md|indent=2}}
{{../../config/layout-osd.ru.md|indent=2}}
{{../../config/osd.ru.md|indent=2}}
{{../../config/monitor.ru.md|indent=2}}
{{../../config/pool.ru.md|indent=2}}
{{../../config/inode.ru.md|indent=2}}
## Использование
{{../../usage/cli.ru.md}}
{{../../usage/disk.ru.md}}
{{../../usage/fio.ru.md}}
{{../../usage/nbd.ru.md}}
{{../../usage/qemu.ru.md}}
{{../../usage/nfs.ru.md}}
## Производительность
{{../../performance/understanding.ru.md}}
{{../../performance/theoretical.ru.md}}
{{../../performance/comparison1.ru.md}}
{{../../intro/author.ru.md|indent=1}}

View File

@@ -7,27 +7,26 @@
in Vitastor, affects memory usage, write amplification and I/O load in Vitastor, affects memory usage, write amplification and I/O load
distribution effectiveness. distribution effectiveness.
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact, Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
it's possible to use 1 MB for SSD too - it will lower memory usage, but it's possible to use 4 MB for SSD too - it will lower memory usage, but
may increase average WA and reduce linear performance. may increase average WA and reduce linear performance.
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
544 MB per 1 TB of used disk space with the default 128 KB block size. 544 MB per 1 TB of used disk space with the default 128 KB block size.
With 1 MB it's 8 times lower.
info_ru: | info_ru: |
Размер объектов (блоков данных), на которые делятся физические и виртуальные Размер объектов (блоков данных), на которые делятся физические и виртуальные
диски в Vitastor (в рамках каждого пула). Одна из ключевых на данный момент диски в Vitastor (в рамках каждого пула). Одна из ключевых на данный момент
настроек, влияет на потребление памяти, объём избыточной записи (write настроек, влияет на потребление памяти, объём избыточной записи (write
amplification) и эффективность распределения нагрузки по OSD. amplification) и эффективность распределения нагрузки по OSD.
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт, мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
это понизит использование памяти, но ухудшит распределение нагрузки и в это понизит использование памяти, но ухудшит распределение нагрузки и в
среднем увеличит WA. среднем увеличит WA.
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт), Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше. стандартном 128 КБ блоке.
- name: bitmap_granularity - name: bitmap_granularity
type: int type: int
default: 4096 default: 4096

View File

@@ -8,13 +8,13 @@
У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома. У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома.
Для установки возьмите манифесты из директории [csi/deploy/](../../csi/deploy/), поместите Для установки возьмите манифесты из директории [csi/deploy/](../csi/deploy/), поместите
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml), вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../csi/deploy/001-csi-config-map.yaml),
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../../csi/deploy/009-storage-class.yaml) настройте StorageClass в [csi/deploy/009-storage-class.yaml](../csi/deploy/009-storage-class.yaml)
и примените все `NNN-*.yaml` к вашей инсталляции Kubernetes. и примените все `NNN-*.yaml` к вашей инсталляции Kubernetes.
``` ```
for i in ./???-*.yaml; do kubectl apply -f $i; done for i in ./???-*.yaml; do kubectl apply -f $i; done
``` ```
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../../csi/deploy/example-pvc.yaml). После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../csi/deploy/example-pvc.yaml).

View File

@@ -36,5 +36,5 @@ vitastor_pool_id = 1
image_upload_use_cinder_backend = True image_upload_use_cinder_backend = True
``` ```
To put Glance images in Vitastor, use [volume-backed images](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html), To put Glance images in Vitastor, use [https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](volume-backed images),
although the support has not been verified yet. although the support has not been verified yet.

View File

@@ -36,5 +36,5 @@ image_upload_use_cinder_backend = True
``` ```
Чтобы помещать в Vitastor Glance-образы, нужно использовать Чтобы помещать в Vitastor Glance-образы, нужно использовать
[образы на основе томов Cinder](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html), [https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](образы на основе томов Cinder),
однако, поддержка этой функции ещё не проверялась. однако, поддержка этой функции ещё не проверялась.

View File

@@ -6,10 +6,10 @@
# Proxmox VE # Proxmox VE
To enable Vitastor support in Proxmox Virtual Environment (6.4-8.0 are supported): To enable Vitastor support in Proxmox Virtual Environment (6.4-7.4 are supported):
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts: - Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
bookworm for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4 buster for 6.4, bullseye for 7.4, pve7.1 for 7.1, pve7.2 for 7.2, pve7.3 for 7.3
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository - Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
- Define storage in `/etc/pve/storage.cfg` (see below) - Define storage in `/etc/pve/storage.cfg` (see below)
- Block network access from VMs to Vitastor network (to OSDs and etcd), - Block network access from VMs to Vitastor network (to OSDs and etcd),
@@ -35,5 +35,5 @@ vitastor: vitastor
vitastor_nbd 0 vitastor_nbd 0
``` ```
\* Note: you can also manually copy [patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) to Proxmox hosts \* Note: you can also manually copy [patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) to Proxmox hosts
as `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm` instead of installing pve-storage-vitastor. as `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm` instead of installing pve-storage-vitastor.

View File

@@ -1,15 +1,15 @@
[Документация](../../README-ru.md#документация) → Установка → Proxmox VE [Документация](../../README-ru.md#документация) → Установка → Proxmox
----- -----
[Read in English](proxmox.en.md) [Read in English](proxmox.en.md)
# Proxmox VE # Proxmox
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-8.0): Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-7.4):
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox: - Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
bookworm для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4 buster для 6.4, bullseye для 7.4, pve7.1 для 7.1, pve7.2 для 7.2, pve7.3 для 7.3
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor - Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже) - Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию - Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
@@ -35,5 +35,5 @@ vitastor: vitastor
``` ```
\* Примечание: вместо установки пакета pve-storage-vitastor вы можете вручную скопировать файл \* Примечание: вместо установки пакета pve-storage-vitastor вы можете вручную скопировать файл
[patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) на хосты Proxmox как [patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) на хосты Proxmox как
`/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm`. `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm`.

View File

@@ -21,7 +21,7 @@
## Basic instructions ## Basic instructions
Download source, for example using git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/` Download source, for example using git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`
Get `fio` source and symlink it into `<vitastor>/fio`. If you don't want to build fio engine, Get `fio` source and symlink it into `<vitastor>/fio`. If you don't want to build fio engine,
you can disable it by passing `-DWITH_FIO=no` to cmake. you can disable it by passing `-DWITH_FIO=no` to cmake.
@@ -41,7 +41,7 @@ It's recommended to build the QEMU driver (qemu_driver.c) in-tree, as a part of
QEMU build process. To do that: QEMU build process. To do that:
- Install vitastor client library headers (from source or from vitastor-client-dev package) - Install vitastor client library headers (from source or from vitastor-client-dev package)
- Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source - Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source
- Copy `src/qemu_driver.c` to QEMU source directory as `block/vitastor.c` - Copy `src/qemu_driver.c` to QEMU source directory as `block/block-vitastor.c`
- Build QEMU as usual - Build QEMU as usual
But it is also possible to build it out-of-tree. To do that: But it is also possible to build it out-of-tree. To do that:

View File

@@ -21,7 +21,7 @@
## Базовая инструкция ## Базовая инструкция
Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/` Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`
Скачайте исходные коды пакета `fio`, распакуйте их и создайте символическую ссылку на них Скачайте исходные коды пакета `fio`, распакуйте их и создайте символическую ссылку на них
в директории исходников Vitastor: `<vitastor>/fio`. Либо, если вы не хотите собирать плагин fio, в директории исходников Vitastor: `<vitastor>/fio`. Либо, если вы не хотите собирать плагин fio,
@@ -41,7 +41,7 @@ cmake .. && make -j8 install
Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого: Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого:
- Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev) - Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev)
- Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU - Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c` - Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/block-vitastor.c`
- Соберите QEMU как обычно - Соберите QEMU как обычно
Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого: Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого:
@@ -60,7 +60,7 @@ cmake .. && make -j8 install
* Для QEMU 2.0+: `<qemu>/qapi-types.h` &rarr; `<vitastor>/qemu/b/qemu/qapi-types.h` * Для QEMU 2.0+: `<qemu>/qapi-types.h` &rarr; `<vitastor>/qemu/b/qemu/qapi-types.h`
- `config-host.h` и `qapi` нужны, т.к. в них содержатся автогенерируемые заголовки - `config-host.h` и `qapi` нужны, т.к. в них содержатся автогенерируемые заголовки
- Сконфигурируйте cmake Vitastor с `WITH_QEMU=yes` (`cmake .. -DWITH_QEMU=yes`) и, если вы - Сконфигурируйте cmake Vitastor с `WITH_QEMU=yes` (`cmake .. -DWITH_QEMU=yes`) и, если вы
используете RHEL-подобный дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`. используете RHEL-подобый дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`.
- После этого в процессе сборки Vitastor также будет собираться подходящий для вашей - После этого в процессе сборки Vitastor также будет собираться подходящий для вашей
версии QEMU `block-vitastor.so`. версии QEMU `block-vitastor.so`.
- Таким образом можно использовать драйвер даже с немодифицированным QEMU, но в этом случае - Таким образом можно использовать драйвер даже с немодифицированным QEMU, но в этом случае

View File

@@ -44,7 +44,7 @@
depends linearly on drive capacity and data store block size which is 128 KB by default. depends linearly on drive capacity and data store block size which is 128 KB by default.
With 128 KB blocks metadata takes around 512 MB per 1 TB (which is still less than Ceph wants). With 128 KB blocks metadata takes around 512 MB per 1 TB (which is still less than Ceph wants).
Journal is also kept in memory by default, but in SSD-only clusters it's only 32 MB, and in SSD+HDD Journal is also kept in memory by default, but in SSD-only clusters it's only 32 MB, and in SSD+HDD
clusters, where it's beneficial to increase it, [inmemory_journal](../config/osd.en.md#inmemory_journal) can be disabled. clusters, where it's beneficial to increase it, [inmemory_journal](docs/config/osd.en.md#inmemory_journal) can be disabled.
- Vitastor storage layer doesn't have internal copy-on-write or redirect-write. I know that maybe - Vitastor storage layer doesn't have internal copy-on-write or redirect-write. I know that maybe
it's possible to create a good copy-on-write storage, but it's much harder and makes performance it's possible to create a good copy-on-write storage, but it's much harder and makes performance
less deterministic, so CoW isn't used in Vitastor. less deterministic, so CoW isn't used in Vitastor.

View File

@@ -156,7 +156,7 @@
блока хранилища (block_size, по умолчанию 128 КБ). С 128 КБ блоком потребление памяти блока хранилища (block_size, по умолчанию 128 КБ). С 128 КБ блоком потребление памяти
составляет примерно 512 МБ на 1 ТБ данных. Журналы по умолчанию тоже хранятся в памяти, составляет примерно 512 МБ на 1 ТБ данных. Журналы по умолчанию тоже хранятся в памяти,
но в SSD-кластерах нужный размер журнала составляет всего 32 МБ, а в гибридных (SSD+HDD) но в SSD-кластерах нужный размер журнала составляет всего 32 МБ, а в гибридных (SSD+HDD)
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../config/osd.ru.md#inmemory_journal). кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../docs/config/osd.ru.md#inmemory_journal).
- В Vitastor нет внутреннего copy-on-write. Я считаю, что реализация CoW-хранилища гораздо сложнее, - В Vitastor нет внутреннего copy-on-write. Я считаю, что реализация CoW-хранилища гораздо сложнее,
поэтому сложнее добиться устойчиво хороших результатов. Возможно, в один прекрасный день поэтому сложнее добиться устойчиво хороших результатов. Возможно, в один прекрасный день
я придумаю красивый алгоритм для CoW-хранилища, но пока нет — внутреннего CoW в Vitastor не будет. я придумаю красивый алгоритм для CoW-хранилища, но пока нет — внутреннего CoW в Vitastor не будет.

View File

@@ -35,7 +35,7 @@
- [Debian and CentOS packages](../installation/packages.en.md) - [Debian and CentOS packages](../installation/packages.en.md)
- [Image management CLI (vitastor-cli)](../usage/cli.en.md) - [Image management CLI (vitastor-cli)](../usage/cli.en.md)
- [Disk management CLI (vitastor-disk)](../usage/disk.en.md) - [Disk management CLI (vitastor-disk)](docs/usage/disk.en.md)
- Generic user-space client library - Generic user-space client library
- [Native QEMU driver](../usage/qemu.en.md) - [Native QEMU driver](../usage/qemu.en.md)
- [Loadable fio engine for benchmarks](../usage/fio.en.md) - [Loadable fio engine for benchmarks](../usage/fio.en.md)

View File

@@ -13,7 +13,7 @@
## Серверные функции ## Серверные функции
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа - Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
- [Производительность](../performance/comparison1.ru.md) ;-D - [Производительность](../comparison1.ru.md) ;-D
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок - [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
- Конфигурация через простые человекочитаемые JSON-структуры в etcd - Конфигурация через простые человекочитаемые JSON-структуры в etcd
@@ -37,7 +37,7 @@
- [Пакеты для Debian и CentOS](../installation/packages.ru.md) - [Пакеты для Debian и CentOS](../installation/packages.ru.md)
- [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md) - [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md)
- [Инструмент управления дисками (vitastor-disk)](../usage/disk.ru.md) - [Инструмент управления дисками (vitastor-disk)](docs/usage/disk.ru.md)
- Общая пользовательская клиентская библиотека для работы с кластером - Общая пользовательская клиентская библиотека для работы с кластером
- [Драйвер диска для QEMU](../usage/qemu.ru.md) - [Драйвер диска для QEMU](../usage/qemu.ru.md)
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md) - [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)

View File

@@ -7,7 +7,6 @@
# Quick Start # Quick Start
- [Preparation](#preparation) - [Preparation](#preparation)
- [Recommended drives](#recommended-drives)
- [Configure monitors](#configure-monitors) - [Configure monitors](#configure-monitors)
- [Configure OSDs](#configure-osds) - [Configure OSDs](#configure-osds)
- [Create a pool](#create-a-pool) - [Create a pool](#create-a-pool)
@@ -20,20 +19,10 @@
- Get some SATA or NVMe SSDs with capacitors (server-grade drives). You can use desktop SSDs - Get some SATA or NVMe SSDs with capacitors (server-grade drives). You can use desktop SSDs
with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
[here](../config/layout-cluster.en.md#immediate_commit). [here](../config/layout-cluster.en.md#immediate_commit).
- If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then
you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal. - Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`. - Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
- [Install Vitastor packages](../installation/packages.en.md). - [Install Vitastor packages](../installation/packages.en.md).
## Recommended drives
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
## Configure monitors ## Configure monitors
On the monitor hosts: On the monitor hosts:
@@ -56,10 +45,9 @@ On the monitor hosts:
} }
``` ```
- Initialize OSDs: - Initialize OSDs:
- SSD-only or HDD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. - SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. You can add
Add `--disable_data_fsync off` to leave disk write cache enabled if you use `--disable_data_fsync off` to leave disk cache enabled if you use desktop
desktop SSDs without capacitors. Do NOT add `--disable_data_fsync off` if you SSDs without capacitors.
use HDDs or SSD+HDD.
- Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`. - Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
Pass all your devices (HDD and SSD) to this script &mdash; it will partition disks and initialize journals on its own. Pass all your devices (HDD and SSD) to this script &mdash; it will partition disks and initialize journals on its own.
This script skips HDDs which are already partitioned so if you want to use non-empty disks for This script skips HDDs which are already partitioned so if you want to use non-empty disks for

View File

@@ -7,7 +7,6 @@
# Быстрый старт # Быстрый старт
- [Подготовка](#подготовка) - [Подготовка](#подготовка)
- [Рекомендуемые диски](#рекомендуемые-диски)
- [Настройте мониторы](#настройте-мониторы) - [Настройте мониторы](#настройте-мониторы)
- [Настройте OSD](#настройте-osd) - [Настройте OSD](#настройте-osd)
- [Создайте пул](#создайте-пул) - [Создайте пул](#создайте-пул)
@@ -20,20 +19,10 @@
- Возьмите серверы с SSD (SATA или NVMe), желательно с конденсаторами (серверные SSD). Можно - Возьмите серверы с SSD (SATA или NVMe), желательно с конденсаторами (серверные SSD). Можно
использовать и десктопные SSD, включив режим отложенного fsync, но производительность будет хуже. использовать и десктопные SSD, включив режим отложенного fsync, но производительность будет хуже.
О конденсаторах читайте [здесь](../config/layout-cluster.ru.md#immediate_commit). О конденсаторах читайте [здесь](../config/layout-cluster.ru.md#immediate_commit).
- Если хотите использовать HDD, берите современные модели с Media или SSD кэшем - HGST Ultrastar,
Toshiba MG08, Seagate EXOS или что-то похожее. Если такого кэша у ваших дисков нет,
обязательно возьмите SSD под метаданные и журнал (маленькие, буквально 2 ГБ на 1 ТБ HDD-места).
- Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2. - Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
- Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`. - Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
- [Установите пакеты Vitastor](../installation/packages.ru.md). - [Установите пакеты Vitastor](../installation/packages.ru.md).
## Рекомендуемые диски
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
## Настройте мониторы ## Настройте мониторы
На хостах, выделенных под мониторы: На хостах, выделенных под мониторы:
@@ -56,10 +45,9 @@
} }
``` ```
- Инициализуйте OSD: - Инициализуйте OSD:
- Только SSD или только HDD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. - SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. Если вы используете
Если вы используете десктопные SSD без конденсаторов, добавьте опцию `--disable_data_fsync off`, десктопные SSD без конденсаторов, можете оставить кэш включённым, добавив
чтобы оставить кэш записи диска включённым. НЕ добавляйте эту опцию, если используете опцию `--disable_data_fsync off`.
жёсткие диски (HDD).
- Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`. - Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы

View File

@@ -13,8 +13,6 @@ remains decent (see an example [here](../performance/comparison1.en.md#vitastor-
Vitastor Kubernetes CSI driver is based on NBD. Vitastor Kubernetes CSI driver is based on NBD.
See also [VDUSE](qemu.en.md#vduse).
## Map image ## Map image
To create a local block device for a Vitastor image run: To create a local block device for a Vitastor image run:

View File

@@ -16,8 +16,6 @@ NBD немного снижает производительность из-за
CSI-драйвер Kubernetes Vitastor основан на NBD. CSI-драйвер Kubernetes Vitastor основан на NBD.
Смотрите также [VDUSE](qemu.ru.md#vduse).
## Подключить устройство ## Подключить устройство
Чтобы создать локальное блочное устройство для образа, выполните команду: Чтобы создать локальное блочное устройство для образа, выполните команду:

View File

@@ -29,7 +29,7 @@ vitastor-nfs [--etcd_address ADDR] [ДРУГИЕ ОПЦИИ]
--bind <IP> принимать соединения по адресу <IP> (по умолчанию 0.0.0.0 - на всех) --bind <IP> принимать соединения по адресу <IP> (по умолчанию 0.0.0.0 - на всех)
--nfspath <PATH> установить путь NFS-экспорта в <PATH> (по умолчанию /) --nfspath <PATH> установить путь NFS-экспорта в <PATH> (по умолчанию /)
--port <PORT> использовать порт <PORT> для NFS-сервисов (по умолчанию 2049) --port <PORT> использовать порт <PORT> для NFS-сервисов (по умолчанию 2049)
--pool <POOL> использовать пул <POOL> для новых образов (обязательно, если пул в кластере не один) --pool <POOL> исползовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
--foreground 1 не уходить в фон после запуска --foreground 1 не уходить в фон после запуска
``` ```

View File

@@ -83,43 +83,3 @@ qemu-img rebase -u -b '' testimg.qcow2
This can be used for backups. Just note that exporting an image that is currently being written to This can be used for backups. Just note that exporting an image that is currently being written to
is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
on a live VM. on a live VM.
## VDUSE
Linux kernel, starting with version 5.15, supports a new interface for attaching virtual disks
to the host - VDUSE (vDPA Device in Userspace). QEMU, starting with 7.2, has support for
exporting QEMU block devices over this protocol using qemu-storage-daemon.
VDUSE has the same problem as other FUSE-like interfaces in Linux: if a userspace process hangs,
for example, if it loses connectivity with Vitastor cluster - active processes doing I/O may
hang in the D state (uninterruptible sleep) and you won't be able to kill them even with kill -9.
In this case reboot will be the only way to remove VDUSE devices from system.
On the other hand, VDUSE is faster than [NBD](nbd.en.md), so you may prefer to use it if
performance is important for you. Approximate performance numbers:
direct fio benchmark - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
To try VDUSE you need at least Linux 5.15, built with VDUSE support
(CONFIG_VIRTIO_VDPA=m and CONFIG_VDPA_USER=m). Debian Linux kernels have these options
disabled by now, so if you want to try it on Debian, use a kernel from Ubuntu
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) or Proxmox.
Commands to attach Vitastor image as a VDUSE device:
```
modprobe vduse virtio-vdpa
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
vdpa dev add name test1 mgmtdev vduse
```
After running these commands /dev/vda device will appear in the system and you'll be able to
use it as a normal disk.
To remove the device:
```
vdpa dev del test1
kill <qemu-storage-daemon_process_PID>
```

View File

@@ -87,43 +87,3 @@ qemu-img rebase -u -b '' testimg.qcow2
Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ. с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.
## VDUSE
В Linux, начиная с версии ядра 5.15, доступен новый интерфейс для подключения виртуальных дисков
к системе - VDUSE (vDPA Device in Userspace), а в QEMU, начиная с версии 7.2, есть поддержка
экспорта блочных устройств QEMU по этому протоколу через qemu-storage-daemon.
VDUSE страдает общей проблемой FUSE-подобных интерфейсов в Linux: если пользовательский процесс
подвиснет, например, если будет потеряна связь с кластером Vitastor - читающие/пишущие в кластер
процессы могут "залипнуть" в состоянии D (непрерываемый сон) и их будет невозможно убить даже
через kill -9. В этом случае удалить из системы устройство можно только перезагрузившись.
С другой стороны, VDUSE быстрее по сравнению с [NBD](nbd.ru.md), поэтому его может
быть предпочтительно использовать там, где производительность важнее. Порядок показателей:
прямое тестирование через fio - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
Чтобы использовать VDUSE, вам нужно ядро Linux версии хотя бы 5.15, собранное с поддержкой
VDUSE (CONFIG_VIRTIO_VDPA=m и CONFIG_VDPA_USER=m). В ядрах в Debian Linux поддержка пока
отключена - если хотите попробовать эту функцию на Debian, поставьте ядро из Ubuntu
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) или из Proxmox.
Команды для подключения виртуального диска через VDUSE:
```
modprobe vduse virtio-vdpa
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
vdpa dev add name test1 mgmtdev vduse
```
После этого в системе появится устройство /dev/vda, которое можно будет использовать как
обычный диск.
Для удаления устройства из системы:
```
vdpa dev del test1
kill <PID_процесса_qemu-storage-daemon>
```

View File

@@ -63,9 +63,8 @@ Wants=network-online.target local-fs.target time-sync.target
[Service] [Service]
Restart=always Restart=always
Environment=GOGC=50 ExecStart=/usr/local/bin/etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
ExecStart=etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\ --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
--snapshot-count 10000 --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
--initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\ --initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
--initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\ --initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
--initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\ --initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\

View File

@@ -1608,7 +1608,7 @@ class Mon
} }
} }
} }
return { inode_stats, seen_pools }; return inode_stats;
} }
serialize_bigints(obj) serialize_bigints(obj)
@@ -1634,7 +1634,7 @@ class Mon
const timestamp = Date.now(); const timestamp = Date.now();
const { object_counts, object_bytes } = this.sum_object_counts(); const { object_counts, object_bytes } = this.sum_object_counts();
let stats = this.sum_op_stats(timestamp, this.prev_stats); let stats = this.sum_op_stats(timestamp, this.prev_stats);
let { inode_stats, seen_pools } = this.sum_inode_stats( let inode_stats = this.sum_inode_stats(
this.prev_stats ? this.prev_stats.inode_stats : null, this.prev_stats ? this.prev_stats.inode_stats : null,
timestamp, this.prev_stats ? this.prev_stats.timestamp : null timestamp, this.prev_stats ? this.prev_stats.timestamp : null
); );
@@ -1669,22 +1669,12 @@ class Mon
} }
for (const pool_id in this.state.pool.stats) for (const pool_id in this.state.pool.stats)
{ {
if (!seen_pools[pool_id]) const pool_stats = { ...this.state.pool.stats[pool_id] };
{ this.serialize_bigints(pool_stats);
txn.push({ requestDeleteRange: { txn.push({ requestPut: {
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id), key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
} }); value: b64(JSON.stringify(pool_stats)),
delete this.state.pool.stats[pool_id]; } });
}
else
{
const pool_stats = { ...this.state.pool.stats[pool_id] };
this.serialize_bigints(pool_stats);
txn.push({ requestPut: {
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(pool_stats)),
} });
}
} }
if (txn.length) if (txn.length)
{ {

View File

@@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver from cinder.volume import driver
from cinder.volume import volume_utils from cinder.volume import volume_utils
VERSION = '0.9.5' VERSION = '0.9.2'
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)

View File

@@ -1,190 +0,0 @@
diff --git a/block/meson.build b/block/meson.build
index 382bec0e7d..af6207dbce 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -114,6 +114,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index c44d05a13f..ebedb42843 100644
--- a/meson.build
+++ b/meson.build
@@ -1028,6 +1028,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1882,6 +1902,7 @@ endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -4020,6 +4041,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index fc9447d267..c4ac55c283 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -173,6 +173,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c05ad0c07e..f5eb701604 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3308,7 +3308,7 @@
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4338,6 +4338,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4787,6 +4809,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5187,6 +5210,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -5385,6 +5419,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 6e8983f39c..1b0b9fcf3e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -32,7 +32,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 009fab1515..95914e6ebc 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -144,6 +144,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -392,6 +393,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;

View File

@@ -1,176 +0,0 @@
diff --git a/block/Makefile.objs b/block/Makefile.objs
index d644bac60a..e404236291 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -19,6 +19,7 @@ block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
block-obj-$(CONFIG_LIBNFS) += nfs.o
block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-$(CONFIG_VITASTOR) += vitastor.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_VXHS) += vxhs.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
@@ -39,6 +40,8 @@ curl.o-cflags := $(CURL_CFLAGS)
curl.o-libs := $(CURL_LIBS)
rbd.o-cflags := $(RBD_CFLAGS)
rbd.o-libs := $(RBD_LIBS)
+vitastor.o-cflags := $(VITASTOR_CFLAGS)
+vitastor.o-libs := $(VITASTOR_LIBS)
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
gluster.o-libs := $(GLUSTERFS_LIBS)
vxhs.o-libs := $(VXHS_LIBS)
diff --git a/configure b/configure
index 0a19b033bc..58b7fbf24c 100755
--- a/configure
+++ b/configure
@@ -398,6 +398,7 @@ trace_backends="log"
trace_file="trace"
spice=""
rbd=""
+vitastor=""
smartcard=""
libusb=""
usb_redir=""
@@ -1213,6 +1214,10 @@ for opt do
;;
--enable-rbd) rbd="yes"
;;
+ --disable-vitastor) vitastor="no"
+ ;;
+ --enable-vitastor) vitastor="yes"
+ ;;
--disable-xfsctl) xfs="no"
;;
--enable-xfsctl) xfs="yes"
@@ -1601,6 +1606,7 @@ disabled with --disable-FEATURE, default is enabled if available:
vhost-crypto vhost-crypto acceleration support
spice spice
rbd rados block device (rbd)
+ vitastor vitastor block device
libiscsi iscsi support
libnfs nfs support
smartcard smartcard support (libcacard)
@@ -3594,6 +3600,27 @@ EOF
fi
fi
+##########################################
+# vitastor probe
+if test "$vitastor" != "no" ; then
+ cat > $TMPC <<EOF
+#include <vitastor_c.h>
+int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+}
+EOF
+ vitastor_libs="-lvitastor_client"
+ if compile_prog "" "$vitastor_libs" ; then
+ vitastor=yes
+ else
+ if test "$vitastor" = "yes" ; then
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
+ fi
+ vitastor=no
+ fi
+fi
+
##########################################
# libssh2 probe
min_libssh2_version=1.2.8
@@ -5837,6 +5864,7 @@ echo "Trace output file $trace_file-<pid>"
fi
echo "spice support $spice $(echo_version $spice $spice_protocol_version/$spice_server_version)"
echo "rbd support $rbd"
+echo "vitastor support $vitastor"
echo "xfsctl support $xfs"
echo "smartcard support $smartcard"
echo "libusb $libusb"
@@ -6416,6 +6444,11 @@ if test "$rbd" = "yes" ; then
echo "RBD_CFLAGS=$rbd_cflags" >> $config_host_mak
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
fi
+if test "$vitastor" = "yes" ; then
+ echo "CONFIG_VITASTOR=m" >> $config_host_mak
+ echo "VITASTOR_CFLAGS=$vitastor_cflags" >> $config_host_mak
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
+fi
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
if test "$coroutine_pool" = "yes" ; then
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c50517bff3..c780bb2c1c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2514,7 +2514,7 @@
'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
- 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
+ 'quorum', 'raw', 'rbd', 'vitastor', 'replication', 'sheepdog', 'ssh',
'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
##
@@ -3217,6 +3217,28 @@
'*snap-id': 'uint32',
'*tag': 'str' } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -3547,6 +3569,7 @@
'rbd': 'BlockdevOptionsRbd',
'replication':'BlockdevOptionsReplication',
'sheepdog': 'BlockdevOptionsSheepdog',
+ 'vitastor': 'BlockdevOptionsVitastor',
'ssh': 'BlockdevOptionsSsh',
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
@@ -3991,6 +4014,17 @@
'*subformat': 'BlockdevVhdxSubformat',
'*block-state-zero': 'bool' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVpcSubformat:
#
@@ -4074,6 +4108,7 @@
'rbd': 'BlockdevCreateOptionsRbd',
'replication': 'BlockdevCreateNotSupported',
'sheepdog': 'BlockdevCreateOptionsSheepdog',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'ssh': 'BlockdevCreateOptionsSsh',
'throttle': 'BlockdevCreateNotSupported',
'vdi': 'BlockdevCreateOptionsVdi',

View File

@@ -1,181 +0,0 @@
Index: qemu-5.2+dfsg/qapi/block-core.json
===================================================================
--- qemu-5.2+dfsg.orig/qapi/block-core.json
+++ qemu-5.2+dfsg/qapi/block-core.json
@@ -2831,7 +2831,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
- 'sheepdog',
+ 'sheepdog', 'vitastor',
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
##
@@ -3668,6 +3668,28 @@
'*tag': 'str' } }
##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4015,6 +4037,7 @@
'replication': { 'type': 'BlockdevOptionsReplication',
'if': 'defined(CONFIG_REPLICATION)' },
'sheepdog': 'BlockdevOptionsSheepdog',
+ 'vitastor': 'BlockdevOptionsVitastor',
'ssh': 'BlockdevOptionsSsh',
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
@@ -4404,6 +4427,17 @@
'*cluster-size' : 'size' } }
##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
+##
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -4665,6 +4699,7 @@
'qed': 'BlockdevCreateOptionsQed',
'rbd': 'BlockdevCreateOptionsRbd',
'sheepdog': 'BlockdevCreateOptionsSheepdog',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
Index: qemu-5.2+dfsg/block/meson.build
===================================================================
--- qemu-5.2+dfsg.orig/block/meson.build
+++ qemu-5.2+dfsg/block/meson.build
@@ -76,6 +76,7 @@ foreach m : [
['CONFIG_LIBNFS', 'nfs', libnfs, 'nfs.c'],
['CONFIG_LIBSSH', 'ssh', libssh, 'ssh.c'],
['CONFIG_RBD', 'rbd', rbd, 'rbd.c'],
+ ['CONFIG_VITASTOR', 'vitastor', vitastor, 'vitastor.c'],
]
if config_host.has_key(m[0])
if enable_modules
Index: qemu-5.2+dfsg/configure
===================================================================
--- qemu-5.2+dfsg.orig/configure
+++ qemu-5.2+dfsg/configure
@@ -372,6 +372,7 @@ trace_backends="log"
trace_file="trace"
spice=""
rbd=""
+vitastor=""
smartcard=""
u2f="auto"
libusb=""
@@ -1263,6 +1264,10 @@ for opt do
;;
--enable-rbd) rbd="yes"
;;
+ --disable-vitastor) vitastor="no"
+ ;;
+ --enable-vitastor) vitastor="yes"
+ ;;
--disable-xfsctl) xfs="no"
;;
--enable-xfsctl) xfs="yes"
@@ -1827,6 +1832,7 @@ disabled with --disable-FEATURE, default
vhost-vdpa vhost-vdpa kernel backend support
spice spice
rbd rados block device (rbd)
+ vitastor vitastor block device
libiscsi iscsi support
libnfs nfs support
smartcard smartcard support (libcacard)
@@ -3719,6 +3725,27 @@ EOF
fi
##########################################
+# vitastor probe
+if test "$vitastor" != "no" ; then
+ cat > $TMPC <<EOF
+#include <vitastor_c.h>
+int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+}
+EOF
+ vitastor_libs="-lvitastor_client"
+ if compile_prog "" "$vitastor_libs" ; then
+ vitastor=yes
+ else
+ if test "$vitastor" = "yes" ; then
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
+ fi
+ vitastor=no
+ fi
+fi
+
+##########################################
# libssh probe
if test "$libssh" != "no" ; then
if $pkg_config --exists libssh; then
@@ -6456,6 +6483,10 @@ if test "$rbd" = "yes" ; then
echo "CONFIG_RBD=y" >> $config_host_mak
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
fi
+if test "$vitastor" = "yes" ; then
+ echo "CONFIG_VITASTOR=y" >> $config_host_mak
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
+fi
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
if test "$coroutine_pool" = "yes" ; then
Index: qemu-5.2+dfsg/meson.build
===================================================================
--- qemu-5.2+dfsg.orig/meson.build
+++ qemu-5.2+dfsg/meson.build
@@ -596,6 +596,10 @@ rbd = not_found
if 'CONFIG_RBD' in config_host
rbd = declare_dependency(link_args: config_host['RBD_LIBS'].split())
endif
+vitastor = not_found
+if 'CONFIG_VITASTOR' in config_host
+ vitastor = declare_dependency(link_args: config_host['VITASTOR_LIBS'].split())
+endif
glusterfs = not_found
if 'CONFIG_GLUSTERFS' in config_host
glusterfs = declare_dependency(compile_args: config_host['GLUSTERFS_CFLAGS'].split(),
@@ -2145,6 +2149,7 @@ endif
# TODO: add back protocol and server version
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
summary_info += {'rbd support': config_host.has_key('CONFIG_RBD')}
+summary_info += {'vitastor support': config_host.has_key('CONFIG_VITASTOR')}
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
summary_info += {'smartcard support': config_host.has_key('CONFIG_SMARTCARD')}
summary_info += {'U2F support': u2f.found()}

View File

@@ -24,4 +24,4 @@ rm fio
mv fio-copy fio mv fio-copy fio
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'` FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-0.9.5/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.5$(rpm --eval '%dist').tar.gz * tar --transform 's#^#vitastor-0.9.2/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.2$(rpm --eval '%dist').tar.gz *

View File

@@ -22,7 +22,7 @@
Name: qemu-kvm Name: qemu-kvm
Version: 4.2.0 Version: 4.2.0
-Release: 29.vitastor%{?dist}.6 -Release: 29.vitastor%{?dist}.6
+Release: 34.vitastor%{?dist}.6 +Release: 32.vitastor%{?dist}.6
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
Epoch: 15 Epoch: 15
License: GPLv2 and GPLv2+ and CC-BY License: GPLv2 and GPLv2+ and CC-BY

View File

@@ -13,7 +13,7 @@
Name: qemu-kvm Name: qemu-kvm
Version: 4.2.0 Version: 4.2.0
-Release: 29%{?dist}.6 -Release: 29%{?dist}.6
+Release: 33.vitastor%{?dist}.6 +Release: 32.vitastor%{?dist}.6
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
Epoch: 15 Epoch: 15
License: GPLv2 and GPLv2+ and CC-BY License: GPLv2 and GPLv2+ and CC-BY

View File

@@ -1,103 +0,0 @@
--- qemu-kvm-6.2.spec.orig 2023-07-18 13:52:57.636625440 +0000
+++ qemu-kvm-6.2.spec 2023-07-18 13:52:19.011683886 +0000
@@ -73,6 +73,7 @@ Requires: %{name}-hw-usbredir = %{epoch}
%endif \
Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release}
# Macro to properly setup RHEL/RHEV conflict handling
@@ -83,7 +84,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version
Summary: QEMU is a machine emulator and virtualizer
Name: qemu-kvm
Version: 6.2.0
-Release: 32%{?rcrel}%{?dist}
+Release: 32.vitastor%{?rcrel}%{?dist}
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
Epoch: 15
License: GPLv2 and GPLv2+ and CC-BY
@@ -122,6 +123,7 @@ Source37: tests_data_acpi_pc_SSDT.dimmpx
Source38: tests_data_acpi_q35_FACP.slic
Source39: tests_data_acpi_q35_SSDT.dimmpxm
Source40: tests_data_acpi_virt_SSDT.memhp
+Source41: qemu-vitastor.c
Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch
Patch0005: 0005-Initial-redhat-build.patch
@@ -652,6 +654,7 @@ Patch255: kvm-scsi-protect-req-aiocb-wit
Patch256: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch
# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0]
Patch257: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch
+Patch258: qemu-6.2-vitastor.patch
BuildRequires: wget
BuildRequires: rpm-build
@@ -689,6 +692,7 @@ BuildRequires: libcurl-devel
BuildRequires: libssh-devel
BuildRequires: librados-devel
BuildRequires: librbd-devel
+BuildRequires: vitastor-client-devel
%if %{have_gluster}
# For gluster block driver
BuildRequires: glusterfs-api-devel
@@ -926,6 +930,14 @@ Install this package if you want to acce
using the rbd protocol.
+%package block-vitastor
+Summary: QEMU Vitastor block driver
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
+
+%description block-vitastor
+This package provides the additional Vitastor block driver for QEMU.
+
+
%package block-ssh
Summary: QEMU SSH block driver
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
@@ -979,6 +991,7 @@ This package provides usbredir support.
rm -fr slirp
mkdir slirp
%autopatch -p1
+cp %{SOURCE41} ./block/vitastor.c
%global qemu_kvm_build qemu_kvm_build
mkdir -p %{qemu_kvm_build}
@@ -994,7 +1007,7 @@ cp -f %{SOURCE40} tests/data/acpi/virt/S
# --build-id option is used for giving info to the debug packages.
buildldflags="VL_LDFLAGS=-Wl,--build-id"
-%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle
+%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle
%if 0%{have_gluster}
%global block_drivers_list %{block_drivers_list},gluster
@@ -1149,9 +1162,7 @@ pushd %{qemu_kvm_build}
--firmwarepath=%{_prefix}/share/qemu-firmware \
--meson="git" \
--target-list="%{buildarch}" \
- --block-drv-rw-whitelist=%{block_drivers_list} \
--audio-drv-list= \
- --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \
--with-coroutine=ucontext \
--with-git=git \
--tls-priority=@QEMU,SYSTEM \
@@ -1197,6 +1208,7 @@ pushd %{qemu_kvm_build}
%endif
--enable-pie \
--enable-rbd \
+ --enable-vitastor \
%if 0%{have_librdma}
--enable-rdma \
%endif
@@ -1794,6 +1806,9 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.
%files block-rbd
%{_libdir}/qemu-kvm/block-rbd.so
+%files block-vitastor
+%{_libdir}/qemu-kvm/block-vitastor.so
+
%files block-ssh
%{_libdir}/qemu-kvm/block-ssh.so

View File

@@ -1,93 +0,0 @@
--- qemu-kvm-7.2.spec.orig 2023-06-22 13:56:19.000000000 +0000
+++ qemu-kvm-7.2.spec 2023-07-18 07:55:22.347090196 +0000
@@ -100,8 +100,6 @@
%endif
%global target_list %{kvm_target}-softmmu
-%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress
-%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https
%define qemudocdir %{_docdir}/%{name}
%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios"
@@ -126,6 +124,7 @@ Requires: %{name}-device-usb-host = %{ep
Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \
%endif \
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release}
# Since SPICE is removed from RHEL-9, the following Obsoletes:
@@ -148,7 +147,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}
Summary: QEMU is a machine emulator and virtualizer
Name: qemu-kvm
Version: 7.2.0
-Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.1
+Release: 14.vitastor%{?rcrel}%{?dist}%{?cc_suffix}.1
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
# Epoch 15 used for RHEL 8
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
@@ -171,6 +170,7 @@ Source28: 95-kvm-memlock.conf
Source30: kvm-s390x.conf
Source31: kvm-x86.conf
Source36: README.tests
+Source37: qemu-vitastor.c
Patch0004: 0004-Initial-redhat-build.patch
@@ -418,6 +418,7 @@ Patch134: kvm-target-i386-Fix-BZHI-instr
Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch
# For bz#2203745 - Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z]
Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch
+Patch137: qemu-7.2-vitastor.patch
%if %{have_clang}
BuildRequires: clang
@@ -449,6 +450,7 @@ BuildRequires: libcurl-devel
%if %{have_block_rbd}
BuildRequires: librbd-devel
%endif
+BuildRequires: vitastor-client-devel
# We need both because the 'stap' binary is probed for by configure
BuildRequires: systemtap
BuildRequires: systemtap-sdt-devel
@@ -642,6 +644,14 @@ using the rbd protocol.
%endif
+%package block-vitastor
+Summary: QEMU Vitastor block driver
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
+
+%description block-vitastor
+This package provides the additional Vitastor block driver for QEMU.
+
+
%package audio-pa
Summary: QEMU PulseAudio audio driver
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
@@ -719,6 +729,7 @@ This package provides usbredir support.
%prep
%setup -q -n qemu-%{version}%{?rcstr}
%autopatch -p1
+cp %{SOURCE37} ./block/vitastor.c
%global qemu_kvm_build qemu_kvm_build
mkdir -p %{qemu_kvm_build}
@@ -946,6 +957,7 @@ run_configure \
%if %{have_block_rbd}
--enable-rbd \
%endif
+ --enable-vitastor \
%if %{have_librdma}
--enable-rdma \
%endif
@@ -1426,6 +1438,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s
%files block-rbd
%{_libdir}/%{name}/block-rbd.so
%endif
+%files block-vitastor
+%{_libdir}/%{name}/block-vitastor.so
+
%files audio-pa
%{_libdir}/%{name}/audio-pa.so

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \ RUN set -e; \
cd /root/vitastor/rpm; \ cd /root/vitastor/rpm; \
sh build-tarball.sh; \ sh build-tarball.sh; \
cp /root/vitastor-0.9.5.el7.tar.gz ~/rpmbuild/SOURCES; \ cp /root/vitastor-0.9.2.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \ cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \ cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \ rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor Name: vitastor
Version: 0.9.5 Version: 0.9.2
Release: 1%{?dist} Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1 License: Vitastor Network Public License 1.1
URL: https://vitastor.io/ URL: https://vitastor.io/
Source0: vitastor-0.9.5.el7.tar.gz Source0: vitastor-0.9.2.el7.tar.gz
BuildRequires: liburing-devel >= 0.6 BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel BuildRequires: gperftools-devel

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \ RUN set -e; \
cd /root/vitastor/rpm; \ cd /root/vitastor/rpm; \
sh build-tarball.sh; \ sh build-tarball.sh; \
cp /root/vitastor-0.9.5.el8.tar.gz ~/rpmbuild/SOURCES; \ cp /root/vitastor-0.9.2.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \ cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \ cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \ rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor Name: vitastor
Version: 0.9.5 Version: 0.9.2
Release: 1%{?dist} Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1 License: Vitastor Network Public License 1.1
URL: https://vitastor.io/ URL: https://vitastor.io/
Source0: vitastor-0.9.5.el8.tar.gz Source0: vitastor-0.9.2.el8.tar.gz
BuildRequires: liburing-devel >= 0.6 BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel BuildRequires: gperftools-devel

View File

@@ -18,7 +18,7 @@ ADD . /root/vitastor
RUN set -e; \ RUN set -e; \
cd /root/vitastor/rpm; \ cd /root/vitastor/rpm; \
sh build-tarball.sh; \ sh build-tarball.sh; \
cp /root/vitastor-0.9.5.el9.tar.gz ~/rpmbuild/SOURCES; \ cp /root/vitastor-0.9.2.el9.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \ cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \ cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \ rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor Name: vitastor
Version: 0.9.5 Version: 0.9.2
Release: 1%{?dist} Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1 License: Vitastor Network Public License 1.1
URL: https://vitastor.io/ URL: https://vitastor.io/
Source0: vitastor-0.9.5.el9.tar.gz Source0: vitastor-0.9.2.el9.tar.gz
BuildRequires: liburing-devel >= 0.6 BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel BuildRequires: gperftools-devel

View File

@@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif() endif()
add_definitions(-DVERSION="0.9.5") add_definitions(-DVERSION="0.9.2")
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src) add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
if (${WITH_ASAN}) if (${WITH_ASAN})
add_definitions(-fsanitize=address -fno-omit-frame-pointer) add_definitions(-fsanitize=address -fno-omit-frame-pointer)
@@ -56,6 +56,11 @@ if (ISAL_LIBRARIES)
add_definitions(-DWITH_ISAL) add_definitions(-DWITH_ISAL)
endif (ISAL_LIBRARIES) endif (ISAL_LIBRARIES)
find_package(OpenSSL)
if (OPENSSL_FOUND)
add_definitions(-DWITH_OPENSSL)
endif (OPENSSL_FOUND)
add_custom_target(build_tests) add_custom_target(build_tests)
add_custom_target(test add_custom_target(test
COMMAND COMMAND

View File

@@ -714,15 +714,9 @@ resume_1:
return false; return false;
} }
} }
if (new_trim_pos < bs->journal.used_start
? (bs->journal.dirty_start >= bs->journal.used_start || bs->journal.dirty_start < new_trim_pos)
: (bs->journal.dirty_start >= bs->journal.used_start && bs->journal.dirty_start < new_trim_pos))
{
bs->journal.dirty_start = new_trim_pos;
}
bs->journal.used_start = new_trim_pos; bs->journal.used_start = new_trim_pos;
#ifdef BLOCKSTORE_DEBUG #ifdef BLOCKSTORE_DEBUG
printf("Journal trimmed to %08lx (next_free=%08lx dirty_start=%08lx)\n", bs->journal.used_start, bs->journal.next_free, bs->journal.dirty_start); printf("Journal trimmed to %08lx (next_free=%08lx)\n", bs->journal.used_start, bs->journal.next_free);
#endif #endif
if (bs->journal.flush_journal && !flusher->flush_queue.size()) if (bs->journal.flush_journal && !flusher->flush_queue.size())
{ {

View File

@@ -103,7 +103,6 @@ public:
journal_flusher_t(blockstore_impl_t *bs); journal_flusher_t(blockstore_impl_t *bs);
~journal_flusher_t(); ~journal_flusher_t();
void loop(); void loop();
bool is_trim_wanted() { return trim_wanted; }
bool is_active(); bool is_active();
void mark_trim_possible(); void mark_trim_possible();
void request_trim(); void request_trim();

View File

@@ -218,7 +218,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
auto used = --journal.used_sectors[dirty_it->second.journal_sector]; auto used = --journal.used_sectors[dirty_it->second.journal_sector];
#ifdef BLOCKSTORE_DEBUG #ifdef BLOCKSTORE_DEBUG
printf( printf(
"remove usage of journal offset %08lx by %lx:%lx v%lu (%lu refs)\n", dirty_it->second.journal_sector, "remove usage of journal offset %08lx by %lx:%lx v%lu (%d refs)\n", dirty_it->second.journal_sector,
dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version, used dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version, used
); );
#endif #endif

View File

@@ -661,13 +661,8 @@ void blockstore_impl_t::release_journal_sectors(blockstore_op_t *op)
uint64_t s = PRIV(op)->min_flushed_journal_sector; uint64_t s = PRIV(op)->min_flushed_journal_sector;
while (1) while (1)
{ {
if (!journal.sector_info[s-1].dirty && journal.sector_info[s-1].flush_count == 0) if (s != (1+journal.cur_sector) && journal.sector_info[s-1].flush_count == 0)
{ {
if (s == (1+journal.cur_sector))
{
// Forcibly move to the next sector and move dirty position
journal.in_sector_pos = journal.block_size;
}
// We know for sure that we won't write into this sector anymore // We know for sure that we won't write into this sector anymore
uint64_t new_ds = journal.sector_info[s-1].offset + journal.block_size; uint64_t new_ds = journal.sector_info[s-1].offset + journal.block_size;
if (new_ds >= journal.len) if (new_ds >= journal.len)

View File

@@ -56,15 +56,14 @@ struct image_lister_t
{ {
continue; continue;
} }
auto pool_it = parent->cli->st_cli.pool_config.find(INODE_POOL(ic.second.num)); auto & pool_cfg = parent->cli->st_cli.pool_config.at(INODE_POOL(ic.second.num));
bool good_pool = pool_it != parent->cli->st_cli.pool_config.end();
auto item = json11::Json::object { auto item = json11::Json::object {
{ "name", ic.second.name }, { "name", ic.second.name },
{ "size", ic.second.size }, { "size", ic.second.size },
{ "used_size", 0 }, { "used_size", 0 },
{ "readonly", ic.second.readonly }, { "readonly", ic.second.readonly },
{ "pool_id", (uint64_t)INODE_POOL(ic.second.num) }, { "pool_id", (uint64_t)INODE_POOL(ic.second.num) },
{ "pool_name", good_pool ? pool_it->second.name : "? (ID:"+std::to_string(INODE_POOL(ic.second.num))+")" }, { "pool_name", pool_cfg.name },
{ "inode_num", INODE_NO_POOL(ic.second.num) }, { "inode_num", INODE_NO_POOL(ic.second.num) },
{ "inode_id", ic.second.num }, { "inode_id", ic.second.num },
}; };
@@ -248,8 +247,6 @@ resume_1:
if (state == 1) if (state == 1)
goto resume_1; goto resume_1;
get_list(); get_list();
if (state == 100)
return;
if (show_stats) if (show_stats)
{ {
resume_1: resume_1:
@@ -272,7 +269,7 @@ resume_1:
{ "key", "name" }, { "key", "name" },
{ "title", "NAME" }, { "title", "NAME" },
}); });
if (list_pool_name == "") if (!list_pool_id)
{ {
cols.push_back(json11::Json::object{ cols.push_back(json11::Json::object{
{ "key", "pool_name" }, { "key", "pool_name" },

View File

@@ -41,7 +41,7 @@ struct snap_merger_t
int fsync_interval = 128; int fsync_interval = 128;
// -- STATE -- // -- STATE --
inode_t target, to_num; inode_t target;
int target_rank; int target_rank;
bool inside_continue = false; bool inside_continue = false;
int state = 0; int state = 0;
@@ -98,7 +98,6 @@ struct snap_merger_t
state = 100; state = 100;
return; return;
} }
to_num = to_cfg->num;
// Check that to_cfg is actually a child of from_cfg and target_cfg is somewhere between them // Check that to_cfg is actually a child of from_cfg and target_cfg is somewhere between them
std::vector<inode_t> chain_list; std::vector<inode_t> chain_list;
inode_config_t *cur = to_cfg; inode_config_t *cur = to_cfg;
@@ -452,7 +451,7 @@ struct snap_merger_t
{ {
cluster_op_t *op = &rwo->op; cluster_op_t *op = &rwo->op;
op->opcode = OSD_OP_READ; op->opcode = OSD_OP_READ;
op->inode = to_num; op->inode = target;
op->offset = rwo->offset; op->offset = rwo->offset;
op->len = target_block_size; op->len = target_block_size;
op->iov.push_back(rwo->buf, target_block_size); op->iov.push_back(rwo->buf, target_block_size);
@@ -484,7 +483,7 @@ struct snap_merger_t
{ {
// write start->end // write start->end
rwo->todo++; rwo->todo++;
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas && to_num == target ? 1+rwo->op.version : 0); write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
rwo->start = rwo->end; rwo->start = rwo->end;
if (use_cas) if (use_cas)
{ {
@@ -503,7 +502,7 @@ struct snap_merger_t
{ {
// write start->end // write start->end
rwo->todo++; rwo->todo++;
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas && to_num == target ? 1+rwo->op.version : 0); write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
rwo->start = rwo->end; rwo->start = rwo->end;
if (use_cas) if (use_cas)
{ {
@@ -533,7 +532,7 @@ struct snap_merger_t
if (use_cas && subop->retval == -EINTR) if (use_cas && subop->retval == -EINTR)
{ {
// CAS failure - reread and repeat optimistically // CAS failure - reread and repeat optimistically
rwo->start = rwo->end = 0; rwo->start = subop->offset - rwo->offset;
rwo_read(rwo); rwo_read(rwo);
delete subop; delete subop;
return; return;
@@ -543,7 +542,7 @@ struct snap_merger_t
rwo->error_read = false; rwo->error_read = false;
} }
// Increment CAS version // Increment CAS version
rwo->op.version = subop->version; rwo->op.version++;
if (use_cas) if (use_cas)
next_write(rwo); next_write(rwo);
else else

View File

@@ -65,9 +65,6 @@ struct snap_remover_t
int current_child = 0; int current_child = 0;
std::function<bool(cli_result_t &)> cb; std::function<bool(cli_result_t &)> cb;
std::vector<std::string> rebased_images, deleted_images;
std::vector<uint64_t> deleted_ids;
std::string inverse_child_name, inverse_parent_name;
cli_result_t result; cli_result_t result;
bool is_done() bool is_done()
@@ -125,7 +122,6 @@ resume_1:
{ {
if (merge_children[current_child] == inverse_child) if (merge_children[current_child] == inverse_child)
continue; continue;
rebased_images.push_back(parent->cli->st_cli.inode_config.at(merge_children[current_child]).name);
start_merge_child(merge_children[current_child], merge_children[current_child]); start_merge_child(merge_children[current_child], merge_children[current_child]);
if (state == 100) if (state == 100)
return; return;
@@ -138,12 +134,9 @@ resume_2:
cb = NULL; cb = NULL;
if (result.err) if (result.err)
{ {
result.data = my_result(result.data);
state = 100; state = 100;
return; return;
} }
else if (parent->progress)
printf("%s\n", result.text.c_str());
parent->change_parent(merge_children[current_child], new_parent, &result); parent->change_parent(merge_children[current_child], new_parent, &result);
state = 3; state = 3;
resume_3: resume_3:
@@ -151,7 +144,6 @@ resume_3:
return; return;
if (result.err) if (result.err)
{ {
result.data = my_result(result.data);
state = 100; state = 100;
return; return;
} }
@@ -173,12 +165,9 @@ resume_4:
cb = NULL; cb = NULL;
if (result.err) if (result.err)
{ {
result.data = my_result(result.data);
state = 100; state = 100;
return; return;
} }
else if (parent->progress)
printf("%s\n", result.text.c_str());
// Delete "inverse" child data // Delete "inverse" child data
start_delete_source(inverse_child); start_delete_source(inverse_child);
if (state == 100) if (state == 100)
@@ -192,12 +181,9 @@ resume_5:
cb = NULL; cb = NULL;
if (result.err) if (result.err)
{ {
result.data = my_result(result.data);
state = 100; state = 100;
return; return;
} }
else if (parent->progress)
printf("%s\n", result.text.c_str());
// Delete "inverse" child metadata, rename parent over it, // Delete "inverse" child metadata, rename parent over it,
// and also change parent links of the previous "inverse" child // and also change parent links of the previous "inverse" child
rename_inverse_parent(); rename_inverse_parent();
@@ -213,12 +199,6 @@ resume_6:
{ {
if (chain_list[current_child] == inverse_parent) if (chain_list[current_child] == inverse_parent)
continue; continue;
{
auto parent_it = parent->cli->st_cli.inode_config.find(chain_list[current_child]);
if (parent_it != parent->cli->st_cli.inode_config.end())
deleted_images.push_back(parent_it->second.name);
deleted_ids.push_back(chain_list[current_child]);
}
start_delete_source(chain_list[current_child]); start_delete_source(chain_list[current_child]);
resume_7: resume_7:
while (!cb(result)) while (!cb(result))
@@ -229,12 +209,9 @@ resume_7:
cb = NULL; cb = NULL;
if (result.err) if (result.err)
{ {
result.data = my_result(result.data);
state = 100; state = 100;
return; return;
} }
else if (parent->progress)
printf("%s\n", result.text.c_str());
delete_inode_config(chain_list[current_child]); delete_inode_config(chain_list[current_child]);
if (state == 100) if (state == 100)
return; return;
@@ -244,26 +221,11 @@ resume_8:
return; return;
} }
state = 100; state = 100;
result = (cli_result_t){
.text = "",
.data = my_result(result.data),
};
resume_100: resume_100:
// Done // Done
return; return;
} }
json11::Json my_result(json11::Json src)
{
auto obj = src.object_items();
obj["deleted_ids"] = deleted_ids;
obj["deleted_images"] = deleted_images;
obj["rebased_images"] = rebased_images;
obj["renamed_from"] = inverse_parent_name;
obj["renamed_to"] = inverse_child_name;
return obj;
}
void get_merge_children() void get_merge_children()
{ {
// Get all children of from..to // Get all children of from..to
@@ -376,11 +338,7 @@ resume_100:
} }
for (auto inode_result: data["responses"].array_items()) for (auto inode_result: data["responses"].array_items())
{ {
if (inode_result["response_range"]["kvs"].array_items().size() == 0) auto kv = parent->cli->st_cli.parse_etcd_kv(inode_result["kvs"][0]);
{
continue;
}
auto kv = parent->cli->st_cli.parse_etcd_kv(inode_result["response_range"]["kvs"][0]);
pool_id_t pool_id = 0; pool_id_t pool_id = 0;
inode_t inode = 0; inode_t inode = 0;
char null_byte = 0; char null_byte = 0;
@@ -419,7 +377,7 @@ resume_100:
inode_t child = cp.first; inode_t child = cp.first;
uint64_t child_used = inode_used[child]; uint64_t child_used = inode_used[child];
int rank = cp.second; int rank = cp.second;
for (int i = chain_list.size()-1-rank; i < chain_list.size(); i++) for (int i = chain_list.size()-rank; i < chain_list.size(); i++)
{ {
inode_t parent = chain_list[i]; inode_t parent = chain_list[i];
uint64_t parent_used = inode_used[parent]; uint64_t parent_used = inode_used[parent];
@@ -455,8 +413,8 @@ resume_100:
} }
inode_config_t *child_cfg = &child_it->second; inode_config_t *child_cfg = &child_it->second;
inode_config_t *target_cfg = &target_it->second; inode_config_t *target_cfg = &target_it->second;
inverse_child_name = child_cfg->name; std::string child_name = child_cfg->name;
inverse_parent_name = target_cfg->name; std::string target_name = target_cfg->name;
std::string child_cfg_key = base64_encode( std::string child_cfg_key = base64_encode(
parent->cli->st_cli.etcd_prefix+ parent->cli->st_cli.etcd_prefix+
"/config/inode/"+std::to_string(INODE_POOL(inverse_child))+ "/config/inode/"+std::to_string(INODE_POOL(inverse_child))+
@@ -467,9 +425,6 @@ resume_100:
"/config/inode/"+std::to_string(INODE_POOL(inverse_parent))+ "/config/inode/"+std::to_string(INODE_POOL(inverse_parent))+
"/"+std::to_string(INODE_NO_POOL(inverse_parent)) "/"+std::to_string(INODE_NO_POOL(inverse_parent))
); );
std::string target_idx_key = base64_encode(
parent->cli->st_cli.etcd_prefix+"/index/image/"+inverse_parent_name
);
// Fill new configuration // Fill new configuration
inode_config_t new_cfg = *child_cfg; inode_config_t new_cfg = *child_cfg;
new_cfg.num = target_cfg->num; new_cfg.num = target_cfg->num;
@@ -494,11 +449,6 @@ resume_100:
{ "key", child_cfg_key }, { "key", child_cfg_key },
} }, } },
}, },
json11::Json::object {
{ "request_delete_range", json11::Json::object {
{ "key", target_idx_key },
} },
},
json11::Json::object { json11::Json::object {
{ "request_put", json11::Json::object { { "request_put", json11::Json::object {
{ "key", target_cfg_key }, { "key", target_cfg_key },
@@ -545,12 +495,12 @@ resume_100:
parent->cli->st_cli.etcd_txn_slow(json11::Json::object { parent->cli->st_cli.etcd_txn_slow(json11::Json::object {
{ "compare", cmp }, { "compare", cmp },
{ "success", txn }, { "success", txn },
}, [this](std::string err, json11::Json res) }, [this, target_name, child_name](std::string err, json11::Json res)
{ {
parent->waiting--; parent->waiting--;
if (err != "") if (err != "")
{ {
result = (cli_result_t){ .err = EIO, .text = "Error renaming "+inverse_parent_name+" to "+inverse_child_name+": "+err }; result = (cli_result_t){ .err = EIO, .text = "Error renaming "+target_name+" to "+child_name+": "+err };
state = 100; state = 100;
return; return;
} }
@@ -558,14 +508,14 @@ resume_100:
{ {
result = (cli_result_t){ result = (cli_result_t){
.err = EAGAIN, .err = EAGAIN,
.text = "Parent ("+inverse_parent_name+"), child ("+inverse_child_name+"), or one of its children" .text = "Parent ("+target_name+"), child ("+child_name+"), or one of its children"
" configuration was modified during rename", " configuration was modified during rename",
}; };
state = 100; state = 100;
return; return;
} }
if (parent->progress) if (parent->progress)
printf("Layer %s renamed to %s\n", inverse_parent_name.c_str(), inverse_child_name.c_str()); printf("Layer %s renamed to %s\n", target_name.c_str(), child_name.c_str());
parent->ringloop->wakeup(); parent->ringloop->wakeup();
}); });
} }

View File

@@ -28,7 +28,6 @@ struct rm_inode_t
cli_tool_t *parent = NULL; cli_tool_t *parent = NULL;
inode_list_t *lister = NULL; inode_list_t *lister = NULL;
std::vector<rm_pg_t*> lists; std::vector<rm_pg_t*> lists;
std::vector<osd_num_t> inactive_osds;
uint64_t total_count = 0, total_done = 0, total_prev_pct = 0; uint64_t total_count = 0, total_done = 0, total_prev_pct = 0;
uint64_t pgs_to_list = 0; uint64_t pgs_to_list = 0;
bool lists_done = false; bool lists_done = false;
@@ -87,16 +86,6 @@ struct rm_inode_t
state = 100; state = 100;
return; return;
} }
inactive_osds = parent->cli->list_inode_get_inactive_osds(lister);
if (inactive_osds.size() && !parent->json_output)
{
fprintf(stderr, "Some data may remain after delete on OSDs which are currently down: ");
for (int i = 0; i < inactive_osds.size(); i++)
{
fprintf(stderr, i > 0 ? ", %lu" : "%lu", inactive_osds[i]);
}
fprintf(stderr, "\n");
}
pgs_to_list = parent->cli->list_pg_count(lister); pgs_to_list = parent->cli->list_pg_count(lister);
parent->cli->list_inode_next(lister, parent->parallel_osds); parent->cli->list_inode_next(lister, parent->parallel_osds);
} }
@@ -178,33 +167,16 @@ struct rm_inode_t
} }
if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct) if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
{ {
fprintf(stderr, "\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list); printf("\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
total_prev_pct = total_done*1000/total_count; total_prev_pct = total_done*1000/total_count;
} }
if (lists_done && !lists.size()) if (lists_done && !lists.size())
{ {
if (parent->progress && total_count > 0)
{
fprintf(stderr, "\n");
}
if (parent->progress && (total_done < total_count || inactive_osds.size() > 0))
{
fprintf(
stderr, "Warning: Pool:%u,ID:%lu inode data may not have been fully removed.\n"
" Use `vitastor-cli rm-data --pool %u --inode %lu` if you encounter it in listings.\n",
pool_id, INODE_NO_POOL(inode), pool_id, INODE_NO_POOL(inode)
);
}
result = (cli_result_t){ result = (cli_result_t){
.err = error_count > 0 ? EIO : 0, .err = error_count > 0 ? EIO : 0,
.text = error_count > 0 ? "Some blocks were not removed" : ( .text = error_count > 0 ? "Some blocks were not removed" : (
"Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+ "Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+
std::to_string(pool_id)+" removed"), std::to_string(pool_id)+" removed"),
.data = json11::Json::object {
{ "removed_objects", total_done },
{ "total_objects", total_count },
{ "inactive_osds", inactive_osds },
},
}; };
state = 100; state = 100;
} }

View File

@@ -1209,10 +1209,6 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
copy_part_bitmap(op, part); copy_part_bitmap(op, part);
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0; op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
} }
else if (op->opcode == OSD_OP_WRITE)
{
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
}
if (op->inflight_count == 0) if (op->inflight_count == 0)
{ {
if (op->opcode == OSD_OP_SYNC) if (op->opcode == OSD_OP_SYNC)

View File

@@ -130,7 +130,6 @@ public:
inode_list_t *list_inode_start(inode_t inode, inode_list_t *list_inode_start(inode_t inode,
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback); std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
int list_pg_count(inode_list_t *lst); int list_pg_count(inode_list_t *lst);
const std::vector<osd_num_t> & list_inode_get_inactive_osds(inode_list_t *lst);
void list_inode_next(inode_list_t *lst, int next_pgs); void list_inode_next(inode_list_t *lst, int next_pgs);
//inline uint32_t get_bs_bitmap_granularity() { return st_cli.global_bitmap_granularity; } //inline uint32_t get_bs_bitmap_granularity() { return st_cli.global_bitmap_granularity; }
//inline uint64_t get_bs_block_size() { return st_cli.global_block_size; } //inline uint64_t get_bs_block_size() { return st_cli.global_block_size; }

View File

@@ -36,7 +36,6 @@ struct inode_list_t
inode_t inode = 0; inode_t inode = 0;
int done_pgs = 0; int done_pgs = 0;
int want = 0; int want = 0;
std::vector<osd_num_t> inactive_osds;
std::vector<inode_list_pg_t*> pgs; std::vector<inode_list_pg_t*> pgs;
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback; std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback;
}; };
@@ -61,7 +60,6 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
lst->inode = inode; lst->inode = inode;
lst->callback = callback; lst->callback = callback;
auto pool_cfg = st_cli.pool_config[pool_id]; auto pool_cfg = st_cli.pool_config[pool_id];
std::set<osd_num_t> inactive_osd_set;
for (auto & pg_item: pool_cfg.pg_config) for (auto & pg_item: pool_cfg.pg_config)
{ {
auto & pg = pg_item.second; auto & pg = pg_item.second;
@@ -108,18 +106,11 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
} }
for (osd_num_t peer_osd: all_peers) for (osd_num_t peer_osd: all_peers)
{ {
if (st_cli.peer_states.find(peer_osd) != st_cli.peer_states.end()) r->list_osds.push_back((inode_list_osd_t){
{ .pg = r,
r->list_osds.push_back((inode_list_osd_t){ .osd_num = peer_osd,
.pg = r, .sent = false,
.osd_num = peer_osd, });
.sent = false,
});
}
else
{
inactive_osd_set.insert(peer_osd);
}
} }
} }
else else
@@ -141,7 +132,6 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
{ {
lst->pgs[i]->pos = i; lst->pgs[i]->pos = i;
} }
lst->inactive_osds.insert(lst->inactive_osds.end(), inactive_osd_set.begin(), inactive_osd_set.end());
lists.push_back(lst); lists.push_back(lst);
return lst; return lst;
} }
@@ -151,11 +141,6 @@ int cluster_client_t::list_pg_count(inode_list_t *lst)
return lst->pgs.size(); return lst->pgs.size();
} }
const std::vector<osd_num_t> & cluster_client_t::list_inode_get_inactive_osds(inode_list_t *lst)
{
return lst->inactive_osds;
}
void cluster_client_t::list_inode_next(inode_list_t *lst, int next_pgs) void cluster_client_t::list_inode_next(inode_list_t *lst, int next_pgs)
{ {
if (next_pgs >= 0) if (next_pgs >= 0)

View File

@@ -99,16 +99,15 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
if (options["journal_size"] == "") if (options["journal_size"] == "")
{ {
if (options["journal_device"] == "") if (options["journal_device"] == "")
options["journal_size"] = is_hdd ? "128M" : "32M"; options["journal_size"] = "32M";
else if (is_hdd) else if (is_hdd)
options["journal_size"] = DEFAULT_HYBRID_JOURNAL; options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
} }
bool is_hybrid = is_hdd && options["journal_device"] != "" && options["journal_device"] != options["data_device"];
if (is_hdd) if (is_hdd)
{ {
if (options["block_size"] == "") if (options["block_size"] == "")
options["block_size"] = "1M"; options["block_size"] = "1M";
if (is_hybrid && options["throttle_small_writes"] == "") if (options["throttle_small_writes"] == "")
options["throttle_small_writes"] = "1"; options["throttle_small_writes"] = "1";
} }
json11::Json::object sb; json11::Json::object sb;
@@ -135,7 +134,7 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
{ "meta_offset", 4096 + (dsk.meta_device == dsk.journal_device ? dsk.journal_len : 0) }, { "meta_offset", 4096 + (dsk.meta_device == dsk.journal_device ? dsk.journal_len : 0) },
{ "data_offset", 4096 + (dsk.data_device == dsk.meta_device ? dsk.meta_len : 0) + { "data_offset", 4096 + (dsk.data_device == dsk.meta_device ? dsk.meta_len : 0) +
(dsk.data_device == dsk.journal_device ? dsk.journal_len : 0) }, (dsk.data_device == dsk.journal_device ? dsk.journal_len : 0) },
{ "journal_no_same_sector_overwrites", !is_hdd || is_hybrid }, { "journal_no_same_sector_overwrites", true },
{ "journal_sector_buffer_count", 1024 }, { "journal_sector_buffer_count", 1024 },
{ "disable_data_fsync", json_is_true(options["disable_data_fsync"]) }, { "disable_data_fsync", json_is_true(options["disable_data_fsync"]) },
{ "disable_meta_fsync", json_is_true(options["disable_meta_fsync"]) }, { "disable_meta_fsync", json_is_true(options["disable_meta_fsync"]) },

View File

@@ -187,30 +187,22 @@ void etcd_state_client_t::add_etcd_url(std::string addr)
check_addr = addr; check_addr = addr;
if (pos == std::string::npos) if (pos == std::string::npos)
addr += "/v3"; addr += "/v3";
bool local = false;
int i; int i;
for (i = 0; i < local_ips.size(); i++) for (i = 0; i < local_ips.size(); i++)
{ {
if (local_ips[i] == check_addr) if (local_ips[i] == check_addr)
{ {
local = true; this->etcd_local.push_back(addr);
break; break;
} }
} }
auto & to = local ? this->etcd_local : this->etcd_addresses; if (i >= local_ips.size())
for (i = 0; i < to.size(); i++) this->etcd_addresses.push_back(addr);
{
if (to[i] == addr)
break;
}
if (i >= to.size())
to.push_back(addr);
} }
} }
void etcd_state_client_t::parse_config(const json11::Json & config) void etcd_state_client_t::parse_config(const json11::Json & config)
{ {
this->etcd_local.clear();
this->etcd_addresses.clear(); this->etcd_addresses.clear();
if (config["etcd_address"].is_string()) if (config["etcd_address"].is_string())
{ {
@@ -357,7 +349,7 @@ void etcd_state_client_t::start_etcd_watcher()
watch_id == ETCD_OSD_STATE_WATCH_ID) watch_id == ETCD_OSD_STATE_WATCH_ID)
etcd_watches_initialised++; etcd_watches_initialised++;
if (etcd_watches_initialised == 4 && this->log_level > 0) if (etcd_watches_initialised == 4 && this->log_level > 0)
fprintf(stderr, "Successfully subscribed to etcd at %s\n", cur_addr.c_str()); fprintf(stderr, "Successfully subscribed to etcd at %s\n", selected_etcd_address.c_str());
} }
if (data["result"]["canceled"].bool_value()) if (data["result"]["canceled"].bool_value())
{ {
@@ -368,17 +360,15 @@ void etcd_state_client_t::start_etcd_watcher()
// so we should restart from the beginning if we can // so we should restart from the beginning if we can
if (on_reload_hook != NULL) if (on_reload_hook != NULL)
{ {
// check to not trigger on_reload_hook multiple times fprintf(stderr, "Revisions before %lu were compacted by etcd, reloading state\n",
if (etcd_watch_ws != NULL) data["result"]["compact_revision"].uint64_value());
if (etcd_watch_ws)
{ {
fprintf(stderr, "Revisions before %lu were compacted by etcd, reloading state\n",
data["result"]["compact_revision"].uint64_value());
http_close(etcd_watch_ws); http_close(etcd_watch_ws);
etcd_watch_ws = NULL; etcd_watch_ws = NULL;
etcd_watch_revision = 0;
on_reload_hook();
} }
return; etcd_watch_revision = 0;
on_reload_hook();
} }
else else
{ {
@@ -425,9 +415,13 @@ void etcd_state_client_t::start_etcd_watcher()
} }
if (msg->eof) if (msg->eof)
{ {
fprintf(stderr, "Disconnected from etcd %s\n", cur_addr.c_str());
if (cur_addr == selected_etcd_address) if (cur_addr == selected_etcd_address)
{
fprintf(stderr, "Disconnected from etcd %s\n", selected_etcd_address.c_str());
selected_etcd_address = ""; selected_etcd_address = "";
}
else
fprintf(stderr, "Disconnected from etcd\n");
if (etcd_watch_ws) if (etcd_watch_ws)
{ {
http_close(etcd_watch_ws); http_close(etcd_watch_ws);
@@ -444,7 +438,6 @@ void etcd_state_client_t::start_etcd_watcher()
else if (etcd_watches_initialised > 0) else if (etcd_watches_initialised > 0)
{ {
// Connection was live, retry immediately // Connection was live, retry immediately
etcd_watches_initialised = 0;
start_etcd_watcher(); start_etcd_watcher();
} }
} }

View File

@@ -27,10 +27,19 @@ static void parse_http_headers(std::string & res, http_response_t *parsed);
struct http_co_t struct http_co_t
{ {
#ifdef WITH_OPENSSL
static SSL_CTX *ssl_ctx = NULL;
SSL *ssl_cli = NULL;
BIO *ssl_rbio = NULL;
BIO *ssl_wbio = NULL;
std::vector<uint8_t> encrypted_out;
#endif
timerfd_manager_t *tfd; timerfd_manager_t *tfd;
std::function<void(const http_response_t*)> response_callback; std::function<void(const http_response_t*)> response_callback;
int request_timeout = 0; int request_timeout = 0;
bool ssl = false;
std::string host; std::string host;
std::string request; std::string request;
std::string ws_outbox; std::string ws_outbox;
@@ -46,7 +55,7 @@ struct http_co_t
int timeout_id = -1; int timeout_id = -1;
int epoll_events = 0; int epoll_events = 0;
int sent = 0; int sent = 0;
std::vector<char> rbuf; std::vector<uint8_t> rbuf;
iovec read_iov, send_iov; iovec read_iov, send_iov;
msghdr read_msg = { 0 }, send_msg = { 0 }; msghdr read_msg = { 0 }, send_msg = { 0 };
http_response_t parsed; http_response_t parsed;
@@ -259,6 +268,12 @@ void http_response_t::parse_json_response(std::string & error, json11::Json & r)
http_co_t::~http_co_t() http_co_t::~http_co_t()
{ {
#ifdef WITH_OPENSSL
if (ssl_cli)
{
SSL_free(ssl_cli);
}
#endif
close_connection(); close_connection();
} }
@@ -275,6 +290,16 @@ void http_co_t::close_connection()
close(peer_fd); close(peer_fd);
peer_fd = -1; peer_fd = -1;
} }
#ifdef WITH_OPENSSL
if (ssl_ctx)
{
// Frees context, client and bios at once
SSL_free(ssl_ctx);
ssl_rbio = NULL;
ssl_wbio = NULL;
ssl_cli = NULL;
}
#endif
state = HTTP_CO_CLOSED; state = HTTP_CO_CLOSED;
connected_host = ""; connected_host = "";
response = ""; response = "";
@@ -304,6 +329,27 @@ void http_co_t::start_connection()
} }
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK); fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
epoll_events = 0; epoll_events = 0;
#ifdef WITH_OPENSSL
// https://wiki.openssl.org/index.php/Hostname_validation
if (ssl)
{
if (!ssl_ctx)
ssl_ctx = SSL_CTX_new(TLS_method());
ssl_rbio = BIO_new(BIO_s_mem());
ssl_wbio = BIO_new(BIO_s_mem());
ssl_cli = SSL_new(ssl_ctx);
if (!ssl_ctx || !ssl_cli || !ssl_rbio || !ssl_wbio)
{
parsed = { .error = std::string("openssl initialization failed: ")+ERR_get_error(NULL) };
response_callback(&parsed);
response_callback = NULL;
stackout();
return;
}
SSL_set_connect_state(ssl_cli);
SSL_set_bio(ssl_cli, ssl_rbio, ssl_wbio);
}
#endif
// Finally call connect // Finally call connect
int r = ::connect(peer_fd, (sockaddr*)&addr, sizeof(addr)); int r = ::connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
if (r < 0 && errno != EINPROGRESS) if (r < 0 && errno != EINPROGRESS)
@@ -432,11 +478,11 @@ void http_co_t::submit_read(bool check_timeout)
stackin(); stackin();
int res; int res;
again: again:
if (rbuf.size() != READ_BUFFER_SIZE) if (rbuf.capacity()-rbuf.size() < READ_BUFFER_SIZE)
{ {
rbuf.resize(READ_BUFFER_SIZE); rbuf.reserve(rbuf.size() + READ_BUFFER_SIZE);
} }
read_iov = { .iov_base = rbuf.data(), .iov_len = READ_BUFFER_SIZE }; read_iov = { .iov_base = rbuf.data()+rbuf.size(), .iov_len = READ_BUFFER_SIZE };
read_msg.msg_iov = &read_iov; read_msg.msg_iov = &read_iov;
read_msg.msg_iovlen = 1; read_msg.msg_iovlen = 1;
res = recvmsg(peer_fd, &read_msg, 0); res = recvmsg(peer_fd, &read_msg, 0);
@@ -466,22 +512,177 @@ again:
else if (res <= 0) else if (res <= 0)
{ {
// < 0 means error, 0 means EOF // < 0 means error, 0 means EOF
epoll_events = epoll_events & ~EPOLLIN; on_read_error(res);
if (state == HTTP_CO_HEADERS_RECEIVED)
std::swap(parsed.body, response);
close_connection();
if (res < 0)
parsed = { .error = std::string("recvmsg: ")+strerror(-res) };
run_cb_and_clear();
} }
else else
{ {
response += std::string(rbuf.data(), res); if (ssl)
handle_ssl_read(rbuf);
else
response += std::string((char*)rbuf.data(), res);
rbuf.resize(0);
handle_read(); handle_read();
} }
stackout(); stackout();
} }
void http_co_t::on_read_error(int res)
{
epoll_events = epoll_events & ~EPOLLIN;
if (state == HTTP_CO_HEADERS_RECEIVED)
std::swap(parsed.body, response);
close_connection();
if (res < 0)
parsed = { .error = std::string("recvmsg: ")+strerror(-res) };
run_cb_and_clear();
}
int http_co_t::do_ssl_handshake()
{
stackin();
int r;
while (1)
{
r = SSL_do_handshake(ssl_cli);
if (r == SSL_ERROR_WANT_WRITE)
{
r = ssl_encrypt();
if (r >= 0)
submit_send();
else
{
r = -r;
break;
}
}
else
{
if (r == SSL_ERROR_WANT_READ || r == SSL_ERROR_NONE)
{
// OK or wait until we have more incoming data
r = 0;
}
break;
}
}
stackout();
return r;
}
// Enqueue outbound encrypted TLS data
int http_co_t::ssl_encrypt()
{
stackin();
int queued = 0;
while (true)
{
if (encrypted_out.size() >= encrypted_out.capacity()/2)
encrypted_out.reserve(encrypted_out.size() < READ_BUFFER_SIZE ? encrypted_out.size() + READ_BUFFER_SIZE : 2*encrypted_out.size());
int r = BIO_read(ssl_wbio, encrypted_out.data()+encrypted_out.size(), encrypted_out.capacity()-encrypted_out.size());
if (r > 0)
{
queued += r;
encrypted_out.resize(encrypted_out.size()+r);
}
else
{
if (!BIO_should_retry(ssl_wbio))
queued = r;
break;
}
}
stackout();
return queued;
}
void http_co_t::handle_ssl_write()
{
stackin();
int r = 0;
while (sent < request.size())
{
if (!SSL_is_init_finished(ssl_cli))
{
if (do_ssl_handshake() != 0)
{
on_read_error(-EIO);
break;
}
if (!SSL_is_init_finished(ssl_cli))
break;
}
else
{
int n = SSL_write(ssl_cli, request.data()+sent, request.size()-sent);
if (n > 0)
sent += n;
else if (get_sslstatus(ssl_cli, n) == SSLSTATUS_FAIL)
{
on_read_error(-EIO);
break;
}
else
break;
}
r = ssl_encrypt();
if (r >= 0)
submit_send();
else
{
on_read_error(-EIO);
break;
}
}
stackout();
}
// Process incoming encrypted TLS data
void http_co_t::handle_ssl_read()
{
stackin();
int size = rbuf.size();
int done = 0;
while (done < size)
{
int n = BIO_write(ssl_rbio, rbuf.data()+done, size-done);
if (n > 0)
{
done += n;
}
if (n <= 0)
{
on_read_error(-EIO);
break;
}
if (!SSL_is_init_finished(ssl_cli))
{
if (do_ssl_handshake() != 0)
{
on_read_error(-EIO);
break;
}
if (!SSL_is_init_finished(ssl_cli))
break;
}
do
{
if (response.capacity() - response.size() < READ_BUFFER_SIZE)
response.reserve(2*response.size() < response.size() + READ_BUFFER_SIZE ? response.size() + READ_BUFFER_SIZE : 2*response.size());
n = SSL_read(ssl_cli, response.data() + response.size(), READ_BUFFER_SIZE);
if (n <= 0)
{
n = SSL_get_error(ssl_cli, n);
if (n == SSL_ERROR_WANT_READ)
break;
}
} while (n > 0);
}
if (done < size)
memmove(rbuf.data(), rbuf.data()+done, size-done);
rbuf.resize(size-done);
stackout();
}
bool http_co_t::handle_read() bool http_co_t::handle_read()
{ {
stackin(); stackin();

View File

@@ -9,10 +9,6 @@ osd_op_t::~osd_op_t()
{ {
assert(!bs_op); assert(!bs_op);
assert(!op_data); assert(!op_data);
if (bitmap_buf)
{
free(bitmap_buf);
}
if (rmw_buf) if (rmw_buf)
{ {
free(rmw_buf); free(rmw_buf);

View File

@@ -165,7 +165,6 @@ struct osd_op_t
void *bitmap = NULL; void *bitmap = NULL;
unsigned bitmap_len = 0; unsigned bitmap_len = 0;
unsigned bmp_data = 0; unsigned bmp_data = 0;
void *bitmap_buf = NULL;
void *rmw_buf = NULL; void *rmw_buf = NULL;
osd_primary_op_data_t* op_data = NULL; osd_primary_op_data_t* op_data = NULL;
std::function<void(osd_op_t*)> callback; std::function<void(osd_op_t*)> callback;

View File

@@ -369,7 +369,7 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
op->buf = malloc_or_die(op->reply.hdr.retval); op->buf = malloc_or_die(op->reply.hdr.retval);
cl->recv_list.push_back(op->buf, op->reply.hdr.retval); cl->recv_list.push_back(op->buf, op->reply.hdr.retval);
} }
else if (op->reply.hdr.opcode == OSD_OP_DESCRIBE && op->reply.describe.result_bytes > 0) else if (op->reply.hdr.opcode == OSD_OP_DESCRIBE && op->reply.hdr.retval > 0)
{ {
delete cl->read_op; delete cl->read_op;
cl->read_op = op; cl->read_op = op;

View File

@@ -84,12 +84,9 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op)
{ {
for (int i = 0; i < cur_op->iov.count; i++) for (int i = 0; i < cur_op->iov.count; i++)
{ {
if (cur_op->iov.buf[i].iov_len > 0) assert(cur_op->iov.buf[i].iov_base);
{ to_send_list.push_back(cur_op->iov.buf[i]);
assert(cur_op->iov.buf[i].iov_base); to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
to_send_list.push_back(cur_op->iov.buf[i]);
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
}
} }
} }
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ_BMP) if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)

View File

@@ -149,7 +149,7 @@ public:
" --dev_num N\n" " --dev_num N\n"
" Use the specified device /dev/nbdN instead of automatic selection.\n" " Use the specified device /dev/nbdN instead of automatic selection.\n"
" --foreground 1\n" " --foreground 1\n"
" Stay in foreground, do not daemonize.\n", " Stay in foreground, do not daemonize.n",
exe_name, exe_name, exe_name exe_name, exe_name, exe_name
); );
exit(0); exit(0);

View File

@@ -198,14 +198,13 @@ class osd_t
void on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num); void on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num);
void on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes); void on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes);
void on_load_config_hook(json11::Json::object & changes); void on_load_config_hook(json11::Json::object & changes);
void on_reload_config_hook(json11::Json::object & changes);
json11::Json on_load_pgs_checks_hook(); json11::Json on_load_pgs_checks_hook();
void on_load_pgs_hook(bool success); void on_load_pgs_hook(bool success);
void bind_socket(); void bind_socket();
void acquire_lease(); void acquire_lease();
json11::Json get_osd_state(); json11::Json get_osd_state();
void create_osd_state(); void create_osd_state();
void renew_lease(bool reload); void renew_lease();
void print_stats(); void print_stats();
void print_slow(); void print_slow();
void reset_stats(); void reset_stats();

View File

@@ -70,7 +70,6 @@ void osd_t::init_cluster()
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); }; st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
st_cli.load_pgs_checks_hook = [this]() { return on_load_pgs_checks_hook(); }; st_cli.load_pgs_checks_hook = [this]() { return on_load_pgs_checks_hook(); };
st_cli.on_load_pgs_hook = [this](bool success) { on_load_pgs_hook(success); }; st_cli.on_load_pgs_hook = [this](bool success) { on_load_pgs_hook(success); };
st_cli.on_reload_hook = [this]() { st_cli.load_global_config(); };
peering_state = OSD_LOADING_PGS; peering_state = OSD_LOADING_PGS;
st_cli.load_global_config(); st_cli.load_global_config();
} }
@@ -396,14 +395,6 @@ void osd_t::on_load_config_hook(json11::Json::object & global_config)
parse_config(true); parse_config(true);
bind_socket(); bind_socket();
acquire_lease(); acquire_lease();
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_reload_config_hook(cfg); };
}
void osd_t::on_reload_config_hook(json11::Json::object & global_config)
{
etcd_global_config = global_config;
parse_config(false);
renew_lease(true);
} }
// Acquire lease // Acquire lease
@@ -433,7 +424,7 @@ void osd_t::acquire_lease()
); );
tfd->set_timer(etcd_report_interval*1000, true, [this](int timer_id) tfd->set_timer(etcd_report_interval*1000, true, [this](int timer_id)
{ {
renew_lease(false); renew_lease();
}); });
} }
@@ -508,11 +499,11 @@ void osd_t::create_osd_state()
} }
// Renew lease // Renew lease
void osd_t::renew_lease(bool reload) void osd_t::renew_lease()
{ {
st_cli.etcd_call("/lease/keepalive", json11::Json::object { st_cli.etcd_call("/lease/keepalive", json11::Json::object {
{ "ID", etcd_lease_id } { "ID", etcd_lease_id }
}, st_cli.etcd_quick_timeout, 0, 0, [this, reload](std::string err, json11::Json data) }, st_cli.etcd_quick_timeout, 0, 0, [this](std::string err, json11::Json data)
{ {
if (err == "" && data["result"]["TTL"].string_value() == "") if (err == "" && data["result"]["TTL"].string_value() == "")
{ {
@@ -531,20 +522,15 @@ void osd_t::renew_lease(bool reload)
force_stop(1); force_stop(1);
} }
// Retry // Retry
tfd->set_timer(st_cli.etcd_quick_timeout, false, [this, reload](int timer_id) tfd->set_timer(st_cli.etcd_quick_timeout, false, [this](int timer_id)
{ {
renew_lease(reload); renew_lease();
}); });
} }
else else
{ {
etcd_failed_attempts = 0; etcd_failed_attempts = 0;
report_statistics(); report_statistics();
// Reload PGs
if (reload && run_primary)
{
st_cli.load_pgs();
}
} }
}); });
} }
@@ -574,6 +560,7 @@ void osd_t::force_stop(int exitcode)
json11::Json osd_t::on_load_pgs_checks_hook() json11::Json osd_t::on_load_pgs_checks_hook()
{ {
assert(this->pgs.size() == 0);
json11::Json::array checks = { json11::Json::array checks = {
json11::Json::object { json11::Json::object {
{ "target", "LEASE" }, { "target", "LEASE" },

View File

@@ -220,7 +220,7 @@ struct __attribute__((__packed__)) osd_reply_rw_t
// for reads: bitmap length // for reads: bitmap length
uint32_t bitmap_len; uint32_t bitmap_len;
uint32_t pad0; uint32_t pad0;
// for reads and writes: object version // for reads: object version
uint64_t version; uint64_t version;
}; };

View File

@@ -87,7 +87,8 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
// - op_data // - op_data
1, sizeof(osd_primary_op_data_t) + 1, sizeof(osd_primary_op_data_t) +
// - stripes // - stripes
stripe_count * sizeof(osd_rmw_stripe_t) + // - resulting bitmap buffers
stripe_count * (clean_entry_bitmap_size + sizeof(osd_rmw_stripe_t)) +
chain_size * ( chain_size * (
// - copy of the chain // - copy of the chain
sizeof(inode_t) + sizeof(inode_t) +
@@ -109,12 +110,11 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
op_data->pg_size = pg_it->second.pg_size; op_data->pg_size = pg_it->second.pg_size;
cur_op->op_data = op_data; cur_op->op_data = op_data;
split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes); split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes);
// Resulting bitmaps have to survive op_data and be freed with the op itself // Allocate bitmaps along with stripes to avoid extra allocations and fragmentation
assert(!cur_op->bitmap_buf);
cur_op->bitmap_buf = calloc_or_die(1, clean_entry_bitmap_size * stripe_count);
for (int i = 0; i < stripe_count; i++) for (int i = 0; i < stripe_count; i++)
{ {
op_data->stripes[i].bmp_buf = (uint8_t*)cur_op->bitmap_buf + clean_entry_bitmap_size * i; op_data->stripes[i].bmp_buf = data_buf;
data_buf = (uint8_t*)data_buf + clean_entry_bitmap_size;
} }
op_data->chain_size = chain_size; op_data->chain_size = chain_size;
if (chain_size > 0) if (chain_size > 0)
@@ -129,19 +129,16 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
data_buf = (uint8_t*)data_buf + chain_size * (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 0 : pg_it->second.pg_size); data_buf = (uint8_t*)data_buf + chain_size * (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 0 : pg_it->second.pg_size);
// Copy chain // Copy chain
int chain_num = 0; int chain_num = 0;
op_data->read_chain[chain_num] = cur_op->req.rw.inode; op_data->read_chain[chain_num++] = cur_op->req.rw.inode;
op_data->chain_states[chain_num] = NULL;
chain_num++;
auto inode_it = st_cli.inode_config.find(cur_op->req.rw.inode); auto inode_it = st_cli.inode_config.find(cur_op->req.rw.inode);
while (inode_it != st_cli.inode_config.end() && inode_it->second.parent_id && while (inode_it != st_cli.inode_config.end() && inode_it->second.parent_id &&
INODE_POOL(inode_it->second.parent_id) == pg_it->second.pool_id && INODE_POOL(inode_it->second.parent_id) == pg_it->second.pool_id &&
// Check for loops // Check for loops
inode_it->second.parent_id != cur_op->req.rw.inode) inode_it->second.parent_id != cur_op->req.rw.inode)
{ {
op_data->read_chain[chain_num] = inode_it->second.parent_id; op_data->read_chain[chain_num++] = inode_it->second.parent_id;
op_data->chain_states[chain_num] = NULL; op_data->chain_states[chain_num++] = NULL;
inode_it = st_cli.inode_config.find(inode_it->second.parent_id); inode_it = st_cli.inode_config.find(inode_it->second.parent_id);
chain_num++;
} }
} }
pg_it->second.inflight++; pg_it->second.inflight++;
@@ -647,6 +644,12 @@ void osd_t::continue_primary_del(osd_op_t *cur_op)
else if (op_data->st == 4) goto resume_4; else if (op_data->st == 4) goto resume_4;
else if (op_data->st == 5) goto resume_5; else if (op_data->st == 5) goto resume_5;
assert(op_data->st == 0); assert(op_data->st == 0);
// Delete is forbidden even in active PGs if they're also degraded or have previous dead OSDs
if (pg.state & (PG_DEGRADED | PG_LEFT_ON_DEAD))
{
finish_op(cur_op, -EBUSY);
return;
}
if (!check_write_queue(cur_op, pg)) if (!check_write_queue(cur_op, pg))
{ {
return; return;

View File

@@ -83,13 +83,11 @@ retry_1:
// Object is degraded/misplaced and will be moved to <write_osd_set> // Object is degraded/misplaced and will be moved to <write_osd_set>
op_data->stripes[0].read_start = 0; op_data->stripes[0].read_start = 0;
op_data->stripes[0].read_end = bs_block_size; op_data->stripes[0].read_end = bs_block_size;
assert(!cur_op->rmw_buf);
cur_op->rmw_buf = op_data->stripes[0].read_buf = memalign_or_die(MEM_ALIGNMENT, bs_block_size); cur_op->rmw_buf = op_data->stripes[0].read_buf = memalign_or_die(MEM_ALIGNMENT, bs_block_size);
} }
} }
else else
{ {
assert(!cur_op->rmw_buf);
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set, cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size); pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
if (!cur_op->rmw_buf) if (!cur_op->rmw_buf)

View File

@@ -35,11 +35,6 @@
#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value))) #define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
#define qobject_unref QDECREF #define qobject_unref QDECREF
#endif #endif
#if QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4
#include "sysemu/replay.h"
#else
#include "sysemu/sysemu.h"
#endif
#include "vitastor_c.h" #include "vitastor_c.h"
@@ -53,13 +48,9 @@ void DSO_STAMP_FUN(void)
} }
#endif #endif
typedef struct VitastorFdData VitastorFdData;
typedef struct VitastorClient typedef struct VitastorClient
{ {
void *proxy; void *proxy;
int uring_eventfd;
void *watch; void *watch;
char *config_path; char *config_path;
char *etcd_host; char *etcd_host;
@@ -76,24 +67,12 @@ typedef struct VitastorClient
int rdma_gid_index; int rdma_gid_index;
int rdma_mtu; int rdma_mtu;
QemuMutex mutex; QemuMutex mutex;
AioContext *ctx;
VitastorFdData **fds;
int fd_count, fd_alloc;
int bh_uring_scheduled;
uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len; uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
uint32_t last_bitmap_granularity; uint32_t last_bitmap_granularity;
uint8_t *last_bitmap; uint8_t *last_bitmap;
} VitastorClient; } VitastorClient;
typedef struct VitastorFdData
{
VitastorClient *cli;
int fd;
IOHandler *fd_read, *fd_write;
void *opaque;
} VitastorFdData;
typedef struct VitastorRPC typedef struct VitastorRPC
{ {
BlockDriverState *bs; BlockDriverState *bs;
@@ -104,21 +83,10 @@ typedef struct VitastorRPC
uint64_t inode, offset, len; uint64_t inode, offset, len;
uint32_t bitmap_granularity; uint32_t bitmap_granularity;
uint8_t *bitmap; uint8_t *bitmap;
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
QEMUBH *bh;
#endif
} VitastorRPC; } VitastorRPC;
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
typedef struct VitastorBH
{
VitastorClient *cli;
QEMUBH *bh;
} VitastorBH;
#endif
static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task); static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
static void vitastor_co_generic_cb(void *opaque, long retval); static void vitastor_co_generic_bh_cb(void *opaque, long retval);
static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version); static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
static void vitastor_close(BlockDriverState *bs); static void vitastor_close(BlockDriverState *bs);
@@ -234,54 +202,6 @@ out:
return; return;
} }
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
static void vitastor_uring_handler(void *opaque)
{
VitastorClient *client = (VitastorClient*)opaque;
qemu_mutex_lock(&client->mutex);
client->bh_uring_scheduled = 0;
vitastor_c_uring_handle_events(client->proxy);
qemu_mutex_unlock(&client->mutex);
}
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
static void vitastor_bh_uring_handler(void *opaque)
{
VitastorBH *vbh = opaque;
vitastor_bh_handler(vbh->cli);
qemu_bh_delete(vbh->bh);
free(vbh);
}
#endif
static void vitastor_schedule_uring_handler(VitastorClient *client)
{
void *opaque = client;
if (client->uring_eventfd >= 0 && !client->bh_uring_scheduled)
{
client->bh_uring_scheduled = 1;
#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque);
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque);
#else
VitastorBH *vbh = (VitastorBH*)malloc(sizeof(VitastorBH));
vbh->cli = client;
#if QEMU_VERSION_MAJOR >= 2
vbh->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_bh_uring_handler, vbh);
#else
vbh->bh = qemu_bh_new(vitastor_bh_uring_handler, vbh);
#endif
qemu_bh_schedule(vbh->bh);
#endif
}
}
#else
static void vitastor_schedule_uring_handler(VitastorClient *client)
{
}
#endif
static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task) static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
{ {
BlockDriverState *bs = task->bs; BlockDriverState *bs = task->bs;
@@ -289,8 +209,7 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
task->co = qemu_coroutine_self(); task->co = qemu_coroutine_self();
qemu_mutex_lock(&client->mutex); qemu_mutex_lock(&client->mutex);
vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task); vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_bh_cb, task);
vitastor_schedule_uring_handler(client);
qemu_mutex_unlock(&client->mutex); qemu_mutex_unlock(&client->mutex);
while (!task->complete) while (!task->complete)
@@ -299,32 +218,14 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
} }
} }
static void vitastor_aio_fd_read(void *fddv) // FIXME: Fix thread safety of the driver - now it segfaults when iothread is enabled in QEMU
{ static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
VitastorFdData *fdd = (VitastorFdData*)fddv;
qemu_mutex_lock(&fdd->cli->mutex);
fdd->fd_read(fdd->opaque);
vitastor_schedule_uring_handler(fdd->cli);
qemu_mutex_unlock(&fdd->cli->mutex);
}
static void vitastor_aio_fd_write(void *fddv)
{
VitastorFdData *fdd = (VitastorFdData*)fddv;
qemu_mutex_lock(&fdd->cli->mutex);
fdd->fd_write(fdd->opaque);
vitastor_schedule_uring_handler(fdd->cli);
qemu_mutex_unlock(&fdd->cli->mutex);
}
static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
{ {
aio_set_fd_handler(ctx, fd, aio_set_fd_handler(ctx, fd,
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3 #if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
0 /*is_external*/, 0 /*is_external*/,
#endif #endif
fd_read, fd_read, fd_write,
fd_write,
#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1 #if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
NULL /*io_flush*/, NULL /*io_flush*/,
#endif #endif
@@ -337,53 +238,6 @@ static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_
opaque); opaque);
} }
static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
{
VitastorClient *client = (VitastorClient*)vcli;
VitastorFdData *fdd = NULL;
int i;
for (i = 0; i < client->fd_count; i++)
{
if (client->fds[i]->fd == fd)
{
if (fd_read || fd_write)
{
fdd = client->fds[i];
fdd->opaque = opaque;
fdd->fd_read = fd_read;
fdd->fd_write = fd_write;
}
else
{
for (int j = i+1; j < client->fd_count; j++)
client->fds[j-1] = client->fds[j];
client->fd_count--;
}
break;
}
}
if ((fd_read || fd_write) && !fdd)
{
fdd = (VitastorFdData*)malloc(sizeof(VitastorFdData));
fdd->cli = client;
fdd->fd = fd;
fdd->fd_read = fd_read;
fdd->fd_write = fd_write;
fdd->opaque = opaque;
if (client->fd_count >= client->fd_alloc)
{
client->fd_alloc = client->fd_alloc*2;
if (client->fd_alloc < 16)
client->fd_alloc = 16;
client->fds = (VitastorFdData**)realloc(client->fds, sizeof(VitastorFdData*) * client->fd_alloc);
}
client->fds[client->fd_count++] = fdd;
}
universal_aio_set_fd_handler(
client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd
);
}
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
{ {
VitastorRPC task; VitastorRPC task;
@@ -401,36 +255,10 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0); client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0); client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0); client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
client->ctx = bdrv_get_aio_context(bs); client->proxy = vitastor_c_create_qemu(
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2 vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
client->proxy = vitastor_c_create_qemu_uring(
vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0 client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
); );
if (!client->proxy)
{
fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower\n", strerror(errno));
client->uring_eventfd = -1;
#endif
client->proxy = vitastor_c_create_qemu(
vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
);
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
}
else
{
client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy);
if (client->uring_eventfd < 0)
{
fprintf(stderr, "vitastor: failed to create io_uring eventfd: %s\n", strerror(errno));
error_setg(errp, "failed to create io_uring eventfd");
vitastor_close(bs);
return -1;
}
universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client);
}
#endif
image = client->image = g_strdup(qdict_get_try_str(options, "image")); image = client->image = g_strdup(qdict_get_try_str(options, "image"));
client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0; client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
// Get image metadata (size and readonly flag) or just wait until the client is ready // Get image metadata (size and readonly flag) or just wait until the client is ready
@@ -510,12 +338,6 @@ static void vitastor_close(BlockDriverState *bs)
{ {
VitastorClient *client = bs->opaque; VitastorClient *client = bs->opaque;
vitastor_c_destroy(client->proxy); vitastor_c_destroy(client->proxy);
if (client->fds)
{
free(client->fds);
client->fds = NULL;
client->fd_alloc = client->fd_count = 0;
}
qemu_mutex_destroy(&client->mutex); qemu_mutex_destroy(&client->mutex);
if (client->config_path) if (client->config_path)
g_free(client->config_path); g_free(client->config_path);
@@ -632,44 +454,25 @@ static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task)
}; };
} }
static void vitastor_co_generic_bh_cb(void *opaque) static void vitastor_co_generic_bh_cb(void *opaque, long retval)
{ {
VitastorRPC *task = opaque; VitastorRPC *task = opaque;
task->ret = retval;
task->complete = 1; task->complete = 1;
if (qemu_coroutine_self() != task->co) if (qemu_coroutine_self() != task->co)
{ {
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8 #if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
aio_co_wake(task->co); aio_co_wake(task->co);
#else #else
#if QEMU_VERSION_MAJOR == 2
qemu_bh_delete(task->bh);
#endif
qemu_coroutine_enter(task->co, NULL); qemu_coroutine_enter(task->co, NULL);
qemu_aio_release(task); qemu_aio_release(task);
#endif #endif
} }
} }
static void vitastor_co_generic_cb(void *opaque, long retval)
{
VitastorRPC *task = opaque;
task->ret = retval;
#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
#elif QEMU_VERSION_MAJOR >= 2
task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
qemu_bh_schedule(task->bh);
#else
task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
qemu_bh_schedule(task->bh);
#endif
}
static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version) static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
{ {
vitastor_co_generic_cb(opaque, retval); vitastor_co_generic_bh_cb(opaque, retval);
} }
static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs, static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
@@ -688,7 +491,6 @@ static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode; uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
qemu_mutex_lock(&client->mutex); qemu_mutex_lock(&client->mutex);
vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task); vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
vitastor_schedule_uring_handler(client);
qemu_mutex_unlock(&client->mutex); qemu_mutex_unlock(&client->mutex);
while (!task.complete) while (!task.complete)
@@ -721,8 +523,7 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode; uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
qemu_mutex_lock(&client->mutex); qemu_mutex_lock(&client->mutex);
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task); vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
vitastor_schedule_uring_handler(client);
qemu_mutex_unlock(&client->mutex); qemu_mutex_unlock(&client->mutex);
while (!task.complete) while (!task.complete)
@@ -740,6 +541,7 @@ static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitma
VitastorRPC *task = opaque; VitastorRPC *task = opaque;
VitastorClient *client = task->bs->opaque; VitastorClient *client = task->bs->opaque;
task->ret = retval; task->ret = retval;
task->complete = 1;
if (retval >= 0) if (retval >= 0)
{ {
task->bitmap = bitmap; task->bitmap = bitmap;
@@ -751,17 +553,15 @@ static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitma
client->last_bitmap = bitmap; client->last_bitmap = bitmap;
} }
} }
#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 if (qemu_coroutine_self() != task->co)
replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque); {
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8 #if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque); aio_co_wake(task->co);
#elif QEMU_VERSION_MAJOR >= 2
task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
qemu_bh_schedule(task->bh);
#else #else
task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque); qemu_coroutine_enter(task->co, NULL);
qemu_bh_schedule(task->bh); qemu_aio_release(task);
#endif #endif
}
} }
static int coroutine_fn vitastor_co_block_status( static int coroutine_fn vitastor_co_block_status(
@@ -802,7 +602,6 @@ static int coroutine_fn vitastor_co_block_status(
task.bitmap = client->last_bitmap = NULL; task.bitmap = client->last_bitmap = NULL;
qemu_mutex_lock(&client->mutex); qemu_mutex_lock(&client->mutex);
vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task); vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
vitastor_schedule_uring_handler(client);
qemu_mutex_unlock(&client->mutex); qemu_mutex_unlock(&client->mutex);
while (!task.complete) while (!task.complete)
{ {
@@ -888,8 +687,7 @@ static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
vitastor_co_init_task(bs, &task); vitastor_co_init_task(bs, &task);
qemu_mutex_lock(&client->mutex); qemu_mutex_lock(&client->mutex);
vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task); vitastor_c_sync(client->proxy, vitastor_co_generic_bh_cb, &task);
vitastor_schedule_uring_handler(client);
qemu_mutex_unlock(&client->mutex); qemu_mutex_unlock(&client->mutex);
while (!task.complete) while (!task.complete)

View File

@@ -2,12 +2,9 @@
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details) // License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h>
#include <stdexcept> #include <stdexcept>
#include <sys/eventfd.h>
#include "ringloop.h" #include "ringloop.h"
ring_loop_t::ring_loop_t(int qd) ring_loop_t::ring_loop_t(int qd)
@@ -35,10 +32,6 @@ ring_loop_t::~ring_loop_t()
free(free_ring_data); free(free_ring_data);
free(ring_datas); free(ring_datas);
io_uring_queue_exit(&ring); io_uring_queue_exit(&ring);
if (ring_eventfd)
{
close(ring_eventfd);
}
} }
void ring_loop_t::register_consumer(ring_consumer_t *consumer) void ring_loop_t::register_consumer(ring_consumer_t *consumer)
@@ -66,16 +59,6 @@ void ring_loop_t::unregister_consumer(ring_consumer_t *consumer)
void ring_loop_t::loop() void ring_loop_t::loop()
{ {
if (ring_eventfd >= 0)
{
// Reset eventfd counter
uint64_t ctr = 0;
int r = read(ring_eventfd, &ctr, 8);
if (r < 0 && errno != EAGAIN && errno != EINTR)
{
fprintf(stderr, "Error resetting eventfd: %s\n", strerror(errno));
}
}
struct io_uring_cqe *cqe; struct io_uring_cqe *cqe;
while (!io_uring_peek_cqe(&ring, &cqe)) while (!io_uring_peek_cqe(&ring, &cqe))
{ {
@@ -94,7 +77,7 @@ void ring_loop_t::loop()
} }
else else
{ {
fprintf(stderr, "Warning: empty callback in SQE\n"); printf("Warning: empty callback in SQE\n");
free_ring_data[free_ring_data_ptr++] = d - ring_datas; free_ring_data[free_ring_data_ptr++] = d - ring_datas;
} }
io_uring_cqe_seen(&ring, cqe); io_uring_cqe_seen(&ring, cqe);
@@ -144,24 +127,3 @@ int ring_loop_t::sqes_left()
} }
return left; return left;
} }
int ring_loop_t::register_eventfd()
{
if (ring_eventfd >= 0)
{
return ring_eventfd;
}
ring_eventfd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK);
if (ring_eventfd < 0)
{
return -errno;
}
int r = io_uring_register_eventfd(&ring, ring_eventfd);
if (r < 0)
{
close(ring_eventfd);
ring_eventfd = -1;
return r;
}
return ring_eventfd;
}

View File

@@ -126,13 +126,11 @@ class ring_loop_t
unsigned free_ring_data_ptr; unsigned free_ring_data_ptr;
bool loop_again; bool loop_again;
struct io_uring ring; struct io_uring ring;
int ring_eventfd = -1;
public: public:
ring_loop_t(int qd); ring_loop_t(int qd);
~ring_loop_t(); ~ring_loop_t();
void register_consumer(ring_consumer_t *consumer); void register_consumer(ring_consumer_t *consumer);
void unregister_consumer(ring_consumer_t *consumer); void unregister_consumer(ring_consumer_t *consumer);
int register_eventfd();
inline struct io_uring_sqe* get_sqe() inline struct io_uring_sqe* get_sqe()
{ {

View File

@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: Vitastor Name: Vitastor
Description: Vitastor client library Description: Vitastor client library
Version: 0.9.5 Version: 0.9.2
Libs: -L${libdir} -lvitastor_client Libs: -L${libdir} -lvitastor_client
Cflags: -I${includedir} Cflags: -I${includedir}

View File

@@ -5,7 +5,6 @@
// Also acts as a C-C++ proxy for the QEMU driver (QEMU headers don't compile with g++) // Also acts as a C-C++ proxy for the QEMU driver (QEMU headers don't compile with g++)
#include <sys/epoll.h> #include <sys/epoll.h>
#include <sys/eventfd.h>
#include "ringloop.h" #include "ringloop.h"
#include "epoll_manager.h" #include "epoll_manager.h"
@@ -26,7 +25,6 @@ struct vitastor_c
epoll_manager_t *epmgr = NULL; epoll_manager_t *epmgr = NULL;
timerfd_manager_t *tfd = NULL; timerfd_manager_t *tfd = NULL;
cluster_client_t *cli = NULL; cluster_client_t *cli = NULL;
int uring_eventfd = -1;
QEMUSetFDHandler *aio_set_fd_handler = NULL; QEMUSetFDHandler *aio_set_fd_handler = NULL;
void *aio_ctx = NULL; void *aio_ctx = NULL;
@@ -72,8 +70,14 @@ static void vitastor_c_write_handler(void *opaque)
data->callback(data->fd, EPOLLOUT); data->callback(data->fd, EPOLLOUT);
} }
static vitastor_c *vitastor_c_create_qemu_common(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context) vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
{ {
json11::Json cfg_json = vitastor_c_common_config(
config_path, etcd_host, etcd_prefix, use_rdma,
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
);
vitastor_c *self = new vitastor_c; vitastor_c *self = new vitastor_c;
self->aio_set_fd_handler = aio_set_fd_handler; self->aio_set_fd_handler = aio_set_fd_handler;
self->aio_ctx = aio_context; self->aio_ctx = aio_context;
@@ -91,77 +95,24 @@ static vitastor_c *vitastor_c_create_qemu_common(QEMUSetFDHandler *aio_set_fd_ha
self->aio_set_fd_handler(self->aio_ctx, fd, false, NULL, NULL, NULL, NULL); self->aio_set_fd_handler(self->aio_ctx, fd, false, NULL, NULL, NULL, NULL);
} }
}); });
return self;
}
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
{
json11::Json cfg_json = vitastor_c_common_config(
config_path, etcd_host, etcd_prefix, use_rdma,
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
);
auto self = vitastor_c_create_qemu_common(aio_set_fd_handler, aio_context);
self->cli = new cluster_client_t(NULL, self->tfd, cfg_json); self->cli = new cluster_client_t(NULL, self->tfd, cfg_json);
return self; return self;
} }
vitastor_c *vitastor_c_create_qemu_uring(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
{
ring_loop_t *ringloop = NULL;
try
{
ringloop = new ring_loop_t(512);
}
catch (std::exception & e)
{
return NULL;
}
json11::Json cfg_json = vitastor_c_common_config(
config_path, etcd_host, etcd_prefix, use_rdma,
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
);
auto self = vitastor_c_create_qemu_common(aio_set_fd_handler, aio_context);
self->ringloop = ringloop;
self->cli = new cluster_client_t(self->ringloop, self->tfd, cfg_json);
return self;
}
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix, vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level) int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
{ {
ring_loop_t *ringloop = NULL;
try
{
ringloop = new ring_loop_t(512);
}
catch (std::exception & e)
{
return NULL;
}
json11::Json cfg_json = vitastor_c_common_config( json11::Json cfg_json = vitastor_c_common_config(
config_path, etcd_host, etcd_prefix, use_rdma, config_path, etcd_host, etcd_prefix, use_rdma,
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
); );
vitastor_c *self = new vitastor_c; vitastor_c *self = new vitastor_c;
self->ringloop = ringloop; self->ringloop = new ring_loop_t(512);
self->epmgr = new epoll_manager_t(self->ringloop); self->epmgr = new epoll_manager_t(self->ringloop);
self->cli = new cluster_client_t(self->ringloop, self->epmgr->tfd, cfg_json); self->cli = new cluster_client_t(self->ringloop, self->epmgr->tfd, cfg_json);
return self; return self;
} }
int vitastor_c_uring_register_eventfd(vitastor_c *client)
{
if (!client->ringloop)
{
return -EINVAL;
}
return client->ringloop->register_eventfd();
}
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len) vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len)
{ {
json11::Json::object cfg; json11::Json::object cfg;
@@ -215,11 +166,6 @@ void vitastor_c_uring_wait_events(vitastor_c *client)
client->ringloop->wait(); client->ringloop->wait();
} }
int vitastor_c_uring_has_work(vitastor_c *client)
{
return client->ringloop->has_work();
}
void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque) struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque)
{ {

View File

@@ -7,7 +7,7 @@
#define VITASTOR_QEMU_PROXY_H #define VITASTOR_QEMU_PROXY_H
// C API wrapper version // C API wrapper version
#define VITASTOR_C_API_VERSION 2 #define VITASTOR_C_API_VERSION 1
#ifndef POOL_ID_BITS #ifndef POOL_ID_BITS
#define POOL_ID_BITS 16 #define POOL_ID_BITS 16
@@ -34,19 +34,14 @@ typedef void QEMUSetFDHandler(void *ctx, int fd, int is_external, IOHandler *fd_
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context, vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
const char *config_path, const char *etcd_host, const char *etcd_prefix, const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level); int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
vitastor_c *vitastor_c_create_qemu_uring(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix, vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level); int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len); vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len);
void vitastor_c_destroy(vitastor_c *client); void vitastor_c_destroy(vitastor_c *client);
int vitastor_c_is_ready(vitastor_c *client); int vitastor_c_is_ready(vitastor_c *client);
int vitastor_c_uring_register_eventfd(vitastor_c *client);
void vitastor_c_uring_wait_ready(vitastor_c *client); void vitastor_c_uring_wait_ready(vitastor_c *client);
void vitastor_c_uring_handle_events(vitastor_c *client); void vitastor_c_uring_handle_events(vitastor_c *client);
void vitastor_c_uring_wait_events(vitastor_c *client); void vitastor_c_uring_wait_events(vitastor_c *client);
int vitastor_c_uring_has_work(vitastor_c *client);
void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque); struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque);
void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version, void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,

View File

@@ -36,6 +36,9 @@ for i in $(seq 1 $OSD_COUNT); do
start_osd $i start_osd $i
done done
cd mon
npm install
cd ..
(while true; do node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 || true; done) &>./testdata/mon.log & (while true; do node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 || true; done) &>./testdata/mon.log &
MON_PID=$! MON_PID=$!
@@ -82,9 +85,7 @@ wait_up()
done done
} }
if [[ $OSD_COUNT -gt 0 ]]; then wait_up 60
wait_up 60
fi
try_reweight() try_reweight()
{ {

View File

@@ -16,35 +16,29 @@ SCHEME=ec ./test_change_pg_count.sh
./test_etcd_fail.sh ./test_etcd_fail.sh
./test_failure_domain.sh
./test_interrupted_rebalance.sh ./test_interrupted_rebalance.sh
IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
SCHEME=ec ./test_interrupted_rebalance.sh SCHEME=ec ./test_interrupted_rebalance.sh
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh SCHEME=ec IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
./test_failure_domain.sh
./test_snapshot.sh
SCHEME=ec ./test_snapshot.sh
./test_minsize_1.sh ./test_minsize_1.sh
./test_move_reappear.sh ./test_move_reappear.sh
./test_rm.sh
./test_snapshot_chain.sh
SCHEME=ec ./test_snapshot_chain.sh
./test_snapshot_down.sh
SCHEME=ec ./test_snapshot_down.sh
./test_splitbrain.sh
./test_rebalance_verify.sh ./test_rebalance_verify.sh
IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
SCHEME=ec ./test_rebalance_verify.sh SCHEME=ec ./test_rebalance_verify.sh
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh SCHEME=ec IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
./test_rm.sh
./test_snapshot.sh
SCHEME=ec ./test_snapshot.sh
./test_splitbrain.sh
./test_write.sh ./test_write.sh
SCHEME=xor ./test_write.sh SCHEME=xor ./test_write.sh

View File

@@ -15,6 +15,9 @@ $ETCDCTL put /vitastor/osd/stats/7 '{"host":"host4","size":1073741824,"time":"'$
$ETCDCTL put /vitastor/osd/stats/8 '{"host":"host4","size":1073741824,"time":"'$TIME'"}' $ETCDCTL put /vitastor/osd/stats/8 '{"host":"host4","size":1073741824,"time":"'$TIME'"}'
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":4,"failure_domain":"rack"}}' $ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":4,"failure_domain":"rack"}}'
cd mon
npm install
cd ..
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log & node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
MON_PID=$! MON_PID=$!

View File

@@ -1,48 +0,0 @@
#!/bin/bash -ex
. `dirname $0`/run_3osds.sh
check_qemu
# Test multiple snapshots
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 32M testchain
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
for i in {1..10}; do
# Create a snapshot
build/src/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@$i
# Check that the new snapshot is see-through
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/check.bin
cmp ./testdata/check.bin ./testdata/mirror.bin
# Write something to it
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -rw=randwrite \
-randrepeat=$((i <= 2)) -buffer_pattern=0x$((10+i))$((10+i))$((10+i))$((10+i)) \
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/mirror.bin
# Check the new content
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
done
build/src/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
# Check the final image
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
# Check the last remaining snapshot
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain@10" \
-O raw ./testdata/layer0.bin
cmp ./testdata/layer0.bin ./testdata/check.bin
format_green OK

View File

@@ -1,37 +0,0 @@
#!/bin/bash -ex
. `dirname $0`/run_3osds.sh
check_qemu
# Test merge to child (without "inverse rename" optimisation)
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 128M testchain
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
# Create a snapshot
build/src/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
# Write something to it
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/mirror.bin
# Check the new content
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
# Merge
build/src/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
# Check the final image
qemu-img convert -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
-O raw ./testdata/layer1.bin
cmp ./testdata/layer1.bin ./testdata/mirror.bin
format_green OK