Compare commits
58 Commits
Author | SHA1 | Date | |
---|---|---|---|
0c89886374 | |||
e79bef8751 | |||
ad76f84e1c | |||
db827cb34c | |||
e5c6d85ea1 | |||
6cc44c1f54 | |||
c20450c1f1 | |||
db63e58b3d | |||
31b7021330 | |||
2ebe3a468c | |||
9892fccfb0 | |||
0be86a306d | |||
d77a775948 | |||
8cc82bab39 | |||
f9d5e33ddd | |||
![]() |
f83418d93e | ||
fbf14fb0cb | |||
fb1c3e00f4 | |||
![]() |
d8332171e9 | ||
c24cc9bf0b | |||
9f57c75acf | |||
53b12641d1 | |||
![]() |
5c5c8825dc | ||
3a261ac3fc | |||
04514435de | |||
07303020fc | |||
feaf7a15cf | |||
29dda5066f | |||
1de53ef7e6 | |||
4793dbe9c3 | |||
918ea34af2 | |||
2db8184cd8 | |||
0e964b3c8c | |||
1b9296ff6c | |||
6bf136c199 | |||
b529f77264 | |||
bf9519dcdc | |||
4ba687738b | |||
8427f6fe46 | |||
efa6bc3e70 | |||
da33e9b12d | |||
![]() |
265127c1a7 | ||
2b30acfc1d | |||
7fbc38ef29 | |||
e5070e991a | |||
625552c441 | |||
78c95c94f6 | |||
488e20bf55 | |||
25d6281b3e | |||
1676e50b3a | |||
8049e3c14a | |||
93a30efd86 | |||
83fb121f36 | |||
afc97b757b | |||
68905cbf41 | |||
3fff667f13 | |||
980aec1d9b | |||
f515fcce62 |
@@ -20,7 +20,7 @@ RUN echo 'deb http://deb.debian.org/debian bullseye-backports main' >> /etc/apt/
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
|
||||
liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
|
||||
libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
|
||||
RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
||||
RUN apt-get update && apt-get -y install jq lp-solve sudo nfs-common fdisk parted
|
||||
RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
||||
|
@@ -144,6 +144,24 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_change_pg_count_online:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_change_pg_count_online.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_change_pg_size:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
|
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VITASTOR_VERSION "2.2.2")
|
||||
set(VITASTOR_VERSION "2.3.0")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -19,7 +19,7 @@ Vitastor нацелен в первую очередь на SSD и SSD+HDD кл
|
||||
TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
|
||||
что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.
|
||||
|
||||
Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, OpenNebula, Proxmox, Kubernetes.
|
||||
Vitastor поддерживает QEMU-драйвер, протоколы UBLK, NBD и NFS, драйверы OpenStack, OpenNebula, Proxmox, Kubernetes.
|
||||
Другие драйверы могут также быть легко реализованы.
|
||||
|
||||
Подробности смотрите в документации по ссылкам. Можете начать отсюда: [Быстрый старт](docs/intro/quickstart.ru.md).
|
||||
@@ -64,8 +64,9 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
|
||||
- [vitastor-cli](docs/usage/cli.ru.md) (консольный интерфейс)
|
||||
- [vitastor-disk](docs/usage/disk.ru.md) (управление дисками)
|
||||
- [fio](docs/usage/fio.ru.md) для тестов производительности
|
||||
- [NBD](docs/usage/nbd.ru.md) для монтирования ядром
|
||||
- [QEMU и qemu-img](docs/usage/qemu.ru.md)
|
||||
- [UBLK](docs/usage/ublk.ru.md) для монтирования ядром
|
||||
- [NBD](docs/usage/nbd.ru.md) - старый интерфейс для монтирования ядром
|
||||
- [QEMU, qemu-img и VDUSE](docs/usage/qemu.ru.md)
|
||||
- [NFS](docs/usage/nfs.ru.md) кластерная файловая система и псевдо-ФС прокси
|
||||
- [Администрирование](docs/usage/admin.ru.md)
|
||||
- Производительность
|
||||
|
@@ -19,7 +19,7 @@ supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1
|
||||
with proper hardware which is ~10 times faster than other popular SDS's like Ceph
|
||||
or internal systems of public clouds.
|
||||
|
||||
Vitastor supports QEMU, NBD, NFS protocols, OpenStack, OpenNebula, Proxmox, Kubernetes drivers.
|
||||
Vitastor supports QEMU, UBLK, NBD, NFS protocols, OpenStack, OpenNebula, Proxmox, Kubernetes drivers.
|
||||
More drivers may be created easily.
|
||||
|
||||
Read more details in the documentation. You can start from here: [Quick Start](docs/intro/quickstart.en.md).
|
||||
@@ -64,8 +64,9 @@ Read more details in the documentation. You can start from here: [Quick Start](d
|
||||
- [vitastor-cli](docs/usage/cli.en.md) (command-line interface)
|
||||
- [vitastor-disk](docs/usage/disk.en.md) (disk management tool)
|
||||
- [fio](docs/usage/fio.en.md) for benchmarks
|
||||
- [NBD](docs/usage/nbd.en.md) for kernel mounts
|
||||
- [QEMU and qemu-img](docs/usage/qemu.en.md)
|
||||
- [UBLK](docs/usage/ublk.en.md) for kernel mounts
|
||||
- [NBD](docs/usage/nbd.en.md) - old interface for kernel mounts
|
||||
- [QEMU, qemu-img and VDUSE](docs/usage/qemu.en.md)
|
||||
- [NFS](docs/usage/nfs.en.md) clustered file system and pseudo-FS proxy
|
||||
- [Administration](docs/usage/admin.en.md)
|
||||
- Performance
|
||||
|
@@ -1,4 +1,4 @@
|
||||
VITASTOR_VERSION ?= v2.2.2
|
||||
VITASTOR_VERSION ?= v2.3.0
|
||||
|
||||
all: build push
|
||||
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v2.2.2
|
||||
image: vitalif/vitastor-csi:v2.3.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -121,7 +121,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v2.2.2
|
||||
image: vitalif/vitastor-csi:v2.3.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "2.2.2"
|
||||
vitastorCSIDriverVersion = "2.3.0"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
7
debian/build-vitastor-bookworm.sh
vendored
7
debian/build-vitastor-bookworm.sh
vendored
@@ -1,7 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
cat < vitastor.Dockerfile > ../Dockerfile
|
||||
cd ..
|
||||
mkdir -p packages
|
||||
sudo podman build --build-arg DISTRO=debian --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
|
||||
rm Dockerfile
|
||||
docker build --build-arg DISTRO=debian --build-arg REL=bookworm -t vitastor-buildenv:bookworm -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=bookworm -v `dirname $0`/../:/root/vitastor vitastor-buildenv:bookworm /root/vitastor/debian/vitastor-build.sh
|
||||
|
7
debian/build-vitastor-bullseye.sh
vendored
7
debian/build-vitastor-bullseye.sh
vendored
@@ -1,7 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
cat < vitastor.Dockerfile > ../Dockerfile
|
||||
cd ..
|
||||
mkdir -p packages
|
||||
sudo podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f Dockerfile .
|
||||
rm Dockerfile
|
||||
docker build --build-arg DISTRO=debian --build-arg REL=bullseye -t vitastor-buildenv:bullseye -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=bullseye -v `dirname $0`/../:/root/vitastor vitastor-buildenv:bullseye /root/vitastor/debian/vitastor-build.sh
|
||||
|
7
debian/build-vitastor-buster.sh
vendored
7
debian/build-vitastor-buster.sh
vendored
@@ -1,7 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
cat < vitastor.Dockerfile > ../Dockerfile
|
||||
cd ..
|
||||
mkdir -p packages
|
||||
sudo podman build --build-arg DISTRO=debian --build-arg REL=buster -v `pwd`/packages:/root/packages -f Dockerfile .
|
||||
rm Dockerfile
|
||||
docker build --build-arg DISTRO=debian --build-arg REL=buster -t vitastor-buildenv:buster -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=buster -v `dirname $0`/../:/root/vitastor vitastor-buildenv:buster /root/vitastor/debian/vitastor-build.sh
|
||||
|
4
debian/build-vitastor-trixie.sh
vendored
Executable file
4
debian/build-vitastor-trixie.sh
vendored
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
docker build --build-arg DISTRO=debian --build-arg REL=trixie -t vitastor-buildenv:trixie -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=trixie -v `dirname $0`/../:/root/vitastor vitastor-buildenv:trixie /root/vitastor/debian/vitastor-build.sh
|
8
debian/build-vitastor-ubuntu-jammy.sh
vendored
8
debian/build-vitastor-ubuntu-jammy.sh
vendored
@@ -1,7 +1,5 @@
|
||||
#!/bin/bash
|
||||
# Ubuntu 22.04 Jammy Jellyfish
|
||||
|
||||
cat < vitastor.Dockerfile > ../Dockerfile
|
||||
cd ..
|
||||
mkdir -p packages
|
||||
sudo podman build --build-arg DISTRO=ubuntu --build-arg REL=jammy -v `pwd`/packages:/root/packages -f Dockerfile .
|
||||
rm Dockerfile
|
||||
docker build --build-arg DISTRO=ubuntu --build-arg REL=jammy -t vitastor-buildenv:jammy -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=jammy -v `dirname $0`/../:/root/vitastor vitastor-buildenv:jammy /root/vitastor/debian/vitastor-build.sh
|
||||
|
5
debian/build-vitastor-ubuntu-noble.sh
vendored
Executable file
5
debian/build-vitastor-ubuntu-noble.sh
vendored
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
# 24.04 Noble Numbat
|
||||
|
||||
docker build --build-arg DISTRO=ubuntu --build-arg REL=noble -t vitastor-buildenv:noble -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=noble -v `dirname $0`/../:/root/vitastor vitastor-buildenv:noble /root/vitastor/debian/vitastor-build.sh
|
2
debian/changelog
vendored
2
debian/changelog
vendored
@@ -1,4 +1,4 @@
|
||||
vitastor (2.2.2-1) unstable; urgency=medium
|
||||
vitastor (2.3.0-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
|
2
debian/control
vendored
2
debian/control
vendored
@@ -2,7 +2,7 @@ Source: vitastor
|
||||
Section: admin
|
||||
Priority: optional
|
||||
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8),
|
||||
Build-Depends: debhelper, g++ (>= 8), libstdc++6 (>= 8),
|
||||
linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev,
|
||||
libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev,
|
||||
node-bindings <!nocheck>, node-gyp, node-nan
|
||||
|
2
debian/patched-qemu.Dockerfile
vendored
2
debian/patched-qemu.Dockerfile
vendored
@@ -26,7 +26,7 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" -o "$REL" = "bookworm" ]; then
|
||||
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||
|
||||
RUN apt-get update
|
||||
RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
|
||||
RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install fio libgoogle-perftools-dev devscripts
|
||||
RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y build-dep qemu
|
||||
# To build a custom version
|
||||
#RUN cp /root/packages/qemu-orig/* /root
|
||||
|
60
debian/vitastor-build.sh
vendored
Executable file
60
debian/vitastor-build.sh
vendored
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
# To be ran inside buildenv docker
|
||||
|
||||
set -e -x
|
||||
|
||||
[ -e /usr/lib/x86_64-linux-gnu/pkgconfig/libisal.pc ] || cp /root/vitastor/debian/libisal.pc /usr/lib/x86_64-linux-gnu/pkgconfig
|
||||
|
||||
mkdir -p /root/fio-build/
|
||||
cd /root/fio-build/
|
||||
rm -rf /root/fio-build/*
|
||||
dpkg-source -x /root/fio*.dsc
|
||||
|
||||
FULLVER=`head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'`
|
||||
VER=${FULLVER%%-*}
|
||||
rm -rf /root/vitastor-$VER
|
||||
mkdir /root/vitastor-$VER
|
||||
cd /root/vitastor
|
||||
cp -a $(ls | grep -v packages) /root/vitastor-$VER
|
||||
|
||||
rm -rf /root/vitastor/packages/vitastor-$REL
|
||||
mkdir -p /root/vitastor/packages/vitastor-$REL
|
||||
mv /root/vitastor-$VER /root/vitastor/packages/vitastor-$REL/
|
||||
|
||||
cd /root/vitastor/packages/vitastor-$REL/vitastor-$VER
|
||||
|
||||
rm -rf fio
|
||||
ln -s /root/fio-build/fio-*/ ./fio
|
||||
FIO=`head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'`
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h
|
||||
sh copy-fio-includes.sh
|
||||
rm fio
|
||||
mkdir -p a b debian/patches
|
||||
mv fio-copy b/fio
|
||||
diff -NaurpbB a b > debian/patches/fio-headers.patch || true
|
||||
echo fio-headers.patch >> debian/patches/series
|
||||
rm -rf a b
|
||||
|
||||
echo "dep:fio=$FIO" > debian/fio_version
|
||||
|
||||
cd /root/vitastor/packages/vitastor-$REL/vitastor-$VER
|
||||
mkdir mon/node_modules
|
||||
cd mon/node_modules
|
||||
curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx
|
||||
curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx
|
||||
|
||||
cd /root/vitastor/packages/vitastor-$REL
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER
|
||||
cd vitastor-$VER
|
||||
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa
|
||||
rm -rf /root/vitastor/packages/vitastor-$REL/vitastor-*/
|
||||
|
||||
# Why does ubuntu rename debug packages to *.ddeb?
|
||||
cd /root/vitastor/packages/vitastor-$REL
|
||||
if ls *.ddeb >/dev/null; then
|
||||
perl -i -pe 's/\.ddeb/.deb/' *.buildinfo *.changes
|
||||
for i in *.ddeb; do
|
||||
mv $i ${i%%.ddeb}.deb
|
||||
done
|
||||
fi
|
31
debian/vitastor-buildenv.Dockerfile
vendored
Normal file
31
debian/vitastor-buildenv.Dockerfile
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
# Build environment for building Vitastor packages for Debian inside a container
|
||||
# cd ..
|
||||
# docker build --build-arg DISTRO=debian --build-arg REL=bullseye -f debian/vitastor.Dockerfile -t vitastor-buildenv:bullseye .
|
||||
# docker run --rm -e REL=bullseye -v ./:/root/vitastor /root/vitastor/debian/vitastor-build.sh
|
||||
|
||||
ARG DISTRO=debian
|
||||
ARG REL=
|
||||
FROM $DISTRO:$REL
|
||||
ARG DISTRO=debian
|
||||
ARG REL=
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
RUN set -e -x; \
|
||||
if [ "$REL" = "buster" ]; then \
|
||||
perl -i -pe 's/deb.debian.org/archive.debian.org/' /etc/apt/sources.list; \
|
||||
apt-get update; \
|
||||
apt-get -y install wget; \
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
|
||||
echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
|
||||
fi; \
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
|
||||
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/*.sources || true; \
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
|
||||
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get -y install fio libgoogle-perftools-dev devscripts libjerasure-dev cmake \
|
||||
libibverbs-dev librdmacm-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl nodejs npm node-nan node-bindings && \
|
||||
apt-get -y build-dep fio && \
|
||||
apt-get --download-only source fio
|
1
debian/vitastor-client.install
vendored
1
debian/vitastor-client.install
vendored
@@ -2,6 +2,7 @@ usr/bin/vita
|
||||
usr/bin/vitastor-cli
|
||||
usr/bin/vitastor-rm
|
||||
usr/bin/vitastor-nbd
|
||||
usr/bin/vitastor-ublk
|
||||
usr/bin/vitastor-nfs
|
||||
usr/bin/vitastor-kv
|
||||
usr/bin/vitastor-kv-stress
|
||||
|
65
debian/vitastor.Dockerfile
vendored
65
debian/vitastor.Dockerfile
vendored
@@ -1,65 +0,0 @@
|
||||
# Build Vitastor packages for Debian inside a container
|
||||
# cd ..; podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
|
||||
|
||||
ARG DISTRO=debian
|
||||
ARG REL=
|
||||
FROM $DISTRO:$REL
|
||||
ARG DISTRO=debian
|
||||
ARG REL=
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
RUN set -e -x; \
|
||||
if [ "$REL" = "buster" ]; then \
|
||||
apt-get update; \
|
||||
apt-get -y install wget; \
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
|
||||
echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
|
||||
fi; \
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
|
||||
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
|
||||
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake \
|
||||
libibverbs-dev librdmacm-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl nodejs npm node-nan node-bindings && \
|
||||
apt-get -y build-dep fio && \
|
||||
apt-get --download-only source fio
|
||||
|
||||
ADD . /root/vitastor
|
||||
RUN set -e -x; \
|
||||
[ -e /usr/lib/x86_64-linux-gnu/pkgconfig/libisal.pc ] || cp /root/vitastor/debian/libisal.pc /usr/lib/x86_64-linux-gnu/pkgconfig; \
|
||||
mkdir -p /root/fio-build/; \
|
||||
cd /root/fio-build/; \
|
||||
rm -rf /root/fio-build/*; \
|
||||
dpkg-source -x /root/fio*.dsc; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
FULLVER=$(head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
VER=${FULLVER%%-*}; \
|
||||
cp -r /root/vitastor vitastor-$VER; \
|
||||
cd vitastor-$VER; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
sh copy-fio-includes.sh; \
|
||||
rm fio; \
|
||||
mkdir -p a b debian/patches; \
|
||||
mv fio-copy b/fio; \
|
||||
diff -NaurpbB a b > debian/patches/fio-headers.patch || true; \
|
||||
echo fio-headers.patch >> debian/patches/series; \
|
||||
rm -rf a b; \
|
||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||
cd /root/packages/vitastor-$REL/vitastor-$VER; \
|
||||
mkdir mon/node_modules; \
|
||||
cd mon/node_modules; \
|
||||
curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx; \
|
||||
curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER; \
|
||||
cd vitastor-$VER; \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
rm -rf /root/packages/vitastor-$REL/vitastor-*/
|
@@ -1,4 +1,4 @@
|
||||
VITASTOR_VERSION ?= v2.2.2
|
||||
VITASTOR_VERSION ?= v2.3.0
|
||||
|
||||
all: build push
|
||||
|
||||
|
3
docker/etc/apt/preferences
Normal file
3
docker/etc/apt/preferences
Normal file
@@ -0,0 +1,3 @@
|
||||
Package: *
|
||||
Pin: release n=bookworm-backports
|
||||
Pin-Priority: 500
|
@@ -4,7 +4,7 @@
|
||||
#
|
||||
|
||||
# Desired Vitastor version
|
||||
VITASTOR_VERSION=v2.2.2
|
||||
VITASTOR_VERSION=v2.3.0
|
||||
|
||||
# Additional arguments for all containers
|
||||
# For example, you may want to specify a custom logging driver here
|
||||
|
@@ -25,6 +25,9 @@ affect their interaction with the cluster.
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
- [hostname](#hostname)
|
||||
- [ublk_queue_depth](#ublk_queue_depth)
|
||||
- [ublk_max_io_size](#ublk_max_io_size)
|
||||
- [qemu_file_mirror_path](#qemu_file_mirror_path)
|
||||
|
||||
## client_iothread_count
|
||||
|
||||
@@ -225,3 +228,28 @@ without destroying and recreating OSDs.
|
||||
Clients use host name to find their distance to OSDs when [localized reads](pool.en.md#local_reads)
|
||||
are enabled. By default, standard [gethostname](https://man7.org/linux/man-pages/man2/gethostname.2.html)
|
||||
function is used to determine host name, but you can also override it with this parameter.
|
||||
|
||||
## ublk_queue_depth
|
||||
|
||||
- Type: integer
|
||||
- Default: 256
|
||||
|
||||
Default queue depth for [Vitastor ublk servers](../usage/ublk.en.md).
|
||||
|
||||
## ublk_max_io_size
|
||||
|
||||
- Type: integer
|
||||
|
||||
Default maximum I/O size for Vitastor [ublk servers](../usage/ublk.en.md).
|
||||
The largest of 1 MB and pool block size multiplied by EC data chunk count is used if not specified.
|
||||
|
||||
## qemu_file_mirror_path
|
||||
|
||||
- Type: string
|
||||
|
||||
When set to an FS directory path (for example, `/mnt/vitastor/`), `qemu-img info` and similar
|
||||
QAPI commands return the name of the image inside this directory instead of normal
|
||||
`vitastor://?image=abc` URI as `filename`.
|
||||
|
||||
This allows to then mount this path using [vitastor-nfs](../usage/nfs.en.md) and trick
|
||||
third-party systems like Veeam which rely on `filename` in the image info but don't support Vitastor.
|
||||
|
@@ -25,6 +25,9 @@
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
- [hostname](#hostname)
|
||||
- [ublk_queue_depth](#ublk_queue_depth)
|
||||
- [ublk_max_io_size](#ublk_max_io_size)
|
||||
- [qemu_file_mirror_path](#qemu_file_mirror_path)
|
||||
|
||||
## client_iothread_count
|
||||
|
||||
@@ -230,3 +233,30 @@ RDMA и хотите повысить пиковую производитель
|
||||
[локальные чтения](pool.ru.md#local_reads). По умолчанию для определения имени
|
||||
хоста используется стандартная функция [gethostname](https://man7.org/linux/man-pages/man2/gethostname.2.html),
|
||||
но вы также можете задать имя хоста вручную данным параметром.
|
||||
|
||||
## ublk_queue_depth
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 256
|
||||
|
||||
Глубина очереди по умолчанию для [ublk-серверов Vitastor](../usage/ublk.ru.md).
|
||||
|
||||
## ublk_max_io_size
|
||||
|
||||
- Тип: целое число
|
||||
|
||||
Максимальный размер запроса ввода-вывода для [ublk-серверов Vitastor](../usage/ublk.ru.md).
|
||||
Если не задан, используется максимум из 1 МБ и размера блока пула, умноженного на число частей
|
||||
данных EC-пула.
|
||||
|
||||
## qemu_file_mirror_path
|
||||
|
||||
- Тип: строка
|
||||
|
||||
Если установить эту опцию равной пути к каталогу в ФС, команда `qemu-img info` и подобные
|
||||
команды QAPI будут возвращать в поле `filename` имя образа внутри заданного каталога вместо
|
||||
обычного адреса типа `vitastor://?image=abc`.
|
||||
|
||||
Это позволяет смонтировать этот путь с помощью [vitastor-nfs](../usage/nfs.ru.md) и обмануть
|
||||
сторонние системы типа Veeam, которые полагаются на поле `filename` в информации об образе QEMU,
|
||||
но не поддерживают Vitastor.
|
||||
|
@@ -74,7 +74,7 @@ Consider `use_rdmacm` for such networks.
|
||||
## use_rdmacm
|
||||
|
||||
- Type: boolean
|
||||
- Default: true
|
||||
- Default: false
|
||||
|
||||
Use an alternative implementation of RDMA through RDMA-CM (Connection
|
||||
Manager). Works with all RDMA networks: Infiniband, iWARP and
|
||||
|
@@ -74,7 +74,7 @@ RDMA-устройства, но они не имеют соединения с
|
||||
## use_rdmacm
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
- Значение по умолчанию: true
|
||||
- Значение по умолчанию: false
|
||||
|
||||
Использовать альтернативную реализацию RDMA на основе RDMA-CM (Connection
|
||||
Manager). Работает со всеми типами RDMA-сетей: Infiniband, iWARP и
|
||||
|
@@ -283,3 +283,36 @@
|
||||
[локальные чтения](pool.ru.md#local_reads). По умолчанию для определения имени
|
||||
хоста используется стандартная функция [gethostname](https://man7.org/linux/man-pages/man2/gethostname.2.html),
|
||||
но вы также можете задать имя хоста вручную данным параметром.
|
||||
- name: ublk_queue_depth
|
||||
type: int
|
||||
default: 256
|
||||
online: false
|
||||
info: Default queue depth for [Vitastor ublk servers](../usage/ublk.en.md).
|
||||
info_ru: Глубина очереди по умолчанию для [ublk-серверов Vitastor](../usage/ublk.ru.md).
|
||||
- name: ublk_max_io_size
|
||||
type: int
|
||||
online: false
|
||||
info: |
|
||||
Default maximum I/O size for Vitastor [ublk servers](../usage/ublk.en.md).
|
||||
The largest of 1 MB and pool block size multiplied by EC data chunk count is used if not specified.
|
||||
info_ru: |
|
||||
Максимальный размер запроса ввода-вывода для [ublk-серверов Vitastor](../usage/ublk.ru.md).
|
||||
Если не задан, используется максимум из 1 МБ и размера блока пула, умноженного на число частей
|
||||
данных EC-пула.
|
||||
- name: qemu_file_mirror_path
|
||||
type: string
|
||||
info: |
|
||||
When set to an FS directory path (for example, `/mnt/vitastor/`), `qemu-img info` and similar
|
||||
QAPI commands return the name of the image inside this directory instead of normal
|
||||
`vitastor://?image=abc` URI as `filename`.
|
||||
|
||||
This allows to then mount this path using [vitastor-nfs](../usage/nfs.en.md) and trick
|
||||
third-party systems like Veeam which rely on `filename` in the image info but don't support Vitastor.
|
||||
info_ru: |
|
||||
Если установить эту опцию равной пути к каталогу в ФС, команда `qemu-img info` и подобные
|
||||
команды QAPI будут возвращать в поле `filename` имя образа внутри заданного каталога вместо
|
||||
обычного адреса типа `vitastor://?image=abc`.
|
||||
|
||||
Это позволяет смонтировать этот путь с помощью [vitastor-nfs](../usage/nfs.ru.md) и обмануть
|
||||
сторонние системы типа Veeam, которые полагаются на поле `filename` в информации об образе QEMU,
|
||||
но не поддерживают Vitastor.
|
||||
|
@@ -24,6 +24,8 @@
|
||||
|
||||
{{../../installation/kubernetes.en.md}}
|
||||
|
||||
{{../../installation/s3.en.md}}
|
||||
|
||||
{{../../installation/source.en.md}}
|
||||
|
||||
{{../../config.en.md|indent=1}}
|
||||
@@ -54,6 +56,8 @@
|
||||
|
||||
{{../../usage/fio.en.md}}
|
||||
|
||||
{{../../usage/ublk.en.md}}
|
||||
|
||||
{{../../usage/nbd.en.md}}
|
||||
|
||||
{{../../usage/qemu.en.md}}
|
||||
|
@@ -26,6 +26,8 @@
|
||||
|
||||
{{../../installation/source.ru.md}}
|
||||
|
||||
{{../../installation/s3.ru.md}}
|
||||
|
||||
{{../../config.ru.md|indent=1}}
|
||||
|
||||
{{../../config/common.ru.md|indent=2}}
|
||||
@@ -54,6 +56,8 @@
|
||||
|
||||
{{../../usage/fio.ru.md}}
|
||||
|
||||
{{../../usage/ublk.ru.md}}
|
||||
|
||||
{{../../usage/nbd.ru.md}}
|
||||
|
||||
{{../../usage/qemu.ru.md}}
|
||||
|
@@ -51,7 +51,7 @@
|
||||
Рассмотрите включение `use_rdmacm` для таких сетей.
|
||||
- name: use_rdmacm
|
||||
type: bool
|
||||
default: true
|
||||
default: false
|
||||
info: |
|
||||
Use an alternative implementation of RDMA through RDMA-CM (Connection
|
||||
Manager). Works with all RDMA networks: Infiniband, iWARP and
|
||||
|
@@ -26,9 +26,9 @@ at Vitastor Kubernetes operator: https://github.com/Antilles7227/vitastor-operat
|
||||
The instruction is very simple.
|
||||
|
||||
1. Download a Docker image of the desired version: \
|
||||
`docker pull vitastor:v2.2.2`
|
||||
`docker pull vitalif/vitastor:v2.3.0`
|
||||
2. Install scripts to the host system: \
|
||||
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:v2.2.2 install.sh`
|
||||
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitalif/vitastor:v2.3.0 install.sh`
|
||||
3. Reload udev rules: \
|
||||
`udevadm control --reload-rules`
|
||||
|
||||
|
@@ -25,9 +25,9 @@ Vitastor можно установить в Docker/Podman. При этом etcd,
|
||||
Инструкция по установке максимально простая.
|
||||
|
||||
1. Скачайте Docker-образ желаемой версии: \
|
||||
`docker pull vitastor:v2.2.2`
|
||||
`docker pull vitalif/vitastor:v2.3.0`
|
||||
2. Установите скрипты в хост-систему командой: \
|
||||
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:v2.2.2 install.sh`
|
||||
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitalif/vitastor:v2.3.0 install.sh`
|
||||
3. Перезагрузите правила udev: \
|
||||
`udevadm control --reload-rules`
|
||||
|
||||
|
@@ -11,12 +11,20 @@
|
||||
- Trust Vitastor package signing key:
|
||||
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||
- Add Vitastor package repository to your /etc/apt/sources.list:
|
||||
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 13 (Trixie/Sid): `deb https://vitastor.io/debian trixie main`
|
||||
- Debian 12 (Bookworm): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Ubuntu 22.04 (Jammy): `deb https://vitastor.io/debian jammy main`
|
||||
- Ubuntu 24.04 (Noble): `deb https://vitastor.io/debian noble main`
|
||||
- Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
|
||||
stable version from 0.9.x branch instead of 1.x
|
||||
- To always prefer vitastor-patched QEMU and Libvirt versions, add the following to `/etc/apt/preferences`:
|
||||
```
|
||||
Package: *
|
||||
Pin: origin "vitastor.io"
|
||||
Pin-Priority: 501
|
||||
```
|
||||
- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
|
||||
|
||||
## CentOS
|
||||
@@ -42,7 +50,6 @@
|
||||
recommended because io_uring is a relatively new technology and there is
|
||||
at least one bug which reproduces with io_uring and HP SmartArray
|
||||
controllers in 5.4
|
||||
- liburing 0.4 or newer
|
||||
- lp_solve
|
||||
- etcd 3.4.15 or newer. Earlier versions won't work because of various bugs,
|
||||
for example [#12402](https://github.com/etcd-io/etcd/pull/12402).
|
||||
|
@@ -11,12 +11,20 @@
|
||||
- Добавьте ключ репозитория Vitastor:
|
||||
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
|
||||
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 13 (Trixie/Sid): `deb https://vitastor.io/debian trixie main`
|
||||
- Debian 12 (Bookworm): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Ubuntu 22.04 (Jammy): `deb https://vitastor.io/debian jammy main`
|
||||
- Ubuntu 24.04 (Noble): `deb https://vitastor.io/debian noble main`
|
||||
- Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
|
||||
установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
|
||||
- Чтобы всегда предпочитались версии пакетов QEMU и Libvirt с патчами Vitastor, добавьте в `/etc/apt/preferences`:
|
||||
```
|
||||
Package: *
|
||||
Pin: origin "vitastor.io"
|
||||
Pin-Priority: 501
|
||||
```
|
||||
- Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
|
||||
|
||||
## CentOS
|
||||
@@ -41,7 +49,6 @@
|
||||
- Ядро Linux 5.4 или новее, для поддержки io_uring. Рекомендуется даже 5.8,
|
||||
так как io_uring - относительно новый интерфейс и в версиях до 5.8 встречались
|
||||
некоторые баги, например, зависание с io_uring и контроллером HP SmartArray
|
||||
- liburing 0.4 или новее
|
||||
- lp_solve
|
||||
- etcd 3.4.15 или новее. Более старые версии не будут работать из-за разных багов,
|
||||
например, [#12402](https://github.com/etcd-io/etcd/pull/12402).
|
||||
|
@@ -9,7 +9,7 @@
|
||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-8.x are supported):
|
||||
|
||||
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
|
||||
bookworm for 8.1+, pve8.0 for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4
|
||||
trixie for 9.0+, bookworm for 8.1+, pve8.0 for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4
|
||||
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
|
||||
- Define storage in `/etc/pve/storage.cfg` (see below)
|
||||
- Block network access from VMs to Vitastor network (to OSDs and etcd),
|
||||
|
@@ -9,7 +9,7 @@
|
||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-8.x):
|
||||
|
||||
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
|
||||
bookworm для 8.1+, pve8.0 для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4
|
||||
trixie для 9.0+, bookworm для 8.1+, pve8.0 для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4
|
||||
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
|
||||
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
|
||||
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
|
||||
|
@@ -15,7 +15,7 @@
|
||||
- gcc and g++ 8 or newer, clang 10 or newer, or other compiler with C++11 plus
|
||||
designated initializers support from C++20
|
||||
- CMake
|
||||
- liburing, jerasure headers and libraries
|
||||
- jerasure headers and libraries
|
||||
- ISA-L, libibverbs and librdmacm headers and libraries (optional)
|
||||
- tcmalloc (google-perftools-dev)
|
||||
|
||||
|
@@ -15,7 +15,7 @@
|
||||
- gcc и g++ >= 8, либо clang >= 10, либо другой компилятор с поддержкой C++11 плюс
|
||||
назначенных инициализаторов (designated initializers) из C++20
|
||||
- CMake
|
||||
- Заголовки и библиотеки liburing, jerasure
|
||||
- Заголовки и библиотеки jerasure
|
||||
- Опционально - заголовки и библиотеки ISA-L, libibverbs, librdmacm
|
||||
- tcmalloc (google-perftools-dev)
|
||||
|
||||
|
@@ -52,7 +52,7 @@
|
||||
- Generic user-space client library
|
||||
- [Native QEMU driver](../usage/qemu.en.md)
|
||||
- [Loadable fio engine for benchmarks](../usage/fio.en.md)
|
||||
- [NBD proxy for kernel mounts](../usage/nbd.en.md)
|
||||
- [UBLK](../usage/ublk.en.md) and [NBD](../usage/nbd.en.md) servers for kernel mounts
|
||||
- [Simplified NFS proxy for file-based image access emulation (suitable for VMWare)](../usage/nfs.en.md#pseudo-fs)
|
||||
|
||||
## Roadmap
|
||||
|
@@ -54,7 +54,7 @@
|
||||
- Общая пользовательская клиентская библиотека для работы с кластером
|
||||
- [Драйвер диска для QEMU](../usage/qemu.ru.md)
|
||||
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
|
||||
- [NBD-прокси для монтирования образов ядром](../usage/nbd.ru.md) ("блочное устройство в режиме пользователя")
|
||||
- [UBLK](../usage/ublk.ru.md) и [NBD](../usage/nbd.ru.md) серверы для монтирования образов ядром ("блочное устройство в режиме пользователя")
|
||||
- [Упрощённая NFS-прокси для эмуляции файлового доступа к образам (подходит для VMWare)](../usage/nfs.ru.md#псевдо-фс)
|
||||
|
||||
## Планы развития
|
||||
|
@@ -89,6 +89,8 @@ POSIX features currently not implemented in VitastorFS:
|
||||
instead of actually allocated space
|
||||
- Access times (`atime`) are not tracked (like `-o noatime`)
|
||||
- Modification time (`mtime`) is updated lazily every second (like `-o lazytime`)
|
||||
- Permission enforcement is disabled by default (and Linux NFS client doesn't
|
||||
enforce them too). Use `--enforce 1` to enable it.
|
||||
|
||||
Other notable missing features which should be addressed in the future:
|
||||
- Inode ID reuse. Currently inode IDs always grow, the limit is 2^48 inodes, so
|
||||
@@ -258,4 +260,5 @@ Options:
|
||||
| `--nfspath <PATH>` | set NFS export path to \<PATH> (default is /) |
|
||||
| `--pidfile <FILE>` | write process ID to the specified file |
|
||||
| `--logfile <FILE>` | log to the specified file |
|
||||
| `--enforce 1` | enforce permissions at the server side (no by default) |
|
||||
| `--foreground 1` | stay in foreground, do not daemonize |
|
||||
|
@@ -91,6 +91,8 @@ JSON-формате :-). Для инспекции содержимого БД
|
||||
stat(2), так что `du` всегда показывает сумму размеров файлов, а не фактически занятое место
|
||||
- Времена доступа (`atime`) не отслеживаются (как будто ФС смонтирована с `-o noatime`)
|
||||
- Времена модификации (`mtime`) отслеживаются асинхронно (как будто ФС смонтирована с `-o lazytime`)
|
||||
- Привилегии доступа по умолчанию не проверяются сервером (клиент NFS Linux их также не проверяет).
|
||||
Чтобы включить проверки, используйте опцию `--enforce 1`.
|
||||
|
||||
Другие недостающие функции, которые нужно добавить в будущем:
|
||||
- Переиспользование номеров инодов. В текущей реализации номера инодов всё время
|
||||
@@ -270,4 +272,5 @@ VitastorFS из GPUDirect.
|
||||
| `--nfspath <PATH>` | установить путь NFS-экспорта в \<PATH> (по умолчанию /) |
|
||||
| `--pidfile <FILE>` | записать ID процесса в заданный файл |
|
||||
| `--logfile <FILE>` | записывать логи в заданный файл |
|
||||
| `--enforce 1` | проверять права доступа на стороне сервера (по умолчанию нет) |
|
||||
| `--foreground 1` | не уходить в фон после запуска |
|
||||
|
@@ -130,23 +130,16 @@ Linux kernel, starting with version 5.15, supports a new interface for attaching
|
||||
to the host - VDUSE (vDPA Device in Userspace). QEMU, starting with 7.2, has support for
|
||||
exporting QEMU block devices over this protocol using qemu-storage-daemon.
|
||||
|
||||
VDUSE is currently the best interface to attach Vitastor disks as kernel devices because:
|
||||
- It avoids data copies and thus achieves much better performance than [NBD](nbd.en.md)
|
||||
- It doesn't have NBD timeout problem - the device doesn't die if an operation executes for too long
|
||||
VDUSE advantages:
|
||||
|
||||
- VDUSE copies memory 1 time instead of 2, and is thus faster than [NBD](nbd.en.md) for linear read/write.
|
||||
- It doesn't have NBD timeout problem - the device doesn't die if an operation executes for too long.
|
||||
- It doesn't have hung device problem - if the userspace process dies it can be restarted (!)
|
||||
and block device will continue operation
|
||||
- It doesn't seem to have the device number limit
|
||||
and block device will continue operation (UBLK can do it too).
|
||||
- It doesn't seem to have the device number limit (UBLK also doesn't).
|
||||
|
||||
Example performance comparison:
|
||||
|
||||
| | direct fio | NBD | VDUSE |
|
||||
|----------------------|-------------|-------------|-------------|
|
||||
| linear write | 3.85 GB/s | 1.12 GB/s | 3.85 GB/s |
|
||||
| 4k random write Q128 | 240000 iops | 120000 iops | 178000 iops |
|
||||
| 4k random write Q1 | 9500 iops | 7620 iops | 7640 iops |
|
||||
| linear read | 4.3 GB/s | 1.8 GB/s | 2.85 GB/s |
|
||||
| 4k random read Q128 | 287000 iops | 140000 iops | 189000 iops |
|
||||
| 4k random read Q1 | 9600 iops | 7640 iops | 7780 iops |
|
||||
At the same time, VDUSE may be slower or faster than [UBLK](ublk.en.md) for linear read/write,
|
||||
and iops-wise it's sometimes even slower than NBD. See performance comparison examples at the page [UBLK](ublk.en.md).
|
||||
|
||||
To try VDUSE you need at least Linux 5.15, built with VDUSE support
|
||||
(CONFIG_VDPA=m, CONFIG_VDPA_USER=m, CONFIG_VIRTIO_VDPA=m).
|
||||
@@ -193,3 +186,12 @@ To remove the device:
|
||||
vdpa dev del test1
|
||||
kill <qemu-storage-daemon_process_PID>
|
||||
```
|
||||
|
||||
## Veeam
|
||||
|
||||
Vitastor QEMU driver has a feature that allows to trick third-party systems like Veeam not able to parse qemu-img
|
||||
vitastor URIs: [qemu_file_mirror_path](../config/client.en.md#qemu_file_mirror_path).
|
||||
|
||||
To make such systems work, you should set this option to an FS directory path (for example, `/mnt/vitastor/`) and
|
||||
mount this directory using [`vitastor-nfs mount --block`](../usage/nfs.en.md). It will make them access
|
||||
your images using files and, hopefully, succeed in doing their normal job :).
|
||||
|
@@ -132,24 +132,16 @@ qemu-system-x86_64 -enable-kvm -m 2048 -M accel=kvm,memory-backend=mem \
|
||||
к системе - VDUSE (vDPA Device in Userspace), а в QEMU, начиная с версии 7.2, есть поддержка
|
||||
экспорта блочных устройств QEMU по этому протоколу через qemu-storage-daemon.
|
||||
|
||||
VDUSE - на данный момент лучший интерфейс для подключения дисков Vitastor в виде блочных
|
||||
устройств на уровне ядра, ибо:
|
||||
- VDUSE не копирует данные и поэтому достигает значительно лучшей производительности, чем [NBD](nbd.ru.md)
|
||||
- Также оно не имеет проблемы NBD-таймаута - устройство не умирает, если операция выполняется слишком долго
|
||||
- Также оно не имеет проблемы подвисающих устройств - если процесс-обработчик умирает, его можно
|
||||
перезапустить (!) и блочное устройство продолжит работать
|
||||
- По-видимому, у него нет предела числа подключаемых в систему устройств
|
||||
Преимущества VDUSE:
|
||||
|
||||
Пример сравнения производительности:
|
||||
- VDUSE копирует данные 1 раз, а не 2, и поэтому он быстрее, чем [NBD](nbd.ru.md) при линейном доступе.
|
||||
- VDUSE не имеет проблемы NBD-таймаута - устройство не умирает, если операция выполняется слишком долго.
|
||||
- VDUSE не имеет проблемы подвисающих устройств - если процесс-обработчик умирает, его можно
|
||||
перезапустить (!) и блочное устройство продолжит работать (в UBLK это тоже поддерживается).
|
||||
- По-видимому, у него нет предела числа подключаемых в систему устройств (в UBLK лимита тоже нет).
|
||||
|
||||
| | Прямой fio | NBD | VDUSE |
|
||||
|--------------------------|-------------|-------------|-------------|
|
||||
| линейная запись | 3.85 GB/s | 1.12 GB/s | 3.85 GB/s |
|
||||
| 4k случайная запись Q128 | 240000 iops | 120000 iops | 178000 iops |
|
||||
| 4k случайная запись Q1 | 9500 iops | 7620 iops | 7640 iops |
|
||||
| линейное чтение | 4.3 GB/s | 1.8 GB/s | 2.85 GB/s |
|
||||
| 4k случайное чтение Q128 | 287000 iops | 140000 iops | 189000 iops |
|
||||
| 4k случайное чтение Q1 | 9600 iops | 7640 iops | 7780 iops |
|
||||
Однако, при линейном доступе VDUSE может быть медленнее UBLK (а может быть и быстрее), а по iops
|
||||
VDUSE иногда даже медленнее NBD. Пример сравнения производительности смотрите на странице [UBLK](ublk.ru.md).
|
||||
|
||||
Чтобы попробовать VDUSE, вам нужно ядро Linux как минимум версии 5.15, собранное с поддержкой
|
||||
VDUSE (CONFIG_VDPA=m, CONFIG_VDPA_USER=m, CONFIG_VIRTIO_VDPA=m).
|
||||
@@ -196,3 +188,12 @@ vdpa dev add name test1 mgmtdev vduse
|
||||
vdpa dev del test1
|
||||
kill <PID_процесса_qemu-storage-daemon>
|
||||
```
|
||||
|
||||
## Veeam
|
||||
|
||||
Драйвер Vitastor QEMU имеет функцию, которая позволяет обманывать сторонние системы типа Veeam, которые
|
||||
не могут сами по себе разобрать адреса дисков в vitastor: [qemu_file_mirror_path](../config/client.ru.md#qemu_file_mirror_path).
|
||||
|
||||
Чтобы заставить такие системы работать, вам нужно установить эту опцию равной пути к некоторому каталогу
|
||||
в ФС (например, `/mnt/vitastor/`) и примонтировать этот каталог с помощью [`vitastor-nfs mount --block`](../usage/nfs.ru.md).
|
||||
Они начнут обращаться к образам как к файлам и, вероятно, смогут заработать корректно :).
|
||||
|
116
docs/usage/ublk.en.md
Normal file
116
docs/usage/ublk.en.md
Normal file
@@ -0,0 +1,116 @@
|
||||
[Documentation](../../README.md#documentation) → Usage → UBLK
|
||||
|
||||
-----
|
||||
|
||||
[Читать на русском](ublk.ru.md)
|
||||
|
||||
# UBLK
|
||||
|
||||
[ublk](https://docs.kernel.org/block/ublk.html) is a new io_uring-based Linux interface
|
||||
for user-space block device drivers, available since Linux 6.0.
|
||||
|
||||
It's not zero-copy, but it's still a fast implementation, outperforming both [NBD](nbd.en.md)
|
||||
and [VDUSE](qemu.en.md#vduse) iops-wise and may or may not outperform VDUSE in linear I/O MB/s.
|
||||
ublk also allows to recover devices even if the server (vitastor-ublk process) dies.
|
||||
|
||||
## Example performance comparison
|
||||
|
||||
TCP (100G), 3 hosts each with 6 NVMe OSDs, 3 replicas, single client
|
||||
|
||||
| | direct fio | NBD | VDUSE | UBLK |
|
||||
|----------------------|-------------|-------------|------------|-------------|
|
||||
| linear write | 3807 MB/s | 1832 MB/s | 3226 MB/s | 3027 MB/s |
|
||||
| linear read | 3067 MB/s | 1885 MB/s | 1800 MB/s | 2076 MB/s |
|
||||
| 4k random write Q128 | 128624 iops | 91060 iops | 94621 iops | 149450 iops |
|
||||
| 4k random read Q128 | 117769 iops | 153408 iops | 93157 iops | 171987 iops |
|
||||
| 4k random write Q1 | 8090 iops | 6442 iops | 6316 iops | 7272 iops |
|
||||
| 4k random read Q1 | 9474 iops | 7200 iops | 6840 iops | 8038 iops |
|
||||
|
||||
RDMA (100G), 3 hosts each with 6 NVMe OSDs, 3 replicas, single client
|
||||
|
||||
| | direct fio | NBD | VDUSE | UBLK |
|
||||
|----------------------|-------------|-------------|-------------|-------------|
|
||||
| linear write | 6998 MB/s | 1878 MB/s | 4249 MB/s | 3140 MB/s |
|
||||
| linear read | 8628 MB/s | 3389 MB/s | 5062 MB/s | 3674 MB/s |
|
||||
| 4k random write Q128 | 222541 iops | 181589 iops | 138281 iops | 218222 iops |
|
||||
| 4k random read Q128 | 412647 iops | 239987 iops | 151663 iops | 269583 iops |
|
||||
| 4k random write Q1 | 11601 iops | 8592 iops | 9111 iops | 10000 iops |
|
||||
| 4k random read Q1 | 10102 iops | 7788 iops | 8111 iops | 8965 iops |
|
||||
|
||||
## Commands
|
||||
|
||||
vitastor-ublk supports the following commands:
|
||||
|
||||
- [map](#map)
|
||||
- [unmap](#unmap)
|
||||
- [ls](#ls)
|
||||
|
||||
## map
|
||||
|
||||
To create a local block device for a Vitastor image run:
|
||||
|
||||
```
|
||||
vitastor-ublk map [/dev/ublkbN] --image testimg
|
||||
```
|
||||
|
||||
It will output a block device name like /dev/ublkb0 which you can then use as a normal disk.
|
||||
|
||||
You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--image <IMAGE>` if you want.
|
||||
|
||||
vitastor-ublk supports all usual Vitastor configuration options like `--config_path <path_to_config>` plus ublk-specific:
|
||||
|
||||
* `--recover` \
|
||||
Recover a mapped device if the previous ublk server is dead.
|
||||
* `--queue_depth 256` \
|
||||
Maximum queue size for the device.
|
||||
* `--max_io_size 1M` \
|
||||
Maximum single I/O size for the device. Default: `max(1 MB, pool block size * EC part count)`.
|
||||
* `--readonly` \
|
||||
Make the device read-only.
|
||||
* `--hdd` \
|
||||
Mark the device as rotational.
|
||||
* `--logfile /path/to/log/file.txt` \
|
||||
Write log messages to the specified file instead of dropping them (in background mode)
|
||||
or printing them to the standard output (in foreground mode).
|
||||
* `--dev_num N` \
|
||||
Use the specified device /dev/ublkbN instead of automatic selection (alternative syntax
|
||||
to /dev/ublkbN positional parameter).
|
||||
* `--foreground 1` \
|
||||
Stay in foreground, do not daemonize.
|
||||
|
||||
Note that `ublk_queue_depth` and `ublk_max_io_size` may also be specified
|
||||
in `/etc/vitastor/vitastor.conf` or in other configuration file specified with `--config_path`.
|
||||
|
||||
## unmap
|
||||
|
||||
To unmap the device run:
|
||||
|
||||
```
|
||||
vitastor-ublk unmap /dev/ublkb0
|
||||
```
|
||||
|
||||
## ls
|
||||
|
||||
```
|
||||
vitastor-ublk ls [--json]
|
||||
```
|
||||
|
||||
List mapped images.
|
||||
|
||||
Example output (normal format):
|
||||
|
||||
```
|
||||
/dev/ublkb0
|
||||
image: bench
|
||||
pid: 584536
|
||||
|
||||
/dev/ublkb1
|
||||
image: bench1
|
||||
pid: 584546
|
||||
```
|
||||
|
||||
Example output (JSON format):
|
||||
|
||||
```
|
||||
{"/dev/ublkb0": {"image": "bench", "pid": 584536}, "/dev/ublkb1": {"image": "bench1", "pid": 584546}}
|
||||
```
|
121
docs/usage/ublk.ru.md
Normal file
121
docs/usage/ublk.ru.md
Normal file
@@ -0,0 +1,121 @@
|
||||
[Документация](../../README-ru.md#документация) → Использование → UBLK
|
||||
|
||||
-----
|
||||
|
||||
[Read in English](ublk.en.md)
|
||||
|
||||
# UBLK
|
||||
|
||||
[ublk](https://docs.kernel.org/block/ublk.html) - это новый Linux-интерфейс на основе io_uring
|
||||
для реализации блочных устройств в пространстве пользователя, доступный, начиная с Linux 6.0.
|
||||
|
||||
ublk тоже копирует память (т.е. не является zero-copy), но по IOPS всё равно обгоняет и
|
||||
[NBD](nbd.ru.md), и [VDUSE](qemu.ru.md#vduse), и иногда может даже обгонять VDUSE по
|
||||
скорости линейного доступа. Также ublk позволяет оживлять устройства, у которых умер
|
||||
сервер (процесс-обработчик vitastor-ublk).
|
||||
|
||||
## Пример сравнения производительности
|
||||
|
||||
TCP (100G), 3 сервера с 6 NVMe OSD каждый, 3 реплики, один клиент
|
||||
|
||||
| | Прямой fio | NBD | VDUSE | UBLK |
|
||||
|--------------------------|-------------|-------------|------------|-------------|
|
||||
| линейная запись | 3807 MB/s | 1832 MB/s | 3226 MB/s | 3027 MB/s |
|
||||
| линейное чтение | 3067 MB/s | 1885 MB/s | 1800 MB/s | 2076 MB/s |
|
||||
| 4k случайная запись Q128 | 128624 iops | 91060 iops | 94621 iops | 149450 iops |
|
||||
| 4k случайное чтение Q128 | 117769 iops | 153408 iops | 93157 iops | 171987 iops |
|
||||
| 4k случайная запись Q1 | 8090 iops | 6442 iops | 6316 iops | 7272 iops |
|
||||
| 4k случайное чтение Q1 | 9474 iops | 7200 iops | 6840 iops | 8038 iops |
|
||||
|
||||
RDMA (100G), 3 сервера с 6 NVMe OSD каждый, 3 реплики, один клиент
|
||||
|
||||
| | Прямой fio | NBD | VDUSE | UBLK |
|
||||
|--------------------------|-------------|-------------|-------------|-------------|
|
||||
| линейная запись | 6998 MB/s | 1878 MB/s | 4249 MB/s | 3140 MB/s |
|
||||
| линейное чтение | 8628 MB/s | 3389 MB/s | 5062 MB/s | 3674 MB/s |
|
||||
| 4k случайная запись Q128 | 222541 iops | 181589 iops | 138281 iops | 218222 iops |
|
||||
| 4k случайное чтение Q128 | 412647 iops | 239987 iops | 151663 iops | 269583 iops |
|
||||
| 4k случайная запись Q1 | 11601 iops | 8592 iops | 9111 iops | 10000 iops |
|
||||
| 4k случайное чтение Q1 | 10102 iops | 7788 iops | 8111 iops | 8965 iops |
|
||||
|
||||
## Команды
|
||||
|
||||
vitastor-ublk поддерживает следующие команды:
|
||||
|
||||
- [map](#map)
|
||||
- [unmap](#unmap)
|
||||
- [ls](#ls)
|
||||
|
||||
## map
|
||||
|
||||
Чтобы создать локальное блочное устройство для образа, выполните команду:
|
||||
|
||||
```
|
||||
vitastor-ublk map [/dev/ublkbN] --image testimg
|
||||
```
|
||||
|
||||
Команда напечатает название блочного устройства вида /dev/ublkb0, которое потом можно
|
||||
будет использовать как обычный диск.
|
||||
|
||||
Для обращения по номеру инода, аналогично другим командам, можно использовать опции
|
||||
`--pool <POOL> --inode <INODE> --size <SIZE>` вместо `--image testimg`.
|
||||
|
||||
vitastor-ublk поддерживает все обычные опции Vitastor, например, `--config_path <path_to_config>`,
|
||||
плюс специфичные для ublk:
|
||||
|
||||
* `--recover` \
|
||||
Восстановить ранее подключённое устройство, у которого умер обработчик.
|
||||
* `--queue_depth 256` \
|
||||
Максимальная глубина очереди устройства.
|
||||
* `--max_io_size 1M` \
|
||||
Максимальный размер запроса ввода-вывода для устройства. По умолчанию: `max(1 MB, блок данных пула * число частей данных EC)`.
|
||||
* `--readonly` \
|
||||
Подключить устройство в режиме только для чтения.
|
||||
* `--hdd` \
|
||||
Пометить устройство как вращающийся жёсткий диск (флаг rotational).
|
||||
* `--logfile /path/to/log/file.txt` \
|
||||
Писать сообщения о процессе работы в заданный файл, вместо пропуска их
|
||||
при фоновом режиме запуска или печати на стандартный вывод при запуске
|
||||
в консоли с `--foreground 1`.
|
||||
* `--dev_num N` \
|
||||
Использовать заданное устройство `/dev/ublkbN` вместо автоматического подбора.
|
||||
* `--foreground 1` \
|
||||
Не уводить процесс в фоновый режим.
|
||||
|
||||
Обратите внимание, что опции `ublk_queue_depth` и `ublk_max_io_size` можно
|
||||
также задавать в `/etc/vitastor/vitastor.conf` или в другом файле конфигурации,
|
||||
заданном опцией `--config_path`.
|
||||
|
||||
## unmap
|
||||
|
||||
Для отключения устройства выполните:
|
||||
|
||||
```
|
||||
vitastor-ublk unmap /dev/ublkb0
|
||||
```
|
||||
|
||||
## ls
|
||||
|
||||
```
|
||||
vitastor-ublk ls [--json]
|
||||
```
|
||||
|
||||
Вывести подключённые устройства.
|
||||
|
||||
Пример вывода в обычном формате:
|
||||
|
||||
```
|
||||
/dev/ublkb0
|
||||
image: bench
|
||||
pid: 584536
|
||||
|
||||
/dev/ublkb1
|
||||
image: bench1
|
||||
pid: 584546
|
||||
```
|
||||
|
||||
Пример вывода в JSON-формате:
|
||||
|
||||
```
|
||||
{"/dev/ublkb0": {"image": "bench", "pid": 584536}, "/dev/ublkb1": {"image": "bench1", "pid": 584546}}
|
||||
```
|
@@ -96,6 +96,7 @@ class Mon
|
||||
}
|
||||
else
|
||||
{
|
||||
res.setHeader('Content-Type', 'text/plain; version=0.0.4; charset=utf-8');
|
||||
res.write(export_prometheus_metrics(this.state));
|
||||
}
|
||||
}
|
||||
|
@@ -179,7 +179,7 @@ function filter_osds_by_block_layout(orig_tree, osd_stats, block_size, bitmap_gr
|
||||
if (orig_tree[osd].level === 'osd')
|
||||
{
|
||||
const osd_stat = osd_stats[osd];
|
||||
if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
|
||||
if (osd_stat && (osd_stat.data_block_size && osd_stat.data_block_size != block_size ||
|
||||
osd_stat.bitmap_granularity && osd_stat.bitmap_granularity != bitmap_granularity ||
|
||||
osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
|
||||
osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vitastor-mon",
|
||||
"version": "2.2.2",
|
||||
"version": "2.3.0",
|
||||
"description": "Vitastor SDS monitor service",
|
||||
"main": "mon-main.js",
|
||||
"scripts": {
|
||||
@@ -9,7 +9,7 @@
|
||||
"author": "Vitaliy Filippov",
|
||||
"license": "UNLICENSED",
|
||||
"dependencies": {
|
||||
"antietcd": "^1.1.2",
|
||||
"antietcd": "^1.1.3",
|
||||
"sprintf-js": "^1.1.2",
|
||||
"ws": "^7.2.5"
|
||||
},
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vitastor",
|
||||
"version": "2.2.2",
|
||||
"version": "2.3.0",
|
||||
"description": "Low-level native bindings to Vitastor client library",
|
||||
"main": "index.js",
|
||||
"keywords": [
|
||||
|
@@ -261,7 +261,7 @@ sub free_image
|
||||
my ($vtype, $name, $vmid, undef, undef, undef) = $class->parse_volname($volname);
|
||||
$class->deactivate_volume($storeid, $scfg, $volname);
|
||||
my $full_list = run_cli($scfg, [ 'ls', '-l' ]);
|
||||
my $list = _process_list($scfg, $storeid, $full_list);
|
||||
my $list = _process_list($scfg, $storeid, $full_list, 0);
|
||||
# Remove image and all its snapshots
|
||||
my $rm_names = {
|
||||
map { ($prefix.$_->{name} => 1) }
|
||||
@@ -269,6 +269,10 @@ sub free_image
|
||||
@$list
|
||||
};
|
||||
my $children = [ grep { $_->{parent_name} && $rm_names->{$_->{parent_name}} } @$full_list ];
|
||||
$children = [ grep {
|
||||
substr($_->{name}, 0, length($prefix.$name)) ne $prefix.$name &&
|
||||
substr($_->{name}, 0, length($prefix.$name)+1) ne $prefix.$name.'@'
|
||||
} @$children ];
|
||||
die "Image has children: ".join(', ', map {
|
||||
substr($_->{name}, 0, length $prefix) eq $prefix
|
||||
? substr($_->name, length $prefix)
|
||||
@@ -288,14 +292,15 @@ sub free_image
|
||||
|
||||
sub _process_list
|
||||
{
|
||||
my ($scfg, $storeid, $result) = @_;
|
||||
my ($scfg, $storeid, $result, $skip_snapshot) = @_;
|
||||
$skip_snapshot = 1 if !defined $skip_snapshot;
|
||||
my $prefix = defined $scfg->{vitastor_prefix} ? $scfg->{vitastor_prefix} : 'pve/';
|
||||
my $list = [];
|
||||
foreach my $el (@$result)
|
||||
{
|
||||
next if !$el->{name} || length($prefix) && substr($el->{name}, 0, length $prefix) ne $prefix;
|
||||
my $name = substr($el->{name}, length $prefix);
|
||||
next if $name =~ /@/;
|
||||
next if $skip_snapshot && $name =~ /@/;
|
||||
my ($owner) = $name =~ /^(?:vm|base)-(\d+)-/s;
|
||||
next if !defined $owner;
|
||||
my $parent = !defined $el->{parent_name}
|
||||
@@ -410,8 +415,8 @@ sub volume_size_info
|
||||
my $prefix = defined $scfg->{vitastor_prefix} ? $scfg->{vitastor_prefix} : 'pve/';
|
||||
my ($vtype, $name, $vmid) = $class->parse_volname($volname);
|
||||
my $info = _process_list($scfg, $storeid, run_cli($scfg, [ 'ls', $prefix.$name ]))->[0];
|
||||
#return wantarray ? ($size, $format, $used, $parent, $st->ctime) : $size;
|
||||
return $info->{size};
|
||||
# (size, format, used, parent, ctime)
|
||||
return wantarray ? ($info->{size}, $info->{format}, $info->{size}, $info->{parent}, 0) : $info->{size};
|
||||
}
|
||||
|
||||
sub volume_resize
|
||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VITASTOR_VERSION = '2.2.2'
|
||||
VITASTOR_VERSION = '2.3.0'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
637
patches/libvirt-11.5-vitastor.diff
Normal file
637
patches/libvirt-11.5-vitastor.diff
Normal file
@@ -0,0 +1,637 @@
|
||||
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
|
||||
index aaad4a3da1..5f5daa8341 100644
|
||||
--- a/include/libvirt/libvirt-storage.h
|
||||
+++ b/include/libvirt/libvirt-storage.h
|
||||
@@ -326,6 +326,7 @@ typedef enum {
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
|
||||
} virConnectListAllStoragePoolsFlags;
|
||||
|
||||
int virConnectListAllStoragePools(virConnectPtr conn,
|
||||
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
|
||||
index 1e24e41a48..ce359a4cf8 100644
|
||||
--- a/src/conf/domain_conf.c
|
||||
+++ b/src/conf/domain_conf.c
|
||||
@@ -7435,7 +7435,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
|
||||
src->configFile = virXPathString("string(./config/@file)", ctxt);
|
||||
|
||||
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
|
||||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
|
||||
src->query = virXMLPropString(node, "query");
|
||||
|
||||
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
|
||||
@@ -31871,6 +31872,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_POOL_MPATH:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
|
||||
index b28af7fa56..d1aae6e43e 100644
|
||||
--- a/src/conf/domain_validate.c
|
||||
+++ b/src/conf/domain_validate.c
|
||||
@@ -504,6 +504,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
@@ -576,7 +577,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
}
|
||||
}
|
||||
|
||||
- /* internal snapshots and config files are currently supported only with rbd: */
|
||||
+ /* internal snapshots are currently supported only with rbd: */
|
||||
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
|
||||
if (src->snapshot) {
|
||||
@@ -584,10 +585,14 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
_("<snapshot> element is currently supported only with 'rbd' disks"));
|
||||
return -1;
|
||||
}
|
||||
-
|
||||
+ }
|
||||
+ /* config files are currently supported only with rbd and vitastor: */
|
||||
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||
if (src->configFile) {
|
||||
virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
- _("<config> element is currently supported only with 'rbd' disks"));
|
||||
+ _("<config> element is currently supported only with 'rbd' and 'vitastor' disks"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
|
||||
index 183dd5db5e..dcc0d1a778 100644
|
||||
--- a/src/conf/schemas/domaincommon.rng
|
||||
+++ b/src/conf/schemas/domaincommon.rng
|
||||
@@ -2066,6 +2066,35 @@
|
||||
</element>
|
||||
</define>
|
||||
|
||||
+ <define name="diskSourceNetworkProtocolVitastor">
|
||||
+ <element name="source">
|
||||
+ <interleave>
|
||||
+ <attribute name="protocol">
|
||||
+ <value>vitastor</value>
|
||||
+ </attribute>
|
||||
+ <ref name="diskSourceCommon"/>
|
||||
+ <optional>
|
||||
+ <attribute name="name"/>
|
||||
+ </optional>
|
||||
+ <optional>
|
||||
+ <attribute name="query"/>
|
||||
+ </optional>
|
||||
+ <zeroOrMore>
|
||||
+ <ref name="diskSourceNetworkHost"/>
|
||||
+ </zeroOrMore>
|
||||
+ <optional>
|
||||
+ <element name="config">
|
||||
+ <attribute name="file">
|
||||
+ <ref name="absFilePath"/>
|
||||
+ </attribute>
|
||||
+ <empty/>
|
||||
+ </element>
|
||||
+ </optional>
|
||||
+ <empty/>
|
||||
+ </interleave>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
<define name="diskSourceNetworkProtocolISCSI">
|
||||
<element name="source">
|
||||
<attribute name="protocol">
|
||||
@@ -2416,6 +2445,7 @@
|
||||
<ref name="diskSourceNetworkProtocolSimple"/>
|
||||
<ref name="diskSourceNetworkProtocolVxHS"/>
|
||||
<ref name="diskSourceNetworkProtocolNFS"/>
|
||||
+ <ref name="diskSourceNetworkProtocolVitastor"/>
|
||||
</choice>
|
||||
</define>
|
||||
|
||||
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
|
||||
index 1dc9365bf2..a8a736be81 100644
|
||||
--- a/src/conf/storage_conf.c
|
||||
+++ b/src/conf/storage_conf.c
|
||||
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
|
||||
"logical", "disk", "iscsi",
|
||||
"iscsi-direct", "scsi", "mpath",
|
||||
"rbd", "sheepdog", "gluster",
|
||||
- "zfs", "vstorage",
|
||||
+ "zfs", "vstorage", "vitastor",
|
||||
);
|
||||
|
||||
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
|
||||
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
|
||||
.formatToString = virStorageFileFormatTypeToString,
|
||||
}
|
||||
},
|
||||
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
|
||||
+ .poolOptions = {
|
||||
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NAME),
|
||||
+ },
|
||||
+ .volOptions = {
|
||||
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
|
||||
+ .formatFromString = virStorageVolumeFormatFromString,
|
||||
+ .formatToString = virStorageFileFormatTypeToString,
|
||||
+ }
|
||||
+ },
|
||||
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
|
||||
.poolOptions = {
|
||||
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
@@ -538,6 +550,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
|
||||
_("element 'name' is mandatory for RBD pool"));
|
||||
return -1;
|
||||
}
|
||||
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
|
||||
+ virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
+ _("element 'name' is mandatory for Vitastor pool"));
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
if (options->formatFromString) {
|
||||
g_autofree char *format = NULL;
|
||||
@@ -1127,6 +1144,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
|
||||
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
|
||||
* files, so they don't have a target */
|
||||
if (def->type != VIR_STORAGE_POOL_RBD &&
|
||||
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
|
||||
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
|
||||
def->type != VIR_STORAGE_POOL_GLUSTER &&
|
||||
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
|
||||
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
|
||||
index fc67957cfe..720c07ef74 100644
|
||||
--- a/src/conf/storage_conf.h
|
||||
+++ b/src/conf/storage_conf.h
|
||||
@@ -103,6 +103,7 @@ typedef enum {
|
||||
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
|
||||
VIR_STORAGE_POOL_ZFS, /* ZFS */
|
||||
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
|
||||
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
|
||||
|
||||
VIR_STORAGE_POOL_LAST,
|
||||
} virStoragePoolType;
|
||||
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
|
||||
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
|
||||
index 8a063be244..dd9c7f11a2 100644
|
||||
--- a/src/conf/storage_source_conf.c
|
||||
+++ b/src/conf/storage_source_conf.c
|
||||
@@ -89,6 +89,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
|
||||
"ssh",
|
||||
"vxhs",
|
||||
"nfs",
|
||||
+ "vitastor",
|
||||
);
|
||||
|
||||
|
||||
@@ -1314,6 +1315,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
return 24007;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
/* we don't provide a default for RBD */
|
||||
return 0;
|
||||
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
|
||||
index ebddf28cd6..873a2be65c 100644
|
||||
--- a/src/conf/storage_source_conf.h
|
||||
+++ b/src/conf/storage_source_conf.h
|
||||
@@ -130,6 +130,7 @@ typedef enum {
|
||||
VIR_STORAGE_NET_PROTOCOL_SSH,
|
||||
VIR_STORAGE_NET_PROTOCOL_VXHS,
|
||||
VIR_STORAGE_NET_PROTOCOL_NFS,
|
||||
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
|
||||
|
||||
VIR_STORAGE_NET_PROTOCOL_LAST
|
||||
} virStorageNetProtocol;
|
||||
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
|
||||
index 59fa5da372..4739167f5f 100644
|
||||
--- a/src/conf/virstorageobj.c
|
||||
+++ b/src/conf/virstorageobj.c
|
||||
@@ -1438,6 +1438,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
|
||||
return 1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
@@ -1921,6 +1922,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
|
||||
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
|
||||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
|
||||
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
|
||||
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
|
||||
index db7660aac4..561df34709 100644
|
||||
--- a/src/libvirt-storage.c
|
||||
+++ b/src/libvirt-storage.c
|
||||
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
|
||||
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
|
||||
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
|
||||
index bdd30dd65a..5353e00b4a 100644
|
||||
--- a/src/libxl/libxl_conf.c
|
||||
+++ b/src/libxl/libxl_conf.c
|
||||
@@ -1081,6 +1081,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
|
||||
index ec8de30c01..61eab9606d 100644
|
||||
--- a/src/libxl/xen_xl.c
|
||||
+++ b/src/libxl/xen_xl.c
|
||||
@@ -1461,6 +1461,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
|
||||
index 32568d4ae6..e625fa0720 100644
|
||||
--- a/src/qemu/qemu_block.c
|
||||
+++ b/src/qemu/qemu_block.c
|
||||
@@ -731,6 +731,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
|
||||
}
|
||||
|
||||
|
||||
+static virJSONValue *
|
||||
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
|
||||
+{
|
||||
+ virJSONValue *ret = NULL;
|
||||
+ virStorageNetHostDef *host;
|
||||
+ size_t i;
|
||||
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
|
||||
+ g_autofree char *etcd = NULL;
|
||||
+
|
||||
+ for (i = 0; i < src->nhosts; i++) {
|
||||
+ host = src->hosts + i;
|
||||
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
|
||||
+ }
|
||||
+ if (src->nhosts > 0) {
|
||||
+ etcd = virBufferContentAndReset(&buf);
|
||||
+ }
|
||||
+
|
||||
+ if (virJSONValueObjectAdd(&ret,
|
||||
+ "S:etcd-host", etcd,
|
||||
+ "S:etcd-prefix", src->query,
|
||||
+ "S:config-path", src->configFile,
|
||||
+ "s:image", src->path,
|
||||
+ NULL) < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static virJSONValue *
|
||||
qemuBlockStorageSourceGetSshProps(virStorageSource *src)
|
||||
{
|
||||
@@ -1082,6 +1114,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return NULL;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
driver = "ssh";
|
||||
if (!(fileprops = qemuBlockStorageSourceGetSshProps(src)))
|
||||
@@ -1985,6 +2023,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
@@ -2365,6 +2404,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
if (srcPriv->nbdkitProcess) {
|
||||
/* disk creation not yet supported with nbdkit, and even if it
|
||||
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
||||
index 0d2548d8d4..91121d6e1f 100644
|
||||
--- a/src/qemu/qemu_domain.c
|
||||
+++ b/src/qemu/qemu_domain.c
|
||||
@@ -4526,7 +4526,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
|
||||
if (src->query &&
|
||||
(actualType != VIR_STORAGE_TYPE_NETWORK ||
|
||||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
|
||||
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
|
||||
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
||||
_("query is supported only with HTTP(S) protocols"));
|
||||
return -1;
|
||||
@@ -8954,6 +8955,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
|
||||
break;
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
|
||||
index 8128154749..afb339b9b0 100644
|
||||
--- a/src/qemu/qemu_snapshot.c
|
||||
+++ b/src/qemu/qemu_snapshot.c
|
||||
@@ -662,6 +662,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
@@ -887,6 +888,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
|
||||
index e19e032427..59f91f4710 100644
|
||||
--- a/src/storage/storage_driver.c
|
||||
+++ b/src/storage/storage_driver.c
|
||||
@@ -1626,6 +1626,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
|
||||
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
|
||||
index 80681924ea..8a3ade9ec0 100644
|
||||
--- a/src/storage_file/storage_source_backingstore.c
|
||||
+++ b/src/storage_file/storage_source_backingstore.c
|
||||
@@ -287,6 +287,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
|
||||
}
|
||||
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseVitastorColonString(const char *colonstr,
|
||||
+ virStorageSource *src)
|
||||
+{
|
||||
+ char *p, *e, *next;
|
||||
+ g_autofree char *options = NULL;
|
||||
+
|
||||
+ /* optionally skip the "vitastor:" prefix if provided */
|
||||
+ if (STRPREFIX(colonstr, "vitastor:"))
|
||||
+ colonstr += strlen("vitastor:");
|
||||
+
|
||||
+ options = g_strdup(colonstr);
|
||||
+
|
||||
+ p = options;
|
||||
+ while (*p) {
|
||||
+ /* find : delimiter or end of string */
|
||||
+ for (e = p; *e && *e != ':'; ++e) {
|
||||
+ if (*e == '\\') {
|
||||
+ e++;
|
||||
+ if (*e == '\0')
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (*e == '\0') {
|
||||
+ next = e; /* last kv pair */
|
||||
+ } else {
|
||||
+ next = e + 1;
|
||||
+ *e = '\0';
|
||||
+ }
|
||||
+
|
||||
+ if (STRPREFIX(p, "image=")) {
|
||||
+ src->path = g_strdup(p + strlen("image="));
|
||||
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
|
||||
+ src->query = g_strdup(p + strlen("etcd-prefix="));
|
||||
+ } else if (STRPREFIX(p, "config-path=")) {
|
||||
+ src->configFile = g_strdup(p + strlen("config-path="));
|
||||
+ } else if (STRPREFIX(p, "etcd-host=")) {
|
||||
+ char *h, *sep;
|
||||
+
|
||||
+ h = p + strlen("etcd-host=");
|
||||
+ while (h < e) {
|
||||
+ for (sep = h; sep < e; ++sep) {
|
||||
+ if (*sep == '\\' && (sep[1] == ',' ||
|
||||
+ sep[1] == ';' ||
|
||||
+ sep[1] == ' ')) {
|
||||
+ *sep = '\0';
|
||||
+ sep += 2;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (virStorageSourceRBDAddHost(src, h) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ h = sep;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ p = next;
|
||||
+ }
|
||||
+
|
||||
+ if (!src->path) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseNBDColonString(const char *nbdstr,
|
||||
virStorageSource *src)
|
||||
@@ -399,6 +468,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
@@ -975,6 +1049,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
|
||||
+ virJSONValue *json,
|
||||
+ const char *jsonstr G_GNUC_UNUSED,
|
||||
+ int opaque G_GNUC_UNUSED)
|
||||
+{
|
||||
+ const char *filename;
|
||||
+ const char *image = virJSONValueObjectGetString(json, "image");
|
||||
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
|
||||
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
|
||||
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
|
||||
+ size_t nservers;
|
||||
+ size_t i;
|
||||
+
|
||||
+ src->type = VIR_STORAGE_TYPE_NETWORK;
|
||||
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
|
||||
+
|
||||
+ /* legacy syntax passed via 'filename' option */
|
||||
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
|
||||
+ return virStorageSourceParseVitastorColonString(filename, src);
|
||||
+
|
||||
+ if (!image) {
|
||||
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
|
||||
+ _("missing image name in Vitastor backing volume "
|
||||
+ "JSON specification"));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ src->path = g_strdup(image);
|
||||
+ src->configFile = g_strdup(conf);
|
||||
+ src->query = g_strdup(etcd_prefix);
|
||||
+
|
||||
+ if (servers) {
|
||||
+ nservers = virJSONValueArraySize(servers);
|
||||
+
|
||||
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
|
||||
+ src->nhosts = nservers;
|
||||
+
|
||||
+ for (i = 0; i < nservers; i++) {
|
||||
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
|
||||
+ virJSONValueArrayGet(servers, i)) < 0)
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
|
||||
virJSONValue *json,
|
||||
@@ -1152,6 +1274,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
|
||||
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
|
||||
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
|
||||
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
|
||||
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
|
||||
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
|
||||
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
|
||||
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
|
||||
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
|
||||
index 25335d9002..cf54069fbe 100644
|
||||
--- a/src/test/test_driver.c
|
||||
+++ b/src/test/test_driver.c
|
||||
@@ -7340,6 +7340,7 @@ testStorageVolumeTypeForPool(int pooltype)
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
return VIR_STORAGE_VOL_NETWORK;
|
||||
case VIR_STORAGE_POOL_LOGICAL:
|
||||
case VIR_STORAGE_POOL_DISK:
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
index eee75af746..8bd0a57bdd 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='no'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
index 805950a937..852df0de16 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='yes'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
|
||||
index d5c2531ab8..b19308ac38 100644
|
||||
--- a/tests/storagepoolxml2argvtest.c
|
||||
+++ b/tests/storagepoolxml2argvtest.c
|
||||
@@ -57,6 +57,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
default:
|
||||
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
|
||||
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
|
||||
index 2010ef1356..072e2ff9e8 100644
|
||||
--- a/tools/virsh-pool.c
|
||||
+++ b/tools/virsh-pool.c
|
||||
@@ -1187,6 +1187,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
|
||||
break;
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
|
||||
+ break;
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
break;
|
||||
}
|
172
patches/pve-qemu-10.0-vitastor.patch
Normal file
172
patches/pve-qemu-10.0-vitastor.patch
Normal file
@@ -0,0 +1,172 @@
|
||||
Index: pve-qemu-kvm-10.0.2/block/meson.build
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/block/meson.build
|
||||
+++ pve-qemu-kvm-10.0.2/block/meson.build
|
||||
@@ -126,6 +126,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
Index: pve-qemu-kvm-10.0.2/meson.build
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/meson.build
|
||||
+++ pve-qemu-kvm-10.0.2/meson.build
|
||||
@@ -1622,6 +1622,26 @@ if not get_option('rbd').auto() or have_
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'))
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -2514,6 +2534,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
|
||||
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
|
||||
@@ -4812,6 +4833,7 @@ summary_info += {'fdt support': fd
|
||||
summary_info += {'libcap-ng support': libcap_ng}
|
||||
summary_info += {'bpf support': libbpf}
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
Index: pve-qemu-kvm-10.0.2/meson_options.txt
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/meson_options.txt
|
||||
+++ pve-qemu-kvm-10.0.2/meson_options.txt
|
||||
@@ -202,6 +202,8 @@ option('pvg', type: 'feature', value: 'a
|
||||
description: 'macOS paravirtualized graphics support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
Index: pve-qemu-kvm-10.0.2/qapi/block-core.json
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/qapi/block-core.json
|
||||
+++ pve-qemu-kvm-10.0.2/qapi/block-core.json
|
||||
@@ -3599,7 +3599,7 @@
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
'pbs',
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4725,6 +4725,28 @@
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
+##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
# An enumeration of replication modes.
|
||||
@@ -5194,6 +5216,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5674,6 +5697,20 @@
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
+##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
# Subformat options for VMDK images
|
||||
@@ -5895,6 +5932,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
Index: pve-qemu-kvm-10.0.2/scripts/meson-buildoptions.sh
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/scripts/meson-buildoptions.sh
|
||||
+++ pve-qemu-kvm-10.0.2/scripts/meson-buildoptions.sh
|
||||
@@ -175,6 +175,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' qpl Query Processing Library support'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' rust Rust support'
|
||||
@@ -458,6 +459,8 @@ _meson_option_parse() {
|
||||
--disable-qpl) printf "%s" -Dqpl=disabled ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-relocatable) printf "%s" -Drelocatable=true ;;
|
172
patches/qemu-10.0-vitastor.patch
Normal file
172
patches/qemu-10.0-vitastor.patch
Normal file
@@ -0,0 +1,172 @@
|
||||
diff --git a/block/meson.build b/block/meson.build
|
||||
index 34b1b2a306..24ca0f1e52 100644
|
||||
--- a/block/meson.build
|
||||
+++ b/block/meson.build
|
||||
@@ -114,6 +114,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
diff --git a/meson.build b/meson.build
|
||||
index 41f68d3806..29eaed9ba4 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1622,6 +1622,26 @@ if not get_option('rbd').auto() or have_block
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'))
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -2506,6 +2526,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
|
||||
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
|
||||
@@ -4813,6 +4834,7 @@ summary_info += {'fdt support': fdt_opt == 'internal' ? 'internal' : fdt}
|
||||
summary_info += {'libcap-ng support': libcap_ng}
|
||||
summary_info += {'bpf support': libbpf}
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index 59d973bca0..a3e7123980 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -202,6 +202,8 @@ option('pvg', type: 'feature', value: 'auto',
|
||||
description: 'macOS paravirtualized graphics support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index b1937780e1..a511193620 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3216,7 +3216,7 @@
|
||||
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4299,6 +4299,28 @@
|
||||
'*key-secret': 'str',
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -4767,6 +4789,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5240,6 +5263,20 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5462,6 +5499,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 3e8e00852b..45aff3b6a9 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -175,6 +175,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' qpl Query Processing Library support'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' rust Rust support'
|
||||
@@ -458,6 +459,8 @@ _meson_option_parse() {
|
||||
--disable-qpl) printf "%s" -Dqpl=disabled ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-relocatable) printf "%s" -Drelocatable=true ;;
|
@@ -7,22 +7,24 @@ set -e
|
||||
VITASTOR=$(dirname $0)
|
||||
VITASTOR=$(realpath "$VITASTOR/..")
|
||||
|
||||
EL=$(rpm --eval '%dist')
|
||||
if [ "$EL" = ".el8" ]; then
|
||||
REL=$(rpm --eval '%dist')
|
||||
REL=${REL##.}
|
||||
if [ "$REL" = "el8" ]; then
|
||||
# CentOS 8
|
||||
. /opt/rh/gcc-toolset-9/enable
|
||||
elif [ "$EL" = ".el7" ]; then
|
||||
elif [ "$REL" = "el7" ]; then
|
||||
# CentOS 7
|
||||
. /opt/rh/devtoolset-9/enable
|
||||
fi
|
||||
cd ~/rpmbuild/SPECS
|
||||
rpmbuild -bp fio.spec
|
||||
cd $VITASTOR
|
||||
VER=$(grep ^Version: rpm/vitastor-el7.spec | awk '{print $2}')
|
||||
VER=$(grep ^Version: rpm/vitastor-$REL.spec | awk '{print $2}')
|
||||
rm -rf fio
|
||||
ln -s ~/rpmbuild/BUILD/fio*/ fio
|
||||
sh copy-fio-includes.sh
|
||||
rm fio
|
||||
mv fio-copy fio
|
||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform "s#^#vitastor-$VER/#" --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-$VER$(rpm --eval '%dist').tar.gz *
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-$REL.spec
|
||||
tar --transform "s#^#vitastor-$VER/#" --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-$VER.$REL.tar.gz $(ls | grep -v packages)
|
||||
|
16
rpm/vitastor-build.sh
Executable file
16
rpm/vitastor-build.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e -x
|
||||
REL=$(rpm --eval '%dist')
|
||||
REL=${REL##.}
|
||||
cd /root/vitastor/rpm
|
||||
./build-tarball.sh
|
||||
VER=$(grep ^Version: vitastor-$REL.spec | awk '{print $2}')
|
||||
cp /root/vitastor-$VER.$REL.tar.gz ~/rpmbuild/SOURCES
|
||||
cp vitastor-$REL.spec ~/rpmbuild/SPECS/vitastor.spec
|
||||
cd ~/rpmbuild/SPECS/
|
||||
rpmbuild -ba vitastor.spec
|
||||
mkdir -p /root/vitastor/packages/vitastor-$REL
|
||||
rm -rf /root/vitastor/packages/vitastor-$REL/*
|
||||
cp ~/rpmbuild/RPMS/*/*vitastor* /root/vitastor/packages/vitastor-$REL/
|
||||
cp ~/rpmbuild/SRPMS/vitastor* /root/vitastor/packages/vitastor-$REL/
|
@@ -1,5 +1,8 @@
|
||||
# Build packages for CentOS 7 inside a container
|
||||
# cd ..; podman build -t vitastor-el7 -v `pwd`/packages:/root/packages -f rpm/vitastor-el7.Dockerfile .
|
||||
# cd ..
|
||||
# docker build -t vitastor-buildenv:el7 -f rpm/vitastor-el7.Dockerfile .
|
||||
# docker run -i --rm -v ./:/root/vitastor vitastor-buildenv:el7 /root/vitastor/rpm/vitastor-build.sh
|
||||
|
||||
# localedef -i ru_RU -f UTF-8 ru_RU.UTF-8
|
||||
|
||||
FROM centos:7
|
||||
@@ -7,7 +10,9 @@ FROM centos:7
|
||||
WORKDIR /root
|
||||
|
||||
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
||||
RUN sed -i 's/^mirrorlist=/#mirrorlist=/; s!#baseurl=http://mirror.centos.org/centos/\$releasever!baseurl=http://vault.centos.org/7.9.2009!' /etc/yum.repos.d/*.repo
|
||||
RUN yum -y --enablerepo=extras install centos-release-scl epel-release yum-utils rpm-build
|
||||
RUN perl -i -pe 's!mirrorlist=!#mirrorlist=!s; s!#\s*baseurl=http://mirror.centos.org!baseurl=http://vault.centos.org!' /etc/yum.repos.d/CentOS-SCLo-scl*.repo
|
||||
RUN yum -y install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm
|
||||
RUN yum -y install devtoolset-9-gcc-c++ devtoolset-9-libatomic-devel gcc make cmake gperftools-devel \
|
||||
fio rh-nodejs12 jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libnl3-devel
|
||||
@@ -16,32 +21,3 @@ RUN rpm --nomd5 -i fio*.src.rpm
|
||||
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
||||
RUN cd ~/rpmbuild/SPECS && yum-builddep -y fio.spec
|
||||
RUN yum -y install cmake3
|
||||
|
||||
ADD https://vitastor.io/rpms/liburing-el7/liburing-0.7-2.el7.src.rpm /root
|
||||
|
||||
RUN set -e; \
|
||||
rpm -i liburing*.src.rpm; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
. /opt/rh/devtoolset-9/enable; \
|
||||
rpmbuild -ba liburing.spec; \
|
||||
mkdir -p /root/packages/liburing-el7; \
|
||||
rm -rf /root/packages/liburing-el7/*; \
|
||||
cp ~/rpmbuild/RPMS/*/liburing* /root/packages/liburing-el7/; \
|
||||
cp ~/rpmbuild/SRPMS/liburing* /root/packages/liburing-el7/
|
||||
|
||||
RUN rpm -i `ls /root/packages/liburing-el7/liburing-*.x86_64.rpm | grep -v debug`
|
||||
|
||||
ADD . /root/vitastor
|
||||
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
VER=$(grep ^Version: vitastor-el7.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
mkdir -p /root/packages/vitastor-el7; \
|
||||
rm -rf /root/packages/vitastor-el7/*; \
|
||||
cp ~/rpmbuild/RPMS/*/*vitastor* /root/packages/vitastor-el7/; \
|
||||
cp ~/rpmbuild/SRPMS/vitastor* /root/packages/vitastor-el7/
|
||||
|
@@ -1,13 +1,12 @@
|
||||
Name: vitastor
|
||||
Version: 2.2.2
|
||||
Version: 2.3.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-2.2.2.el7.tar.gz
|
||||
Source0: vitastor-2.3.0.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
BuildRequires: devtoolset-9-gcc-c++
|
||||
BuildRequires: rh-nodejs12
|
||||
@@ -35,8 +34,6 @@ size with configurable redundancy (replication or erasure codes/XOR).
|
||||
Summary: Vitastor - OSD
|
||||
Requires: libJerasure2
|
||||
Requires: libisa-l
|
||||
Requires: liburing >= 0.6
|
||||
Requires: liburing < 2
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
Requires: util-linux
|
||||
Requires: parted
|
||||
@@ -60,8 +57,6 @@ scheduling cluster-level operations.
|
||||
|
||||
%package -n vitastor-client
|
||||
Summary: Vitastor - client
|
||||
Requires: liburing >= 0.6
|
||||
Requires: liburing < 2
|
||||
|
||||
|
||||
%description -n vitastor-client
|
||||
@@ -82,7 +77,7 @@ Vitastor library headers for development.
|
||||
Summary: Vitastor - fio drivers
|
||||
Group: Development/Libraries
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
Requires: fio = 3.7-1.el7
|
||||
Requires: fio = 3.7-2.el7
|
||||
|
||||
|
||||
%description -n vitastor-fio
|
||||
@@ -169,6 +164,7 @@ chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
%files -n vitastor-client
|
||||
%_bindir/vitastor-nbd
|
||||
%_bindir/vitastor-ublk
|
||||
%_bindir/vitastor-nfs
|
||||
%_bindir/vitastor-cli
|
||||
%_bindir/vitastor-rm
|
||||
|
@@ -1,5 +1,7 @@
|
||||
# Build packages for CentOS 8 inside a container
|
||||
# cd ..; podman build -t vitastor-el8 -v `pwd`/packages:/root/packages -f rpm/vitastor-el8.Dockerfile .
|
||||
# cd ..
|
||||
# docker build -t vitastor-buildenv:el8 -f rpm/vitastor-el8.Dockerfile .
|
||||
# docker run -i --rm -v ./:/root/vitastor vitastor-buildenv:el8 /root/vitastor/rpm/vitastor-build.sh
|
||||
|
||||
FROM centos:8
|
||||
|
||||
@@ -15,32 +17,3 @@ RUN dnf -y install gcc-toolset-9 gcc-toolset-9-gcc-c++ gperftools-devel \
|
||||
RUN dnf download --source fio
|
||||
RUN rpm --nomd5 -i fio*.src.rpm
|
||||
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec fio.spec
|
||||
|
||||
ADD https://vitastor.io/rpms/liburing-el7/liburing-0.7-2.el7.src.rpm /root
|
||||
|
||||
RUN set -e; \
|
||||
rpm -i liburing*.src.rpm; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
. /opt/rh/gcc-toolset-9/enable; \
|
||||
rpmbuild -ba liburing.spec; \
|
||||
mkdir -p /root/packages/liburing-el8; \
|
||||
rm -rf /root/packages/liburing-el8/*; \
|
||||
cp ~/rpmbuild/RPMS/*/liburing* /root/packages/liburing-el8/; \
|
||||
cp ~/rpmbuild/SRPMS/liburing* /root/packages/liburing-el8/
|
||||
|
||||
RUN rpm -i `ls /root/packages/liburing-el8/liburing-*.x86_64.rpm | grep -v debug`
|
||||
|
||||
ADD . /root/vitastor
|
||||
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
VER=$(grep ^Version: vitastor-el8.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
mkdir -p /root/packages/vitastor-el8; \
|
||||
rm -rf /root/packages/vitastor-el8/*; \
|
||||
cp ~/rpmbuild/RPMS/*/*vitastor* /root/packages/vitastor-el8/; \
|
||||
cp ~/rpmbuild/SRPMS/vitastor* /root/packages/vitastor-el8/
|
||||
|
@@ -1,13 +1,12 @@
|
||||
Name: vitastor
|
||||
Version: 2.2.2
|
||||
Version: 2.3.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-2.2.2.el8.tar.gz
|
||||
Source0: vitastor-2.3.0.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
BuildRequires: gcc-toolset-9-gcc-c++
|
||||
BuildRequires: nodejs >= 10
|
||||
@@ -34,8 +33,6 @@ size with configurable redundancy (replication or erasure codes/XOR).
|
||||
Summary: Vitastor - OSD
|
||||
Requires: libJerasure2
|
||||
Requires: libisa-l
|
||||
Requires: liburing >= 0.6
|
||||
Requires: liburing < 2
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
Requires: util-linux
|
||||
Requires: parted
|
||||
@@ -58,8 +55,6 @@ scheduling cluster-level operations.
|
||||
|
||||
%package -n vitastor-client
|
||||
Summary: Vitastor - client
|
||||
Requires: liburing >= 0.6
|
||||
Requires: liburing < 2
|
||||
|
||||
|
||||
%description -n vitastor-client
|
||||
@@ -80,7 +75,7 @@ Vitastor library headers for development.
|
||||
Summary: Vitastor - fio drivers
|
||||
Group: Development/Libraries
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
Requires: fio = 3.7-3.el8
|
||||
Requires: fio = 3.19-3.el8
|
||||
|
||||
|
||||
%description -n vitastor-fio
|
||||
@@ -166,6 +161,7 @@ chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
%files -n vitastor-client
|
||||
%_bindir/vitastor-nbd
|
||||
%_bindir/vitastor-ublk
|
||||
%_bindir/vitastor-nfs
|
||||
%_bindir/vitastor-cli
|
||||
%_bindir/vitastor-rm
|
||||
|
@@ -1,5 +1,7 @@
|
||||
# Build packages for AlmaLinux 9 inside a container
|
||||
# cd ..; podman build -t vitastor-el9 -v `pwd`/packages:/root/packages -f rpm/vitastor-el9.Dockerfile .
|
||||
# cd ..
|
||||
# docker build -t vitastor-buildenv:el9 -f rpm/vitastor-el9.Dockerfile .
|
||||
# docker run -i --rm -v ./:/root/vitastor vitastor-buildenv:el9 /root/vitastor/rpm/vitastor-build.sh
|
||||
|
||||
FROM almalinux:9
|
||||
|
||||
@@ -8,22 +10,7 @@ WORKDIR /root
|
||||
RUN sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*.repo
|
||||
RUN dnf -y install epel-release dnf-plugins-core
|
||||
RUN dnf -y install https://vitastor.io/rpms/centos/9/vitastor-release-1.0-1.el9.noarch.rpm
|
||||
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive liburing-devel cmake libnl3-devel
|
||||
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive cmake libnl3-devel
|
||||
RUN dnf download --source fio
|
||||
RUN rpm --nomd5 -i fio*.src.rpm
|
||||
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --spec fio.spec
|
||||
|
||||
ADD . /root/vitastor
|
||||
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
VER=$(grep ^Version: vitastor-el9.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
mkdir -p /root/packages/vitastor-el9; \
|
||||
rm -rf /root/packages/vitastor-el9/*; \
|
||||
cp ~/rpmbuild/RPMS/*/*vitastor* /root/packages/vitastor-el9/; \
|
||||
cp ~/rpmbuild/SRPMS/vitastor* /root/packages/vitastor-el9/
|
||||
|
@@ -1,13 +1,12 @@
|
||||
Name: vitastor
|
||||
Version: 2.2.2
|
||||
Version: 2.3.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-2.2.2.el9.tar.gz
|
||||
Source0: vitastor-2.3.0.el9.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: nodejs >= 10
|
||||
@@ -159,6 +158,7 @@ chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
%files -n vitastor-client
|
||||
%_bindir/vitastor-nbd
|
||||
%_bindir/vitastor-ublk
|
||||
%_bindir/vitastor-nfs
|
||||
%_bindir/vitastor-cli
|
||||
%_bindir/vitastor-rm
|
||||
|
@@ -12,6 +12,7 @@ set(WITH_QEMU false CACHE BOOL "Build QEMU driver inside Vitastor source tree")
|
||||
set(WITH_FIO true CACHE BOOL "Build FIO driver")
|
||||
set(QEMU_PLUGINDIR qemu CACHE STRING "QEMU plugin directory suffix (qemu-kvm on RHEL)")
|
||||
set(WITH_ASAN false CACHE BOOL "Build with AddressSanitizer")
|
||||
set(WITH_SYSTEM_LIBURING false CACHE BOOL "Use system liburing")
|
||||
if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
if(EXISTS "/etc/debian_version")
|
||||
set(CMAKE_INSTALL_LIBDIR "lib/${CMAKE_LIBRARY_ARCHITECTURE}")
|
||||
@@ -19,13 +20,16 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVITASTOR_VERSION="2.2.2")
|
||||
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
|
||||
add_definitions(-DVITASTOR_VERSION="2.3.0")
|
||||
add_definitions(-D_GNU_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -fvisibility=hidden -I ${CMAKE_SOURCE_DIR}/src)
|
||||
add_link_options(-fno-omit-frame-pointer)
|
||||
if (${WITH_ASAN})
|
||||
add_definitions(-fsanitize=address)
|
||||
add_link_options(-fsanitize=address -fno-omit-frame-pointer)
|
||||
endif (${WITH_ASAN})
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fvisibility-inlines-hidden")
|
||||
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} -fvisibility-inlines-hidden")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fvisibility-inlines-hidden")
|
||||
|
||||
set(CMAKE_BUILD_TYPE RelWithDebInfo)
|
||||
string(REGEX REPLACE "([\\/\\-]O)[^ \t\r\n]*" "\\13" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
||||
@@ -49,7 +53,6 @@ endmacro(install_symlink)
|
||||
check_include_file("linux/nbd-netlink.h" HAVE_NBD_NETLINK_H)
|
||||
|
||||
find_package(PkgConfig)
|
||||
pkg_check_modules(LIBURING REQUIRED liburing)
|
||||
if (${WITH_QEMU})
|
||||
pkg_check_modules(GLIB REQUIRED glib-2.0)
|
||||
endif (${WITH_QEMU})
|
||||
@@ -66,6 +69,15 @@ if (RDMACM_LIBRARIES)
|
||||
add_definitions(-DWITH_RDMACM)
|
||||
endif (RDMACM_LIBRARIES)
|
||||
|
||||
if (${WITH_SYSTEM_LIBURING})
|
||||
pkg_check_modules(LIBURING REQUIRED liburing>=2.10)
|
||||
include_directories(${LIBURING_INCLUDE_DIRS})
|
||||
else()
|
||||
include_directories(${CMAKE_SOURCE_DIR}/src/liburing/include)
|
||||
add_subdirectory(liburing)
|
||||
set(LIBURING_LIBRARIES uring)
|
||||
endif (${WITH_SYSTEM_LIBURING})
|
||||
|
||||
add_custom_target(build_tests)
|
||||
add_custom_target(test
|
||||
COMMAND
|
||||
@@ -86,7 +98,6 @@ include_directories(
|
||||
${CMAKE_SOURCE_DIR}/src/test
|
||||
${CMAKE_SOURCE_DIR}/src/util
|
||||
/usr/include/jerasure
|
||||
${LIBURING_INCLUDE_DIRS}
|
||||
${IBVERBS_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
@@ -101,7 +112,7 @@ add_subdirectory(test)
|
||||
|
||||
### Install
|
||||
|
||||
install(TARGETS vitastor-osd vitastor-disk vitastor-nbd vitastor-nfs vitastor-cli vitastor-kv vitastor-kv-stress RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
install(TARGETS vitastor-osd vitastor-disk vitastor-nbd vitastor-ublk vitastor-nfs vitastor-cli vitastor-kv vitastor-kv-stress RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
install_symlink(vitastor-disk ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/vitastor-dump-journal)
|
||||
install_symlink(vitastor-cli ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/vitastor-rm)
|
||||
install_symlink(vitastor-cli ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/vita)
|
||||
|
@@ -9,6 +9,7 @@ add_library(vitastor_blk SHARED
|
||||
)
|
||||
target_link_libraries(vitastor_blk
|
||||
${LIBURING_LIBRARIES}
|
||||
${ISAL_LIBRARIES}
|
||||
tcmalloc_minimal
|
||||
# for timerfd_manager
|
||||
vitastor_common
|
||||
|
@@ -42,8 +42,7 @@
|
||||
#define BS_OP_DELETE 6
|
||||
#define BS_OP_LIST 7
|
||||
#define BS_OP_ROLLBACK 8
|
||||
#define BS_OP_SYNC_STAB_ALL 9
|
||||
#define BS_OP_MAX 9
|
||||
#define BS_OP_MAX 8
|
||||
|
||||
#define BS_OP_PRIVATE_DATA_SIZE 256
|
||||
|
||||
@@ -113,14 +112,6 @@ Input:
|
||||
Output:
|
||||
- retval = 0 or negative error number (-ENOENT if no such version for stabilize)
|
||||
|
||||
## BS_OP_SYNC_STAB_ALL
|
||||
|
||||
ONLY FOR TESTS! Sync and mark all unstable object versions as stable, at once.
|
||||
|
||||
Input: Nothing except opcode
|
||||
Output:
|
||||
- retval = 0 or negative error number (-EINVAL)
|
||||
|
||||
## BS_OP_LIST
|
||||
|
||||
Get a list of all objects in this Blockstore.
|
||||
@@ -144,10 +135,10 @@ Output:
|
||||
|
||||
*/
|
||||
|
||||
struct blockstore_op_t
|
||||
struct __attribute__ ((visibility("default"))) blockstore_op_t
|
||||
{
|
||||
// operation
|
||||
uint64_t opcode;
|
||||
uint64_t opcode = 0;
|
||||
// finish callback
|
||||
std::function<void (blockstore_op_t*)> callback;
|
||||
union __attribute__((__packed__))
|
||||
@@ -171,9 +162,9 @@ struct blockstore_op_t
|
||||
uint32_t list_stable_limit;
|
||||
};
|
||||
};
|
||||
void *buf;
|
||||
void *bitmap;
|
||||
int retval;
|
||||
void *buf = NULL;
|
||||
void *bitmap = NULL;
|
||||
int retval = 0;
|
||||
|
||||
uint8_t private_data[BS_OP_PRIVATE_DATA_SIZE];
|
||||
};
|
||||
@@ -182,7 +173,7 @@ typedef std::map<std::string, std::string> blockstore_config_t;
|
||||
|
||||
class blockstore_impl_t;
|
||||
|
||||
class blockstore_t
|
||||
class __attribute__((visibility("default"))) blockstore_t
|
||||
{
|
||||
blockstore_impl_t *impl;
|
||||
public:
|
||||
|
@@ -361,6 +361,10 @@ bool journal_flusher_co::loop()
|
||||
else if (wait_state == 28) goto resume_28;
|
||||
else if (wait_state == 29) goto resume_29;
|
||||
else if (wait_state == 30) goto resume_30;
|
||||
else if (wait_state == 31) goto resume_31;
|
||||
else if (wait_state == 32) goto resume_32;
|
||||
else if (wait_state == 33) goto resume_33;
|
||||
else if (wait_state == 34) goto resume_34;
|
||||
resume_0:
|
||||
if (flusher->flush_queue.size() < flusher->min_flusher_count && !flusher->trim_wanted ||
|
||||
!flusher->flush_queue.size() || !flusher->dequeuing)
|
||||
@@ -486,13 +490,14 @@ resume_2:
|
||||
resume_10:
|
||||
resume_11:
|
||||
resume_12:
|
||||
resume_13:
|
||||
if (fill_incomplete && !clear_incomplete_csum_block_bits(5))
|
||||
return false;
|
||||
// Wait for journal data reads if the journal is not inmemory
|
||||
resume_13:
|
||||
resume_14:
|
||||
if (wait_journal_count > 0)
|
||||
{
|
||||
wait_state = wait_base+13;
|
||||
wait_state = wait_base+14;
|
||||
return false;
|
||||
}
|
||||
if (bs->dsk.csum_block_size)
|
||||
@@ -509,31 +514,26 @@ resume_2:
|
||||
{
|
||||
if (it->copy_flags == COPY_BUF_JOURNAL || it->copy_flags == (COPY_BUF_JOURNAL|COPY_BUF_COALESCED))
|
||||
{
|
||||
await_sqe(14);
|
||||
await_sqe(15);
|
||||
data->iov = (struct iovec){ it->buf, (size_t)it->len };
|
||||
data->callback = simple_callback_w;
|
||||
my_uring_prep_writev(
|
||||
io_uring_prep_writev(
|
||||
sqe, bs->dsk.data_fd, &data->iov, 1, bs->dsk.data_offset + clean_loc + it->offset
|
||||
);
|
||||
wait_count++;
|
||||
}
|
||||
}
|
||||
// Wait for data writes and metadata reads
|
||||
resume_15:
|
||||
resume_16:
|
||||
if (!wait_meta_reads(15))
|
||||
resume_17:
|
||||
if (!wait_meta_reads(16))
|
||||
return false;
|
||||
// Sync data before writing metadata
|
||||
resume_17:
|
||||
resume_18:
|
||||
resume_19:
|
||||
if (copy_count && !fsync_batch(false, 17))
|
||||
resume_20:
|
||||
if (copy_count && !fsync_batch(false, 18))
|
||||
return false;
|
||||
// Modify the new metadata entry
|
||||
update_metadata_entry();
|
||||
// Update clean_db - it must be equal to the metadata entry
|
||||
update_clean_db();
|
||||
// And write metadata entries
|
||||
if (old_clean_loc != UINT64_MAX && old_clean_loc != clean_loc)
|
||||
{
|
||||
// zero out old metadata entry
|
||||
@@ -548,26 +548,56 @@ resume_2:
|
||||
}
|
||||
}
|
||||
memset((uint8_t*)meta_old.buf + meta_old.pos*bs->dsk.clean_entry_size, 0, bs->dsk.clean_entry_size);
|
||||
resume_20:
|
||||
if (meta_old.sector != meta_new.sector && !write_meta_block(meta_old, 20))
|
||||
return false;
|
||||
}
|
||||
if (meta_old.sector != meta_new.sector)
|
||||
{
|
||||
resume_21:
|
||||
if (!write_meta_block(meta_new, 21))
|
||||
return false;
|
||||
if (flusher->inflight_meta_sectors.find(meta_old.sector) != flusher->inflight_meta_sectors.end())
|
||||
{
|
||||
wait_state = wait_base+21;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.insert(meta_old.sector);
|
||||
resume_22:
|
||||
if (!write_meta_block(meta_old, 22))
|
||||
return false;
|
||||
resume_23:
|
||||
if (wait_count > 0)
|
||||
{
|
||||
wait_state = wait_base+23;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.erase(meta_old.sector);
|
||||
}
|
||||
}
|
||||
resume_24:
|
||||
if (flusher->inflight_meta_sectors.find(meta_new.sector) != flusher->inflight_meta_sectors.end())
|
||||
{
|
||||
wait_state = wait_base+24;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.insert(meta_new.sector);
|
||||
// Modify the new metadata entry
|
||||
update_metadata_entry();
|
||||
// Update clean_db - it must be equal to the metadata entry
|
||||
update_clean_db();
|
||||
// And write metadata entries
|
||||
resume_25:
|
||||
if (!write_meta_block(meta_new, 25))
|
||||
return false;
|
||||
resume_26:
|
||||
if (wait_count > 0)
|
||||
{
|
||||
wait_state = wait_base+22;
|
||||
wait_state = wait_base+26;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.erase(meta_new.sector);
|
||||
// Done, free all buffers
|
||||
free_buffers();
|
||||
// And sync metadata (in batches - not per each operation!)
|
||||
resume_23:
|
||||
resume_24:
|
||||
resume_25:
|
||||
if (!fsync_batch(true, 23))
|
||||
resume_27:
|
||||
resume_28:
|
||||
resume_29:
|
||||
if (!fsync_batch(true, 27))
|
||||
return false;
|
||||
// Free the data block only when metadata is synced
|
||||
free_data_blocks();
|
||||
@@ -590,12 +620,12 @@ resume_2:
|
||||
if (bs->journal_trim_interval && !((++flusher->journal_trim_counter) % bs->journal_trim_interval) ||
|
||||
flusher->trim_wanted > 0)
|
||||
{
|
||||
resume_26:
|
||||
resume_27:
|
||||
resume_28:
|
||||
resume_29:
|
||||
resume_30:
|
||||
if (!trim_journal(26))
|
||||
resume_31:
|
||||
resume_32:
|
||||
resume_33:
|
||||
resume_34:
|
||||
if (!trim_journal(30))
|
||||
return false;
|
||||
}
|
||||
// All done
|
||||
@@ -716,7 +746,7 @@ bool journal_flusher_co::write_meta_block(flusher_meta_write_t & meta_block, int
|
||||
await_sqe(0);
|
||||
data->iov = (struct iovec){ meta_block.buf, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = simple_callback_w;
|
||||
my_uring_prep_writev(
|
||||
io_uring_prep_writev(
|
||||
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bs->dsk.meta_block_size + meta_block.sector
|
||||
);
|
||||
wait_count++;
|
||||
@@ -734,6 +764,7 @@ bool journal_flusher_co::clear_incomplete_csum_block_bits(int wait_base)
|
||||
else if (wait_state == wait_base+5) goto resume_5;
|
||||
else if (wait_state == wait_base+6) goto resume_6;
|
||||
else if (wait_state == wait_base+7) goto resume_7;
|
||||
else if (wait_state == wait_base+8) goto resume_8;
|
||||
cleared_incomplete = false;
|
||||
for (auto it = v.begin(); it != v.end(); it++)
|
||||
{
|
||||
@@ -754,11 +785,18 @@ bool journal_flusher_co::clear_incomplete_csum_block_bits(int wait_base)
|
||||
if (!wait_meta_reads(wait_base+0))
|
||||
return false;
|
||||
resume_2:
|
||||
if (wait_journal_count > 0)
|
||||
if (flusher->inflight_meta_sectors.find(meta_new.sector) != flusher->inflight_meta_sectors.end())
|
||||
{
|
||||
wait_state = wait_base+2;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.insert(meta_new.sector);
|
||||
resume_3:
|
||||
if (wait_journal_count > 0)
|
||||
{
|
||||
wait_state = wait_base+3;
|
||||
return false;
|
||||
}
|
||||
// Verify data checksums
|
||||
for (i = v.size()-1; i >= 0 && (v[i].copy_flags & COPY_BUF_CSUM_FILL); i--)
|
||||
{
|
||||
@@ -837,19 +875,20 @@ bool journal_flusher_co::clear_incomplete_csum_block_bits(int wait_base)
|
||||
}
|
||||
}
|
||||
// Write and fsync the modified metadata entry
|
||||
resume_3:
|
||||
if (!write_meta_block(meta_new, wait_base+3))
|
||||
return false;
|
||||
resume_4:
|
||||
if (!write_meta_block(meta_new, wait_base+4))
|
||||
return false;
|
||||
resume_5:
|
||||
if (wait_count > 0)
|
||||
{
|
||||
wait_state = wait_base+4;
|
||||
wait_state = wait_base+5;
|
||||
return false;
|
||||
}
|
||||
resume_5:
|
||||
flusher->inflight_meta_sectors.erase(meta_new.sector);
|
||||
resume_6:
|
||||
resume_7:
|
||||
if (!fsync_batch(true, wait_base+5))
|
||||
resume_8:
|
||||
if (!fsync_batch(true, wait_base+6))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -1090,7 +1129,7 @@ bool journal_flusher_co::read_dirty(int wait_base)
|
||||
vi.buf = memalign_or_die(MEM_ALIGNMENT, vi.len);
|
||||
data->iov = (struct iovec){ vi.buf, (size_t)vi.len };
|
||||
data->callback = simple_callback_r;
|
||||
my_uring_prep_readv(
|
||||
io_uring_prep_readv(
|
||||
sqe, bs->dsk.data_fd, &data->iov, 1, bs->dsk.data_offset + old_clean_loc + vi.offset
|
||||
);
|
||||
wait_count++;
|
||||
@@ -1122,7 +1161,7 @@ bool journal_flusher_co::read_dirty(int wait_base)
|
||||
await_sqe(1);
|
||||
data->iov = (struct iovec){ v[i].buf, (size_t)v[i].len };
|
||||
data->callback = simple_callback_rj;
|
||||
my_uring_prep_readv(
|
||||
io_uring_prep_readv(
|
||||
sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + v[i].disk_offset
|
||||
);
|
||||
wait_journal_count++;
|
||||
@@ -1215,7 +1254,7 @@ bool journal_flusher_co::modify_meta_read(uint64_t meta_loc, flusher_meta_write_
|
||||
data->iov = (struct iovec){ wr.it->second.buf, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = simple_callback_r;
|
||||
wr.submitted = true;
|
||||
my_uring_prep_readv(
|
||||
io_uring_prep_readv(
|
||||
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bs->dsk.meta_block_size + wr.sector
|
||||
);
|
||||
wait_count++;
|
||||
@@ -1313,7 +1352,7 @@ bool journal_flusher_co::fsync_batch(bool fsync_meta, int wait_base)
|
||||
await_sqe(0);
|
||||
data->iov = { 0 };
|
||||
data->callback = simple_callback_w;
|
||||
my_uring_prep_fsync(sqe, fsync_meta ? bs->dsk.meta_fd : bs->dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, fsync_meta ? bs->dsk.meta_fd : bs->dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||
cur_sync->state = 1;
|
||||
wait_count++;
|
||||
resume_2:
|
||||
@@ -1383,7 +1422,7 @@ bool journal_flusher_co::trim_journal(int wait_base)
|
||||
((journal_entry_start*)flusher->journal_superblock)->crc32 = je_crc32((journal_entry*)flusher->journal_superblock);
|
||||
data->iov = (struct iovec){ flusher->journal_superblock, (size_t)bs->dsk.journal_block_size };
|
||||
data->callback = simple_callback_w;
|
||||
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
wait_count++;
|
||||
resume_2:
|
||||
if (wait_count > 0)
|
||||
@@ -1394,7 +1433,7 @@ bool journal_flusher_co::trim_journal(int wait_base)
|
||||
if (!bs->disable_journal_fsync)
|
||||
{
|
||||
await_sqe(3);
|
||||
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = simple_callback_w;
|
||||
wait_count++;
|
||||
|
@@ -119,7 +119,8 @@ class journal_flusher_t
|
||||
|
||||
std::map<uint64_t, meta_sector_t> meta_sectors;
|
||||
std::deque<object_id> flush_queue;
|
||||
std::map<object_id, uint64_t> flush_versions; // FIXME: consider unordered_map?
|
||||
std::unordered_map<object_id, uint64_t> flush_versions;
|
||||
std::unordered_set<uint64_t> inflight_meta_sectors;
|
||||
|
||||
bool try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
|
||||
bool try_find_other(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
|
||||
|
@@ -343,44 +343,6 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op)
|
||||
ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
|
||||
return;
|
||||
}
|
||||
if (op->opcode == BS_OP_SYNC_STAB_ALL)
|
||||
{
|
||||
std::function<void(blockstore_op_t*)> *old_callback = new std::function<void(blockstore_op_t*)>(op->callback);
|
||||
op->opcode = BS_OP_SYNC;
|
||||
op->callback = [this, old_callback](blockstore_op_t *op)
|
||||
{
|
||||
if (op->retval >= 0 && unstable_writes.size() > 0)
|
||||
{
|
||||
op->opcode = BS_OP_STABLE;
|
||||
op->len = unstable_writes.size();
|
||||
obj_ver_id *vers = new obj_ver_id[op->len];
|
||||
op->buf = vers;
|
||||
int i = 0;
|
||||
for (auto it = unstable_writes.begin(); it != unstable_writes.end(); it++, i++)
|
||||
{
|
||||
vers[i] = {
|
||||
.oid = it->first,
|
||||
.version = it->second,
|
||||
};
|
||||
}
|
||||
unstable_writes.clear();
|
||||
op->callback = [old_callback](blockstore_op_t *op)
|
||||
{
|
||||
obj_ver_id *vers = (obj_ver_id*)op->buf;
|
||||
delete[] vers;
|
||||
op->buf = NULL;
|
||||
(*old_callback)(op);
|
||||
delete old_callback;
|
||||
};
|
||||
this->enqueue_op(op);
|
||||
}
|
||||
else
|
||||
{
|
||||
(*old_callback)(op);
|
||||
delete old_callback;
|
||||
}
|
||||
};
|
||||
}
|
||||
if ((op->opcode == BS_OP_WRITE || op->opcode == BS_OP_WRITE_STABLE || op->opcode == BS_OP_DELETE) && !enqueue_write(op))
|
||||
{
|
||||
ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
|
||||
|
@@ -19,6 +19,7 @@
|
||||
#include <deque>
|
||||
#include <new>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "cpp-btree/btree_map.h"
|
||||
|
||||
|
@@ -66,7 +66,7 @@ int blockstore_init_meta::loop()
|
||||
last_read_offset = 0;
|
||||
data->iov = { metadata_buffer, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||
io_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||
bs->ringloop->submit();
|
||||
submitted++;
|
||||
resume_1:
|
||||
@@ -104,7 +104,7 @@ resume_1:
|
||||
last_read_offset = 0;
|
||||
data->iov = (struct iovec){ metadata_buffer, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||
bs->ringloop->submit();
|
||||
submitted++;
|
||||
resume_3:
|
||||
@@ -213,12 +213,12 @@ resume_2:
|
||||
data->iov = { bufs[i].buf, (size_t)bufs[i].size };
|
||||
data->callback = [this, i](ring_data_t *data) { handle_event(data, i); };
|
||||
if (!zero_on_init)
|
||||
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
io_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
else
|
||||
{
|
||||
// Fill metadata with zeroes
|
||||
memset(data->iov.iov_base, 0, data->iov.iov_len);
|
||||
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
}
|
||||
bs->ringloop->submit();
|
||||
break;
|
||||
@@ -245,7 +245,7 @@ resume_2:
|
||||
assert(bufs[i].size <= 0x7fffffff);
|
||||
data->iov = { bufs[i].buf, (size_t)bufs[i].size };
|
||||
data->callback = [this, i](ring_data_t *data) { handle_event(data, i); };
|
||||
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
bs->ringloop->submit();
|
||||
bufs[i].state = INIT_META_WRITING;
|
||||
submitted++;
|
||||
@@ -274,7 +274,7 @@ resume_2:
|
||||
last_read_offset = (1+next_offset)*bs->dsk.meta_block_size;
|
||||
data->iov = { metadata_buffer, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
io_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
bs->ringloop->submit();
|
||||
submitted++;
|
||||
resume_5:
|
||||
@@ -291,7 +291,7 @@ resume_5:
|
||||
GET_SQE();
|
||||
data->iov = { metadata_buffer, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
io_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
bs->ringloop->submit();
|
||||
submitted++;
|
||||
resume_6:
|
||||
@@ -313,7 +313,7 @@ resume_6:
|
||||
if (zero_on_init && !bs->disable_meta_fsync)
|
||||
{
|
||||
GET_SQE();
|
||||
my_uring_prep_fsync(sqe, bs->dsk.meta_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, bs->dsk.meta_fd, IORING_FSYNC_DATASYNC);
|
||||
last_read_offset = 0;
|
||||
data->iov = { 0 };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
@@ -495,7 +495,7 @@ int blockstore_init_journal::loop()
|
||||
data = ((ring_data_t*)sqe->user_data);
|
||||
data->iov = { submitted_buf, (size_t)bs->journal.block_size };
|
||||
data->callback = simple_callback;
|
||||
my_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
io_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
bs->ringloop->submit();
|
||||
wait_count = 1;
|
||||
resume_1:
|
||||
@@ -536,7 +536,7 @@ resume_1:
|
||||
GET_SQE();
|
||||
data->iov = (struct iovec){ submitted_buf, (size_t)(2*bs->journal.block_size) };
|
||||
data->callback = simple_callback;
|
||||
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
wait_count++;
|
||||
bs->ringloop->submit();
|
||||
resume_6:
|
||||
@@ -548,7 +548,7 @@ resume_1:
|
||||
if (!bs->disable_journal_fsync)
|
||||
{
|
||||
GET_SQE();
|
||||
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = simple_callback;
|
||||
wait_count++;
|
||||
@@ -636,7 +636,7 @@ resume_1:
|
||||
(size_t)(end - journal_pos < JOURNAL_BUFFER_SIZE ? end - journal_pos : JOURNAL_BUFFER_SIZE),
|
||||
};
|
||||
data->callback = [this](ring_data_t *data1) { handle_event(data1); };
|
||||
my_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + journal_pos);
|
||||
io_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + journal_pos);
|
||||
bs->ringloop->submit();
|
||||
}
|
||||
while (done.size() > 0)
|
||||
@@ -651,7 +651,7 @@ resume_1:
|
||||
GET_SQE();
|
||||
data->iov = { init_write_buf, (size_t)bs->journal.block_size };
|
||||
data->callback = simple_callback;
|
||||
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + init_write_sector);
|
||||
io_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + init_write_sector);
|
||||
wait_count++;
|
||||
bs->ringloop->submit();
|
||||
resume_7:
|
||||
@@ -665,7 +665,7 @@ resume_1:
|
||||
GET_SQE();
|
||||
data->iov = { 0 };
|
||||
data->callback = simple_callback;
|
||||
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
wait_count++;
|
||||
bs->ringloop->submit();
|
||||
}
|
||||
|
@@ -190,7 +190,7 @@ void blockstore_impl_t::prepare_journal_sector_write(int cur_sector, blockstore_
|
||||
(size_t)journal.block_size
|
||||
};
|
||||
data->callback = [this, flush_id = journal.submit_id](ring_data_t *data) { handle_journal_write(data, flush_id); };
|
||||
my_uring_prep_writev(
|
||||
io_uring_prep_writev(
|
||||
sqe, dsk.journal_fd, &data->iov, 1, journal.offset + journal.sector_info[cur_sector].offset
|
||||
);
|
||||
}
|
||||
@@ -326,31 +326,3 @@ void journal_t::dump_diagnostics()
|
||||
journal_used_it == used_sectors.end() ? 0 : journal_used_it->second
|
||||
);
|
||||
}
|
||||
|
||||
static uint64_t zero_page[4096];
|
||||
|
||||
uint32_t crc32c_pad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad)
|
||||
{
|
||||
uint32_t r = prev_crc;
|
||||
while (left_pad >= 4096)
|
||||
{
|
||||
r = crc32c(r, zero_page, 4096);
|
||||
left_pad -= 4096;
|
||||
}
|
||||
if (left_pad > 0)
|
||||
r = crc32c(r, zero_page, left_pad);
|
||||
r = crc32c(r, buf, len);
|
||||
while (right_pad >= 4096)
|
||||
{
|
||||
r = crc32c(r, zero_page, 4096);
|
||||
right_pad -= 4096;
|
||||
}
|
||||
if (left_pad > 0)
|
||||
r = crc32c(r, zero_page, right_pad);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t crc32c_nopad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad)
|
||||
{
|
||||
return crc32c(0, buf, len);
|
||||
}
|
||||
|
@@ -216,6 +216,3 @@ struct blockstore_journal_check_t
|
||||
};
|
||||
|
||||
journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type, uint32_t size);
|
||||
|
||||
uint32_t crc32c_pad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad);
|
||||
uint32_t crc32c_nopad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad);
|
||||
|
@@ -27,7 +27,7 @@ int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
data->iov = (struct iovec){ buf, (size_t)len };
|
||||
PRIV(op)->pending_ops++;
|
||||
my_uring_prep_readv(
|
||||
io_uring_prep_readv(
|
||||
sqe,
|
||||
IS_JOURNAL(item_state) ? dsk.journal_fd : dsk.data_fd,
|
||||
&data->iov, 1,
|
||||
@@ -356,7 +356,7 @@ bool blockstore_impl_t::read_checksum_block(blockstore_op_t *op, int rv_pos, uin
|
||||
int n_cur = n_iov-n_pos < IOV_MAX ? n_iov-n_pos : IOV_MAX;
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
PRIV(op)->pending_ops++;
|
||||
my_uring_prep_readv(sqe, submit_fd, iov + n_pos, n_cur, submit_offset + clean_loc + item_start + d_pos);
|
||||
io_uring_prep_readv(sqe, submit_fd, iov + n_pos, n_cur, submit_offset + clean_loc + item_start + d_pos);
|
||||
data->callback = [this, op](ring_data_t *data) { handle_read_event(data, op); };
|
||||
if (n_pos > 0 || n_pos + IOV_MAX < n_iov)
|
||||
{
|
||||
@@ -702,7 +702,7 @@ uint8_t* blockstore_impl_t::read_clean_meta_block(blockstore_op_t *op, uint64_t
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
data->iov = (struct iovec){ buf, (size_t)dsk.meta_block_size };
|
||||
PRIV(op)->pending_ops++;
|
||||
my_uring_prep_readv(sqe, dsk.meta_fd, &data->iov, 1, dsk.meta_offset + dsk.meta_block_size + sector);
|
||||
io_uring_prep_readv(sqe, dsk.meta_fd, &data->iov, 1, dsk.meta_offset + dsk.meta_block_size + sector);
|
||||
data->callback = [this, op](ring_data_t *data) { handle_read_event(data, op); };
|
||||
// return pointer to checksums + bitmap
|
||||
return buf + pos + sizeof(clean_disk_entry);
|
||||
|
@@ -101,7 +101,7 @@ resume_2:
|
||||
if (!disable_journal_fsync)
|
||||
{
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
my_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||
|
@@ -400,7 +400,7 @@ resume_2:
|
||||
if (!disable_journal_fsync)
|
||||
{
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
my_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||
|
@@ -58,7 +58,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||
if (!disable_data_fsync)
|
||||
{
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
my_uring_prep_fsync(sqe, dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||
@@ -149,7 +149,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||
if (!disable_journal_fsync)
|
||||
{
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
my_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||
|
@@ -169,26 +169,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||
if (op->opcode == BS_OP_WRITE_STABLE)
|
||||
state |= BS_ST_INSTANT;
|
||||
if (op->bitmap)
|
||||
{
|
||||
// Only allow to overwrite part of the object bitmap respective to the write's offset/len
|
||||
uint32_t bit = op->offset/dsk.bitmap_granularity;
|
||||
uint32_t bits_left = op->len/dsk.bitmap_granularity;
|
||||
while (!(bit % 8) && bits_left >= 8)
|
||||
{
|
||||
// Copy bytes
|
||||
dyn_ptr[bit/8] = ((uint8_t*)op->bitmap)[bit/8];
|
||||
bit += 8;
|
||||
bits_left -= 8;
|
||||
}
|
||||
while (bits_left > 0)
|
||||
{
|
||||
// Copy bits
|
||||
dyn_ptr[bit/8] = (dyn_ptr[bit/8] & ~(1 << (bit%8)))
|
||||
| (((uint8_t*)op->bitmap)[bit/8] & (1 << bit%8));
|
||||
bit++;
|
||||
bits_left--;
|
||||
}
|
||||
}
|
||||
memcpy(dyn_ptr, op->bitmap, dsk.clean_entry_bitmap_size);
|
||||
}
|
||||
// Calculate checksums
|
||||
// FIXME: Allow to receive checksums from outside?
|
||||
@@ -384,7 +365,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||
}
|
||||
data->iov.iov_len = op->len + stripe_offset + stripe_end; // to check it in the callback
|
||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||
my_uring_prep_writev(
|
||||
io_uring_prep_writev(
|
||||
sqe, dsk.data_fd, PRIV(op)->iov_zerofill, vcnt, dsk.data_offset + (loc << dsk.block_order) + op->offset - stripe_offset
|
||||
);
|
||||
PRIV(op)->pending_ops = 1;
|
||||
@@ -511,7 +492,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||
.op = op,
|
||||
});
|
||||
data2->callback = [this, flush_id = journal.submit_id](ring_data_t *data) { handle_journal_write(data, flush_id); };
|
||||
my_uring_prep_writev(
|
||||
io_uring_prep_writev(
|
||||
sqe2, dsk.journal_fd, &data2->iov, 1, journal.offset + journal.next_free
|
||||
);
|
||||
PRIV(op)->pending_ops++;
|
||||
|
@@ -5,14 +5,14 @@
|
||||
//
|
||||
// Initialize storage for tests:
|
||||
//
|
||||
// dd if=/dev/zero of=test_data.bin bs=1024 count=1048576
|
||||
// dd if=/dev/zero of=test_meta.bin bs=1024 count=256
|
||||
// dd if=/dev/zero of=test_journal.bin bs=1024 count=4096
|
||||
// dd if=/dev/zero of=test_data.bin bs=1M count=1024
|
||||
//
|
||||
// Random write:
|
||||
//
|
||||
// fio -thread -ioengine=./libfio_blockstore.so -name=test -bs=4k -direct=1 -fsync=16 -iodepth=16 -rw=randwrite \
|
||||
// -bs_config='{"data_device":"./test_data.bin"}' -size=1000M
|
||||
// [LD_PRELOAD=libasan.so.8] \
|
||||
// fio -name=test -thread -ioengine=../build/src/blockstore/libfio_vitastor_blk.so \
|
||||
// -bs=4k -direct=1 -rw=randwrite -iodepth=16 -size=900M -loops=10 \
|
||||
// -bs_config='{"data_device":"./test_data.bin","meta_offset":0,"journal_offset":16777216,"data_offset":33554432,"disable_data_fsync":true,"immediate_commit":"all","journal_no_same_sector_overwrites":true}'
|
||||
//
|
||||
// Linear write:
|
||||
//
|
||||
@@ -38,12 +38,14 @@ struct bs_data
|
||||
std::vector<io_u*> completed;
|
||||
int op_n = 0, inflight = 0;
|
||||
bool last_sync = false;
|
||||
bool trace = false;
|
||||
};
|
||||
|
||||
struct bs_options
|
||||
{
|
||||
int __pad;
|
||||
char *json_config = NULL;
|
||||
int trace = 0;
|
||||
};
|
||||
|
||||
static struct fio_option options[] = {
|
||||
@@ -56,6 +58,16 @@ static struct fio_option options[] = {
|
||||
.category = FIO_OPT_C_ENGINE,
|
||||
.group = FIO_OPT_G_FILENAME,
|
||||
},
|
||||
{
|
||||
.name = "bs_trace",
|
||||
.lname = "trace",
|
||||
.type = FIO_OPT_BOOL,
|
||||
.off1 = offsetof(struct bs_options, trace),
|
||||
.help = "Trace operations",
|
||||
.def = "0",
|
||||
.category = FIO_OPT_C_ENGINE,
|
||||
.group = FIO_OPT_G_FILENAME,
|
||||
},
|
||||
{
|
||||
.name = NULL,
|
||||
},
|
||||
@@ -63,6 +75,7 @@ static struct fio_option options[] = {
|
||||
|
||||
static int bs_setup(struct thread_data *td)
|
||||
{
|
||||
bs_options *o = (bs_options*)td->eo;
|
||||
bs_data *bsd;
|
||||
//fio_file *f;
|
||||
//int r;
|
||||
@@ -83,6 +96,8 @@ static int bs_setup(struct thread_data *td)
|
||||
td->o.open_files++;
|
||||
}
|
||||
|
||||
bsd->trace = o->trace ? true : false;
|
||||
|
||||
//f = td->files[0];
|
||||
//f->real_file_size = size;
|
||||
return 0;
|
||||
@@ -176,20 +191,19 @@ static enum fio_q_status bs_queue(struct thread_data *td, struct io_u *io)
|
||||
op->version = UINT64_MAX; // last unstable
|
||||
op->offset = io->offset % bsd->bs->get_block_size();
|
||||
op->len = io->xfer_buflen;
|
||||
op->callback = [io](blockstore_op_t *op)
|
||||
op->callback = [io, n = bsd->op_n](blockstore_op_t *op)
|
||||
{
|
||||
io->error = op->retval < 0 ? -op->retval : 0;
|
||||
bs_data *bsd = (bs_data*)io->engine_data;
|
||||
bsd->inflight--;
|
||||
bsd->completed.push_back(io);
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("--- OP_READ %llx n=%d retval=%d\n", io, n, op->retval);
|
||||
#endif
|
||||
if (bsd->trace)
|
||||
printf("--- OP_READ %zx n=%d retval=%d\n", (size_t)op, n, op->retval);
|
||||
delete op;
|
||||
};
|
||||
break;
|
||||
case DDIR_WRITE:
|
||||
op->opcode = BS_OP_WRITE;
|
||||
op->opcode = BS_OP_WRITE_STABLE;
|
||||
op->buf = io->xfer_buf;
|
||||
op->oid = {
|
||||
.inode = 1,
|
||||
@@ -198,30 +212,28 @@ static enum fio_q_status bs_queue(struct thread_data *td, struct io_u *io)
|
||||
op->version = 0; // assign automatically
|
||||
op->offset = io->offset % bsd->bs->get_block_size();
|
||||
op->len = io->xfer_buflen;
|
||||
op->callback = [io](blockstore_op_t *op)
|
||||
op->callback = [io, n = bsd->op_n](blockstore_op_t *op)
|
||||
{
|
||||
io->error = op->retval < 0 ? -op->retval : 0;
|
||||
bs_data *bsd = (bs_data*)io->engine_data;
|
||||
bsd->inflight--;
|
||||
bsd->completed.push_back(io);
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("--- OP_WRITE %llx n=%d retval=%d\n", io, n, op->retval);
|
||||
#endif
|
||||
if (bsd->trace)
|
||||
printf("--- OP_WRITE %zx n=%d retval=%d\n", (size_t)op, n, op->retval);
|
||||
delete op;
|
||||
};
|
||||
bsd->last_sync = false;
|
||||
break;
|
||||
case DDIR_SYNC:
|
||||
op->opcode = BS_OP_SYNC_STAB_ALL;
|
||||
op->callback = [io](blockstore_op_t *op)
|
||||
op->opcode = BS_OP_SYNC;
|
||||
op->callback = [io, n = bsd->op_n](blockstore_op_t *op)
|
||||
{
|
||||
bs_data *bsd = (bs_data*)io->engine_data;
|
||||
io->error = op->retval < 0 ? -op->retval : 0;
|
||||
bsd->completed.push_back(io);
|
||||
bsd->inflight--;
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("--- OP_SYNC %llx n=%d retval=%d\n", io, n, op->retval);
|
||||
#endif
|
||||
if (bsd->trace)
|
||||
printf("--- OP_SYNC %zx n=%d retval=%d\n", (size_t)op, n, op->retval);
|
||||
delete op;
|
||||
};
|
||||
bsd->last_sync = true;
|
||||
@@ -232,9 +244,8 @@ static enum fio_q_status bs_queue(struct thread_data *td, struct io_u *io)
|
||||
return FIO_Q_COMPLETED;
|
||||
}
|
||||
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("+++ %s %llx n=%d\n", op->opcode == OP_READ ? "OP_READ" : (op->opcode == OP_WRITE ? "OP_WRITE" : "OP_SYNC"), io, n);
|
||||
#endif
|
||||
if (bsd->trace)
|
||||
printf("+++ %s %zx n=%d\n", op->opcode == BS_OP_READ ? "OP_READ" : (op->opcode == BS_OP_WRITE_STABLE ? "OP_WRITE" : "OP_SYNC"), (size_t)op, bsd->op_n);
|
||||
io->error = 0;
|
||||
bsd->inflight++;
|
||||
bsd->bs->enqueue_op(op);
|
||||
@@ -290,7 +301,7 @@ static int bs_invalidate(struct thread_data *td, struct fio_file *f)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ioengine_ops ioengine = {
|
||||
struct ioengine_ops __attribute__((visibility("default"))) ioengine = {
|
||||
.name = "vitastor_blockstore",
|
||||
.version = FIO_IOOPS_VERSION,
|
||||
.flags = FIO_MEMALIGN | FIO_DISKLESSIO | FIO_NOEXTEND,
|
||||
|
@@ -68,6 +68,12 @@ if (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
|
||||
target_compile_definitions(vitastor-nbd PUBLIC HAVE_NBD_NETLINK_H)
|
||||
endif (HAVE_NBD_NETLINK_H AND NL3_LIBRARIES)
|
||||
|
||||
# vitastor-ublk
|
||||
add_executable(vitastor-ublk
|
||||
ublk_server.cpp
|
||||
)
|
||||
target_link_libraries(vitastor-ublk vitastor_client)
|
||||
|
||||
if (${WITH_QEMU})
|
||||
# qemu_driver.so
|
||||
add_library(qemu_vitastor SHARED
|
||||
|
@@ -57,6 +57,7 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
|
||||
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
|
||||
st_cli.on_change_osd_state_hook = [this](uint64_t peer_osd) { on_change_osd_state_hook(peer_osd); };
|
||||
st_cli.on_change_pool_config_hook = [this]() { on_change_pool_config_hook(); };
|
||||
st_cli.on_change_pg_config_hook = [this]() { on_change_pool_config_hook(); };
|
||||
st_cli.on_change_pg_state_hook = [this](pool_id_t pool_id, pg_num_t pg_num, osd_num_t prev_primary) { on_change_pg_state_hook(pool_id, pg_num, prev_primary); };
|
||||
st_cli.on_change_node_placement_hook = [this]() { on_change_node_placement_hook(); };
|
||||
st_cli.on_load_pgs_hook = [this](bool success) { on_load_pgs_hook(success); };
|
||||
@@ -563,7 +564,7 @@ osd_num_t cluster_client_t::select_nearest_osd(const std::vector<osd_num_t> & os
|
||||
|
||||
void cluster_client_t::on_load_pgs_hook(bool success)
|
||||
{
|
||||
for (auto pool_item: st_cli.pool_config)
|
||||
for (auto & pool_item: st_cli.pool_config)
|
||||
{
|
||||
pg_counts[pool_item.first] = pool_item.second.real_pg_count;
|
||||
}
|
||||
@@ -583,10 +584,15 @@ void cluster_client_t::on_load_pgs_hook(bool success)
|
||||
|
||||
void cluster_client_t::on_change_pool_config_hook()
|
||||
{
|
||||
for (auto pool_item: st_cli.pool_config)
|
||||
for (auto & pool_item: st_cli.pool_config)
|
||||
{
|
||||
if (pg_counts[pool_item.first] != pool_item.second.real_pg_count)
|
||||
{
|
||||
if (log_level > 2 && pg_counts[pool_item.first])
|
||||
{
|
||||
printf("Pool %u (%s) PG count changed from %lu to %lu\n", pool_item.first, pool_item.second.name.c_str(),
|
||||
pg_counts[pool_item.first], pool_item.second.real_pg_count);
|
||||
}
|
||||
// At this point, all pool operations should have been suspended
|
||||
// And now they have to be resliced!
|
||||
for (auto op = op_queue_head; op; op = op->next)
|
||||
|
@@ -31,7 +31,7 @@ struct cluster_op_part_t
|
||||
osd_op_t op;
|
||||
};
|
||||
|
||||
struct cluster_op_t
|
||||
struct __attribute__((visibility("default"))) cluster_op_t
|
||||
{
|
||||
uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE, OSD_OP_READ_BITMAP, OSD_OP_READ_CHAIN_BITMAP
|
||||
uint64_t inode;
|
||||
@@ -81,7 +81,7 @@ struct inode_list_pg_t;
|
||||
class writeback_cache_t;
|
||||
|
||||
// FIXME: Split into public and private interfaces
|
||||
class cluster_client_t
|
||||
class __attribute__((visibility("default"))) cluster_client_t
|
||||
{
|
||||
#ifdef __MOCK__
|
||||
public:
|
||||
|
@@ -1056,6 +1056,10 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
}
|
||||
pool_item.second.real_pg_count = n;
|
||||
}
|
||||
if (on_change_pg_config_hook)
|
||||
{
|
||||
on_change_pg_config_hook();
|
||||
}
|
||||
}
|
||||
else if (key.substr(0, etcd_prefix.length()+12) == etcd_prefix+"/pg/history/")
|
||||
{
|
||||
@@ -1182,9 +1186,7 @@ void etcd_state_client_t::parse_state(const etcd_kv_t & kv)
|
||||
osd_num_t peer_osd = std::stoull(key.substr(etcd_prefix.length()+11));
|
||||
if (peer_osd > 0)
|
||||
{
|
||||
if (value.is_object() && value["state"] == "up" &&
|
||||
value["addresses"].is_array() &&
|
||||
value["port"].int64_value() > 0 && value["port"].int64_value() < 65536)
|
||||
if (value.is_object() && value["state"] == "up")
|
||||
{
|
||||
this->peer_states[peer_osd] = value;
|
||||
this->seen_peers.insert(peer_osd);
|
||||
|
@@ -92,7 +92,7 @@ struct inode_watch_t
|
||||
|
||||
struct http_co_t;
|
||||
|
||||
struct etcd_state_client_t
|
||||
struct __attribute__((visibility("default"))) etcd_state_client_t
|
||||
{
|
||||
protected:
|
||||
std::vector<std::string> local_ips;
|
||||
@@ -142,6 +142,7 @@ public:
|
||||
std::function<json11::Json()> load_pgs_checks_hook;
|
||||
std::function<void(bool)> on_load_pgs_hook;
|
||||
std::function<void()> on_change_pool_config_hook;
|
||||
std::function<void()> on_change_pg_config_hook;
|
||||
std::function<void(pool_id_t)> on_change_backfillfull_hook;
|
||||
std::function<void(pool_id_t, pg_num_t, osd_num_t)> on_change_pg_state_hook;
|
||||
std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook;
|
||||
|
@@ -525,7 +525,7 @@ static int sec_invalidate(struct thread_data *td, struct fio_file *f)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ioengine_ops ioengine = {
|
||||
struct ioengine_ops __attribute__((visibility("default"))) ioengine = {
|
||||
.name = "vitastor_cluster",
|
||||
.version = FIO_IOOPS_VERSION,
|
||||
.flags = FIO_MEMALIGN | FIO_DISKLESSIO | FIO_NOEXTEND,
|
||||
|
@@ -437,7 +437,7 @@ static int sec_invalidate(struct thread_data *td, struct fio_file *f)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct ioengine_ops ioengine = {
|
||||
struct ioengine_ops __attribute__((visibility("default"))) ioengine = {
|
||||
.name = "vitastor_secondary_osd",
|
||||
.version = FIO_IOOPS_VERSION,
|
||||
.flags = FIO_MEMALIGN | FIO_DISKLESSIO | FIO_NOEXTEND,
|
||||
|
@@ -70,7 +70,7 @@ void msgr_iothread_t::add_to_ringloop(ring_loop_t *outer_loop)
|
||||
assert(sqe != NULL);
|
||||
this->outer_loop = outer_loop;
|
||||
this->outer_loop_data = ((ring_data_t*)sqe->user_data);
|
||||
my_uring_prep_poll_add(sqe, eventfd, POLLIN);
|
||||
io_uring_prep_poll_add(sqe, eventfd, POLLIN);
|
||||
outer_loop_data->callback = [this](ring_data_t *data)
|
||||
{
|
||||
if (data->res < 0)
|
||||
|
@@ -174,7 +174,7 @@ struct osd_messenger_t;
|
||||
struct rdmacm_connecting_t;
|
||||
#endif
|
||||
|
||||
struct osd_messenger_t
|
||||
struct __attribute__((visibility("default"))) osd_messenger_t
|
||||
{
|
||||
protected:
|
||||
int keepalive_timer_id = -1;
|
||||
|
@@ -152,7 +152,7 @@ struct blockstore_op_t;
|
||||
|
||||
struct osd_primary_op_data_t;
|
||||
|
||||
struct osd_op_t
|
||||
struct __attribute__((visibility("default"))) osd_op_t
|
||||
{
|
||||
timespec tv_begin = { 0 }, tv_end = { 0 };
|
||||
uint64_t op_type = OSD_OP_IN;
|
||||
|
@@ -515,7 +515,7 @@ void osd_messenger_t::rdmacm_established(rdma_cm_event *ev)
|
||||
cl->peer_fd = conn->peer_fd;
|
||||
cl->peer_state = PEER_RDMA;
|
||||
cl->connect_timeout_id = -1;
|
||||
cl->in_osd_num = peer_osd;
|
||||
cl->osd_num = peer_osd;
|
||||
cl->in_buf = malloc_or_die(receive_buffer_size);
|
||||
cl->rdma_conn = rc;
|
||||
clients[conn->peer_fd] = cl;
|
||||
|
@@ -48,7 +48,7 @@ void osd_messenger_t::read_requests()
|
||||
}
|
||||
ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
||||
data->callback = [this, cl](ring_data_t *data) { handle_read(data->res, cl); };
|
||||
my_uring_prep_recvmsg(sqe, peer_fd, &cl->read_msg, 0);
|
||||
io_uring_prep_recvmsg(sqe, peer_fd, &cl->read_msg, 0);
|
||||
if (iothread)
|
||||
{
|
||||
iothread->add_sqe(sqe_local);
|
||||
|
@@ -218,11 +218,11 @@ bool osd_messenger_t::try_send(osd_client_t *cl)
|
||||
}
|
||||
if (use_zc)
|
||||
{
|
||||
my_uring_prep_sendmsg_zc(sqe, peer_fd, &cl->write_msg, MSG_WAITALL);
|
||||
io_uring_prep_sendmsg_zc(sqe, peer_fd, &cl->write_msg, MSG_WAITALL);
|
||||
}
|
||||
else
|
||||
{
|
||||
my_uring_prep_sendmsg(sqe, peer_fd, &cl->write_msg, MSG_WAITALL);
|
||||
io_uring_prep_sendmsg(sqe, peer_fd, &cl->write_msg, MSG_WAITALL);
|
||||
}
|
||||
if (iothread)
|
||||
{
|
||||
|
@@ -1008,7 +1008,7 @@ protected:
|
||||
data->callback = [this](ring_data_t *data) { handle_send(data->res); };
|
||||
send_msg.msg_iov = send_list.data();
|
||||
send_msg.msg_iovlen = send_list.size();
|
||||
my_uring_prep_sendmsg(sqe, nbd_fd, &send_msg, MSG_ZEROCOPY);
|
||||
io_uring_prep_sendmsg(sqe, nbd_fd, &send_msg, MSG_ZEROCOPY);
|
||||
}
|
||||
|
||||
void handle_send(int result)
|
||||
@@ -1076,7 +1076,7 @@ protected:
|
||||
}
|
||||
read_msg.msg_iov = &read_iov;
|
||||
read_msg.msg_iovlen = 1;
|
||||
my_uring_prep_recvmsg(sqe, nbd_fd, &read_msg, 0);
|
||||
io_uring_prep_recvmsg(sqe, nbd_fd, &read_msg, 0);
|
||||
}
|
||||
|
||||
void handle_read(int result)
|
||||
|
@@ -3,6 +3,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#pragma GCC visibility push(default)
|
||||
|
||||
// Placement group states
|
||||
// STARTING -> [acquire lock] -> PEERING -> INCOMPLETE|ACTIVE
|
||||
// ACTIVE -> REPEERING -> PEERING
|
||||
@@ -50,3 +52,5 @@ extern const int pg_state_bit_count;
|
||||
extern const int object_state_bits[];
|
||||
extern const char *object_state_names[];
|
||||
extern const int object_state_bit_count;
|
||||
|
||||
#pragma GCC visibility pop
|
||||
|
@@ -5,7 +5,6 @@
|
||||
|
||||
#ifdef VITASTOR_SOURCE_TREE
|
||||
#define BUILD_DSO
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/main-loop.h"
|
||||
@@ -14,27 +13,42 @@
|
||||
#endif
|
||||
#include "block/block_int.h"
|
||||
#include "qapi/error.h"
|
||||
#include "qapi/qmp/qdict.h"
|
||||
#include "qapi/qmp/qerror.h"
|
||||
#include "qemu/error-report.h"
|
||||
#include "qemu/module.h"
|
||||
#include "qemu/option.h"
|
||||
|
||||
#if QEMU_VERSION_MAJOR >= 3
|
||||
#include "qemu/units.h"
|
||||
#include "block/qdict.h"
|
||||
#include "qemu/cutils.h"
|
||||
#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 10
|
||||
#include "qemu/cutils.h"
|
||||
#if QEMU_VERSION_MAJOR >= 10
|
||||
#include "qobject/qstring.h"
|
||||
#include "qobject/qdict.h"
|
||||
#include "qobject/qobject.h"
|
||||
#include "qobject/qjson.h"
|
||||
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 10
|
||||
#include "qapi/qmp/qstring.h"
|
||||
#include "qapi/qmp/qdict.h"
|
||||
#include "qapi/qmp/qobject.h"
|
||||
#include "qapi/qmp/qjson.h"
|
||||
#else
|
||||
#include "qapi/qmp/qstring.h"
|
||||
#include "qapi/qmp/qint.h"
|
||||
#include "qapi/qmp/qdict.h"
|
||||
#include "qapi/qmp/qobject.h"
|
||||
#include "qapi/qmp/qjson.h"
|
||||
#define qdict_put_int(options, name, num_val) qdict_put_obj(options, name, QOBJECT(qint_from_int(num_val)))
|
||||
#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
|
||||
#define qobject_unref QDECREF
|
||||
#endif
|
||||
#if QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4
|
||||
|
||||
// parse_uint_full()
|
||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 6
|
||||
#include "qemu/cutils.h"
|
||||
#else
|
||||
#include "qemu-common.h"
|
||||
#endif
|
||||
|
||||
#if QEMU_VERSION_MAJOR >= 10
|
||||
#include "system/replay.h"
|
||||
#elif QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4
|
||||
#include "sysemu/replay.h"
|
||||
#else
|
||||
#include "sysemu/sysemu.h"
|
||||
@@ -43,11 +57,11 @@
|
||||
#include "vitastor_c.h"
|
||||
|
||||
#ifdef VITASTOR_SOURCE_TREE
|
||||
void qemu_module_dummy(void)
|
||||
void __attribute__((visibility("default"))) qemu_module_dummy(void)
|
||||
{
|
||||
}
|
||||
|
||||
void DSO_STAMP_FUN(void)
|
||||
void __attribute__((visibility("default"))) DSO_STAMP_FUN(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
@@ -58,12 +72,14 @@ typedef struct VitastorClient
|
||||
{
|
||||
void *proxy;
|
||||
int uring_eventfd;
|
||||
int auto_loop;
|
||||
|
||||
void *watch;
|
||||
char *config_path;
|
||||
char *etcd_host;
|
||||
char *etcd_prefix;
|
||||
char *image;
|
||||
char *file_mirror_path;
|
||||
int skip_parents;
|
||||
uint64_t inode;
|
||||
uint64_t pool;
|
||||
@@ -155,16 +171,19 @@ static void qemu_vitastor_unescape(char *src)
|
||||
// vitastor[:key=value]*
|
||||
// vitastor[:etcd_host=127.0.0.1]:inode=1:pool=1[:rdma_gid_index=3]
|
||||
// vitastor:config_path=/etc/vitastor/vitastor.conf:image=testimg
|
||||
// vitastor://?config_path=/etc/vitastor/vitastor.conf&image=testimg
|
||||
static void vitastor_parse_filename(const char *filename, QDict *options, Error **errp)
|
||||
{
|
||||
const char *start;
|
||||
char *p, *buf;
|
||||
|
||||
if (!strstart(filename, "vitastor:", &start))
|
||||
int url_style = strstart(filename, "vitastor://?", &start);
|
||||
if (!url_style && !strstart(filename, "vitastor:", &start))
|
||||
{
|
||||
error_setg(errp, "File name must start with 'vitastor:'");
|
||||
return;
|
||||
}
|
||||
char delim = url_style ? '&' : ':';
|
||||
|
||||
buf = g_strdup(start);
|
||||
p = buf;
|
||||
@@ -184,7 +203,7 @@ static void vitastor_parse_filename(const char *filename, QDict *options, Error
|
||||
if (name[i] == '_')
|
||||
name[i] = '-';
|
||||
qemu_vitastor_unescape(name);
|
||||
value = qemu_vitastor_next_tok(p, ':', &p);
|
||||
value = qemu_vitastor_next_tok(p, delim, &p);
|
||||
qemu_vitastor_unescape(value);
|
||||
if (!strcmp(name, "inode") ||
|
||||
!strcmp(name, "pool") ||
|
||||
@@ -294,9 +313,8 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
||||
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task);
|
||||
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
|
||||
vitastor_schedule_uring_handler(client);
|
||||
#endif
|
||||
if (!client->auto_loop)
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
|
||||
while (!task->complete)
|
||||
@@ -429,6 +447,74 @@ static void strarray_free(str_array *a)
|
||||
}
|
||||
#endif
|
||||
|
||||
static void check_config(VitastorClient *client)
|
||||
{
|
||||
size_t bufsize = 64*1024, maxsize = 1024*1024, done = 0;
|
||||
ssize_t r = 0;
|
||||
char *data = NULL;
|
||||
QObject *obj = NULL;
|
||||
QDict *qd = NULL;
|
||||
int fd = open(client->config_path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
{
|
||||
fprintf(stderr, "vitastor: failed to read %s: %s\n", client->config_path, strerror(errno));
|
||||
return;
|
||||
}
|
||||
data = (char*)malloc(bufsize);
|
||||
if (!data)
|
||||
{
|
||||
fprintf(stderr, "vitastor: memory allocation failed\n");
|
||||
goto end_free;
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
if (bufsize <= done)
|
||||
{
|
||||
if (bufsize >= maxsize)
|
||||
{
|
||||
fprintf(stderr, "vitastor: config file exceeds %zu bytes\n", maxsize);
|
||||
goto end_free;
|
||||
}
|
||||
bufsize *= 2;
|
||||
data = (char*)realloc(data, bufsize);
|
||||
if (!data)
|
||||
{
|
||||
fprintf(stderr, "vitastor: memory allocation failed\n");
|
||||
goto end_free;
|
||||
}
|
||||
}
|
||||
r = read(fd, data+done, bufsize-done);
|
||||
if (r < 0)
|
||||
{
|
||||
if (errno == EAGAIN || errno == EINTR)
|
||||
continue;
|
||||
fprintf(stderr, "vitastor: failed to read %s: %s\n", client->config_path, strerror(errno));
|
||||
break;
|
||||
}
|
||||
if (!r)
|
||||
{
|
||||
break;
|
||||
}
|
||||
done += r;
|
||||
}
|
||||
obj = qobject_from_json(data
|
||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
qd = qobject_to(QDict, obj);
|
||||
if (qd)
|
||||
{
|
||||
client->file_mirror_path = g_strdup(qdict_get_try_str(qd, "qemu_file_mirror_path"));
|
||||
}
|
||||
end_free:
|
||||
if (obj)
|
||||
qobject_unref(obj);
|
||||
if (data)
|
||||
free(data);
|
||||
close(fd);
|
||||
}
|
||||
|
||||
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
||||
{
|
||||
VitastorRPC task;
|
||||
@@ -447,8 +533,15 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
||||
client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
|
||||
client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
|
||||
client->ctx = bdrv_get_aio_context(bs);
|
||||
if (client->config_path && strlen(client->config_path))
|
||||
{
|
||||
check_config(client);
|
||||
}
|
||||
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
str_array opt = {};
|
||||
char version_buffer[32];
|
||||
memset(version_buffer, ' ', sizeof(version_buffer));
|
||||
version_buffer[sizeof(version_buffer)-1] = '\0';
|
||||
strarray_push_kv(&opt, "config_path", qdict_get_try_str(options, "config-path"));
|
||||
strarray_push_kv(&opt, "etcd_address", qdict_get_try_str(options, "etcd-host"));
|
||||
strarray_push_kv(&opt, "etcd_prefix", qdict_get_try_str(options, "etcd-prefix"));
|
||||
@@ -458,7 +551,9 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
||||
strarray_push_kv(&opt, "rdma_gid_index", qdict_get_try_str(options, "rdma-gid-index"));
|
||||
strarray_push_kv(&opt, "rdma_mtu", qdict_get_try_str(options, "rdma-mtu"));
|
||||
strarray_push_kv(&opt, "client_writeback_allowed", (flags & BDRV_O_NOCACHE) ? "0" : "1");
|
||||
strarray_push_kv(&opt, "__version_check_buffer", version_buffer);
|
||||
client->proxy = vitastor_c_create_uring_json(opt.items, opt.len);
|
||||
client->auto_loop = 0;
|
||||
strarray_free(&opt);
|
||||
if (client->proxy)
|
||||
{
|
||||
@@ -471,6 +566,12 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
||||
return -1;
|
||||
}
|
||||
universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client);
|
||||
int vitastor_version[3] = { 0 };
|
||||
if (sscanf(version_buffer, "%d.%d.%d", &vitastor_version[0], &vitastor_version[1], &vitastor_version[2]) == 3 &&
|
||||
vitastor_version[0] >= 2)
|
||||
{
|
||||
client->auto_loop = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -604,6 +705,34 @@ static void vitastor_close(BlockDriverState *bs)
|
||||
client->last_bitmap = NULL;
|
||||
}
|
||||
|
||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 2
|
||||
static void vitastor_refresh_filename(BlockDriverState *bs)
|
||||
{
|
||||
VitastorClient *client = bs->opaque;
|
||||
size_t len = 0;
|
||||
int n = 0;
|
||||
if (client->image && client->file_mirror_path)
|
||||
{
|
||||
len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s%s", client->file_mirror_path, client->image);
|
||||
return;
|
||||
}
|
||||
len = snprintf(bs->exact_filename, sizeof(bs->exact_filename), "vitastor://");
|
||||
if (len < sizeof(bs->exact_filename))
|
||||
{
|
||||
if (client->image)
|
||||
len += snprintf(bs->exact_filename+len, sizeof(bs->exact_filename)-len, "%cimage=%s", (n++ ? '&' : '?'), client->image);
|
||||
else
|
||||
len += snprintf(bs->exact_filename+len, sizeof(bs->exact_filename)-len, "%cpool=%ju&inode=%ju&size=%ju", (n++ ? '&' : '?'), client->pool, client->inode, client->size);
|
||||
}
|
||||
if (client->config_path && len < sizeof(bs->exact_filename))
|
||||
len += snprintf(bs->exact_filename+len, sizeof(bs->exact_filename)-len, "%cconfig_path=%s", (n++ ? '&' : '?'), client->config_path);
|
||||
if (client->etcd_host && len < sizeof(bs->exact_filename))
|
||||
len += snprintf(bs->exact_filename+len, sizeof(bs->exact_filename)-len, "%cetcd_host=%s", (n++ ? '&' : '?'), client->etcd_host);
|
||||
if (client->etcd_prefix && len < sizeof(bs->exact_filename))
|
||||
len += snprintf(bs->exact_filename+len, sizeof(bs->exact_filename)-len, "%cetcd_prefix=%s", (n++ ? '&' : '?'), client->etcd_prefix);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
|
||||
static int vitastor_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
|
||||
{
|
||||
@@ -767,9 +896,8 @@ static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
|
||||
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
|
||||
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
|
||||
vitastor_schedule_uring_handler(client);
|
||||
#endif
|
||||
if (!client->auto_loop)
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
|
||||
while (!task.complete)
|
||||
@@ -803,9 +931,8 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
|
||||
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task);
|
||||
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
|
||||
vitastor_schedule_uring_handler(client);
|
||||
#endif
|
||||
if (!client->auto_loop)
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
|
||||
while (!task.complete)
|
||||
@@ -885,9 +1012,8 @@ static int coroutine_fn vitastor_co_block_status(
|
||||
task.bitmap = client->last_bitmap = NULL;
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
||||
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
|
||||
vitastor_schedule_uring_handler(client);
|
||||
#endif
|
||||
if (!client->auto_loop)
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
while (!task.complete)
|
||||
{
|
||||
@@ -974,9 +1100,8 @@ static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
|
||||
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task);
|
||||
#if !defined VITASTOR_C_API_VERSION || VITASTOR_C_API_VERSION < 5
|
||||
vitastor_schedule_uring_handler(client);
|
||||
#endif
|
||||
if (!client->auto_loop)
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
|
||||
while (!task.complete)
|
||||
@@ -1038,6 +1163,9 @@ static BlockDriver bdrv_vitastor = {
|
||||
.bdrv_get_info = vitastor_get_info,
|
||||
.bdrv_getlength = vitastor_getlength,
|
||||
#endif
|
||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 2
|
||||
.bdrv_refresh_filename = vitastor_refresh_filename,
|
||||
#endif
|
||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
|
||||
.bdrv_probe_blocksizes = vitastor_probe_blocksizes,
|
||||
#endif
|
||||
|
900
src/client/ublk_server.cpp
Normal file
900
src/client/ublk_server.cpp
Normal file
@@ -0,0 +1,900 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
// ublk-based Vitastor block device in userspace
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <signal.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/un.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../liburing/include/ublk_cmd.h"
|
||||
#include "cluster_client.h"
|
||||
#include "epoll_manager.h"
|
||||
#include "str_util.h"
|
||||
|
||||
const char *exe_name = NULL;
|
||||
|
||||
const char *help_text =
|
||||
"Vitastor ublk server " VITASTOR_VERSION "\n"
|
||||
"(c) Vitaliy Filippov, 2025+ (VNPL-1.1)\n"
|
||||
"\n"
|
||||
"COMMANDS:\n"
|
||||
"\n"
|
||||
"vitastor-ublk map [OPTIONS] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
|
||||
" Map a ublk device. Options:\n"
|
||||
" --recover\n"
|
||||
" Recover a mapped device if the previous ublk server is dead.\n"
|
||||
" --queue_depth 256\n"
|
||||
" Maximum queue size for the device.\n"
|
||||
" --max_io_size 1M\n"
|
||||
" Maximum single I/O size for the device. Default: max(1 MB, pool block size * EC part count).\n"
|
||||
" --readonly\n"
|
||||
" Make the device read-only.\n"
|
||||
" --hdd\n"
|
||||
" Mark the device as rotational.\n"
|
||||
" --logfile /path/to/log/file.txt\n"
|
||||
" Write log messages to the specified file instead of dropping them (in background mode)\n"
|
||||
" or printing them to the standard output (in foreground mode).\n"
|
||||
" --dev_num N\n"
|
||||
" Use the specified device /dev/ublkbN instead of automatic selection (alternative syntax\n"
|
||||
" to /dev/ublkbN positional parameter).\n"
|
||||
" --foreground 1\n"
|
||||
" Stay in foreground, do not daemonize.\n"
|
||||
"\n"
|
||||
"vitastor-ublk unmap [--force] /dev/ublkb<N>\n"
|
||||
" Unmap a Vitastor ublk device. Do not check if it's actually mapped if --force is specified.\n"
|
||||
"\n"
|
||||
"vitastor-ublk ls [--json]\n"
|
||||
" List mapped Vitastor ublk devices, optionally in JSON format.\n"
|
||||
"\n"
|
||||
"Use vitastor-ublk --help <command> for command details or vitastor-ublk --help --all for all details.\n"
|
||||
"\n"
|
||||
"All usual Vitastor config options like --config_path <path_to_config> may also be specified in CLI.\n"
|
||||
;
|
||||
|
||||
class ublk_server
|
||||
{
|
||||
protected:
|
||||
std::string image_name;
|
||||
uint64_t inode = 0;
|
||||
uint64_t device_size = 0;
|
||||
int req_dev_num = -1;
|
||||
bool readonly = false;
|
||||
bool hdd = false;
|
||||
bool recover = false;
|
||||
uint16_t queue_depth = 256;
|
||||
uint32_t max_io_size = 0;
|
||||
|
||||
ring_loop_t *ringloop = NULL;
|
||||
epoll_manager_t *epmgr = NULL;
|
||||
cluster_client_t *cli = NULL;
|
||||
inode_watch_t *watch = NULL;
|
||||
|
||||
std::string logfile = "/dev/null";
|
||||
|
||||
public:
|
||||
ublk_server()
|
||||
{
|
||||
ringloop = new ring_loop_t(RINGLOOP_DEFAULT_SIZE, false, true);
|
||||
}
|
||||
|
||||
~ublk_server()
|
||||
{
|
||||
if (ctrl_fd >= 0)
|
||||
{
|
||||
close(ctrl_fd);
|
||||
ctrl_fd = -1;
|
||||
}
|
||||
if (cdev_fd >= 0)
|
||||
{
|
||||
close(cdev_fd);
|
||||
cdev_fd = -1;
|
||||
}
|
||||
for (auto & buf: buffers)
|
||||
{
|
||||
free(buf);
|
||||
}
|
||||
buffers.clear();
|
||||
if (ringloop)
|
||||
{
|
||||
delete ringloop;
|
||||
ringloop = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static json11::Json::object parse_args(int narg, const char *args[])
|
||||
{
|
||||
json11::Json::object cfg;
|
||||
int pos = 0;
|
||||
for (int i = 1; i < narg; i++)
|
||||
{
|
||||
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
|
||||
{
|
||||
cfg["help"] = 1;
|
||||
}
|
||||
else if (args[i][0] == '-' && args[i][1] == '-')
|
||||
{
|
||||
const char *opt = args[i]+2;
|
||||
cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "all") ||
|
||||
!strcmp(opt, "readonly") || !strcmp(opt, "hdd") || !strcmp(opt, "recover") ||
|
||||
!strcmp(opt, "force") || i == narg-1 ? "1" : args[++i];
|
||||
}
|
||||
else if (pos == 0)
|
||||
{
|
||||
cfg["command"] = args[i];
|
||||
pos++;
|
||||
}
|
||||
else if (pos == 1)
|
||||
{
|
||||
char c = 0;
|
||||
int n = 0;
|
||||
if (sscanf(args[i], "/dev/ublkb%d%c", &n, &c) == 1)
|
||||
cfg["dev_num"] = n;
|
||||
else if (sscanf(args[i], "/dev/ublkc%d%c", &n, &c) == 1)
|
||||
cfg["dev_num"] = n;
|
||||
else
|
||||
cfg["dev_num"] = args[i];
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
return cfg;
|
||||
}
|
||||
|
||||
void exec(json11::Json cfg)
|
||||
{
|
||||
if (cfg["help"].bool_value())
|
||||
{
|
||||
goto help;
|
||||
}
|
||||
if (cfg["command"] == "map")
|
||||
{
|
||||
start(cfg);
|
||||
}
|
||||
else if (cfg["command"] == "unmap")
|
||||
{
|
||||
if (!cfg["dev_num"].is_number() &&
|
||||
cfg["dev_num"].string_value() != "0" &&
|
||||
!cfg["dev_num"].uint64_value())
|
||||
{
|
||||
fprintf(stderr, "device name or number is missing\n");
|
||||
exit(1);
|
||||
}
|
||||
open_control();
|
||||
unmap_device(cfg["dev_num"].uint64_value(), cfg["unpriv"].bool_value(), cfg["wait"].bool_value());
|
||||
}
|
||||
else if (cfg["command"] == "ls" || cfg["command"] == "list" || cfg["command"] == "list-mapped")
|
||||
{
|
||||
auto mapped = list_mapped();
|
||||
print_mapped(mapped, !cfg["json"].is_null());
|
||||
}
|
||||
else
|
||||
{
|
||||
help:
|
||||
print_help(help_text, "vitastor-ublk", cfg["command"].string_value(), cfg["all"].bool_value());
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
void start(json11::Json cfg)
|
||||
{
|
||||
// Check options
|
||||
if (cfg["dev_num"].string_value() != "" || cfg["dev_num"].is_number())
|
||||
{
|
||||
req_dev_num = cfg["dev_num"].uint64_value();
|
||||
}
|
||||
if (cfg["image"].string_value() != "")
|
||||
{
|
||||
// Use image name
|
||||
image_name = cfg["image"].string_value();
|
||||
inode = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use pool, inode number and size
|
||||
device_size = cfg["size"].is_string()
|
||||
? parse_size(cfg["size"].string_value())
|
||||
: cfg["size"].uint64_value();
|
||||
if (!device_size)
|
||||
{
|
||||
fprintf(stderr, "device size is missing\n");
|
||||
exit(1);
|
||||
}
|
||||
inode = cfg["inode"].uint64_value();
|
||||
uint64_t pool = cfg["pool"].uint64_value();
|
||||
if (pool)
|
||||
{
|
||||
inode = INODE_WITH_POOL(pool, inode);
|
||||
}
|
||||
if (!INODE_POOL(inode))
|
||||
{
|
||||
fprintf(stderr, "pool is missing\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (cfg["client_writeback_allowed"].is_null())
|
||||
{
|
||||
// ublk is always aware of fsync, so we allow write-back cache
|
||||
// by default if it's enabled
|
||||
auto obj = cfg.object_items();
|
||||
obj["client_writeback_allowed"] = true;
|
||||
cfg = obj;
|
||||
}
|
||||
readonly = cfg["readonly"].bool_value();
|
||||
hdd = cfg["hdd"].bool_value();
|
||||
recover = cfg["recover"].bool_value();
|
||||
if (recover && req_dev_num < 0)
|
||||
{
|
||||
fprintf(stderr, "device is missing\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Create client
|
||||
epmgr = new epoll_manager_t(ringloop);
|
||||
cli = new cluster_client_t(ringloop, epmgr->tfd, cfg);
|
||||
|
||||
// cli->config contains merged config
|
||||
if (!cfg["queue_depth"].is_null())
|
||||
{
|
||||
queue_depth = cfg["queue_depth"].uint64_value();
|
||||
}
|
||||
else if (cli->config.find("ublk_queue_depth") != cli->config.end())
|
||||
{
|
||||
queue_depth = cli->config["ublk_queue_depth"].uint64_value();
|
||||
}
|
||||
if (!cfg["max_io_size"].is_null())
|
||||
{
|
||||
max_io_size = parse_size(cfg["max_io_size"].string_value());
|
||||
}
|
||||
else if (cli->config.find("ublk_max_io_size") != cli->config.end())
|
||||
{
|
||||
max_io_size = cli->config["ublk_max_io_size"].is_string()
|
||||
? parse_size(cli->config["ublk_max_io_size"].string_value())
|
||||
: cli->config["ublk_max_io_size"].uint64_value();
|
||||
}
|
||||
|
||||
// Load image metadata
|
||||
while (!cli->is_ready())
|
||||
{
|
||||
ringloop->loop();
|
||||
if (cli->is_ready())
|
||||
break;
|
||||
ringloop->wait();
|
||||
}
|
||||
if (!inode)
|
||||
{
|
||||
watch = cli->st_cli.watch_inode(image_name);
|
||||
device_size = watch->cfg.size;
|
||||
if (!watch->cfg.num || !device_size)
|
||||
{
|
||||
// Image does not exist
|
||||
fprintf(stderr, "Image %s does not exist\n", image_name.c_str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
const bool writeback = cli->get_immediate_commit(inode);
|
||||
auto pool_it = cli->st_cli.pool_config.find(INODE_POOL(inode ? inode : watch->cfg.num));
|
||||
if (pool_it == cli->st_cli.pool_config.end())
|
||||
{
|
||||
fprintf(stderr, "Pool %u does not exist\n", INODE_POOL(inode ? inode : watch->cfg.num));
|
||||
exit(1);
|
||||
}
|
||||
auto & pool_cfg = pool_it->second;
|
||||
uint32_t pg_data_size = pool_cfg.data_block_size * (pool_cfg.scheme == POOL_SCHEME_REPLICATED
|
||||
? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
|
||||
if (max_io_size & (max_io_size-1))
|
||||
{
|
||||
fprintf(stderr, "max_io_size must be a power of 2\n");
|
||||
exit(1);
|
||||
}
|
||||
uint32_t buf_size = max_io_size ? max_io_size : (1024*1024 < pg_data_size ? pg_data_size : 1024*1024);
|
||||
uint32_t bitmap_granularity = pool_cfg.bitmap_granularity;
|
||||
|
||||
load_module();
|
||||
|
||||
bool bg = cfg["foreground"].is_null();
|
||||
if (cfg["logfile"].string_value() != "")
|
||||
{
|
||||
logfile = cfg["logfile"].string_value();
|
||||
}
|
||||
|
||||
open_control();
|
||||
if (recover)
|
||||
{
|
||||
recover_device(req_dev_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
add_device(
|
||||
req_dev_num,
|
||||
(writeback ? UBLK_ATTR_VOLATILE_CACHE : 0) |
|
||||
(readonly ? UBLK_ATTR_READ_ONLY : 0) | (hdd ? UBLK_ATTR_ROTATIONAL : 0),
|
||||
queue_depth, bitmap_granularity, buf_size, pg_data_size, device_size
|
||||
);
|
||||
}
|
||||
int notifyfd[2] = { -1, -1 };
|
||||
if (bg)
|
||||
{
|
||||
if (socketpair(AF_UNIX, SOCK_STREAM, 0, notifyfd) < 0)
|
||||
{
|
||||
perror("socketpair");
|
||||
exit(1);
|
||||
}
|
||||
daemonize_fork(notifyfd);
|
||||
close(notifyfd[0]);
|
||||
}
|
||||
start_device(recover);
|
||||
if (bg)
|
||||
{
|
||||
daemonize_reopen_stdio();
|
||||
int ok = 0;
|
||||
write(notifyfd[1], &ok, sizeof(ok));
|
||||
close(notifyfd[1]);
|
||||
}
|
||||
else
|
||||
printf("/dev/ublkb%d\n", ublk_dev.dev_id);
|
||||
stop = false;
|
||||
while (!stop)
|
||||
{
|
||||
ringloop->loop();
|
||||
ringloop->wait();
|
||||
}
|
||||
cluster_op_t *close_sync = new cluster_op_t;
|
||||
close_sync->opcode = OSD_OP_SYNC;
|
||||
close_sync->callback = [this](cluster_op_t *op)
|
||||
{
|
||||
stop = true;
|
||||
delete op;
|
||||
};
|
||||
cli->execute(close_sync);
|
||||
while (!stop)
|
||||
{
|
||||
ringloop->loop();
|
||||
ringloop->wait();
|
||||
}
|
||||
cli->flush();
|
||||
delete cli;
|
||||
delete epmgr;
|
||||
cli = NULL;
|
||||
epmgr = NULL;
|
||||
}
|
||||
|
||||
void load_module()
|
||||
{
|
||||
if (access("/sys/module/ublk_drv", F_OK) == 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
int r;
|
||||
if ((r = system("modprobe ublk_drv")) != 0)
|
||||
{
|
||||
if (r < 0)
|
||||
perror("Failed to load ublk_drv kernel module");
|
||||
else
|
||||
fprintf(stderr, "Failed to load ublk_drv kernel module\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void daemonize_fork(int *notifyfd)
|
||||
{
|
||||
if (fork())
|
||||
{
|
||||
// Parent - check status
|
||||
close(notifyfd[1]);
|
||||
int child_errno = 1;
|
||||
read(notifyfd[0], &child_errno, sizeof(child_errno));
|
||||
if (!child_errno)
|
||||
printf("/dev/ublkb%d\n", ublk_dev.dev_id);
|
||||
exit(child_errno);
|
||||
}
|
||||
setsid();
|
||||
if (fork())
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void daemonize_reopen_stdio()
|
||||
{
|
||||
close(0);
|
||||
close(1);
|
||||
close(2);
|
||||
open("/dev/null", O_RDONLY);
|
||||
open(logfile.c_str(), O_WRONLY|O_APPEND|O_CREAT, 0666);
|
||||
open(logfile.c_str(), O_WRONLY|O_APPEND|O_CREAT, 0666);
|
||||
if (chdir("/") != 0)
|
||||
fprintf(stderr, "Warning: Failed to chdir into /\n");
|
||||
}
|
||||
|
||||
json11::Json::object list_mapped()
|
||||
{
|
||||
int n_in_dev = 0;
|
||||
DIR *d = opendir("/dev");
|
||||
if (!d)
|
||||
{
|
||||
fprintf(stderr, "Failed to list /dev: %s (code %d)\n", strerror(errno), errno);
|
||||
exit(1);
|
||||
}
|
||||
dirent *ent;
|
||||
while ((ent = readdir(d)) != NULL)
|
||||
{
|
||||
if (!strncmp(ent->d_name, "ublkc", strlen("ublkc")))
|
||||
n_in_dev++;
|
||||
}
|
||||
closedir(d);
|
||||
json11::Json::object mapped;
|
||||
const char *self_filename = exe_name;
|
||||
for (int i = 0; exe_name[i] != 0; i++)
|
||||
{
|
||||
if (exe_name[i] == '/')
|
||||
self_filename = exe_name+i+1;
|
||||
}
|
||||
char path[64] = { 0 };
|
||||
int dev_num = -1, n_in_ctrl = 0;
|
||||
open_control();
|
||||
while (true)
|
||||
{
|
||||
dev_num++;
|
||||
int res = get_dev_info(dev_num, false);
|
||||
if (res == -ENODEV)
|
||||
{
|
||||
if (n_in_ctrl >= n_in_dev)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
n_in_ctrl++;
|
||||
sprintf(path, "/proc/%d/cmdline", ublk_dev.ublksrv_pid);
|
||||
std::string cmdline = read_file(path);
|
||||
if (cmdline == "")
|
||||
{
|
||||
// Process is dead
|
||||
mapped["/dev/ublkb"+std::to_string(dev_num)] = json11::Json::object{{"dead", true}};
|
||||
continue;
|
||||
}
|
||||
std::vector<const char*> argv;
|
||||
int last = 0;
|
||||
for (int i = 0; i < cmdline.size(); i++)
|
||||
{
|
||||
if (cmdline[i] == 0)
|
||||
{
|
||||
argv.push_back(cmdline.c_str()+last);
|
||||
last = i+1;
|
||||
}
|
||||
}
|
||||
if (argv.size() > 0)
|
||||
{
|
||||
const char *pid_filename = argv[0];
|
||||
for (int i = 0; argv[0][i] != 0; i++)
|
||||
{
|
||||
if (argv[0][i] == '/')
|
||||
pid_filename = argv[0]+i+1;
|
||||
}
|
||||
if (!strcmp(pid_filename, self_filename))
|
||||
{
|
||||
json11::Json::object cfg = ublk_server::parse_args(argv.size(), argv.data());
|
||||
if (cfg["command"] == "map")
|
||||
{
|
||||
cfg.erase("command");
|
||||
cfg["pid"] = ublk_dev.ublksrv_pid;
|
||||
mapped["/dev/ublkb"+std::to_string(dev_num)] = cfg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return mapped;
|
||||
}
|
||||
|
||||
void print_mapped(json11::Json mapped, bool json)
|
||||
{
|
||||
if (json)
|
||||
{
|
||||
printf("%s\n", mapped.dump().c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto & dev: mapped.object_items())
|
||||
{
|
||||
printf("%s\n", dev.first.c_str());
|
||||
for (auto & k: dev.second.object_items())
|
||||
{
|
||||
printf("%s: %s\n", k.first.c_str(), k.second.as_string().c_str());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string read_file(char *path)
|
||||
{
|
||||
int fd = open(path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
{
|
||||
if (errno == ENOENT)
|
||||
return "";
|
||||
auto err = "open "+std::string(path);
|
||||
perror(err.c_str());
|
||||
exit(1);
|
||||
}
|
||||
std::string r;
|
||||
while (true)
|
||||
{
|
||||
int l = r.size();
|
||||
r.resize(l + 1024);
|
||||
int rd = read(fd, (void*)(r.c_str() + l), 1024);
|
||||
if (rd <= 0)
|
||||
{
|
||||
r.resize(l);
|
||||
break;
|
||||
}
|
||||
r.resize(l + rd);
|
||||
}
|
||||
close(fd);
|
||||
return r;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool stop = false;
|
||||
bool new_opcodes = true;
|
||||
uint64_t ublk_features = 0;
|
||||
int max_wait_time_ms = 5000;
|
||||
int ctrl_fd = -1, cdev_fd = -1;
|
||||
ublksrv_ctrl_dev_info ublk_dev = {};
|
||||
ublksrv_io_desc *ublk_queue = NULL;
|
||||
std::vector<uint8_t*> buffers;
|
||||
|
||||
void open_control()
|
||||
{
|
||||
ctrl_fd = open("/dev/ublk-control", O_RDWR);
|
||||
if (ctrl_fd < 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to open /dev/ublk-control: %s (code %d)\n", strerror(errno), errno);
|
||||
exit(1);
|
||||
}
|
||||
// Check features
|
||||
int res = sync_ublk_cmd(UBLK_U_CMD_GET_FEATURES, &ublk_features, 8, 0);
|
||||
if (res == -EOPNOTSUPP)
|
||||
{
|
||||
new_opcodes = false;
|
||||
}
|
||||
else if (res != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to get ublk features: %s (code %d)\n", strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void add_device(int32_t dev_num, uint32_t attrs, uint16_t queue_depth, uint32_t phys_block_size,
|
||||
uint32_t max_io_buf_bytes, uint64_t opt_block_size, uint64_t device_size)
|
||||
{
|
||||
// Add device
|
||||
ublk_dev.dev_id = dev_num;
|
||||
ublk_dev.nr_hw_queues = 1;
|
||||
ublk_dev.queue_depth = queue_depth;
|
||||
ublk_dev.max_io_buf_bytes = max_io_buf_bytes;
|
||||
ublk_dev.flags = UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
|
||||
int res = sync_ublk_cmd(new_opcodes ? UBLK_U_CMD_ADD_DEV : UBLK_CMD_ADD_DEV, &ublk_dev, sizeof(ublk_dev));
|
||||
if (res != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to add ublk device: %s (code %d)\n", strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
// Wait until the device appears
|
||||
std::string ublkc_path = "/dev/ublkc"+std::to_string(ublk_dev.dev_id);
|
||||
int wait_time = 0;
|
||||
while (wait_time < max_wait_time_ms)
|
||||
{
|
||||
cdev_fd = open(ublkc_path.c_str(), O_RDWR);
|
||||
if (cdev_fd >= 0)
|
||||
break;
|
||||
else if (errno != ENOENT)
|
||||
{
|
||||
fprintf(stderr, "Failed to open %s: %s (code %d)", ublkc_path.c_str(), strerror(errno), errno);
|
||||
exit(1);
|
||||
}
|
||||
usleep(100);
|
||||
wait_time += 100;
|
||||
}
|
||||
close(cdev_fd);
|
||||
cdev_fd = -1;
|
||||
// Set device params
|
||||
uint8_t io_opt_shift = 0;
|
||||
while ((opt_block_size >> io_opt_shift) > 1)
|
||||
{
|
||||
io_opt_shift++;
|
||||
}
|
||||
uint8_t phys_shift = 0;
|
||||
while ((phys_block_size >> phys_shift) > 1)
|
||||
{
|
||||
phys_shift++;
|
||||
}
|
||||
ublk_params params = {
|
||||
.len = sizeof(ublk_params),
|
||||
.types = UBLK_PARAM_TYPE_BASIC,
|
||||
.basic = {
|
||||
.attrs = attrs, // UBLK_ATTR_READ_ONLY | UBLK_ATTR_ROTATIONAL | UBLK_ATTR_VOLATILE_CACHE | UBLK_ATTR_FUA
|
||||
.logical_bs_shift = 9,
|
||||
.physical_bs_shift = phys_shift,
|
||||
.io_opt_shift = io_opt_shift,
|
||||
.io_min_shift = phys_shift,
|
||||
.max_sectors = max_io_buf_bytes / phys_block_size,
|
||||
.chunk_sectors = 0,
|
||||
.dev_sectors = device_size / phys_block_size,
|
||||
.virt_boundary_mask = 0,
|
||||
},
|
||||
.discard = {
|
||||
.discard_alignment = 0,
|
||||
.discard_granularity = 0,
|
||||
.max_discard_sectors = 0,
|
||||
.max_write_zeroes_sectors = 0,
|
||||
.max_discard_segments = 0,
|
||||
},
|
||||
};
|
||||
res = sync_unpriv_cmd(false, new_opcodes ? UBLK_U_CMD_SET_PARAMS : UBLK_CMD_SET_PARAMS, ¶ms, sizeof(params));
|
||||
if (res != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to set ublk device params: %s (code %d)\n", strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void map_ublk_queue()
|
||||
{
|
||||
const unsigned page_sz = getpagesize();
|
||||
size_t cmd_buf_size = (ublk_dev.queue_depth * sizeof(ublksrv_io_desc) + page_sz-1) / page_sz * page_sz;
|
||||
//const unsigned queue_offset = (UBLK_MAX_QUEUE_DEPTH * sizeof(ublksrv_io_desc) + page_sz-1) / page_sz * page_sz;
|
||||
//off = q_id * queue_offset;
|
||||
ublk_queue = (ublksrv_io_desc*)mmap(0, cmd_buf_size, PROT_READ, MAP_SHARED | MAP_POPULATE, cdev_fd, 0);
|
||||
if ((void*)ublk_queue == MAP_FAILED)
|
||||
{
|
||||
fprintf(stderr, "Failed to mmap() ublk queue buffer\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void recover_device(uint32_t dev_num)
|
||||
{
|
||||
ublk_dev.dev_id = dev_num;
|
||||
int res = sync_ublk_cmd(new_opcodes ? UBLK_U_CMD_GET_DEV_INFO : UBLK_CMD_GET_DEV_INFO, &ublk_dev, sizeof(ublk_dev));
|
||||
if (res != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to get /dev/ublkb%u device info: %s (code %d)\n", dev_num, strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
if (ublk_dev.nr_hw_queues != 1)
|
||||
{
|
||||
fprintf(stderr, "Device /dev/ublkb%u is not supported because it has %d queues\n", dev_num, ublk_dev.nr_hw_queues);
|
||||
exit(1);
|
||||
}
|
||||
if (ublk_dev.ublksrv_pid != 0)
|
||||
{
|
||||
res = kill(ublk_dev.ublksrv_pid, 0);
|
||||
if (res == 0)
|
||||
{
|
||||
fprintf(stderr, "Device /dev/ublkb%u is still alive, daemon PID is %u\n", dev_num, ublk_dev.ublksrv_pid);
|
||||
exit(1);
|
||||
}
|
||||
else if (errno != ESRCH)
|
||||
{
|
||||
fprintf(stderr, "Device /dev/ublkb%u is still alive, failed to check if the daemon with PID %u is running: %s (code %d)\n",
|
||||
dev_num, ublk_dev.ublksrv_pid, strerror(errno), errno);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
// Send the "start recovery" command
|
||||
res = sync_unpriv_cmd(false, new_opcodes ? UBLK_U_CMD_START_USER_RECOVERY : UBLK_CMD_START_USER_RECOVERY, NULL, 0);
|
||||
if (res != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to start /dev/ublkb%u device recovery: %s (code %d)\n", dev_num, strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void start_device(bool recover)
|
||||
{
|
||||
std::string ublkc_path = "/dev/ublkc"+std::to_string(ublk_dev.dev_id);
|
||||
cdev_fd = open(ublkc_path.c_str(), O_RDWR|O_NONBLOCK);
|
||||
if (cdev_fd < 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to open %s: %s (code %d)", ublkc_path.c_str(), strerror(errno), errno);
|
||||
exit(1);
|
||||
}
|
||||
// FIXME Here we could optionally do ublk_get_queue_affinity
|
||||
// Map queue command buffer
|
||||
map_ublk_queue();
|
||||
// submit initial fetch requests to ublk driver
|
||||
for (int i = 0; i < ublk_dev.queue_depth; i++)
|
||||
{
|
||||
buffers.push_back((uint8_t*)memalign_or_die(MEM_ALIGNMENT, ublk_dev.max_io_buf_bytes));
|
||||
submit_request(new_opcodes ? UBLK_U_IO_FETCH_REQ : UBLK_IO_FETCH_REQ, i, 0);
|
||||
}
|
||||
ringloop->submit();
|
||||
// start device
|
||||
ublk_dev.ublksrv_pid = getpid();
|
||||
int res = sync_unpriv_cmd(false, (recover
|
||||
? (new_opcodes ? UBLK_U_CMD_END_USER_RECOVERY : UBLK_CMD_END_USER_RECOVERY)
|
||||
: (new_opcodes ? UBLK_U_CMD_START_DEV : UBLK_CMD_START_DEV)), NULL, 0, ublk_dev.ublksrv_pid);
|
||||
if (res != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to start ublk device: %s (code %d)\n", strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
close(ctrl_fd);
|
||||
ctrl_fd = -1;
|
||||
}
|
||||
|
||||
void submit_request(uint64_t ublk_cmd, int i, int res)
|
||||
{
|
||||
io_uring_sqe *sqe = ringloop->get_sqe();
|
||||
ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
||||
sqe->fd = cdev_fd;
|
||||
sqe->opcode = IORING_OP_URING_CMD;
|
||||
//sqe->flags = IOSQE_FIXED_FILE;
|
||||
sqe->flags = 0;
|
||||
sqe->rw_flags = 0;
|
||||
sqe->off = ublk_cmd;
|
||||
ublksrv_io_cmd *cmd = (ublksrv_io_cmd *)&sqe->addr3; // sqe128 command buffer address
|
||||
cmd->q_id = 0;
|
||||
cmd->tag = i;
|
||||
cmd->addr = (uint64_t)buffers[i];
|
||||
cmd->result = res;
|
||||
data->callback = [this, i](ring_data_t *data) { exec_request(data->res, i); };
|
||||
}
|
||||
|
||||
void exec_request(int res, int i)
|
||||
{
|
||||
if (res != 0)
|
||||
{
|
||||
// Note: res may be also UBLK_IO_RES_NEED_GET_DATA if UBLK_F_NEED_GET_DATA is enabled,
|
||||
// in this case you should submit_request(UBLK_IO_NEED_GET_DATA, i) again with buffer
|
||||
if (res == -ENODEV)
|
||||
{
|
||||
// ublk device is removed
|
||||
stop = true;
|
||||
return;
|
||||
}
|
||||
fprintf(stderr, "Fetching ublk request failed: %s (code %d)\n", strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
ublksrv_io_desc *iod = &ublk_queue[i];
|
||||
uint8_t opcode = ublksrv_get_op(iod);
|
||||
if (opcode == UBLK_IO_OP_FLUSH)
|
||||
{
|
||||
cluster_op_t *op = new cluster_op_t;
|
||||
op->opcode = OSD_OP_SYNC;
|
||||
op->callback = [this, i](cluster_op_t *op)
|
||||
{
|
||||
submit_request(new_opcodes ? UBLK_U_IO_COMMIT_AND_FETCH_REQ : UBLK_IO_COMMIT_AND_FETCH_REQ, i, op->retval);
|
||||
delete op;
|
||||
};
|
||||
cli->execute(op);
|
||||
}
|
||||
else if (opcode == UBLK_IO_OP_WRITE_ZEROES || opcode == UBLK_IO_OP_DISCARD)
|
||||
{
|
||||
submit_request(new_opcodes ? UBLK_U_IO_COMMIT_AND_FETCH_REQ : UBLK_IO_COMMIT_AND_FETCH_REQ, i, -EINVAL);
|
||||
}
|
||||
else if (opcode == UBLK_IO_OP_READ || opcode == UBLK_IO_OP_WRITE)
|
||||
{
|
||||
cluster_op_t *op = new cluster_op_t;
|
||||
op->opcode = opcode == UBLK_IO_OP_READ ? OSD_OP_READ : OSD_OP_WRITE;
|
||||
op->inode = inode ? inode : watch->cfg.num;
|
||||
op->offset = iod->start_sector * 512;
|
||||
op->len = iod->nr_sectors * 512;
|
||||
op->iov.push_back(buffers[i], op->len);
|
||||
op->callback = [this, i](cluster_op_t *op)
|
||||
{
|
||||
submit_request(new_opcodes ? UBLK_U_IO_COMMIT_AND_FETCH_REQ : UBLK_IO_COMMIT_AND_FETCH_REQ, i, op->retval);
|
||||
delete op;
|
||||
};
|
||||
cli->execute(op);
|
||||
}
|
||||
else
|
||||
{
|
||||
submit_request(new_opcodes ? UBLK_U_IO_COMMIT_AND_FETCH_REQ : UBLK_IO_COMMIT_AND_FETCH_REQ, i, -EINVAL);
|
||||
}
|
||||
}
|
||||
|
||||
int get_dev_info(int dev_num, bool unpriv)
|
||||
{
|
||||
// Get device info
|
||||
ublk_dev.dev_id = dev_num;
|
||||
int res = unpriv
|
||||
? sync_unpriv_cmd(true, new_opcodes ? UBLK_U_CMD_GET_DEV_INFO2 : UBLK_CMD_GET_DEV_INFO2, &ublk_dev, sizeof(ublk_dev))
|
||||
: sync_ublk_cmd(new_opcodes ? UBLK_U_CMD_GET_DEV_INFO : UBLK_CMD_GET_DEV_INFO, &ublk_dev, sizeof(ublk_dev));
|
||||
if (res != 0 && res != -ENODEV)
|
||||
{
|
||||
fprintf(stderr, "Failed to get device info from /dev/ublkc%d: %s (code %d)\n", dev_num, strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void unmap_device(int dev_num, bool unpriv, bool wait)
|
||||
{
|
||||
int res = 0;
|
||||
// Stop the device
|
||||
ublk_dev.dev_id = dev_num;
|
||||
res = sync_unpriv_cmd(unpriv, new_opcodes ? UBLK_U_CMD_STOP_DEV : UBLK_CMD_STOP_DEV, NULL, 0);
|
||||
if (res != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to stop device /dev/ublkc%d: %s (code %d)\n", dev_num, strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
// Delete the device
|
||||
res = sync_unpriv_cmd(unpriv, new_opcodes ? (wait ? UBLK_U_CMD_DEL_DEV : UBLK_U_CMD_DEL_DEV_ASYNC) : UBLK_CMD_DEL_DEV, NULL, 0);
|
||||
if (res != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to delete device /dev/ublkc%d: %s (code %d)\n", dev_num, strerror(-res), res);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int sync_unpriv_cmd(bool unpriv, uint32_t cmd_op, void *addr, uint32_t len, uint64_t data0 = 0)
|
||||
{
|
||||
int res;
|
||||
if (unpriv)
|
||||
{
|
||||
static const int path_max = 64;
|
||||
char buf[path_max + len];
|
||||
memset(buf, 0, path_max);
|
||||
memcpy(buf + path_max, addr, len);
|
||||
snprintf(buf, path_max, "/dev/ublkc%d", ublk_dev.dev_id);
|
||||
res = sync_ublk_cmd(cmd_op, buf, sizeof(buf), path_max, data0);
|
||||
if (!res)
|
||||
memcpy(addr, buf + path_max, len);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = sync_ublk_cmd(cmd_op, addr, len, 0, data0);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
int sync_ublk_cmd(uint32_t cmd_op, void *addr, uint32_t len, uint16_t dev_path_len = 0, uint64_t data0 = 0)
|
||||
{
|
||||
io_uring_sqe *sqe = ringloop->get_sqe();
|
||||
sqe->fd = ctrl_fd;
|
||||
sqe->opcode = IORING_OP_URING_CMD;
|
||||
sqe->ioprio = 0;
|
||||
sqe->off = cmd_op;
|
||||
ublksrv_ctrl_cmd *cmd = (ublksrv_ctrl_cmd *)&sqe->addr3; // sqe128 command buffer address
|
||||
cmd->dev_id = ublk_dev.dev_id;
|
||||
cmd->queue_id = -1;
|
||||
cmd->addr = (uint64_t)addr;
|
||||
cmd->len = len;
|
||||
cmd->data[0] = data0;
|
||||
cmd->dev_path_len = dev_path_len;
|
||||
ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
||||
bool done = false;
|
||||
int res = 0;
|
||||
data->callback = [&](ring_data_t *data)
|
||||
{
|
||||
res = data->res;
|
||||
done = true;
|
||||
};
|
||||
ringloop->submit();
|
||||
while (!done)
|
||||
{
|
||||
ringloop->loop();
|
||||
if (!done)
|
||||
ringloop->wait();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
int main(int narg, const char *args[])
|
||||
{
|
||||
setvbuf(stdout, NULL, _IONBF, 0);
|
||||
setvbuf(stderr, NULL, _IONBF, 0);
|
||||
exe_name = args[0];
|
||||
ublk_server *p = new ublk_server();
|
||||
p->exec(ublk_server::parse_args(narg, args));
|
||||
delete p;
|
||||
return 0;
|
||||
}
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
||||
|
||||
Name: Vitastor
|
||||
Description: Vitastor client library
|
||||
Version: 2.2.2
|
||||
Version: 2.3.0
|
||||
Libs: -L${libdir} -lvitastor_client
|
||||
Cflags: -I${includedir}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user