forked from vitalif/vitastor
Compare commits
49 Commits
Author | SHA1 | Date | |
---|---|---|---|
1c10430ae1 | |||
dfce91d168 | |||
332a13ba30 | |||
d0e257ee81 | |||
004912aac0 | |||
c18e92273e | |||
9815d70ffc | |||
4a4627dcab | |||
b963f2fd93 | |||
ba7427020e | |||
a0aac7eb2a | |||
ac7b834af3 | |||
ee0c78fd74 | |||
e6646a5b2f | |||
ae69662b17 | |||
57ad4c3636 | |||
b7e4d0c9bf | |||
161a23c966 | |||
2f999d8607 | |||
d007a374f2 | |||
45c0694853 | |||
57bcba2406 | |||
30ac899074 | |||
2348d39cf4 | |||
3de7929fe5 | |||
07b2196bc2 | |||
b8e30608d6 | |||
a612cdca47 | |||
c8d61568b5 | |||
84ed3c6395 | |||
a7b57386c0 | |||
9d4ea5f764 | |||
000e4944ec | |||
8426616d89 | |||
1a841344ec | |||
8603b5cb1d | |||
f12b8e45a9 | |||
878ccbb6ea | |||
b14220b4d0 | |||
181d6ba407 | |||
63c2b9832c | |||
10e2e6a7c8 | |||
a598428992 | |||
08a677b684 | |||
7c8fbdad16 | |||
2f9353df60 | |||
57c744f288 | |||
a11ca56fb1 | |||
b84927b340 |
@@ -10,6 +10,9 @@ RUN set -e -x; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
ln -s /root/qemu-build/qemu-*/ ./qemu; \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
cd mon; \
|
||||
npm install; \
|
||||
cd ..; \
|
||||
mkdir build; \
|
||||
cd build; \
|
||||
cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \
|
||||
|
@@ -190,24 +190,6 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_failure_domain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_failure_domain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_interrupted_rebalance:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -280,6 +262,60 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_failure_domain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_failure_domain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_minsize_1:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -316,6 +352,114 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_rm:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_rm.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_chain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot_chain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_chain_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_down:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot_down.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_down_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot_down.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_splitbrain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_splitbrain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_rebalance_verify:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -388,78 +532,6 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_rm:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_rm.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_splitbrain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_splitbrain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_write:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
|
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VERSION "0.9.2")
|
||||
set(VERSION "0.9.4")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -15,7 +15,7 @@ Vitastor архитектурно похож на Ceph, что означает
|
||||
и автоматическое распределение данных по любому числу дисков любого размера с настраиваемыми схемами
|
||||
избыточности - репликацией или с произвольными кодами коррекции ошибок.
|
||||
|
||||
Vitastor нацелен на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
|
||||
Vitastor нацелен в первую очередь на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
|
||||
TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
|
||||
что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.
|
||||
|
||||
|
@@ -14,8 +14,8 @@ Vitastor is architecturally similar to Ceph which means strong consistency,
|
||||
primary-replication, symmetric clustering and automatic data distribution over any
|
||||
number of drives of any size with configurable redundancy (replication or erasure codes/XOR).
|
||||
|
||||
Vitastor targets SSD and SSD+HDD clusters with at least 10 Gbit/s network, supports
|
||||
TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
|
||||
Vitastor targets primarily SSD and SSD+HDD clusters with at least 10 Gbit/s network,
|
||||
supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
|
||||
with proper hardware which is ~10 times faster than other popular SDS's like Ceph
|
||||
or internal systems of public clouds.
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
VERSION ?= v0.9.2
|
||||
VERSION ?= v0.9.4
|
||||
|
||||
all: build push
|
||||
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v0.9.2
|
||||
image: vitalif/vitastor-csi:v0.9.4
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -116,7 +116,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v0.9.2
|
||||
image: vitalif/vitastor-csi:v0.9.4
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "0.9.2"
|
||||
vitastorCSIDriverVersion = "0.9.4"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
58
debian/build-pve-qemu.sh
vendored
Normal file
58
debian/build-pve-qemu.sh
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
exit
|
||||
|
||||
git clone https://git.yourcmc.ru/vitalif/pve-qemu .
|
||||
|
||||
# bookworm
|
||||
|
||||
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
|
||||
|
||||
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources
|
||||
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription' >> /etc/apt/sources.list
|
||||
echo 'deb https://vitastor.io/debian bookworm main' >> /etc/apt/sources.list
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
|
||||
echo 'ru_RU UTF-8' >> /etc/locale.gen
|
||||
echo 'en_US UTF-8' >> /etc/locale.gen
|
||||
apt-get update
|
||||
apt-get install wget ca-certificates
|
||||
wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
|
||||
apt-get update
|
||||
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
|
||||
mk-build-deps --install ./control
|
||||
|
||||
# bullseye
|
||||
|
||||
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
|
||||
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
|
||||
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bullseye pve-no-subscription' >> /etc/apt/sources.list
|
||||
echo 'deb https://vitastor.io/debian bullseye main' >> /etc/apt/sources.list
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
|
||||
echo 'ru_RU UTF-8' >> /etc/locale.gen
|
||||
echo 'en_US UTF-8' >> /etc/locale.gen
|
||||
apt-get update
|
||||
apt-get install wget
|
||||
wget https://enterprise.proxmox.com/debian/proxmox-release-bullseye.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bullseye.gpg
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
|
||||
apt-get update
|
||||
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
|
||||
mk-build-deps --install ./control
|
||||
|
||||
# buster
|
||||
|
||||
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-buster debian:buster bash
|
||||
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
|
||||
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve buster pve-no-subscription' >> /etc/apt/sources.list
|
||||
echo 'deb https://vitastor.io/debian buster main' >> /etc/apt/sources.list
|
||||
echo 'deb http://deb.debian.org/debian buster-backports main' >> /etc/apt/sources.list
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
|
||||
echo 'ru_RU UTF-8' >> /etc/locale.gen
|
||||
echo 'en_US UTF-8' >> /etc/locale.gen
|
||||
apt-get update
|
||||
apt-get install wget ca-certificates
|
||||
wget http://download.proxmox.com/debian/proxmox-ve-release-6.x.gpg -O /etc/apt/trusted.gpg.d/proxmox-ve-release-6.x.gpg
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
|
||||
apt-get update
|
||||
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
|
||||
mk-build-deps --install ./control
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@@ -1,10 +1,10 @@
|
||||
vitastor (0.9.2-1) unstable; urgency=medium
|
||||
vitastor (0.9.4-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
||||
|
||||
vitastor (0.9.2-1) unstable; urgency=medium
|
||||
vitastor (0.9.4-1) unstable; urgency=medium
|
||||
|
||||
* Implement NFS proxy
|
||||
* Add documentation
|
||||
|
18
debian/patched-qemu.Dockerfile
vendored
18
debian/patched-qemu.Dockerfile
vendored
@@ -28,13 +28,19 @@ RUN apt-get --download-only source qemu
|
||||
|
||||
ADD patches /root/vitastor/patches
|
||||
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
|
||||
|
||||
#RUN set -e; \
|
||||
# apt-get install -y wget; \
|
||||
# wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
|
||||
# (echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
|
||||
# (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
|
||||
# apt-get update; \
|
||||
# apt-get install -y vitastor-client vitastor-client-dev quilt
|
||||
|
||||
RUN set -e; \
|
||||
apt-get install -y wget; \
|
||||
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
|
||||
(echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
|
||||
(echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
|
||||
dpkg -i /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
|
||||
apt-get update; \
|
||||
apt-get install -y vitastor-client vitastor-client-dev quilt; \
|
||||
apt-get install -y quilt; \
|
||||
mkdir -p /root/packages/qemu-$REL; \
|
||||
rm -rf /root/packages/qemu-$REL/*; \
|
||||
cd /root/packages/qemu-$REL; \
|
||||
@@ -48,7 +54,7 @@ RUN set -e; \
|
||||
quilt add block/vitastor.c; \
|
||||
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
|
||||
quilt refresh; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor1; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor3; \
|
||||
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
rm -rf /root/packages/qemu-$REL/qemu-*/
|
||||
|
8
debian/vitastor.Dockerfile
vendored
8
debian/vitastor.Dockerfile
vendored
@@ -35,8 +35,8 @@ RUN set -e -x; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
cp -r /root/vitastor vitastor-0.9.2; \
|
||||
cd vitastor-0.9.2; \
|
||||
cp -r /root/vitastor vitastor-0.9.4; \
|
||||
cd vitastor-0.9.4; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
@@ -49,8 +49,8 @@ RUN set -e -x; \
|
||||
rm -rf a b; \
|
||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.2.orig.tar.xz vitastor-0.9.2; \
|
||||
cd vitastor-0.9.2; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.4.orig.tar.xz vitastor-0.9.4; \
|
||||
cd vitastor-0.9.4; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
|
@@ -21,7 +21,7 @@ Configuration parameters can be set in 3 places:
|
||||
mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter
|
||||
doesn't allow to set all variables directly, but it allows to override the
|
||||
configuration file and set everything you need inside it.
|
||||
- OSD superblocks created by [vitastor-disk](../usage/disk.en.md) contain
|
||||
- OSD superblocks created by [vitastor-disk](usage/disk.en.md) contain
|
||||
primarily disk layout parameters of specific OSDs. In fact, these parameters
|
||||
are automatically passed into the command line of vitastor-osd process, so
|
||||
they have the same "status" as command-line parameters.
|
||||
|
@@ -23,7 +23,7 @@
|
||||
монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние,
|
||||
как правило, не включают полный набор параметров напрямую, но позволяют
|
||||
определить путь к файлу конфигурации и задать любые параметры в нём.
|
||||
- В суперблоке OSD, записываемом [vitastor-disk](../usage/disk.ru.md) - параметры,
|
||||
- В суперблоке OSD, записываемом [vitastor-disk](usage/disk.ru.md) - параметры,
|
||||
связанные с дисковым форматом и с этим конкретным OSD. На самом деле,
|
||||
при запуске OSD эти параметры автоматически передаются в командную строку
|
||||
процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам
|
||||
|
@@ -33,12 +33,13 @@ Size of objects (data blocks) into which all physical and virtual drives
|
||||
in Vitastor, affects memory usage, write amplification and I/O load
|
||||
distribution effectiveness.
|
||||
|
||||
Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
|
||||
it's possible to use 4 MB for SSD too - it will lower memory usage, but
|
||||
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
|
||||
it's possible to use 1 MB for SSD too - it will lower memory usage, but
|
||||
may increase average WA and reduce linear performance.
|
||||
|
||||
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
|
||||
544 MB per 1 TB of used disk space with the default 128 KB block size.
|
||||
With 1 MB it's 8 times lower.
|
||||
|
||||
## bitmap_granularity
|
||||
|
||||
|
@@ -33,14 +33,14 @@ OSD) могут сосуществовать в одном кластере Vita
|
||||
настроек, влияет на потребление памяти, объём избыточной записи (write
|
||||
amplification) и эффективность распределения нагрузки по OSD.
|
||||
|
||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
|
||||
мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
|
||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
|
||||
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
|
||||
это понизит использование памяти, но ухудшит распределение нагрузки и в
|
||||
среднем увеличит WA.
|
||||
|
||||
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
|
||||
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
|
||||
стандартном 128 КБ блоке.
|
||||
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
|
||||
|
||||
## bitmap_granularity
|
||||
|
||||
|
145
docs/config/src/include.js
Executable file
145
docs/config/src/include.js
Executable file
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/nodejs
|
||||
|
||||
const fsp = require('fs').promises;
|
||||
|
||||
run(process.argv).catch(console.error);
|
||||
|
||||
async function run(argv)
|
||||
{
|
||||
if (argv.length < 3)
|
||||
{
|
||||
console.log('Markdown preprocessor\nUSAGE: ./include.js file.md');
|
||||
return;
|
||||
}
|
||||
const index_file = await fsp.realpath(argv[2]);
|
||||
const re = /(\{\{[\s\S]*?\}\}|\[[^\]]+\]\([^\)]+\)|(?:^|\n)#[^\n]+)/;
|
||||
let text = await fsp.readFile(index_file, { encoding: 'utf-8' });
|
||||
text = text.split(re);
|
||||
let included = {};
|
||||
let heading = 0, heading_name = '', m;
|
||||
for (let i = 0; i < text.length; i++)
|
||||
{
|
||||
if (text[i].substr(0, 2) == '{{')
|
||||
{
|
||||
// Inclusion
|
||||
let incfile = text[i].substr(2, text[i].length-4);
|
||||
let section = null;
|
||||
let indent = heading;
|
||||
incfile = incfile.replace(/\s*\|\s*indent\s*=\s*(-?\d+)\s*$/, (m, m1) => { indent = parseInt(m1); return ''; });
|
||||
incfile = incfile.replace(/\s*#\s*([^#]+)$/, (m, m1) => { section = m1; return ''; });
|
||||
let inc_heading = section;
|
||||
incfile = rel2abs(index_file, incfile);
|
||||
let inc = await fsp.readFile(incfile, { encoding: 'utf-8' });
|
||||
inc = inc.trim().replace(/^[\s\S]+?\n#/, '#'); // remove until the first header
|
||||
inc = inc.split(re);
|
||||
const indent_str = new Array(indent+1).join('#');
|
||||
let section_start = -1, section_end = -1;
|
||||
for (let j = 0; j < inc.length; j++)
|
||||
{
|
||||
if ((m = /^(\n?)(#+\s*)([\s\S]+)$/.exec(inc[j])))
|
||||
{
|
||||
if (!inc_heading)
|
||||
{
|
||||
inc_heading = m[3].trim();
|
||||
}
|
||||
if (section)
|
||||
{
|
||||
if (m[3].trim() == section)
|
||||
section_start = j;
|
||||
else if (section_start >= 0)
|
||||
{
|
||||
section_end = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
inc[j] = m[1] + indent_str + m[2] + m[3];
|
||||
}
|
||||
else if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(inc[j])) && !/^https?:(\/\/)|^#/.exec(m[2]))
|
||||
{
|
||||
const abs_m2 = rel2abs(incfile, m[2]);
|
||||
const rel_m = abs2rel(__filename, abs_m2);
|
||||
if (rel_m.substr(0, 9) == '../../../') // outside docs
|
||||
inc[j] = m[1] + 'https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/'+rel2abs('docs/config/src/include.js', rel_m) + m[3];
|
||||
else
|
||||
inc[j] = m[1] + abs_m2 + m[3];
|
||||
}
|
||||
}
|
||||
if (section)
|
||||
{
|
||||
inc = section_start >= 0 ? inc.slice(section_start, section_end < 0 ? inc.length : section_end) : [];
|
||||
}
|
||||
if (inc.length)
|
||||
{
|
||||
if (!inc_heading)
|
||||
inc_heading = heading_name||'';
|
||||
included[incfile+(section ? '#'+section : '')] = '#'+inc_heading.toLowerCase().replace(/\P{L}+/ug, '-').replace(/^-|-$/g, '');
|
||||
inc[0] = inc[0].replace(/^\s+/, '');
|
||||
inc[inc.length-1] = inc[inc.length-1].replace(/\s+$/, '');
|
||||
}
|
||||
text.splice(i, 1, ...inc);
|
||||
i = i + inc.length - 1;
|
||||
}
|
||||
else if ((m = /^\n?(#+)\s*([\s\S]+)$/.exec(text[i])))
|
||||
{
|
||||
// Heading
|
||||
heading = m[1].length;
|
||||
heading_name = m[2].trim();
|
||||
}
|
||||
}
|
||||
for (let i = 0; i < text.length; i++)
|
||||
{
|
||||
if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(text[i])) && !/^https?:(\/\/)|^#/.exec(m[2]))
|
||||
{
|
||||
const p = m[2].indexOf('#');
|
||||
if (included[m[2]])
|
||||
{
|
||||
text[i] = m[1]+included[m[2]]+m[3];
|
||||
}
|
||||
else if (p >= 0 && included[m[2].substr(0, p)])
|
||||
{
|
||||
text[i] = m[1]+m[2].substr(p)+m[3];
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log(text.join(''));
|
||||
}
|
||||
|
||||
function rel2abs(ref, rel)
|
||||
{
|
||||
rel = [ ...ref.replace(/^(.*)\/[^\/]+$/, '$1').split(/\/+/), ...rel.split(/\/+/) ];
|
||||
return killdots(rel).join('/');
|
||||
}
|
||||
|
||||
function abs2rel(ref, abs)
|
||||
{
|
||||
ref = ref.split(/\/+/);
|
||||
abs = abs.split(/\/+/);
|
||||
while (ref.length > 1 && ref[0] == abs[0])
|
||||
{
|
||||
ref.shift();
|
||||
abs.shift();
|
||||
}
|
||||
for (let i = 1; i < ref.length; i++)
|
||||
{
|
||||
abs.unshift('..');
|
||||
}
|
||||
return killdots(abs).join('/');
|
||||
}
|
||||
|
||||
function killdots(rel)
|
||||
{
|
||||
for (let i = 0; i < rel.length; i++)
|
||||
{
|
||||
if (rel[i] == '.')
|
||||
{
|
||||
rel.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
else if (i >= 1 && rel[i] == '..' && rel[i-1] != '..')
|
||||
{
|
||||
rel.splice(i-1, 2);
|
||||
i -= 2;
|
||||
}
|
||||
}
|
||||
return rel;
|
||||
}
|
65
docs/config/src/included.en.md
Normal file
65
docs/config/src/included.en.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# Vitastor
|
||||
|
||||
{{../../../README.md#The Idea}}
|
||||
|
||||
{{../../../README.md#Talks and presentations}}
|
||||
|
||||
{{../../intro/features.en.md}}
|
||||
|
||||
{{../../intro/quickstart.en.md}}
|
||||
|
||||
{{../../intro/architecture.en.md}}
|
||||
|
||||
## Installation
|
||||
|
||||
{{../../installation/packages.en.md}}
|
||||
|
||||
{{../../installation/proxmox.en.md}}
|
||||
|
||||
{{../../installation/openstack.en.md}}
|
||||
|
||||
{{../../installation/kubernetes.en.md}}
|
||||
|
||||
{{../../installation/source.en.md}}
|
||||
|
||||
{{../../config.en.md|indent=1}}
|
||||
|
||||
{{../../config/common.en.md|indent=2}}
|
||||
|
||||
{{../../config/network.en.md|indent=2}}
|
||||
|
||||
{{../../config/layout-cluster.en.md|indent=2}}
|
||||
|
||||
{{../../config/layout-osd.en.md|indent=2}}
|
||||
|
||||
{{../../config/osd.en.md|indent=2}}
|
||||
|
||||
{{../../config/monitor.en.md|indent=2}}
|
||||
|
||||
{{../../config/pool.en.md|indent=2}}
|
||||
|
||||
{{../../config/inode.en.md|indent=2}}
|
||||
|
||||
## Usage
|
||||
|
||||
{{../../usage/cli.en.md}}
|
||||
|
||||
{{../../usage/disk.en.md}}
|
||||
|
||||
{{../../usage/fio.en.md}}
|
||||
|
||||
{{../../usage/nbd.en.md}}
|
||||
|
||||
{{../../usage/qemu.en.md}}
|
||||
|
||||
{{../../usage/nfs.en.md}}
|
||||
|
||||
## Performance
|
||||
|
||||
{{../../performance/understanding.en.md}}
|
||||
|
||||
{{../../performance/theoretical.en.md}}
|
||||
|
||||
{{../../performance/comparison1.en.md}}
|
||||
|
||||
{{../../intro/author.en.md|indent=1}}
|
65
docs/config/src/included.ru.md
Normal file
65
docs/config/src/included.ru.md
Normal file
@@ -0,0 +1,65 @@
|
||||
# Vitastor
|
||||
|
||||
{{../../../README-ru.md#Идея|indent=0}}
|
||||
|
||||
{{../../../README-ru.md#Презентации и записи докладов|indent=0}}
|
||||
|
||||
{{../../intro/features.ru.md}}
|
||||
|
||||
{{../../intro/quickstart.ru.md}}
|
||||
|
||||
{{../../intro/architecture.ru.md}}
|
||||
|
||||
## Установка
|
||||
|
||||
{{../../installation/packages.ru.md}}
|
||||
|
||||
{{../../installation/proxmox.ru.md}}
|
||||
|
||||
{{../../installation/openstack.ru.md}}
|
||||
|
||||
{{../../installation/kubernetes.ru.md}}
|
||||
|
||||
{{../../installation/source.ru.md}}
|
||||
|
||||
{{../../config.ru.md|indent=1}}
|
||||
|
||||
{{../../config/common.ru.md|indent=2}}
|
||||
|
||||
{{../../config/network.ru.md|indent=2}}
|
||||
|
||||
{{../../config/layout-cluster.ru.md|indent=2}}
|
||||
|
||||
{{../../config/layout-osd.ru.md|indent=2}}
|
||||
|
||||
{{../../config/osd.ru.md|indent=2}}
|
||||
|
||||
{{../../config/monitor.ru.md|indent=2}}
|
||||
|
||||
{{../../config/pool.ru.md|indent=2}}
|
||||
|
||||
{{../../config/inode.ru.md|indent=2}}
|
||||
|
||||
## Использование
|
||||
|
||||
{{../../usage/cli.ru.md}}
|
||||
|
||||
{{../../usage/disk.ru.md}}
|
||||
|
||||
{{../../usage/fio.ru.md}}
|
||||
|
||||
{{../../usage/nbd.ru.md}}
|
||||
|
||||
{{../../usage/qemu.ru.md}}
|
||||
|
||||
{{../../usage/nfs.ru.md}}
|
||||
|
||||
## Производительность
|
||||
|
||||
{{../../performance/understanding.ru.md}}
|
||||
|
||||
{{../../performance/theoretical.ru.md}}
|
||||
|
||||
{{../../performance/comparison1.ru.md}}
|
||||
|
||||
{{../../intro/author.ru.md|indent=1}}
|
@@ -7,26 +7,27 @@
|
||||
in Vitastor, affects memory usage, write amplification and I/O load
|
||||
distribution effectiveness.
|
||||
|
||||
Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
|
||||
it's possible to use 4 MB for SSD too - it will lower memory usage, but
|
||||
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
|
||||
it's possible to use 1 MB for SSD too - it will lower memory usage, but
|
||||
may increase average WA and reduce linear performance.
|
||||
|
||||
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
|
||||
544 MB per 1 TB of used disk space with the default 128 KB block size.
|
||||
With 1 MB it's 8 times lower.
|
||||
info_ru: |
|
||||
Размер объектов (блоков данных), на которые делятся физические и виртуальные
|
||||
диски в Vitastor (в рамках каждого пула). Одна из ключевых на данный момент
|
||||
настроек, влияет на потребление памяти, объём избыточной записи (write
|
||||
amplification) и эффективность распределения нагрузки по OSD.
|
||||
|
||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
|
||||
мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
|
||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
|
||||
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
|
||||
это понизит использование памяти, но ухудшит распределение нагрузки и в
|
||||
среднем увеличит WA.
|
||||
|
||||
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
|
||||
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
|
||||
стандартном 128 КБ блоке.
|
||||
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
|
||||
- name: bitmap_granularity
|
||||
type: int
|
||||
default: 4096
|
||||
|
@@ -8,13 +8,13 @@
|
||||
|
||||
У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома.
|
||||
|
||||
Для установки возьмите манифесты из директории [csi/deploy/](../csi/deploy/), поместите
|
||||
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../csi/deploy/001-csi-config-map.yaml),
|
||||
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../csi/deploy/009-storage-class.yaml)
|
||||
Для установки возьмите манифесты из директории [csi/deploy/](../../csi/deploy/), поместите
|
||||
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml),
|
||||
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../../csi/deploy/009-storage-class.yaml)
|
||||
и примените все `NNN-*.yaml` к вашей инсталляции Kubernetes.
|
||||
|
||||
```
|
||||
for i in ./???-*.yaml; do kubectl apply -f $i; done
|
||||
```
|
||||
|
||||
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../csi/deploy/example-pvc.yaml).
|
||||
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../../csi/deploy/example-pvc.yaml).
|
||||
|
@@ -36,5 +36,5 @@ vitastor_pool_id = 1
|
||||
image_upload_use_cinder_backend = True
|
||||
```
|
||||
|
||||
To put Glance images in Vitastor, use [https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](volume-backed images),
|
||||
To put Glance images in Vitastor, use [volume-backed images](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
|
||||
although the support has not been verified yet.
|
||||
|
@@ -36,5 +36,5 @@ image_upload_use_cinder_backend = True
|
||||
```
|
||||
|
||||
Чтобы помещать в Vitastor Glance-образы, нужно использовать
|
||||
[https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](образы на основе томов Cinder),
|
||||
[образы на основе томов Cinder](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
|
||||
однако, поддержка этой функции ещё не проверялась.
|
||||
|
@@ -6,10 +6,10 @@
|
||||
|
||||
# Proxmox VE
|
||||
|
||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-7.4 are supported):
|
||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-8.0 are supported):
|
||||
|
||||
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
|
||||
buster for 6.4, bullseye for 7.4, pve7.1 for 7.1, pve7.2 for 7.2, pve7.3 for 7.3
|
||||
bookworm for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4
|
||||
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
|
||||
- Define storage in `/etc/pve/storage.cfg` (see below)
|
||||
- Block network access from VMs to Vitastor network (to OSDs and etcd),
|
||||
@@ -35,5 +35,5 @@ vitastor: vitastor
|
||||
vitastor_nbd 0
|
||||
```
|
||||
|
||||
\* Note: you can also manually copy [patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) to Proxmox hosts
|
||||
\* Note: you can also manually copy [patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) to Proxmox hosts
|
||||
as `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm` instead of installing pve-storage-vitastor.
|
||||
|
@@ -1,15 +1,15 @@
|
||||
[Документация](../../README-ru.md#документация) → Установка → Proxmox
|
||||
[Документация](../../README-ru.md#документация) → Установка → Proxmox VE
|
||||
|
||||
-----
|
||||
|
||||
[Read in English](proxmox.en.md)
|
||||
|
||||
# Proxmox
|
||||
# Proxmox VE
|
||||
|
||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-7.4):
|
||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-8.0):
|
||||
|
||||
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
|
||||
buster для 6.4, bullseye для 7.4, pve7.1 для 7.1, pve7.2 для 7.2, pve7.3 для 7.3
|
||||
bookworm для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4
|
||||
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
|
||||
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
|
||||
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
|
||||
@@ -35,5 +35,5 @@ vitastor: vitastor
|
||||
```
|
||||
|
||||
\* Примечание: вместо установки пакета pve-storage-vitastor вы можете вручную скопировать файл
|
||||
[patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) на хосты Proxmox как
|
||||
[patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) на хосты Proxmox как
|
||||
`/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm`.
|
||||
|
@@ -21,7 +21,7 @@
|
||||
|
||||
## Basic instructions
|
||||
|
||||
Download source, for example using git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`
|
||||
Download source, for example using git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/`
|
||||
|
||||
Get `fio` source and symlink it into `<vitastor>/fio`. If you don't want to build fio engine,
|
||||
you can disable it by passing `-DWITH_FIO=no` to cmake.
|
||||
@@ -41,7 +41,7 @@ It's recommended to build the QEMU driver (qemu_driver.c) in-tree, as a part of
|
||||
QEMU build process. To do that:
|
||||
- Install vitastor client library headers (from source or from vitastor-client-dev package)
|
||||
- Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source
|
||||
- Copy `src/qemu_driver.c` to QEMU source directory as `block/block-vitastor.c`
|
||||
- Copy `src/qemu_driver.c` to QEMU source directory as `block/vitastor.c`
|
||||
- Build QEMU as usual
|
||||
|
||||
But it is also possible to build it out-of-tree. To do that:
|
||||
|
@@ -21,7 +21,7 @@
|
||||
|
||||
## Базовая инструкция
|
||||
|
||||
Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`
|
||||
Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/`
|
||||
|
||||
Скачайте исходные коды пакета `fio`, распакуйте их и создайте символическую ссылку на них
|
||||
в директории исходников Vitastor: `<vitastor>/fio`. Либо, если вы не хотите собирать плагин fio,
|
||||
@@ -41,7 +41,7 @@ cmake .. && make -j8 install
|
||||
Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого:
|
||||
- Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev)
|
||||
- Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU
|
||||
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/block-vitastor.c`
|
||||
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c`
|
||||
- Соберите QEMU как обычно
|
||||
|
||||
Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого:
|
||||
@@ -60,7 +60,7 @@ cmake .. && make -j8 install
|
||||
* Для QEMU 2.0+: `<qemu>/qapi-types.h` → `<vitastor>/qemu/b/qemu/qapi-types.h`
|
||||
- `config-host.h` и `qapi` нужны, т.к. в них содержатся автогенерируемые заголовки
|
||||
- Сконфигурируйте cmake Vitastor с `WITH_QEMU=yes` (`cmake .. -DWITH_QEMU=yes`) и, если вы
|
||||
используете RHEL-подобый дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`.
|
||||
используете RHEL-подобный дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`.
|
||||
- После этого в процессе сборки Vitastor также будет собираться подходящий для вашей
|
||||
версии QEMU `block-vitastor.so`.
|
||||
- Таким образом можно использовать драйвер даже с немодифицированным QEMU, но в этом случае
|
||||
|
@@ -44,7 +44,7 @@
|
||||
depends linearly on drive capacity and data store block size which is 128 KB by default.
|
||||
With 128 KB blocks metadata takes around 512 MB per 1 TB (which is still less than Ceph wants).
|
||||
Journal is also kept in memory by default, but in SSD-only clusters it's only 32 MB, and in SSD+HDD
|
||||
clusters, where it's beneficial to increase it, [inmemory_journal](docs/config/osd.en.md#inmemory_journal) can be disabled.
|
||||
clusters, where it's beneficial to increase it, [inmemory_journal](../config/osd.en.md#inmemory_journal) can be disabled.
|
||||
- Vitastor storage layer doesn't have internal copy-on-write or redirect-write. I know that maybe
|
||||
it's possible to create a good copy-on-write storage, but it's much harder and makes performance
|
||||
less deterministic, so CoW isn't used in Vitastor.
|
||||
|
@@ -156,7 +156,7 @@
|
||||
блока хранилища (block_size, по умолчанию 128 КБ). С 128 КБ блоком потребление памяти
|
||||
составляет примерно 512 МБ на 1 ТБ данных. Журналы по умолчанию тоже хранятся в памяти,
|
||||
но в SSD-кластерах нужный размер журнала составляет всего 32 МБ, а в гибридных (SSD+HDD)
|
||||
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../docs/config/osd.ru.md#inmemory_journal).
|
||||
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../config/osd.ru.md#inmemory_journal).
|
||||
- В Vitastor нет внутреннего copy-on-write. Я считаю, что реализация CoW-хранилища гораздо сложнее,
|
||||
поэтому сложнее добиться устойчиво хороших результатов. Возможно, в один прекрасный день
|
||||
я придумаю красивый алгоритм для CoW-хранилища, но пока нет — внутреннего CoW в Vitastor не будет.
|
||||
|
@@ -35,7 +35,7 @@
|
||||
|
||||
- [Debian and CentOS packages](../installation/packages.en.md)
|
||||
- [Image management CLI (vitastor-cli)](../usage/cli.en.md)
|
||||
- [Disk management CLI (vitastor-disk)](docs/usage/disk.en.md)
|
||||
- [Disk management CLI (vitastor-disk)](../usage/disk.en.md)
|
||||
- Generic user-space client library
|
||||
- [Native QEMU driver](../usage/qemu.en.md)
|
||||
- [Loadable fio engine for benchmarks](../usage/fio.en.md)
|
||||
|
@@ -13,7 +13,7 @@
|
||||
## Серверные функции
|
||||
|
||||
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
|
||||
- [Производительность](../comparison1.ru.md) ;-D
|
||||
- [Производительность](../performance/comparison1.ru.md) ;-D
|
||||
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
|
||||
Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
|
||||
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
|
||||
@@ -37,7 +37,7 @@
|
||||
|
||||
- [Пакеты для Debian и CentOS](../installation/packages.ru.md)
|
||||
- [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md)
|
||||
- [Инструмент управления дисками (vitastor-disk)](docs/usage/disk.ru.md)
|
||||
- [Инструмент управления дисками (vitastor-disk)](../usage/disk.ru.md)
|
||||
- Общая пользовательская клиентская библиотека для работы с кластером
|
||||
- [Драйвер диска для QEMU](../usage/qemu.ru.md)
|
||||
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
|
||||
|
@@ -7,6 +7,7 @@
|
||||
# Quick Start
|
||||
|
||||
- [Preparation](#preparation)
|
||||
- [Recommended drives](#recommended-drives)
|
||||
- [Configure monitors](#configure-monitors)
|
||||
- [Configure OSDs](#configure-osds)
|
||||
- [Create a pool](#create-a-pool)
|
||||
@@ -19,10 +20,20 @@
|
||||
- Get some SATA or NVMe SSDs with capacitors (server-grade drives). You can use desktop SSDs
|
||||
with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
|
||||
[here](../config/layout-cluster.en.md#immediate_commit).
|
||||
- If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
|
||||
Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then
|
||||
you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
|
||||
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
|
||||
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
|
||||
- [Install Vitastor packages](../installation/packages.en.md).
|
||||
|
||||
## Recommended drives
|
||||
|
||||
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
|
||||
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
|
||||
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
|
||||
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
|
||||
|
||||
## Configure monitors
|
||||
|
||||
On the monitor hosts:
|
||||
@@ -45,9 +56,10 @@ On the monitor hosts:
|
||||
}
|
||||
```
|
||||
- Initialize OSDs:
|
||||
- SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. You can add
|
||||
`--disable_data_fsync off` to leave disk cache enabled if you use desktop
|
||||
SSDs without capacitors.
|
||||
- SSD-only or HDD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
|
||||
Add `--disable_data_fsync off` to leave disk write cache enabled if you use
|
||||
desktop SSDs without capacitors. Do NOT add `--disable_data_fsync off` if you
|
||||
use HDDs or SSD+HDD.
|
||||
- Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
||||
Pass all your devices (HDD and SSD) to this script — it will partition disks and initialize journals on its own.
|
||||
This script skips HDDs which are already partitioned so if you want to use non-empty disks for
|
||||
|
@@ -7,6 +7,7 @@
|
||||
# Быстрый старт
|
||||
|
||||
- [Подготовка](#подготовка)
|
||||
- [Рекомендуемые диски](#рекомендуемые-диски)
|
||||
- [Настройте мониторы](#настройте-мониторы)
|
||||
- [Настройте OSD](#настройте-osd)
|
||||
- [Создайте пул](#создайте-пул)
|
||||
@@ -19,10 +20,20 @@
|
||||
- Возьмите серверы с SSD (SATA или NVMe), желательно с конденсаторами (серверные SSD). Можно
|
||||
использовать и десктопные SSD, включив режим отложенного fsync, но производительность будет хуже.
|
||||
О конденсаторах читайте [здесь](../config/layout-cluster.ru.md#immediate_commit).
|
||||
- Если хотите использовать HDD, берите современные модели с Media или SSD кэшем - HGST Ultrastar,
|
||||
Toshiba MG08, Seagate EXOS или что-то похожее. Если такого кэша у ваших дисков нет,
|
||||
обязательно возьмите SSD под метаданные и журнал (маленькие, буквально 2 ГБ на 1 ТБ HDD-места).
|
||||
- Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
|
||||
- Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
|
||||
- [Установите пакеты Vitastor](../installation/packages.ru.md).
|
||||
|
||||
## Рекомендуемые диски
|
||||
|
||||
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
|
||||
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
|
||||
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
|
||||
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
|
||||
|
||||
## Настройте мониторы
|
||||
|
||||
На хостах, выделенных под мониторы:
|
||||
@@ -45,9 +56,10 @@
|
||||
}
|
||||
```
|
||||
- Инициализуйте OSD:
|
||||
- SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. Если вы используете
|
||||
десктопные SSD без конденсаторов, можете оставить кэш включённым, добавив
|
||||
опцию `--disable_data_fsync off`.
|
||||
- Только SSD или только HDD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
|
||||
Если вы используете десктопные SSD без конденсаторов, добавьте опцию `--disable_data_fsync off`,
|
||||
чтобы оставить кэш записи диска включённым. НЕ добавляйте эту опцию, если используете
|
||||
жёсткие диски (HDD).
|
||||
- Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
||||
Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
|
||||
разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы
|
||||
|
@@ -13,6 +13,8 @@ remains decent (see an example [here](../performance/comparison1.en.md#vitastor-
|
||||
|
||||
Vitastor Kubernetes CSI driver is based on NBD.
|
||||
|
||||
See also [VDUSE](qemu.en.md#vduse).
|
||||
|
||||
## Map image
|
||||
|
||||
To create a local block device for a Vitastor image run:
|
||||
|
@@ -16,6 +16,8 @@ NBD немного снижает производительность из-за
|
||||
|
||||
CSI-драйвер Kubernetes Vitastor основан на NBD.
|
||||
|
||||
Смотрите также [VDUSE](qemu.ru.md#vduse).
|
||||
|
||||
## Подключить устройство
|
||||
|
||||
Чтобы создать локальное блочное устройство для образа, выполните команду:
|
||||
|
@@ -29,7 +29,7 @@ vitastor-nfs [--etcd_address ADDR] [ДРУГИЕ ОПЦИИ]
|
||||
--bind <IP> принимать соединения по адресу <IP> (по умолчанию 0.0.0.0 - на всех)
|
||||
--nfspath <PATH> установить путь NFS-экспорта в <PATH> (по умолчанию /)
|
||||
--port <PORT> использовать порт <PORT> для NFS-сервисов (по умолчанию 2049)
|
||||
--pool <POOL> исползовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
|
||||
--pool <POOL> использовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
|
||||
--foreground 1 не уходить в фон после запуска
|
||||
```
|
||||
|
||||
|
@@ -83,3 +83,43 @@ qemu-img rebase -u -b '' testimg.qcow2
|
||||
This can be used for backups. Just note that exporting an image that is currently being written to
|
||||
is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
|
||||
on a live VM.
|
||||
|
||||
## VDUSE
|
||||
|
||||
Linux kernel, starting with version 5.15, supports a new interface for attaching virtual disks
|
||||
to the host - VDUSE (vDPA Device in Userspace). QEMU, starting with 7.2, has support for
|
||||
exporting QEMU block devices over this protocol using qemu-storage-daemon.
|
||||
|
||||
VDUSE has the same problem as other FUSE-like interfaces in Linux: if a userspace process hangs,
|
||||
for example, if it loses connectivity with Vitastor cluster - active processes doing I/O may
|
||||
hang in the D state (uninterruptible sleep) and you won't be able to kill them even with kill -9.
|
||||
In this case reboot will be the only way to remove VDUSE devices from system.
|
||||
|
||||
On the other hand, VDUSE is faster than [NBD](nbd.en.md), so you may prefer to use it if
|
||||
performance is important for you. Approximate performance numbers:
|
||||
direct fio benchmark - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
|
||||
|
||||
To try VDUSE you need at least Linux 5.15, built with VDUSE support
|
||||
(CONFIG_VIRTIO_VDPA=m and CONFIG_VDPA_USER=m). Debian Linux kernels have these options
|
||||
disabled by now, so if you want to try it on Debian, use a kernel from Ubuntu
|
||||
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) or Proxmox.
|
||||
|
||||
Commands to attach Vitastor image as a VDUSE device:
|
||||
|
||||
```
|
||||
modprobe vduse virtio-vdpa
|
||||
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
|
||||
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
|
||||
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
|
||||
vdpa dev add name test1 mgmtdev vduse
|
||||
```
|
||||
|
||||
After running these commands /dev/vda device will appear in the system and you'll be able to
|
||||
use it as a normal disk.
|
||||
|
||||
To remove the device:
|
||||
|
||||
```
|
||||
vdpa dev del test1
|
||||
kill <qemu-storage-daemon_process_PID>
|
||||
```
|
||||
|
@@ -87,3 +87,43 @@ qemu-img rebase -u -b '' testimg.qcow2
|
||||
Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
|
||||
в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
|
||||
с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.
|
||||
|
||||
## VDUSE
|
||||
|
||||
В Linux, начиная с версии ядра 5.15, доступен новый интерфейс для подключения виртуальных дисков
|
||||
к системе - VDUSE (vDPA Device in Userspace), а в QEMU, начиная с версии 7.2, есть поддержка
|
||||
экспорта блочных устройств QEMU по этому протоколу через qemu-storage-daemon.
|
||||
|
||||
VDUSE страдает общей проблемой FUSE-подобных интерфейсов в Linux: если пользовательский процесс
|
||||
подвиснет, например, если будет потеряна связь с кластером Vitastor - читающие/пишущие в кластер
|
||||
процессы могут "залипнуть" в состоянии D (непрерываемый сон) и их будет невозможно убить даже
|
||||
через kill -9. В этом случае удалить из системы устройство можно только перезагрузившись.
|
||||
|
||||
С другой стороны, VDUSE быстрее по сравнению с [NBD](nbd.ru.md), поэтому его может
|
||||
быть предпочтительно использовать там, где производительность важнее. Порядок показателей:
|
||||
прямое тестирование через fio - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
|
||||
|
||||
Чтобы использовать VDUSE, вам нужно ядро Linux версии хотя бы 5.15, собранное с поддержкой
|
||||
VDUSE (CONFIG_VIRTIO_VDPA=m и CONFIG_VDPA_USER=m). В ядрах в Debian Linux поддержка пока
|
||||
отключена - если хотите попробовать эту функцию на Debian, поставьте ядро из Ubuntu
|
||||
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) или из Proxmox.
|
||||
|
||||
Команды для подключения виртуального диска через VDUSE:
|
||||
|
||||
```
|
||||
modprobe vduse virtio-vdpa
|
||||
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
|
||||
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
|
||||
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
|
||||
vdpa dev add name test1 mgmtdev vduse
|
||||
```
|
||||
|
||||
После этого в системе появится устройство /dev/vda, которое можно будет использовать как
|
||||
обычный диск.
|
||||
|
||||
Для удаления устройства из системы:
|
||||
|
||||
```
|
||||
vdpa dev del test1
|
||||
kill <PID_процесса_qemu-storage-daemon>
|
||||
```
|
||||
|
@@ -63,8 +63,9 @@ Wants=network-online.target local-fs.target time-sync.target
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
ExecStart=/usr/local/bin/etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
|
||||
--advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
|
||||
Environment=GOGC=50
|
||||
ExecStart=etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
|
||||
--snapshot-count 10000 --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
|
||||
--initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
|
||||
--initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
|
||||
--initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\
|
||||
|
26
mon/mon.js
26
mon/mon.js
@@ -1608,7 +1608,7 @@ class Mon
|
||||
}
|
||||
}
|
||||
}
|
||||
return inode_stats;
|
||||
return { inode_stats, seen_pools };
|
||||
}
|
||||
|
||||
serialize_bigints(obj)
|
||||
@@ -1634,7 +1634,7 @@ class Mon
|
||||
const timestamp = Date.now();
|
||||
const { object_counts, object_bytes } = this.sum_object_counts();
|
||||
let stats = this.sum_op_stats(timestamp, this.prev_stats);
|
||||
let inode_stats = this.sum_inode_stats(
|
||||
let { inode_stats, seen_pools } = this.sum_inode_stats(
|
||||
this.prev_stats ? this.prev_stats.inode_stats : null,
|
||||
timestamp, this.prev_stats ? this.prev_stats.timestamp : null
|
||||
);
|
||||
@@ -1669,12 +1669,22 @@ class Mon
|
||||
}
|
||||
for (const pool_id in this.state.pool.stats)
|
||||
{
|
||||
const pool_stats = { ...this.state.pool.stats[pool_id] };
|
||||
this.serialize_bigints(pool_stats);
|
||||
txn.push({ requestPut: {
|
||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
value: b64(JSON.stringify(pool_stats)),
|
||||
} });
|
||||
if (!seen_pools[pool_id])
|
||||
{
|
||||
txn.push({ requestDeleteRange: {
|
||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
} });
|
||||
delete this.state.pool.stats[pool_id];
|
||||
}
|
||||
else
|
||||
{
|
||||
const pool_stats = { ...this.state.pool.stats[pool_id] };
|
||||
this.serialize_bigints(pool_stats);
|
||||
txn.push({ requestPut: {
|
||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
value: b64(JSON.stringify(pool_stats)),
|
||||
} });
|
||||
}
|
||||
}
|
||||
if (txn.length)
|
||||
{
|
||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VERSION = '0.9.2'
|
||||
VERSION = '0.9.4'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
190
patches/pve-qemu-8.0-vitastor.patch
Normal file
190
patches/pve-qemu-8.0-vitastor.patch
Normal file
@@ -0,0 +1,190 @@
|
||||
diff --git a/block/meson.build b/block/meson.build
|
||||
index 382bec0e7d..af6207dbce 100644
|
||||
--- a/block/meson.build
|
||||
+++ b/block/meson.build
|
||||
@@ -114,6 +114,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
diff --git a/meson.build b/meson.build
|
||||
index c44d05a13f..ebedb42843 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1028,6 +1028,26 @@ if not get_option('rbd').auto() or have_block
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -1882,6 +1902,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||
@@ -4020,6 +4041,7 @@ if spice_protocol.found()
|
||||
summary_info += {' spice server support': spice}
|
||||
endif
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index fc9447d267..c4ac55c283 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -173,6 +173,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||
description: 'lzo compression support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index c05ad0c07e..f5eb701604 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3308,7 +3308,7 @@
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
'pbs',
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4338,6 +4338,28 @@
|
||||
'*key-secret': 'str',
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -4787,6 +4809,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5187,6 +5210,17 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5385,6 +5419,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
index 6e8983f39c..1b0b9fcf3e 100755
|
||||
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
@@ -32,7 +32,7 @@
|
||||
--with-git=meson \
|
||||
--with-git-submodules=update \
|
||||
--target-list="x86_64-softmmu" \
|
||||
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
--audio-drv-list="" \
|
||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||
--with-coroutine=ucontext \
|
||||
@@ -179,6 +179,7 @@
|
||||
--enable-opengl \
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+--enable-vitastor \
|
||||
--enable-rdma \
|
||||
--enable-seccomp \
|
||||
--enable-snappy \
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 009fab1515..95914e6ebc 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -144,6 +144,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qed qed image format support'
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' sdl SDL user interface'
|
||||
@@ -392,6 +393,8 @@ _meson_option_parse() {
|
||||
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-replication) printf "%s" -Dreplication=enabled ;;
|
176
patches/qemu-2.12-vitastor.patch
Normal file
176
patches/qemu-2.12-vitastor.patch
Normal file
@@ -0,0 +1,176 @@
|
||||
diff --git a/block/Makefile.objs b/block/Makefile.objs
|
||||
index d644bac60a..e404236291 100644
|
||||
--- a/block/Makefile.objs
|
||||
+++ b/block/Makefile.objs
|
||||
@@ -19,6 +19,7 @@ block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
|
||||
block-obj-$(CONFIG_LIBNFS) += nfs.o
|
||||
block-obj-$(CONFIG_CURL) += curl.o
|
||||
block-obj-$(CONFIG_RBD) += rbd.o
|
||||
+block-obj-$(CONFIG_VITASTOR) += vitastor.o
|
||||
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
|
||||
block-obj-$(CONFIG_VXHS) += vxhs.o
|
||||
block-obj-$(CONFIG_LIBSSH2) += ssh.o
|
||||
@@ -39,6 +40,8 @@ curl.o-cflags := $(CURL_CFLAGS)
|
||||
curl.o-libs := $(CURL_LIBS)
|
||||
rbd.o-cflags := $(RBD_CFLAGS)
|
||||
rbd.o-libs := $(RBD_LIBS)
|
||||
+vitastor.o-cflags := $(VITASTOR_CFLAGS)
|
||||
+vitastor.o-libs := $(VITASTOR_LIBS)
|
||||
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
|
||||
gluster.o-libs := $(GLUSTERFS_LIBS)
|
||||
vxhs.o-libs := $(VXHS_LIBS)
|
||||
diff --git a/configure b/configure
|
||||
index 0a19b033bc..58b7fbf24c 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -398,6 +398,7 @@ trace_backends="log"
|
||||
trace_file="trace"
|
||||
spice=""
|
||||
rbd=""
|
||||
+vitastor=""
|
||||
smartcard=""
|
||||
libusb=""
|
||||
usb_redir=""
|
||||
@@ -1213,6 +1214,10 @@ for opt do
|
||||
;;
|
||||
--enable-rbd) rbd="yes"
|
||||
;;
|
||||
+ --disable-vitastor) vitastor="no"
|
||||
+ ;;
|
||||
+ --enable-vitastor) vitastor="yes"
|
||||
+ ;;
|
||||
--disable-xfsctl) xfs="no"
|
||||
;;
|
||||
--enable-xfsctl) xfs="yes"
|
||||
@@ -1601,6 +1606,7 @@ disabled with --disable-FEATURE, default is enabled if available:
|
||||
vhost-crypto vhost-crypto acceleration support
|
||||
spice spice
|
||||
rbd rados block device (rbd)
|
||||
+ vitastor vitastor block device
|
||||
libiscsi iscsi support
|
||||
libnfs nfs support
|
||||
smartcard smartcard support (libcacard)
|
||||
@@ -3594,6 +3600,27 @@ EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
+##########################################
|
||||
+# vitastor probe
|
||||
+if test "$vitastor" != "no" ; then
|
||||
+ cat > $TMPC <<EOF
|
||||
+#include <vitastor_c.h>
|
||||
+int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+}
|
||||
+EOF
|
||||
+ vitastor_libs="-lvitastor_client"
|
||||
+ if compile_prog "" "$vitastor_libs" ; then
|
||||
+ vitastor=yes
|
||||
+ else
|
||||
+ if test "$vitastor" = "yes" ; then
|
||||
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
|
||||
+ fi
|
||||
+ vitastor=no
|
||||
+ fi
|
||||
+fi
|
||||
+
|
||||
##########################################
|
||||
# libssh2 probe
|
||||
min_libssh2_version=1.2.8
|
||||
@@ -5837,6 +5864,7 @@ echo "Trace output file $trace_file-<pid>"
|
||||
fi
|
||||
echo "spice support $spice $(echo_version $spice $spice_protocol_version/$spice_server_version)"
|
||||
echo "rbd support $rbd"
|
||||
+echo "vitastor support $vitastor"
|
||||
echo "xfsctl support $xfs"
|
||||
echo "smartcard support $smartcard"
|
||||
echo "libusb $libusb"
|
||||
@@ -6416,6 +6444,11 @@ if test "$rbd" = "yes" ; then
|
||||
echo "RBD_CFLAGS=$rbd_cflags" >> $config_host_mak
|
||||
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
|
||||
fi
|
||||
+if test "$vitastor" = "yes" ; then
|
||||
+ echo "CONFIG_VITASTOR=m" >> $config_host_mak
|
||||
+ echo "VITASTOR_CFLAGS=$vitastor_cflags" >> $config_host_mak
|
||||
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
|
||||
+fi
|
||||
|
||||
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
|
||||
if test "$coroutine_pool" = "yes" ; then
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index c50517bff3..c780bb2c1c 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -2514,7 +2514,7 @@
|
||||
'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
|
||||
'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
|
||||
'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
|
||||
- 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
|
||||
+ 'quorum', 'raw', 'rbd', 'vitastor', 'replication', 'sheepdog', 'ssh',
|
||||
'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
|
||||
|
||||
##
|
||||
@@ -3217,6 +3217,28 @@
|
||||
'*snap-id': 'uint32',
|
||||
'*tag': 'str' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -3547,6 +3569,7 @@
|
||||
'rbd': 'BlockdevOptionsRbd',
|
||||
'replication':'BlockdevOptionsReplication',
|
||||
'sheepdog': 'BlockdevOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'ssh': 'BlockdevOptionsSsh',
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
@@ -3991,6 +4014,17 @@
|
||||
'*subformat': 'BlockdevVhdxSubformat',
|
||||
'*block-state-zero': 'bool' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVpcSubformat:
|
||||
#
|
||||
@@ -4074,6 +4108,7 @@
|
||||
'rbd': 'BlockdevCreateOptionsRbd',
|
||||
'replication': 'BlockdevCreateNotSupported',
|
||||
'sheepdog': 'BlockdevCreateOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'throttle': 'BlockdevCreateNotSupported',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
181
patches/qemu-5.2-vitastor.patch
Normal file
181
patches/qemu-5.2-vitastor.patch
Normal file
@@ -0,0 +1,181 @@
|
||||
Index: qemu-5.2+dfsg/qapi/block-core.json
|
||||
===================================================================
|
||||
--- qemu-5.2+dfsg.orig/qapi/block-core.json
|
||||
+++ qemu-5.2+dfsg/qapi/block-core.json
|
||||
@@ -2831,7 +2831,7 @@
|
||||
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
|
||||
'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
|
||||
- 'sheepdog',
|
||||
+ 'sheepdog', 'vitastor',
|
||||
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
|
||||
|
||||
##
|
||||
@@ -3668,6 +3668,28 @@
|
||||
'*tag': 'str' } }
|
||||
|
||||
##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
+##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
# An enumeration of replication modes.
|
||||
@@ -4015,6 +4037,7 @@
|
||||
'replication': { 'type': 'BlockdevOptionsReplication',
|
||||
'if': 'defined(CONFIG_REPLICATION)' },
|
||||
'sheepdog': 'BlockdevOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'ssh': 'BlockdevOptionsSsh',
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
@@ -4404,6 +4427,17 @@
|
||||
'*cluster-size' : 'size' } }
|
||||
|
||||
##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
+##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
# Subformat options for VMDK images
|
||||
@@ -4665,6 +4699,7 @@
|
||||
'qed': 'BlockdevCreateOptionsQed',
|
||||
'rbd': 'BlockdevCreateOptionsRbd',
|
||||
'sheepdog': 'BlockdevCreateOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
Index: qemu-5.2+dfsg/block/meson.build
|
||||
===================================================================
|
||||
--- qemu-5.2+dfsg.orig/block/meson.build
|
||||
+++ qemu-5.2+dfsg/block/meson.build
|
||||
@@ -76,6 +76,7 @@ foreach m : [
|
||||
['CONFIG_LIBNFS', 'nfs', libnfs, 'nfs.c'],
|
||||
['CONFIG_LIBSSH', 'ssh', libssh, 'ssh.c'],
|
||||
['CONFIG_RBD', 'rbd', rbd, 'rbd.c'],
|
||||
+ ['CONFIG_VITASTOR', 'vitastor', vitastor, 'vitastor.c'],
|
||||
]
|
||||
if config_host.has_key(m[0])
|
||||
if enable_modules
|
||||
Index: qemu-5.2+dfsg/configure
|
||||
===================================================================
|
||||
--- qemu-5.2+dfsg.orig/configure
|
||||
+++ qemu-5.2+dfsg/configure
|
||||
@@ -372,6 +372,7 @@ trace_backends="log"
|
||||
trace_file="trace"
|
||||
spice=""
|
||||
rbd=""
|
||||
+vitastor=""
|
||||
smartcard=""
|
||||
u2f="auto"
|
||||
libusb=""
|
||||
@@ -1263,6 +1264,10 @@ for opt do
|
||||
;;
|
||||
--enable-rbd) rbd="yes"
|
||||
;;
|
||||
+ --disable-vitastor) vitastor="no"
|
||||
+ ;;
|
||||
+ --enable-vitastor) vitastor="yes"
|
||||
+ ;;
|
||||
--disable-xfsctl) xfs="no"
|
||||
;;
|
||||
--enable-xfsctl) xfs="yes"
|
||||
@@ -1827,6 +1832,7 @@ disabled with --disable-FEATURE, default
|
||||
vhost-vdpa vhost-vdpa kernel backend support
|
||||
spice spice
|
||||
rbd rados block device (rbd)
|
||||
+ vitastor vitastor block device
|
||||
libiscsi iscsi support
|
||||
libnfs nfs support
|
||||
smartcard smartcard support (libcacard)
|
||||
@@ -3719,6 +3725,27 @@ EOF
|
||||
fi
|
||||
|
||||
##########################################
|
||||
+# vitastor probe
|
||||
+if test "$vitastor" != "no" ; then
|
||||
+ cat > $TMPC <<EOF
|
||||
+#include <vitastor_c.h>
|
||||
+int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+}
|
||||
+EOF
|
||||
+ vitastor_libs="-lvitastor_client"
|
||||
+ if compile_prog "" "$vitastor_libs" ; then
|
||||
+ vitastor=yes
|
||||
+ else
|
||||
+ if test "$vitastor" = "yes" ; then
|
||||
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
|
||||
+ fi
|
||||
+ vitastor=no
|
||||
+ fi
|
||||
+fi
|
||||
+
|
||||
+##########################################
|
||||
# libssh probe
|
||||
if test "$libssh" != "no" ; then
|
||||
if $pkg_config --exists libssh; then
|
||||
@@ -6456,6 +6483,10 @@ if test "$rbd" = "yes" ; then
|
||||
echo "CONFIG_RBD=y" >> $config_host_mak
|
||||
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
|
||||
fi
|
||||
+if test "$vitastor" = "yes" ; then
|
||||
+ echo "CONFIG_VITASTOR=y" >> $config_host_mak
|
||||
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
|
||||
+fi
|
||||
|
||||
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
|
||||
if test "$coroutine_pool" = "yes" ; then
|
||||
Index: qemu-5.2+dfsg/meson.build
|
||||
===================================================================
|
||||
--- qemu-5.2+dfsg.orig/meson.build
|
||||
+++ qemu-5.2+dfsg/meson.build
|
||||
@@ -596,6 +596,10 @@ rbd = not_found
|
||||
if 'CONFIG_RBD' in config_host
|
||||
rbd = declare_dependency(link_args: config_host['RBD_LIBS'].split())
|
||||
endif
|
||||
+vitastor = not_found
|
||||
+if 'CONFIG_VITASTOR' in config_host
|
||||
+ vitastor = declare_dependency(link_args: config_host['VITASTOR_LIBS'].split())
|
||||
+endif
|
||||
glusterfs = not_found
|
||||
if 'CONFIG_GLUSTERFS' in config_host
|
||||
glusterfs = declare_dependency(compile_args: config_host['GLUSTERFS_CFLAGS'].split(),
|
||||
@@ -2145,6 +2149,7 @@ endif
|
||||
# TODO: add back protocol and server version
|
||||
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
|
||||
summary_info += {'rbd support': config_host.has_key('CONFIG_RBD')}
|
||||
+summary_info += {'vitastor support': config_host.has_key('CONFIG_VITASTOR')}
|
||||
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
|
||||
summary_info += {'smartcard support': config_host.has_key('CONFIG_SMARTCARD')}
|
||||
summary_info += {'U2F support': u2f.found()}
|
@@ -24,4 +24,4 @@ rm fio
|
||||
mv fio-copy fio
|
||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform 's#^#vitastor-0.9.2/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.2$(rpm --eval '%dist').tar.gz *
|
||||
tar --transform 's#^#vitastor-0.9.4/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.4$(rpm --eval '%dist').tar.gz *
|
||||
|
@@ -22,7 +22,7 @@
|
||||
Name: qemu-kvm
|
||||
Version: 4.2.0
|
||||
-Release: 29.vitastor%{?dist}.6
|
||||
+Release: 32.vitastor%{?dist}.6
|
||||
+Release: 34.vitastor%{?dist}.6
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
Epoch: 15
|
||||
License: GPLv2 and GPLv2+ and CC-BY
|
||||
|
@@ -13,7 +13,7 @@
|
||||
Name: qemu-kvm
|
||||
Version: 4.2.0
|
||||
-Release: 29%{?dist}.6
|
||||
+Release: 32.vitastor%{?dist}.6
|
||||
+Release: 33.vitastor%{?dist}.6
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
Epoch: 15
|
||||
License: GPLv2 and GPLv2+ and CC-BY
|
||||
|
103
rpm/qemu-kvm-6.2-el8.spec.patch
Normal file
103
rpm/qemu-kvm-6.2-el8.spec.patch
Normal file
@@ -0,0 +1,103 @@
|
||||
--- qemu-kvm-6.2.spec.orig 2023-07-18 13:52:57.636625440 +0000
|
||||
+++ qemu-kvm-6.2.spec 2023-07-18 13:52:19.011683886 +0000
|
||||
@@ -73,6 +73,7 @@ Requires: %{name}-hw-usbredir = %{epoch}
|
||||
%endif \
|
||||
Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \
|
||||
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
|
||||
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
|
||||
Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release}
|
||||
|
||||
# Macro to properly setup RHEL/RHEV conflict handling
|
||||
@@ -83,7 +84,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version
|
||||
Summary: QEMU is a machine emulator and virtualizer
|
||||
Name: qemu-kvm
|
||||
Version: 6.2.0
|
||||
-Release: 32%{?rcrel}%{?dist}
|
||||
+Release: 32.vitastor%{?rcrel}%{?dist}
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
Epoch: 15
|
||||
License: GPLv2 and GPLv2+ and CC-BY
|
||||
@@ -122,6 +123,7 @@ Source37: tests_data_acpi_pc_SSDT.dimmpx
|
||||
Source38: tests_data_acpi_q35_FACP.slic
|
||||
Source39: tests_data_acpi_q35_SSDT.dimmpxm
|
||||
Source40: tests_data_acpi_virt_SSDT.memhp
|
||||
+Source41: qemu-vitastor.c
|
||||
|
||||
Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch
|
||||
Patch0005: 0005-Initial-redhat-build.patch
|
||||
@@ -652,6 +654,7 @@ Patch255: kvm-scsi-protect-req-aiocb-wit
|
||||
Patch256: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch
|
||||
# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0]
|
||||
Patch257: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch
|
||||
+Patch258: qemu-6.2-vitastor.patch
|
||||
|
||||
BuildRequires: wget
|
||||
BuildRequires: rpm-build
|
||||
@@ -689,6 +692,7 @@ BuildRequires: libcurl-devel
|
||||
BuildRequires: libssh-devel
|
||||
BuildRequires: librados-devel
|
||||
BuildRequires: librbd-devel
|
||||
+BuildRequires: vitastor-client-devel
|
||||
%if %{have_gluster}
|
||||
# For gluster block driver
|
||||
BuildRequires: glusterfs-api-devel
|
||||
@@ -926,6 +930,14 @@ Install this package if you want to acce
|
||||
using the rbd protocol.
|
||||
|
||||
|
||||
+%package block-vitastor
|
||||
+Summary: QEMU Vitastor block driver
|
||||
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||
+
|
||||
+%description block-vitastor
|
||||
+This package provides the additional Vitastor block driver for QEMU.
|
||||
+
|
||||
+
|
||||
%package block-ssh
|
||||
Summary: QEMU SSH block driver
|
||||
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||
@@ -979,6 +991,7 @@ This package provides usbredir support.
|
||||
rm -fr slirp
|
||||
mkdir slirp
|
||||
%autopatch -p1
|
||||
+cp %{SOURCE41} ./block/vitastor.c
|
||||
|
||||
%global qemu_kvm_build qemu_kvm_build
|
||||
mkdir -p %{qemu_kvm_build}
|
||||
@@ -994,7 +1007,7 @@ cp -f %{SOURCE40} tests/data/acpi/virt/S
|
||||
# --build-id option is used for giving info to the debug packages.
|
||||
buildldflags="VL_LDFLAGS=-Wl,--build-id"
|
||||
|
||||
-%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle
|
||||
+%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle
|
||||
|
||||
%if 0%{have_gluster}
|
||||
%global block_drivers_list %{block_drivers_list},gluster
|
||||
@@ -1149,9 +1162,7 @@ pushd %{qemu_kvm_build}
|
||||
--firmwarepath=%{_prefix}/share/qemu-firmware \
|
||||
--meson="git" \
|
||||
--target-list="%{buildarch}" \
|
||||
- --block-drv-rw-whitelist=%{block_drivers_list} \
|
||||
--audio-drv-list= \
|
||||
- --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \
|
||||
--with-coroutine=ucontext \
|
||||
--with-git=git \
|
||||
--tls-priority=@QEMU,SYSTEM \
|
||||
@@ -1197,6 +1208,7 @@ pushd %{qemu_kvm_build}
|
||||
%endif
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+ --enable-vitastor \
|
||||
%if 0%{have_librdma}
|
||||
--enable-rdma \
|
||||
%endif
|
||||
@@ -1794,6 +1806,9 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.
|
||||
%files block-rbd
|
||||
%{_libdir}/qemu-kvm/block-rbd.so
|
||||
|
||||
+%files block-vitastor
|
||||
+%{_libdir}/qemu-kvm/block-vitastor.so
|
||||
+
|
||||
%files block-ssh
|
||||
%{_libdir}/qemu-kvm/block-ssh.so
|
||||
|
93
rpm/qemu-kvm-7.2-el9.spec.patch
Normal file
93
rpm/qemu-kvm-7.2-el9.spec.patch
Normal file
@@ -0,0 +1,93 @@
|
||||
--- qemu-kvm-7.2.spec.orig 2023-06-22 13:56:19.000000000 +0000
|
||||
+++ qemu-kvm-7.2.spec 2023-07-18 07:55:22.347090196 +0000
|
||||
@@ -100,8 +100,6 @@
|
||||
%endif
|
||||
|
||||
%global target_list %{kvm_target}-softmmu
|
||||
-%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress
|
||||
-%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https
|
||||
%define qemudocdir %{_docdir}/%{name}
|
||||
%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios"
|
||||
|
||||
@@ -126,6 +124,7 @@ Requires: %{name}-device-usb-host = %{ep
|
||||
Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \
|
||||
%endif \
|
||||
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
|
||||
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
|
||||
Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release}
|
||||
|
||||
# Since SPICE is removed from RHEL-9, the following Obsoletes:
|
||||
@@ -148,7 +147,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}
|
||||
Summary: QEMU is a machine emulator and virtualizer
|
||||
Name: qemu-kvm
|
||||
Version: 7.2.0
|
||||
-Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.1
|
||||
+Release: 14.vitastor%{?rcrel}%{?dist}%{?cc_suffix}.1
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
# Epoch 15 used for RHEL 8
|
||||
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
|
||||
@@ -171,6 +170,7 @@ Source28: 95-kvm-memlock.conf
|
||||
Source30: kvm-s390x.conf
|
||||
Source31: kvm-x86.conf
|
||||
Source36: README.tests
|
||||
+Source37: qemu-vitastor.c
|
||||
|
||||
|
||||
Patch0004: 0004-Initial-redhat-build.patch
|
||||
@@ -418,6 +418,7 @@ Patch134: kvm-target-i386-Fix-BZHI-instr
|
||||
Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch
|
||||
# For bz#2203745 - Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z]
|
||||
Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch
|
||||
+Patch137: qemu-7.2-vitastor.patch
|
||||
|
||||
%if %{have_clang}
|
||||
BuildRequires: clang
|
||||
@@ -449,6 +450,7 @@ BuildRequires: libcurl-devel
|
||||
%if %{have_block_rbd}
|
||||
BuildRequires: librbd-devel
|
||||
%endif
|
||||
+BuildRequires: vitastor-client-devel
|
||||
# We need both because the 'stap' binary is probed for by configure
|
||||
BuildRequires: systemtap
|
||||
BuildRequires: systemtap-sdt-devel
|
||||
@@ -642,6 +644,14 @@ using the rbd protocol.
|
||||
%endif
|
||||
|
||||
|
||||
+%package block-vitastor
|
||||
+Summary: QEMU Vitastor block driver
|
||||
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||
+
|
||||
+%description block-vitastor
|
||||
+This package provides the additional Vitastor block driver for QEMU.
|
||||
+
|
||||
+
|
||||
%package audio-pa
|
||||
Summary: QEMU PulseAudio audio driver
|
||||
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||
@@ -719,6 +729,7 @@ This package provides usbredir support.
|
||||
%prep
|
||||
%setup -q -n qemu-%{version}%{?rcstr}
|
||||
%autopatch -p1
|
||||
+cp %{SOURCE37} ./block/vitastor.c
|
||||
|
||||
%global qemu_kvm_build qemu_kvm_build
|
||||
mkdir -p %{qemu_kvm_build}
|
||||
@@ -946,6 +957,7 @@ run_configure \
|
||||
%if %{have_block_rbd}
|
||||
--enable-rbd \
|
||||
%endif
|
||||
+ --enable-vitastor \
|
||||
%if %{have_librdma}
|
||||
--enable-rdma \
|
||||
%endif
|
||||
@@ -1426,6 +1438,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s
|
||||
%files block-rbd
|
||||
%{_libdir}/%{name}/block-rbd.so
|
||||
%endif
|
||||
+%files block-vitastor
|
||||
+%{_libdir}/%{name}/block-vitastor.so
|
||||
+
|
||||
%files audio-pa
|
||||
%{_libdir}/%{name}/audio-pa.so
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.9.2.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-0.9.4.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.9.2
|
||||
Version: 0.9.4
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.9.2.el7.tar.gz
|
||||
Source0: vitastor-0.9.4.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.9.2.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-0.9.4.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.9.2
|
||||
Version: 0.9.4
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.9.2.el8.tar.gz
|
||||
Source0: vitastor-0.9.4.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -18,7 +18,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.9.2.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-0.9.4.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.9.2
|
||||
Version: 0.9.4
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.9.2.el9.tar.gz
|
||||
Source0: vitastor-0.9.4.el9.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVERSION="0.9.2")
|
||||
add_definitions(-DVERSION="0.9.4")
|
||||
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
||||
if (${WITH_ASAN})
|
||||
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
||||
@@ -56,11 +56,6 @@ if (ISAL_LIBRARIES)
|
||||
add_definitions(-DWITH_ISAL)
|
||||
endif (ISAL_LIBRARIES)
|
||||
|
||||
find_package(OpenSSL)
|
||||
if (OPENSSL_FOUND)
|
||||
add_definitions(-DWITH_OPENSSL)
|
||||
endif (OPENSSL_FOUND)
|
||||
|
||||
add_custom_target(build_tests)
|
||||
add_custom_target(test
|
||||
COMMAND
|
||||
|
@@ -714,9 +714,15 @@ resume_1:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (new_trim_pos < bs->journal.used_start
|
||||
? (bs->journal.dirty_start >= bs->journal.used_start || bs->journal.dirty_start < new_trim_pos)
|
||||
: (bs->journal.dirty_start >= bs->journal.used_start && bs->journal.dirty_start < new_trim_pos))
|
||||
{
|
||||
bs->journal.dirty_start = new_trim_pos;
|
||||
}
|
||||
bs->journal.used_start = new_trim_pos;
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("Journal trimmed to %08lx (next_free=%08lx)\n", bs->journal.used_start, bs->journal.next_free);
|
||||
printf("Journal trimmed to %08lx (next_free=%08lx dirty_start=%08lx)\n", bs->journal.used_start, bs->journal.next_free, bs->journal.dirty_start);
|
||||
#endif
|
||||
if (bs->journal.flush_journal && !flusher->flush_queue.size())
|
||||
{
|
||||
|
@@ -103,6 +103,7 @@ public:
|
||||
journal_flusher_t(blockstore_impl_t *bs);
|
||||
~journal_flusher_t();
|
||||
void loop();
|
||||
bool is_trim_wanted() { return trim_wanted; }
|
||||
bool is_active();
|
||||
void mark_trim_possible();
|
||||
void request_trim();
|
||||
|
@@ -218,7 +218,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
||||
auto used = --journal.used_sectors[dirty_it->second.journal_sector];
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf(
|
||||
"remove usage of journal offset %08lx by %lx:%lx v%lu (%d refs)\n", dirty_it->second.journal_sector,
|
||||
"remove usage of journal offset %08lx by %lx:%lx v%lu (%lu refs)\n", dirty_it->second.journal_sector,
|
||||
dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version, used
|
||||
);
|
||||
#endif
|
||||
|
@@ -661,8 +661,13 @@ void blockstore_impl_t::release_journal_sectors(blockstore_op_t *op)
|
||||
uint64_t s = PRIV(op)->min_flushed_journal_sector;
|
||||
while (1)
|
||||
{
|
||||
if (s != (1+journal.cur_sector) && journal.sector_info[s-1].flush_count == 0)
|
||||
if (!journal.sector_info[s-1].dirty && journal.sector_info[s-1].flush_count == 0)
|
||||
{
|
||||
if (s == (1+journal.cur_sector))
|
||||
{
|
||||
// Forcibly move to the next sector and move dirty position
|
||||
journal.in_sector_pos = journal.block_size;
|
||||
}
|
||||
// We know for sure that we won't write into this sector anymore
|
||||
uint64_t new_ds = journal.sector_info[s-1].offset + journal.block_size;
|
||||
if (new_ds >= journal.len)
|
||||
|
@@ -56,14 +56,15 @@ struct image_lister_t
|
||||
{
|
||||
continue;
|
||||
}
|
||||
auto & pool_cfg = parent->cli->st_cli.pool_config.at(INODE_POOL(ic.second.num));
|
||||
auto pool_it = parent->cli->st_cli.pool_config.find(INODE_POOL(ic.second.num));
|
||||
bool good_pool = pool_it != parent->cli->st_cli.pool_config.end();
|
||||
auto item = json11::Json::object {
|
||||
{ "name", ic.second.name },
|
||||
{ "size", ic.second.size },
|
||||
{ "used_size", 0 },
|
||||
{ "readonly", ic.second.readonly },
|
||||
{ "pool_id", (uint64_t)INODE_POOL(ic.second.num) },
|
||||
{ "pool_name", pool_cfg.name },
|
||||
{ "pool_name", good_pool ? pool_it->second.name : "? (ID:"+std::to_string(INODE_POOL(ic.second.num))+")" },
|
||||
{ "inode_num", INODE_NO_POOL(ic.second.num) },
|
||||
{ "inode_id", ic.second.num },
|
||||
};
|
||||
@@ -247,6 +248,8 @@ resume_1:
|
||||
if (state == 1)
|
||||
goto resume_1;
|
||||
get_list();
|
||||
if (state == 100)
|
||||
return;
|
||||
if (show_stats)
|
||||
{
|
||||
resume_1:
|
||||
@@ -269,7 +272,7 @@ resume_1:
|
||||
{ "key", "name" },
|
||||
{ "title", "NAME" },
|
||||
});
|
||||
if (!list_pool_id)
|
||||
if (list_pool_name == "")
|
||||
{
|
||||
cols.push_back(json11::Json::object{
|
||||
{ "key", "pool_name" },
|
||||
|
@@ -41,7 +41,7 @@ struct snap_merger_t
|
||||
int fsync_interval = 128;
|
||||
|
||||
// -- STATE --
|
||||
inode_t target;
|
||||
inode_t target, to_num;
|
||||
int target_rank;
|
||||
bool inside_continue = false;
|
||||
int state = 0;
|
||||
@@ -98,6 +98,7 @@ struct snap_merger_t
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
to_num = to_cfg->num;
|
||||
// Check that to_cfg is actually a child of from_cfg and target_cfg is somewhere between them
|
||||
std::vector<inode_t> chain_list;
|
||||
inode_config_t *cur = to_cfg;
|
||||
@@ -451,7 +452,7 @@ struct snap_merger_t
|
||||
{
|
||||
cluster_op_t *op = &rwo->op;
|
||||
op->opcode = OSD_OP_READ;
|
||||
op->inode = target;
|
||||
op->inode = to_num;
|
||||
op->offset = rwo->offset;
|
||||
op->len = target_block_size;
|
||||
op->iov.push_back(rwo->buf, target_block_size);
|
||||
@@ -483,7 +484,7 @@ struct snap_merger_t
|
||||
{
|
||||
// write start->end
|
||||
rwo->todo++;
|
||||
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
|
||||
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas && to_num == target ? 1+rwo->op.version : 0);
|
||||
rwo->start = rwo->end;
|
||||
if (use_cas)
|
||||
{
|
||||
@@ -502,7 +503,7 @@ struct snap_merger_t
|
||||
{
|
||||
// write start->end
|
||||
rwo->todo++;
|
||||
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
|
||||
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas && to_num == target ? 1+rwo->op.version : 0);
|
||||
rwo->start = rwo->end;
|
||||
if (use_cas)
|
||||
{
|
||||
@@ -532,7 +533,7 @@ struct snap_merger_t
|
||||
if (use_cas && subop->retval == -EINTR)
|
||||
{
|
||||
// CAS failure - reread and repeat optimistically
|
||||
rwo->start = subop->offset - rwo->offset;
|
||||
rwo->start = rwo->end = 0;
|
||||
rwo_read(rwo);
|
||||
delete subop;
|
||||
return;
|
||||
@@ -542,7 +543,7 @@ struct snap_merger_t
|
||||
rwo->error_read = false;
|
||||
}
|
||||
// Increment CAS version
|
||||
rwo->op.version++;
|
||||
rwo->op.version = subop->version;
|
||||
if (use_cas)
|
||||
next_write(rwo);
|
||||
else
|
||||
|
@@ -65,6 +65,9 @@ struct snap_remover_t
|
||||
int current_child = 0;
|
||||
std::function<bool(cli_result_t &)> cb;
|
||||
|
||||
std::vector<std::string> rebased_images, deleted_images;
|
||||
std::vector<uint64_t> deleted_ids;
|
||||
std::string inverse_child_name, inverse_parent_name;
|
||||
cli_result_t result;
|
||||
|
||||
bool is_done()
|
||||
@@ -122,6 +125,7 @@ resume_1:
|
||||
{
|
||||
if (merge_children[current_child] == inverse_child)
|
||||
continue;
|
||||
rebased_images.push_back(parent->cli->st_cli.inode_config.at(merge_children[current_child]).name);
|
||||
start_merge_child(merge_children[current_child], merge_children[current_child]);
|
||||
if (state == 100)
|
||||
return;
|
||||
@@ -134,9 +138,12 @@ resume_2:
|
||||
cb = NULL;
|
||||
if (result.err)
|
||||
{
|
||||
result.data = my_result(result.data);
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
else if (parent->progress)
|
||||
printf("%s\n", result.text.c_str());
|
||||
parent->change_parent(merge_children[current_child], new_parent, &result);
|
||||
state = 3;
|
||||
resume_3:
|
||||
@@ -144,6 +151,7 @@ resume_3:
|
||||
return;
|
||||
if (result.err)
|
||||
{
|
||||
result.data = my_result(result.data);
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
@@ -165,9 +173,12 @@ resume_4:
|
||||
cb = NULL;
|
||||
if (result.err)
|
||||
{
|
||||
result.data = my_result(result.data);
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
else if (parent->progress)
|
||||
printf("%s\n", result.text.c_str());
|
||||
// Delete "inverse" child data
|
||||
start_delete_source(inverse_child);
|
||||
if (state == 100)
|
||||
@@ -181,9 +192,12 @@ resume_5:
|
||||
cb = NULL;
|
||||
if (result.err)
|
||||
{
|
||||
result.data = my_result(result.data);
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
else if (parent->progress)
|
||||
printf("%s\n", result.text.c_str());
|
||||
// Delete "inverse" child metadata, rename parent over it,
|
||||
// and also change parent links of the previous "inverse" child
|
||||
rename_inverse_parent();
|
||||
@@ -199,6 +213,12 @@ resume_6:
|
||||
{
|
||||
if (chain_list[current_child] == inverse_parent)
|
||||
continue;
|
||||
{
|
||||
auto parent_it = parent->cli->st_cli.inode_config.find(chain_list[current_child]);
|
||||
if (parent_it != parent->cli->st_cli.inode_config.end())
|
||||
deleted_images.push_back(parent_it->second.name);
|
||||
deleted_ids.push_back(chain_list[current_child]);
|
||||
}
|
||||
start_delete_source(chain_list[current_child]);
|
||||
resume_7:
|
||||
while (!cb(result))
|
||||
@@ -209,9 +229,12 @@ resume_7:
|
||||
cb = NULL;
|
||||
if (result.err)
|
||||
{
|
||||
result.data = my_result(result.data);
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
else if (parent->progress)
|
||||
printf("%s\n", result.text.c_str());
|
||||
delete_inode_config(chain_list[current_child]);
|
||||
if (state == 100)
|
||||
return;
|
||||
@@ -221,11 +244,26 @@ resume_8:
|
||||
return;
|
||||
}
|
||||
state = 100;
|
||||
result = (cli_result_t){
|
||||
.text = "",
|
||||
.data = my_result(result.data),
|
||||
};
|
||||
resume_100:
|
||||
// Done
|
||||
return;
|
||||
}
|
||||
|
||||
json11::Json my_result(json11::Json src)
|
||||
{
|
||||
auto obj = src.object_items();
|
||||
obj["deleted_ids"] = deleted_ids;
|
||||
obj["deleted_images"] = deleted_images;
|
||||
obj["rebased_images"] = rebased_images;
|
||||
obj["renamed_from"] = inverse_parent_name;
|
||||
obj["renamed_to"] = inverse_child_name;
|
||||
return obj;
|
||||
}
|
||||
|
||||
void get_merge_children()
|
||||
{
|
||||
// Get all children of from..to
|
||||
@@ -338,7 +376,11 @@ resume_100:
|
||||
}
|
||||
for (auto inode_result: data["responses"].array_items())
|
||||
{
|
||||
auto kv = parent->cli->st_cli.parse_etcd_kv(inode_result["kvs"][0]);
|
||||
if (inode_result["response_range"]["kvs"].array_items().size() == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
auto kv = parent->cli->st_cli.parse_etcd_kv(inode_result["response_range"]["kvs"][0]);
|
||||
pool_id_t pool_id = 0;
|
||||
inode_t inode = 0;
|
||||
char null_byte = 0;
|
||||
@@ -377,7 +419,7 @@ resume_100:
|
||||
inode_t child = cp.first;
|
||||
uint64_t child_used = inode_used[child];
|
||||
int rank = cp.second;
|
||||
for (int i = chain_list.size()-rank; i < chain_list.size(); i++)
|
||||
for (int i = chain_list.size()-1-rank; i < chain_list.size(); i++)
|
||||
{
|
||||
inode_t parent = chain_list[i];
|
||||
uint64_t parent_used = inode_used[parent];
|
||||
@@ -413,8 +455,8 @@ resume_100:
|
||||
}
|
||||
inode_config_t *child_cfg = &child_it->second;
|
||||
inode_config_t *target_cfg = &target_it->second;
|
||||
std::string child_name = child_cfg->name;
|
||||
std::string target_name = target_cfg->name;
|
||||
inverse_child_name = child_cfg->name;
|
||||
inverse_parent_name = target_cfg->name;
|
||||
std::string child_cfg_key = base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+
|
||||
"/config/inode/"+std::to_string(INODE_POOL(inverse_child))+
|
||||
@@ -425,6 +467,9 @@ resume_100:
|
||||
"/config/inode/"+std::to_string(INODE_POOL(inverse_parent))+
|
||||
"/"+std::to_string(INODE_NO_POOL(inverse_parent))
|
||||
);
|
||||
std::string target_idx_key = base64_encode(
|
||||
parent->cli->st_cli.etcd_prefix+"/index/image/"+inverse_parent_name
|
||||
);
|
||||
// Fill new configuration
|
||||
inode_config_t new_cfg = *child_cfg;
|
||||
new_cfg.num = target_cfg->num;
|
||||
@@ -449,6 +494,11 @@ resume_100:
|
||||
{ "key", child_cfg_key },
|
||||
} },
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_delete_range", json11::Json::object {
|
||||
{ "key", target_idx_key },
|
||||
} },
|
||||
},
|
||||
json11::Json::object {
|
||||
{ "request_put", json11::Json::object {
|
||||
{ "key", target_cfg_key },
|
||||
@@ -495,12 +545,12 @@ resume_100:
|
||||
parent->cli->st_cli.etcd_txn_slow(json11::Json::object {
|
||||
{ "compare", cmp },
|
||||
{ "success", txn },
|
||||
}, [this, target_name, child_name](std::string err, json11::Json res)
|
||||
}, [this](std::string err, json11::Json res)
|
||||
{
|
||||
parent->waiting--;
|
||||
if (err != "")
|
||||
{
|
||||
result = (cli_result_t){ .err = EIO, .text = "Error renaming "+target_name+" to "+child_name+": "+err };
|
||||
result = (cli_result_t){ .err = EIO, .text = "Error renaming "+inverse_parent_name+" to "+inverse_child_name+": "+err };
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
@@ -508,14 +558,14 @@ resume_100:
|
||||
{
|
||||
result = (cli_result_t){
|
||||
.err = EAGAIN,
|
||||
.text = "Parent ("+target_name+"), child ("+child_name+"), or one of its children"
|
||||
.text = "Parent ("+inverse_parent_name+"), child ("+inverse_child_name+"), or one of its children"
|
||||
" configuration was modified during rename",
|
||||
};
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
if (parent->progress)
|
||||
printf("Layer %s renamed to %s\n", target_name.c_str(), child_name.c_str());
|
||||
printf("Layer %s renamed to %s\n", inverse_parent_name.c_str(), inverse_child_name.c_str());
|
||||
parent->ringloop->wakeup();
|
||||
});
|
||||
}
|
||||
|
@@ -28,6 +28,7 @@ struct rm_inode_t
|
||||
cli_tool_t *parent = NULL;
|
||||
inode_list_t *lister = NULL;
|
||||
std::vector<rm_pg_t*> lists;
|
||||
std::vector<osd_num_t> inactive_osds;
|
||||
uint64_t total_count = 0, total_done = 0, total_prev_pct = 0;
|
||||
uint64_t pgs_to_list = 0;
|
||||
bool lists_done = false;
|
||||
@@ -86,6 +87,16 @@ struct rm_inode_t
|
||||
state = 100;
|
||||
return;
|
||||
}
|
||||
inactive_osds = parent->cli->list_inode_get_inactive_osds(lister);
|
||||
if (inactive_osds.size() && !parent->json_output)
|
||||
{
|
||||
fprintf(stderr, "Some data may remain after delete on OSDs which are currently down: ");
|
||||
for (int i = 0; i < inactive_osds.size(); i++)
|
||||
{
|
||||
fprintf(stderr, i > 0 ? ", %lu" : "%lu", inactive_osds[i]);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
pgs_to_list = parent->cli->list_pg_count(lister);
|
||||
parent->cli->list_inode_next(lister, parent->parallel_osds);
|
||||
}
|
||||
@@ -167,16 +178,33 @@ struct rm_inode_t
|
||||
}
|
||||
if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
|
||||
{
|
||||
printf("\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
|
||||
fprintf(stderr, "\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
|
||||
total_prev_pct = total_done*1000/total_count;
|
||||
}
|
||||
if (lists_done && !lists.size())
|
||||
{
|
||||
if (parent->progress && total_count > 0)
|
||||
{
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
if (parent->progress && (total_done < total_count || inactive_osds.size() > 0))
|
||||
{
|
||||
fprintf(
|
||||
stderr, "Warning: Pool:%u,ID:%lu inode data may not have been fully removed.\n"
|
||||
" Use `vitastor-cli rm-data --pool %u --inode %lu` if you encounter it in listings.\n",
|
||||
pool_id, INODE_NO_POOL(inode), pool_id, INODE_NO_POOL(inode)
|
||||
);
|
||||
}
|
||||
result = (cli_result_t){
|
||||
.err = error_count > 0 ? EIO : 0,
|
||||
.text = error_count > 0 ? "Some blocks were not removed" : (
|
||||
"Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+
|
||||
std::to_string(pool_id)+" removed"),
|
||||
.data = json11::Json::object {
|
||||
{ "removed_objects", total_done },
|
||||
{ "total_objects", total_count },
|
||||
{ "inactive_osds", inactive_osds },
|
||||
},
|
||||
};
|
||||
state = 100;
|
||||
}
|
||||
|
@@ -1209,6 +1209,10 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
|
||||
copy_part_bitmap(op, part);
|
||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
||||
}
|
||||
else if (op->opcode == OSD_OP_WRITE)
|
||||
{
|
||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
||||
}
|
||||
if (op->inflight_count == 0)
|
||||
{
|
||||
if (op->opcode == OSD_OP_SYNC)
|
||||
|
@@ -130,6 +130,7 @@ public:
|
||||
inode_list_t *list_inode_start(inode_t inode,
|
||||
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
|
||||
int list_pg_count(inode_list_t *lst);
|
||||
const std::vector<osd_num_t> & list_inode_get_inactive_osds(inode_list_t *lst);
|
||||
void list_inode_next(inode_list_t *lst, int next_pgs);
|
||||
//inline uint32_t get_bs_bitmap_granularity() { return st_cli.global_bitmap_granularity; }
|
||||
//inline uint64_t get_bs_block_size() { return st_cli.global_block_size; }
|
||||
|
@@ -36,6 +36,7 @@ struct inode_list_t
|
||||
inode_t inode = 0;
|
||||
int done_pgs = 0;
|
||||
int want = 0;
|
||||
std::vector<osd_num_t> inactive_osds;
|
||||
std::vector<inode_list_pg_t*> pgs;
|
||||
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback;
|
||||
};
|
||||
@@ -60,6 +61,7 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
||||
lst->inode = inode;
|
||||
lst->callback = callback;
|
||||
auto pool_cfg = st_cli.pool_config[pool_id];
|
||||
std::set<osd_num_t> inactive_osd_set;
|
||||
for (auto & pg_item: pool_cfg.pg_config)
|
||||
{
|
||||
auto & pg = pg_item.second;
|
||||
@@ -106,11 +108,18 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
||||
}
|
||||
for (osd_num_t peer_osd: all_peers)
|
||||
{
|
||||
r->list_osds.push_back((inode_list_osd_t){
|
||||
.pg = r,
|
||||
.osd_num = peer_osd,
|
||||
.sent = false,
|
||||
});
|
||||
if (st_cli.peer_states.find(peer_osd) != st_cli.peer_states.end())
|
||||
{
|
||||
r->list_osds.push_back((inode_list_osd_t){
|
||||
.pg = r,
|
||||
.osd_num = peer_osd,
|
||||
.sent = false,
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
inactive_osd_set.insert(peer_osd);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -132,6 +141,7 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
||||
{
|
||||
lst->pgs[i]->pos = i;
|
||||
}
|
||||
lst->inactive_osds.insert(lst->inactive_osds.end(), inactive_osd_set.begin(), inactive_osd_set.end());
|
||||
lists.push_back(lst);
|
||||
return lst;
|
||||
}
|
||||
@@ -141,6 +151,11 @@ int cluster_client_t::list_pg_count(inode_list_t *lst)
|
||||
return lst->pgs.size();
|
||||
}
|
||||
|
||||
const std::vector<osd_num_t> & cluster_client_t::list_inode_get_inactive_osds(inode_list_t *lst)
|
||||
{
|
||||
return lst->inactive_osds;
|
||||
}
|
||||
|
||||
void cluster_client_t::list_inode_next(inode_list_t *lst, int next_pgs)
|
||||
{
|
||||
if (next_pgs >= 0)
|
||||
|
@@ -99,15 +99,16 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
|
||||
if (options["journal_size"] == "")
|
||||
{
|
||||
if (options["journal_device"] == "")
|
||||
options["journal_size"] = "32M";
|
||||
options["journal_size"] = is_hdd ? "128M" : "32M";
|
||||
else if (is_hdd)
|
||||
options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
|
||||
}
|
||||
bool is_hybrid = is_hdd && options["journal_device"] != "" && options["journal_device"] != options["data_device"];
|
||||
if (is_hdd)
|
||||
{
|
||||
if (options["block_size"] == "")
|
||||
options["block_size"] = "1M";
|
||||
if (options["throttle_small_writes"] == "")
|
||||
if (is_hybrid && options["throttle_small_writes"] == "")
|
||||
options["throttle_small_writes"] = "1";
|
||||
}
|
||||
json11::Json::object sb;
|
||||
@@ -134,7 +135,7 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
|
||||
{ "meta_offset", 4096 + (dsk.meta_device == dsk.journal_device ? dsk.journal_len : 0) },
|
||||
{ "data_offset", 4096 + (dsk.data_device == dsk.meta_device ? dsk.meta_len : 0) +
|
||||
(dsk.data_device == dsk.journal_device ? dsk.journal_len : 0) },
|
||||
{ "journal_no_same_sector_overwrites", true },
|
||||
{ "journal_no_same_sector_overwrites", !is_hdd || is_hybrid },
|
||||
{ "journal_sector_buffer_count", 1024 },
|
||||
{ "disable_data_fsync", json_is_true(options["disable_data_fsync"]) },
|
||||
{ "disable_meta_fsync", json_is_true(options["disable_meta_fsync"]) },
|
||||
|
@@ -187,22 +187,30 @@ void etcd_state_client_t::add_etcd_url(std::string addr)
|
||||
check_addr = addr;
|
||||
if (pos == std::string::npos)
|
||||
addr += "/v3";
|
||||
bool local = false;
|
||||
int i;
|
||||
for (i = 0; i < local_ips.size(); i++)
|
||||
{
|
||||
if (local_ips[i] == check_addr)
|
||||
{
|
||||
this->etcd_local.push_back(addr);
|
||||
local = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i >= local_ips.size())
|
||||
this->etcd_addresses.push_back(addr);
|
||||
auto & to = local ? this->etcd_local : this->etcd_addresses;
|
||||
for (i = 0; i < to.size(); i++)
|
||||
{
|
||||
if (to[i] == addr)
|
||||
break;
|
||||
}
|
||||
if (i >= to.size())
|
||||
to.push_back(addr);
|
||||
}
|
||||
}
|
||||
|
||||
void etcd_state_client_t::parse_config(const json11::Json & config)
|
||||
{
|
||||
this->etcd_local.clear();
|
||||
this->etcd_addresses.clear();
|
||||
if (config["etcd_address"].is_string())
|
||||
{
|
||||
@@ -349,7 +357,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||
etcd_watches_initialised++;
|
||||
if (etcd_watches_initialised == 4 && this->log_level > 0)
|
||||
fprintf(stderr, "Successfully subscribed to etcd at %s\n", selected_etcd_address.c_str());
|
||||
fprintf(stderr, "Successfully subscribed to etcd at %s\n", cur_addr.c_str());
|
||||
}
|
||||
if (data["result"]["canceled"].bool_value())
|
||||
{
|
||||
@@ -360,15 +368,17 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
// so we should restart from the beginning if we can
|
||||
if (on_reload_hook != NULL)
|
||||
{
|
||||
fprintf(stderr, "Revisions before %lu were compacted by etcd, reloading state\n",
|
||||
data["result"]["compact_revision"].uint64_value());
|
||||
if (etcd_watch_ws)
|
||||
// check to not trigger on_reload_hook multiple times
|
||||
if (etcd_watch_ws != NULL)
|
||||
{
|
||||
fprintf(stderr, "Revisions before %lu were compacted by etcd, reloading state\n",
|
||||
data["result"]["compact_revision"].uint64_value());
|
||||
http_close(etcd_watch_ws);
|
||||
etcd_watch_ws = NULL;
|
||||
etcd_watch_revision = 0;
|
||||
on_reload_hook();
|
||||
}
|
||||
etcd_watch_revision = 0;
|
||||
on_reload_hook();
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -415,13 +425,9 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
}
|
||||
if (msg->eof)
|
||||
{
|
||||
fprintf(stderr, "Disconnected from etcd %s\n", cur_addr.c_str());
|
||||
if (cur_addr == selected_etcd_address)
|
||||
{
|
||||
fprintf(stderr, "Disconnected from etcd %s\n", selected_etcd_address.c_str());
|
||||
selected_etcd_address = "";
|
||||
}
|
||||
else
|
||||
fprintf(stderr, "Disconnected from etcd\n");
|
||||
if (etcd_watch_ws)
|
||||
{
|
||||
http_close(etcd_watch_ws);
|
||||
@@ -438,6 +444,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
||||
else if (etcd_watches_initialised > 0)
|
||||
{
|
||||
// Connection was live, retry immediately
|
||||
etcd_watches_initialised = 0;
|
||||
start_etcd_watcher();
|
||||
}
|
||||
}
|
||||
|
@@ -27,19 +27,10 @@ static void parse_http_headers(std::string & res, http_response_t *parsed);
|
||||
|
||||
struct http_co_t
|
||||
{
|
||||
#ifdef WITH_OPENSSL
|
||||
static SSL_CTX *ssl_ctx = NULL;
|
||||
SSL *ssl_cli = NULL;
|
||||
BIO *ssl_rbio = NULL;
|
||||
BIO *ssl_wbio = NULL;
|
||||
std::vector<uint8_t> encrypted_out;
|
||||
#endif
|
||||
|
||||
timerfd_manager_t *tfd;
|
||||
std::function<void(const http_response_t*)> response_callback;
|
||||
|
||||
int request_timeout = 0;
|
||||
bool ssl = false;
|
||||
std::string host;
|
||||
std::string request;
|
||||
std::string ws_outbox;
|
||||
@@ -55,7 +46,7 @@ struct http_co_t
|
||||
int timeout_id = -1;
|
||||
int epoll_events = 0;
|
||||
int sent = 0;
|
||||
std::vector<uint8_t> rbuf;
|
||||
std::vector<char> rbuf;
|
||||
iovec read_iov, send_iov;
|
||||
msghdr read_msg = { 0 }, send_msg = { 0 };
|
||||
http_response_t parsed;
|
||||
@@ -268,12 +259,6 @@ void http_response_t::parse_json_response(std::string & error, json11::Json & r)
|
||||
|
||||
http_co_t::~http_co_t()
|
||||
{
|
||||
#ifdef WITH_OPENSSL
|
||||
if (ssl_cli)
|
||||
{
|
||||
SSL_free(ssl_cli);
|
||||
}
|
||||
#endif
|
||||
close_connection();
|
||||
}
|
||||
|
||||
@@ -290,16 +275,6 @@ void http_co_t::close_connection()
|
||||
close(peer_fd);
|
||||
peer_fd = -1;
|
||||
}
|
||||
#ifdef WITH_OPENSSL
|
||||
if (ssl_ctx)
|
||||
{
|
||||
// Frees context, client and bios at once
|
||||
SSL_free(ssl_ctx);
|
||||
ssl_rbio = NULL;
|
||||
ssl_wbio = NULL;
|
||||
ssl_cli = NULL;
|
||||
}
|
||||
#endif
|
||||
state = HTTP_CO_CLOSED;
|
||||
connected_host = "";
|
||||
response = "";
|
||||
@@ -329,27 +304,6 @@ void http_co_t::start_connection()
|
||||
}
|
||||
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
||||
epoll_events = 0;
|
||||
#ifdef WITH_OPENSSL
|
||||
// https://wiki.openssl.org/index.php/Hostname_validation
|
||||
if (ssl)
|
||||
{
|
||||
if (!ssl_ctx)
|
||||
ssl_ctx = SSL_CTX_new(TLS_method());
|
||||
ssl_rbio = BIO_new(BIO_s_mem());
|
||||
ssl_wbio = BIO_new(BIO_s_mem());
|
||||
ssl_cli = SSL_new(ssl_ctx);
|
||||
if (!ssl_ctx || !ssl_cli || !ssl_rbio || !ssl_wbio)
|
||||
{
|
||||
parsed = { .error = std::string("openssl initialization failed: ")+ERR_get_error(NULL) };
|
||||
response_callback(&parsed);
|
||||
response_callback = NULL;
|
||||
stackout();
|
||||
return;
|
||||
}
|
||||
SSL_set_connect_state(ssl_cli);
|
||||
SSL_set_bio(ssl_cli, ssl_rbio, ssl_wbio);
|
||||
}
|
||||
#endif
|
||||
// Finally call connect
|
||||
int r = ::connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
||||
if (r < 0 && errno != EINPROGRESS)
|
||||
@@ -478,11 +432,11 @@ void http_co_t::submit_read(bool check_timeout)
|
||||
stackin();
|
||||
int res;
|
||||
again:
|
||||
if (rbuf.capacity()-rbuf.size() < READ_BUFFER_SIZE)
|
||||
if (rbuf.size() != READ_BUFFER_SIZE)
|
||||
{
|
||||
rbuf.reserve(rbuf.size() + READ_BUFFER_SIZE);
|
||||
rbuf.resize(READ_BUFFER_SIZE);
|
||||
}
|
||||
read_iov = { .iov_base = rbuf.data()+rbuf.size(), .iov_len = READ_BUFFER_SIZE };
|
||||
read_iov = { .iov_base = rbuf.data(), .iov_len = READ_BUFFER_SIZE };
|
||||
read_msg.msg_iov = &read_iov;
|
||||
read_msg.msg_iovlen = 1;
|
||||
res = recvmsg(peer_fd, &read_msg, 0);
|
||||
@@ -512,177 +466,22 @@ again:
|
||||
else if (res <= 0)
|
||||
{
|
||||
// < 0 means error, 0 means EOF
|
||||
on_read_error(res);
|
||||
epoll_events = epoll_events & ~EPOLLIN;
|
||||
if (state == HTTP_CO_HEADERS_RECEIVED)
|
||||
std::swap(parsed.body, response);
|
||||
close_connection();
|
||||
if (res < 0)
|
||||
parsed = { .error = std::string("recvmsg: ")+strerror(-res) };
|
||||
run_cb_and_clear();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ssl)
|
||||
handle_ssl_read(rbuf);
|
||||
else
|
||||
response += std::string((char*)rbuf.data(), res);
|
||||
rbuf.resize(0);
|
||||
response += std::string(rbuf.data(), res);
|
||||
handle_read();
|
||||
}
|
||||
stackout();
|
||||
}
|
||||
|
||||
void http_co_t::on_read_error(int res)
|
||||
{
|
||||
epoll_events = epoll_events & ~EPOLLIN;
|
||||
if (state == HTTP_CO_HEADERS_RECEIVED)
|
||||
std::swap(parsed.body, response);
|
||||
close_connection();
|
||||
if (res < 0)
|
||||
parsed = { .error = std::string("recvmsg: ")+strerror(-res) };
|
||||
run_cb_and_clear();
|
||||
}
|
||||
|
||||
int http_co_t::do_ssl_handshake()
|
||||
{
|
||||
stackin();
|
||||
int r;
|
||||
while (1)
|
||||
{
|
||||
r = SSL_do_handshake(ssl_cli);
|
||||
if (r == SSL_ERROR_WANT_WRITE)
|
||||
{
|
||||
r = ssl_encrypt();
|
||||
if (r >= 0)
|
||||
submit_send();
|
||||
else
|
||||
{
|
||||
r = -r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (r == SSL_ERROR_WANT_READ || r == SSL_ERROR_NONE)
|
||||
{
|
||||
// OK or wait until we have more incoming data
|
||||
r = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
stackout();
|
||||
return r;
|
||||
}
|
||||
|
||||
// Enqueue outbound encrypted TLS data
|
||||
int http_co_t::ssl_encrypt()
|
||||
{
|
||||
stackin();
|
||||
int queued = 0;
|
||||
while (true)
|
||||
{
|
||||
if (encrypted_out.size() >= encrypted_out.capacity()/2)
|
||||
encrypted_out.reserve(encrypted_out.size() < READ_BUFFER_SIZE ? encrypted_out.size() + READ_BUFFER_SIZE : 2*encrypted_out.size());
|
||||
int r = BIO_read(ssl_wbio, encrypted_out.data()+encrypted_out.size(), encrypted_out.capacity()-encrypted_out.size());
|
||||
if (r > 0)
|
||||
{
|
||||
queued += r;
|
||||
encrypted_out.resize(encrypted_out.size()+r);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!BIO_should_retry(ssl_wbio))
|
||||
queued = r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
stackout();
|
||||
return queued;
|
||||
}
|
||||
|
||||
void http_co_t::handle_ssl_write()
|
||||
{
|
||||
stackin();
|
||||
int r = 0;
|
||||
while (sent < request.size())
|
||||
{
|
||||
if (!SSL_is_init_finished(ssl_cli))
|
||||
{
|
||||
if (do_ssl_handshake() != 0)
|
||||
{
|
||||
on_read_error(-EIO);
|
||||
break;
|
||||
}
|
||||
if (!SSL_is_init_finished(ssl_cli))
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
int n = SSL_write(ssl_cli, request.data()+sent, request.size()-sent);
|
||||
if (n > 0)
|
||||
sent += n;
|
||||
else if (get_sslstatus(ssl_cli, n) == SSLSTATUS_FAIL)
|
||||
{
|
||||
on_read_error(-EIO);
|
||||
break;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
r = ssl_encrypt();
|
||||
if (r >= 0)
|
||||
submit_send();
|
||||
else
|
||||
{
|
||||
on_read_error(-EIO);
|
||||
break;
|
||||
}
|
||||
}
|
||||
stackout();
|
||||
}
|
||||
|
||||
// Process incoming encrypted TLS data
|
||||
void http_co_t::handle_ssl_read()
|
||||
{
|
||||
stackin();
|
||||
int size = rbuf.size();
|
||||
int done = 0;
|
||||
while (done < size)
|
||||
{
|
||||
int n = BIO_write(ssl_rbio, rbuf.data()+done, size-done);
|
||||
if (n > 0)
|
||||
{
|
||||
done += n;
|
||||
}
|
||||
if (n <= 0)
|
||||
{
|
||||
on_read_error(-EIO);
|
||||
break;
|
||||
}
|
||||
if (!SSL_is_init_finished(ssl_cli))
|
||||
{
|
||||
if (do_ssl_handshake() != 0)
|
||||
{
|
||||
on_read_error(-EIO);
|
||||
break;
|
||||
}
|
||||
if (!SSL_is_init_finished(ssl_cli))
|
||||
break;
|
||||
}
|
||||
do
|
||||
{
|
||||
if (response.capacity() - response.size() < READ_BUFFER_SIZE)
|
||||
response.reserve(2*response.size() < response.size() + READ_BUFFER_SIZE ? response.size() + READ_BUFFER_SIZE : 2*response.size());
|
||||
n = SSL_read(ssl_cli, response.data() + response.size(), READ_BUFFER_SIZE);
|
||||
if (n <= 0)
|
||||
{
|
||||
n = SSL_get_error(ssl_cli, n);
|
||||
if (n == SSL_ERROR_WANT_READ)
|
||||
break;
|
||||
}
|
||||
} while (n > 0);
|
||||
}
|
||||
if (done < size)
|
||||
memmove(rbuf.data(), rbuf.data()+done, size-done);
|
||||
rbuf.resize(size-done);
|
||||
stackout();
|
||||
}
|
||||
|
||||
bool http_co_t::handle_read()
|
||||
{
|
||||
stackin();
|
||||
|
@@ -9,6 +9,10 @@ osd_op_t::~osd_op_t()
|
||||
{
|
||||
assert(!bs_op);
|
||||
assert(!op_data);
|
||||
if (bitmap_buf)
|
||||
{
|
||||
free(bitmap_buf);
|
||||
}
|
||||
if (rmw_buf)
|
||||
{
|
||||
free(rmw_buf);
|
||||
|
@@ -165,6 +165,7 @@ struct osd_op_t
|
||||
void *bitmap = NULL;
|
||||
unsigned bitmap_len = 0;
|
||||
unsigned bmp_data = 0;
|
||||
void *bitmap_buf = NULL;
|
||||
void *rmw_buf = NULL;
|
||||
osd_primary_op_data_t* op_data = NULL;
|
||||
std::function<void(osd_op_t*)> callback;
|
||||
|
@@ -369,7 +369,7 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
|
||||
op->buf = malloc_or_die(op->reply.hdr.retval);
|
||||
cl->recv_list.push_back(op->buf, op->reply.hdr.retval);
|
||||
}
|
||||
else if (op->reply.hdr.opcode == OSD_OP_DESCRIBE && op->reply.hdr.retval > 0)
|
||||
else if (op->reply.hdr.opcode == OSD_OP_DESCRIBE && op->reply.describe.result_bytes > 0)
|
||||
{
|
||||
delete cl->read_op;
|
||||
cl->read_op = op;
|
||||
|
@@ -84,9 +84,12 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op)
|
||||
{
|
||||
for (int i = 0; i < cur_op->iov.count; i++)
|
||||
{
|
||||
assert(cur_op->iov.buf[i].iov_base);
|
||||
to_send_list.push_back(cur_op->iov.buf[i]);
|
||||
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
|
||||
if (cur_op->iov.buf[i].iov_len > 0)
|
||||
{
|
||||
assert(cur_op->iov.buf[i].iov_base);
|
||||
to_send_list.push_back(cur_op->iov.buf[i]);
|
||||
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
|
||||
}
|
||||
}
|
||||
}
|
||||
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)
|
||||
|
@@ -149,7 +149,7 @@ public:
|
||||
" --dev_num N\n"
|
||||
" Use the specified device /dev/nbdN instead of automatic selection.\n"
|
||||
" --foreground 1\n"
|
||||
" Stay in foreground, do not daemonize.n",
|
||||
" Stay in foreground, do not daemonize.\n",
|
||||
exe_name, exe_name, exe_name
|
||||
);
|
||||
exit(0);
|
||||
|
@@ -198,13 +198,14 @@ class osd_t
|
||||
void on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num);
|
||||
void on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes);
|
||||
void on_load_config_hook(json11::Json::object & changes);
|
||||
void on_reload_config_hook(json11::Json::object & changes);
|
||||
json11::Json on_load_pgs_checks_hook();
|
||||
void on_load_pgs_hook(bool success);
|
||||
void bind_socket();
|
||||
void acquire_lease();
|
||||
json11::Json get_osd_state();
|
||||
void create_osd_state();
|
||||
void renew_lease();
|
||||
void renew_lease(bool reload);
|
||||
void print_stats();
|
||||
void print_slow();
|
||||
void reset_stats();
|
||||
|
@@ -70,6 +70,7 @@ void osd_t::init_cluster()
|
||||
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
|
||||
st_cli.load_pgs_checks_hook = [this]() { return on_load_pgs_checks_hook(); };
|
||||
st_cli.on_load_pgs_hook = [this](bool success) { on_load_pgs_hook(success); };
|
||||
st_cli.on_reload_hook = [this]() { st_cli.load_global_config(); };
|
||||
peering_state = OSD_LOADING_PGS;
|
||||
st_cli.load_global_config();
|
||||
}
|
||||
@@ -395,6 +396,14 @@ void osd_t::on_load_config_hook(json11::Json::object & global_config)
|
||||
parse_config(true);
|
||||
bind_socket();
|
||||
acquire_lease();
|
||||
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_reload_config_hook(cfg); };
|
||||
}
|
||||
|
||||
void osd_t::on_reload_config_hook(json11::Json::object & global_config)
|
||||
{
|
||||
etcd_global_config = global_config;
|
||||
parse_config(false);
|
||||
renew_lease(true);
|
||||
}
|
||||
|
||||
// Acquire lease
|
||||
@@ -424,7 +433,7 @@ void osd_t::acquire_lease()
|
||||
);
|
||||
tfd->set_timer(etcd_report_interval*1000, true, [this](int timer_id)
|
||||
{
|
||||
renew_lease();
|
||||
renew_lease(false);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -499,11 +508,11 @@ void osd_t::create_osd_state()
|
||||
}
|
||||
|
||||
// Renew lease
|
||||
void osd_t::renew_lease()
|
||||
void osd_t::renew_lease(bool reload)
|
||||
{
|
||||
st_cli.etcd_call("/lease/keepalive", json11::Json::object {
|
||||
{ "ID", etcd_lease_id }
|
||||
}, st_cli.etcd_quick_timeout, 0, 0, [this](std::string err, json11::Json data)
|
||||
}, st_cli.etcd_quick_timeout, 0, 0, [this, reload](std::string err, json11::Json data)
|
||||
{
|
||||
if (err == "" && data["result"]["TTL"].string_value() == "")
|
||||
{
|
||||
@@ -522,15 +531,20 @@ void osd_t::renew_lease()
|
||||
force_stop(1);
|
||||
}
|
||||
// Retry
|
||||
tfd->set_timer(st_cli.etcd_quick_timeout, false, [this](int timer_id)
|
||||
tfd->set_timer(st_cli.etcd_quick_timeout, false, [this, reload](int timer_id)
|
||||
{
|
||||
renew_lease();
|
||||
renew_lease(reload);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
etcd_failed_attempts = 0;
|
||||
report_statistics();
|
||||
// Reload PGs
|
||||
if (reload && run_primary)
|
||||
{
|
||||
st_cli.load_pgs();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -560,7 +574,6 @@ void osd_t::force_stop(int exitcode)
|
||||
|
||||
json11::Json osd_t::on_load_pgs_checks_hook()
|
||||
{
|
||||
assert(this->pgs.size() == 0);
|
||||
json11::Json::array checks = {
|
||||
json11::Json::object {
|
||||
{ "target", "LEASE" },
|
||||
|
@@ -220,7 +220,7 @@ struct __attribute__((__packed__)) osd_reply_rw_t
|
||||
// for reads: bitmap length
|
||||
uint32_t bitmap_len;
|
||||
uint32_t pad0;
|
||||
// for reads: object version
|
||||
// for reads and writes: object version
|
||||
uint64_t version;
|
||||
};
|
||||
|
||||
|
@@ -87,8 +87,7 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
||||
// - op_data
|
||||
1, sizeof(osd_primary_op_data_t) +
|
||||
// - stripes
|
||||
// - resulting bitmap buffers
|
||||
stripe_count * (clean_entry_bitmap_size + sizeof(osd_rmw_stripe_t)) +
|
||||
stripe_count * sizeof(osd_rmw_stripe_t) +
|
||||
chain_size * (
|
||||
// - copy of the chain
|
||||
sizeof(inode_t) +
|
||||
@@ -110,11 +109,12 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
||||
op_data->pg_size = pg_it->second.pg_size;
|
||||
cur_op->op_data = op_data;
|
||||
split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes);
|
||||
// Allocate bitmaps along with stripes to avoid extra allocations and fragmentation
|
||||
// Resulting bitmaps have to survive op_data and be freed with the op itself
|
||||
assert(!cur_op->bitmap_buf);
|
||||
cur_op->bitmap_buf = calloc_or_die(1, clean_entry_bitmap_size * stripe_count);
|
||||
for (int i = 0; i < stripe_count; i++)
|
||||
{
|
||||
op_data->stripes[i].bmp_buf = data_buf;
|
||||
data_buf = (uint8_t*)data_buf + clean_entry_bitmap_size;
|
||||
op_data->stripes[i].bmp_buf = (uint8_t*)cur_op->bitmap_buf + clean_entry_bitmap_size * i;
|
||||
}
|
||||
op_data->chain_size = chain_size;
|
||||
if (chain_size > 0)
|
||||
@@ -129,16 +129,19 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
||||
data_buf = (uint8_t*)data_buf + chain_size * (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 0 : pg_it->second.pg_size);
|
||||
// Copy chain
|
||||
int chain_num = 0;
|
||||
op_data->read_chain[chain_num++] = cur_op->req.rw.inode;
|
||||
op_data->read_chain[chain_num] = cur_op->req.rw.inode;
|
||||
op_data->chain_states[chain_num] = NULL;
|
||||
chain_num++;
|
||||
auto inode_it = st_cli.inode_config.find(cur_op->req.rw.inode);
|
||||
while (inode_it != st_cli.inode_config.end() && inode_it->second.parent_id &&
|
||||
INODE_POOL(inode_it->second.parent_id) == pg_it->second.pool_id &&
|
||||
// Check for loops
|
||||
inode_it->second.parent_id != cur_op->req.rw.inode)
|
||||
{
|
||||
op_data->read_chain[chain_num++] = inode_it->second.parent_id;
|
||||
op_data->chain_states[chain_num++] = NULL;
|
||||
op_data->read_chain[chain_num] = inode_it->second.parent_id;
|
||||
op_data->chain_states[chain_num] = NULL;
|
||||
inode_it = st_cli.inode_config.find(inode_it->second.parent_id);
|
||||
chain_num++;
|
||||
}
|
||||
}
|
||||
pg_it->second.inflight++;
|
||||
@@ -644,12 +647,6 @@ void osd_t::continue_primary_del(osd_op_t *cur_op)
|
||||
else if (op_data->st == 4) goto resume_4;
|
||||
else if (op_data->st == 5) goto resume_5;
|
||||
assert(op_data->st == 0);
|
||||
// Delete is forbidden even in active PGs if they're also degraded or have previous dead OSDs
|
||||
if (pg.state & (PG_DEGRADED | PG_LEFT_ON_DEAD))
|
||||
{
|
||||
finish_op(cur_op, -EBUSY);
|
||||
return;
|
||||
}
|
||||
if (!check_write_queue(cur_op, pg))
|
||||
{
|
||||
return;
|
||||
|
@@ -83,11 +83,13 @@ retry_1:
|
||||
// Object is degraded/misplaced and will be moved to <write_osd_set>
|
||||
op_data->stripes[0].read_start = 0;
|
||||
op_data->stripes[0].read_end = bs_block_size;
|
||||
assert(!cur_op->rmw_buf);
|
||||
cur_op->rmw_buf = op_data->stripes[0].read_buf = memalign_or_die(MEM_ALIGNMENT, bs_block_size);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(!cur_op->rmw_buf);
|
||||
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
|
||||
pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
||||
if (!cur_op->rmw_buf)
|
||||
|
@@ -35,6 +35,11 @@
|
||||
#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
|
||||
#define qobject_unref QDECREF
|
||||
#endif
|
||||
#if QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4
|
||||
#include "sysemu/replay.h"
|
||||
#else
|
||||
#include "sysemu/sysemu.h"
|
||||
#endif
|
||||
|
||||
#include "vitastor_c.h"
|
||||
|
||||
@@ -48,9 +53,13 @@ void DSO_STAMP_FUN(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef struct VitastorFdData VitastorFdData;
|
||||
|
||||
typedef struct VitastorClient
|
||||
{
|
||||
void *proxy;
|
||||
int uring_eventfd;
|
||||
|
||||
void *watch;
|
||||
char *config_path;
|
||||
char *etcd_host;
|
||||
@@ -67,12 +76,24 @@ typedef struct VitastorClient
|
||||
int rdma_gid_index;
|
||||
int rdma_mtu;
|
||||
QemuMutex mutex;
|
||||
AioContext *ctx;
|
||||
VitastorFdData **fds;
|
||||
int fd_count, fd_alloc;
|
||||
int bh_uring_scheduled;
|
||||
|
||||
uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
|
||||
uint32_t last_bitmap_granularity;
|
||||
uint8_t *last_bitmap;
|
||||
} VitastorClient;
|
||||
|
||||
typedef struct VitastorFdData
|
||||
{
|
||||
VitastorClient *cli;
|
||||
int fd;
|
||||
IOHandler *fd_read, *fd_write;
|
||||
void *opaque;
|
||||
} VitastorFdData;
|
||||
|
||||
typedef struct VitastorRPC
|
||||
{
|
||||
BlockDriverState *bs;
|
||||
@@ -83,10 +104,21 @@ typedef struct VitastorRPC
|
||||
uint64_t inode, offset, len;
|
||||
uint32_t bitmap_granularity;
|
||||
uint8_t *bitmap;
|
||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
||||
QEMUBH *bh;
|
||||
#endif
|
||||
} VitastorRPC;
|
||||
|
||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
||||
typedef struct VitastorBH
|
||||
{
|
||||
VitastorClient *cli;
|
||||
QEMUBH *bh;
|
||||
} VitastorBH;
|
||||
#endif
|
||||
|
||||
static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
||||
static void vitastor_co_generic_bh_cb(void *opaque, long retval);
|
||||
static void vitastor_co_generic_cb(void *opaque, long retval);
|
||||
static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
|
||||
static void vitastor_close(BlockDriverState *bs);
|
||||
|
||||
@@ -202,6 +234,57 @@ out:
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
static void vitastor_uring_handler(void *opaque)
|
||||
{
|
||||
VitastorClient *client = (VitastorClient*)opaque;
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
client->bh_uring_scheduled = 0;
|
||||
do
|
||||
{
|
||||
vitastor_c_uring_handle_events(client->proxy);
|
||||
} while (vitastor_c_uring_has_work(client->proxy));
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
}
|
||||
|
||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
||||
static void vitastor_bh_uring_handler(void *opaque)
|
||||
{
|
||||
VitastorBH *vbh = opaque;
|
||||
vitastor_bh_handler(vbh->cli);
|
||||
qemu_bh_delete(vbh->bh);
|
||||
free(vbh);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vitastor_schedule_uring_handler(VitastorClient *client)
|
||||
{
|
||||
void *opaque = client;
|
||||
if (client->uring_eventfd >= 0 && !client->bh_uring_scheduled)
|
||||
{
|
||||
client->bh_uring_scheduled = 1;
|
||||
#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
||||
replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque);
|
||||
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
||||
aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque);
|
||||
#else
|
||||
VitastorBH *vbh = (VitastorBH*)malloc(sizeof(VitastorBH));
|
||||
vbh->cli = client;
|
||||
#if QEMU_VERSION_MAJOR >= 2
|
||||
vbh->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_bh_uring_handler, vbh);
|
||||
#else
|
||||
vbh->bh = qemu_bh_new(vitastor_bh_uring_handler, vbh);
|
||||
#endif
|
||||
qemu_bh_schedule(vbh->bh);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void vitastor_schedule_uring_handler(VitastorClient *client)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
||||
{
|
||||
BlockDriverState *bs = task->bs;
|
||||
@@ -209,7 +292,8 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
||||
task->co = qemu_coroutine_self();
|
||||
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_bh_cb, task);
|
||||
vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task);
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
|
||||
while (!task->complete)
|
||||
@@ -218,14 +302,32 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Fix thread safety of the driver - now it segfaults when iothread is enabled in QEMU
|
||||
static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
|
||||
static void vitastor_aio_fd_read(void *fddv)
|
||||
{
|
||||
VitastorFdData *fdd = (VitastorFdData*)fddv;
|
||||
qemu_mutex_lock(&fdd->cli->mutex);
|
||||
fdd->fd_read(fdd->opaque);
|
||||
vitastor_schedule_uring_handler(fdd->cli);
|
||||
qemu_mutex_unlock(&fdd->cli->mutex);
|
||||
}
|
||||
|
||||
static void vitastor_aio_fd_write(void *fddv)
|
||||
{
|
||||
VitastorFdData *fdd = (VitastorFdData*)fddv;
|
||||
qemu_mutex_lock(&fdd->cli->mutex);
|
||||
fdd->fd_write(fdd->opaque);
|
||||
vitastor_schedule_uring_handler(fdd->cli);
|
||||
qemu_mutex_unlock(&fdd->cli->mutex);
|
||||
}
|
||||
|
||||
static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
|
||||
{
|
||||
aio_set_fd_handler(ctx, fd,
|
||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
|
||||
0 /*is_external*/,
|
||||
#endif
|
||||
fd_read, fd_write,
|
||||
fd_read,
|
||||
fd_write,
|
||||
#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
|
||||
NULL /*io_flush*/,
|
||||
#endif
|
||||
@@ -238,6 +340,53 @@ static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandle
|
||||
opaque);
|
||||
}
|
||||
|
||||
static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
|
||||
{
|
||||
VitastorClient *client = (VitastorClient*)vcli;
|
||||
VitastorFdData *fdd = NULL;
|
||||
int i;
|
||||
for (i = 0; i < client->fd_count; i++)
|
||||
{
|
||||
if (client->fds[i]->fd == fd)
|
||||
{
|
||||
if (fd_read || fd_write)
|
||||
{
|
||||
fdd = client->fds[i];
|
||||
fdd->opaque = opaque;
|
||||
fdd->fd_read = fd_read;
|
||||
fdd->fd_write = fd_write;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = i+1; j < client->fd_count; j++)
|
||||
client->fds[j-1] = client->fds[j];
|
||||
client->fd_count--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ((fd_read || fd_write) && !fdd)
|
||||
{
|
||||
fdd = (VitastorFdData*)malloc(sizeof(VitastorFdData));
|
||||
fdd->cli = client;
|
||||
fdd->fd = fd;
|
||||
fdd->fd_read = fd_read;
|
||||
fdd->fd_write = fd_write;
|
||||
fdd->opaque = opaque;
|
||||
if (client->fd_count >= client->fd_alloc)
|
||||
{
|
||||
client->fd_alloc = client->fd_alloc*2;
|
||||
if (client->fd_alloc < 16)
|
||||
client->fd_alloc = 16;
|
||||
client->fds = (VitastorFdData**)realloc(client->fds, sizeof(VitastorFdData*) * client->fd_alloc);
|
||||
}
|
||||
client->fds[client->fd_count++] = fdd;
|
||||
}
|
||||
universal_aio_set_fd_handler(
|
||||
client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd
|
||||
);
|
||||
}
|
||||
|
||||
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
||||
{
|
||||
VitastorRPC task;
|
||||
@@ -255,10 +404,36 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
||||
client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
||||
client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
|
||||
client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
|
||||
client->proxy = vitastor_c_create_qemu(
|
||||
vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
client->ctx = bdrv_get_aio_context(bs);
|
||||
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
client->proxy = vitastor_c_create_qemu_uring(
|
||||
vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||
);
|
||||
if (!client->proxy)
|
||||
{
|
||||
fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower\n", strerror(errno));
|
||||
client->uring_eventfd = -1;
|
||||
#endif
|
||||
client->proxy = vitastor_c_create_qemu(
|
||||
vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||
);
|
||||
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
||||
}
|
||||
else
|
||||
{
|
||||
client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy);
|
||||
if (client->uring_eventfd < 0)
|
||||
{
|
||||
fprintf(stderr, "vitastor: failed to create io_uring eventfd: %s\n", strerror(errno));
|
||||
error_setg(errp, "failed to create io_uring eventfd");
|
||||
vitastor_close(bs);
|
||||
return -1;
|
||||
}
|
||||
universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client);
|
||||
}
|
||||
#endif
|
||||
image = client->image = g_strdup(qdict_get_try_str(options, "image"));
|
||||
client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
||||
// Get image metadata (size and readonly flag) or just wait until the client is ready
|
||||
@@ -338,6 +513,12 @@ static void vitastor_close(BlockDriverState *bs)
|
||||
{
|
||||
VitastorClient *client = bs->opaque;
|
||||
vitastor_c_destroy(client->proxy);
|
||||
if (client->fds)
|
||||
{
|
||||
free(client->fds);
|
||||
client->fds = NULL;
|
||||
client->fd_alloc = client->fd_count = 0;
|
||||
}
|
||||
qemu_mutex_destroy(&client->mutex);
|
||||
if (client->config_path)
|
||||
g_free(client->config_path);
|
||||
@@ -454,25 +635,44 @@ static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task)
|
||||
};
|
||||
}
|
||||
|
||||
static void vitastor_co_generic_bh_cb(void *opaque, long retval)
|
||||
static void vitastor_co_generic_bh_cb(void *opaque)
|
||||
{
|
||||
VitastorRPC *task = opaque;
|
||||
task->ret = retval;
|
||||
task->complete = 1;
|
||||
if (qemu_coroutine_self() != task->co)
|
||||
{
|
||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
||||
aio_co_wake(task->co);
|
||||
#else
|
||||
#if QEMU_VERSION_MAJOR == 2
|
||||
qemu_bh_delete(task->bh);
|
||||
#endif
|
||||
qemu_coroutine_enter(task->co, NULL);
|
||||
qemu_aio_release(task);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void vitastor_co_generic_cb(void *opaque, long retval)
|
||||
{
|
||||
VitastorRPC *task = opaque;
|
||||
task->ret = retval;
|
||||
#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
||||
replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
||||
aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
#elif QEMU_VERSION_MAJOR >= 2
|
||||
task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
qemu_bh_schedule(task->bh);
|
||||
#else
|
||||
task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
||||
qemu_bh_schedule(task->bh);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
|
||||
{
|
||||
vitastor_co_generic_bh_cb(opaque, retval);
|
||||
vitastor_co_generic_cb(opaque, retval);
|
||||
}
|
||||
|
||||
static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
|
||||
@@ -491,6 +691,7 @@ static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
|
||||
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
|
||||
while (!task.complete)
|
||||
@@ -523,7 +724,8 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
|
||||
|
||||
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
|
||||
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task);
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
|
||||
while (!task.complete)
|
||||
@@ -541,7 +743,6 @@ static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitma
|
||||
VitastorRPC *task = opaque;
|
||||
VitastorClient *client = task->bs->opaque;
|
||||
task->ret = retval;
|
||||
task->complete = 1;
|
||||
if (retval >= 0)
|
||||
{
|
||||
task->bitmap = bitmap;
|
||||
@@ -553,15 +754,17 @@ static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitma
|
||||
client->last_bitmap = bitmap;
|
||||
}
|
||||
}
|
||||
if (qemu_coroutine_self() != task->co)
|
||||
{
|
||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
||||
aio_co_wake(task->co);
|
||||
#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
||||
replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
||||
aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
#elif QEMU_VERSION_MAJOR >= 2
|
||||
task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
||||
qemu_bh_schedule(task->bh);
|
||||
#else
|
||||
qemu_coroutine_enter(task->co, NULL);
|
||||
qemu_aio_release(task);
|
||||
task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
||||
qemu_bh_schedule(task->bh);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static int coroutine_fn vitastor_co_block_status(
|
||||
@@ -602,6 +805,7 @@ static int coroutine_fn vitastor_co_block_status(
|
||||
task.bitmap = client->last_bitmap = NULL;
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
while (!task.complete)
|
||||
{
|
||||
@@ -687,7 +891,8 @@ static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
|
||||
vitastor_co_init_task(bs, &task);
|
||||
|
||||
qemu_mutex_lock(&client->mutex);
|
||||
vitastor_c_sync(client->proxy, vitastor_co_generic_bh_cb, &task);
|
||||
vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task);
|
||||
vitastor_schedule_uring_handler(client);
|
||||
qemu_mutex_unlock(&client->mutex);
|
||||
|
||||
while (!task.complete)
|
||||
|
@@ -2,9 +2,12 @@
|
||||
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <sys/eventfd.h>
|
||||
|
||||
#include "ringloop.h"
|
||||
|
||||
ring_loop_t::ring_loop_t(int qd)
|
||||
@@ -32,6 +35,10 @@ ring_loop_t::~ring_loop_t()
|
||||
free(free_ring_data);
|
||||
free(ring_datas);
|
||||
io_uring_queue_exit(&ring);
|
||||
if (ring_eventfd)
|
||||
{
|
||||
close(ring_eventfd);
|
||||
}
|
||||
}
|
||||
|
||||
void ring_loop_t::register_consumer(ring_consumer_t *consumer)
|
||||
@@ -127,3 +134,24 @@ int ring_loop_t::sqes_left()
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
int ring_loop_t::register_eventfd()
|
||||
{
|
||||
if (ring_eventfd >= 0)
|
||||
{
|
||||
return ring_eventfd;
|
||||
}
|
||||
ring_eventfd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK);
|
||||
if (ring_eventfd < 0)
|
||||
{
|
||||
return -errno;
|
||||
}
|
||||
int r = io_uring_register_eventfd(&ring, ring_eventfd);
|
||||
if (r < 0)
|
||||
{
|
||||
close(ring_eventfd);
|
||||
ring_eventfd = -1;
|
||||
return r;
|
||||
}
|
||||
return ring_eventfd;
|
||||
}
|
||||
|
@@ -126,11 +126,13 @@ class ring_loop_t
|
||||
unsigned free_ring_data_ptr;
|
||||
bool loop_again;
|
||||
struct io_uring ring;
|
||||
int ring_eventfd = -1;
|
||||
public:
|
||||
ring_loop_t(int qd);
|
||||
~ring_loop_t();
|
||||
void register_consumer(ring_consumer_t *consumer);
|
||||
void unregister_consumer(ring_consumer_t *consumer);
|
||||
int register_eventfd();
|
||||
|
||||
inline struct io_uring_sqe* get_sqe()
|
||||
{
|
||||
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
||||
|
||||
Name: Vitastor
|
||||
Description: Vitastor client library
|
||||
Version: 0.9.2
|
||||
Version: 0.9.4
|
||||
Libs: -L${libdir} -lvitastor_client
|
||||
Cflags: -I${includedir}
|
||||
|
||||
|
@@ -5,6 +5,7 @@
|
||||
// Also acts as a C-C++ proxy for the QEMU driver (QEMU headers don't compile with g++)
|
||||
|
||||
#include <sys/epoll.h>
|
||||
#include <sys/eventfd.h>
|
||||
|
||||
#include "ringloop.h"
|
||||
#include "epoll_manager.h"
|
||||
@@ -25,6 +26,7 @@ struct vitastor_c
|
||||
epoll_manager_t *epmgr = NULL;
|
||||
timerfd_manager_t *tfd = NULL;
|
||||
cluster_client_t *cli = NULL;
|
||||
int uring_eventfd = -1;
|
||||
|
||||
QEMUSetFDHandler *aio_set_fd_handler = NULL;
|
||||
void *aio_ctx = NULL;
|
||||
@@ -70,14 +72,8 @@ static void vitastor_c_write_handler(void *opaque)
|
||||
data->callback(data->fd, EPOLLOUT);
|
||||
}
|
||||
|
||||
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
||||
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
|
||||
static vitastor_c *vitastor_c_create_qemu_common(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context)
|
||||
{
|
||||
json11::Json cfg_json = vitastor_c_common_config(
|
||||
config_path, etcd_host, etcd_prefix, use_rdma,
|
||||
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
|
||||
);
|
||||
vitastor_c *self = new vitastor_c;
|
||||
self->aio_set_fd_handler = aio_set_fd_handler;
|
||||
self->aio_ctx = aio_context;
|
||||
@@ -95,24 +91,77 @@ vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *a
|
||||
self->aio_set_fd_handler(self->aio_ctx, fd, false, NULL, NULL, NULL, NULL);
|
||||
}
|
||||
});
|
||||
self->cli = new cluster_client_t(NULL, self->tfd, cfg_json);
|
||||
return self;
|
||||
}
|
||||
|
||||
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
||||
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
|
||||
{
|
||||
json11::Json cfg_json = vitastor_c_common_config(
|
||||
config_path, etcd_host, etcd_prefix, use_rdma,
|
||||
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
|
||||
);
|
||||
auto self = vitastor_c_create_qemu_common(aio_set_fd_handler, aio_context);
|
||||
self->cli = new cluster_client_t(NULL, self->tfd, cfg_json);
|
||||
return self;
|
||||
}
|
||||
|
||||
vitastor_c *vitastor_c_create_qemu_uring(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
||||
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
|
||||
{
|
||||
ring_loop_t *ringloop = NULL;
|
||||
try
|
||||
{
|
||||
ringloop = new ring_loop_t(512);
|
||||
}
|
||||
catch (std::exception & e)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
json11::Json cfg_json = vitastor_c_common_config(
|
||||
config_path, etcd_host, etcd_prefix, use_rdma,
|
||||
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
|
||||
);
|
||||
auto self = vitastor_c_create_qemu_common(aio_set_fd_handler, aio_context);
|
||||
self->ringloop = ringloop;
|
||||
self->cli = new cluster_client_t(self->ringloop, self->tfd, cfg_json);
|
||||
return self;
|
||||
}
|
||||
|
||||
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
|
||||
{
|
||||
ring_loop_t *ringloop = NULL;
|
||||
try
|
||||
{
|
||||
ringloop = new ring_loop_t(512);
|
||||
}
|
||||
catch (std::exception & e)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
json11::Json cfg_json = vitastor_c_common_config(
|
||||
config_path, etcd_host, etcd_prefix, use_rdma,
|
||||
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
|
||||
);
|
||||
vitastor_c *self = new vitastor_c;
|
||||
self->ringloop = new ring_loop_t(512);
|
||||
self->ringloop = ringloop;
|
||||
self->epmgr = new epoll_manager_t(self->ringloop);
|
||||
self->cli = new cluster_client_t(self->ringloop, self->epmgr->tfd, cfg_json);
|
||||
return self;
|
||||
}
|
||||
|
||||
int vitastor_c_uring_register_eventfd(vitastor_c *client)
|
||||
{
|
||||
if (!client->ringloop)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
return client->ringloop->register_eventfd();
|
||||
}
|
||||
|
||||
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len)
|
||||
{
|
||||
json11::Json::object cfg;
|
||||
@@ -166,6 +215,11 @@ void vitastor_c_uring_wait_events(vitastor_c *client)
|
||||
client->ringloop->wait();
|
||||
}
|
||||
|
||||
int vitastor_c_uring_has_work(vitastor_c *client)
|
||||
{
|
||||
return client->ringloop->has_work();
|
||||
}
|
||||
|
||||
void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
|
||||
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque)
|
||||
{
|
||||
|
@@ -7,7 +7,7 @@
|
||||
#define VITASTOR_QEMU_PROXY_H
|
||||
|
||||
// C API wrapper version
|
||||
#define VITASTOR_C_API_VERSION 1
|
||||
#define VITASTOR_C_API_VERSION 2
|
||||
|
||||
#ifndef POOL_ID_BITS
|
||||
#define POOL_ID_BITS 16
|
||||
@@ -34,14 +34,19 @@ typedef void QEMUSetFDHandler(void *ctx, int fd, int is_external, IOHandler *fd_
|
||||
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
||||
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
|
||||
vitastor_c *vitastor_c_create_qemu_uring(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
||||
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
|
||||
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
|
||||
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len);
|
||||
void vitastor_c_destroy(vitastor_c *client);
|
||||
int vitastor_c_is_ready(vitastor_c *client);
|
||||
int vitastor_c_uring_register_eventfd(vitastor_c *client);
|
||||
void vitastor_c_uring_wait_ready(vitastor_c *client);
|
||||
void vitastor_c_uring_handle_events(vitastor_c *client);
|
||||
void vitastor_c_uring_wait_events(vitastor_c *client);
|
||||
int vitastor_c_uring_has_work(vitastor_c *client);
|
||||
void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
|
||||
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque);
|
||||
void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,
|
||||
|
@@ -36,9 +36,6 @@ for i in $(seq 1 $OSD_COUNT); do
|
||||
start_osd $i
|
||||
done
|
||||
|
||||
cd mon
|
||||
npm install
|
||||
cd ..
|
||||
(while true; do node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 || true; done) &>./testdata/mon.log &
|
||||
MON_PID=$!
|
||||
|
||||
@@ -85,7 +82,9 @@ wait_up()
|
||||
done
|
||||
}
|
||||
|
||||
wait_up 60
|
||||
if [[ $OSD_COUNT -gt 0 ]]; then
|
||||
wait_up 60
|
||||
fi
|
||||
|
||||
try_reweight()
|
||||
{
|
||||
|
@@ -16,29 +16,35 @@ SCHEME=ec ./test_change_pg_count.sh
|
||||
|
||||
./test_etcd_fail.sh
|
||||
|
||||
./test_failure_domain.sh
|
||||
|
||||
./test_interrupted_rebalance.sh
|
||||
IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
||||
SCHEME=ec ./test_interrupted_rebalance.sh
|
||||
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
||||
|
||||
./test_failure_domain.sh
|
||||
|
||||
./test_snapshot.sh
|
||||
SCHEME=ec ./test_snapshot.sh
|
||||
|
||||
./test_minsize_1.sh
|
||||
|
||||
./test_move_reappear.sh
|
||||
|
||||
./test_rm.sh
|
||||
|
||||
./test_snapshot_chain.sh
|
||||
SCHEME=ec ./test_snapshot_chain.sh
|
||||
|
||||
./test_snapshot_down.sh
|
||||
SCHEME=ec ./test_snapshot_down.sh
|
||||
|
||||
./test_splitbrain.sh
|
||||
|
||||
./test_rebalance_verify.sh
|
||||
IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
||||
SCHEME=ec ./test_rebalance_verify.sh
|
||||
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
||||
|
||||
./test_rm.sh
|
||||
|
||||
./test_snapshot.sh
|
||||
SCHEME=ec ./test_snapshot.sh
|
||||
|
||||
./test_splitbrain.sh
|
||||
|
||||
./test_write.sh
|
||||
SCHEME=xor ./test_write.sh
|
||||
|
||||
|
@@ -15,9 +15,6 @@ $ETCDCTL put /vitastor/osd/stats/7 '{"host":"host4","size":1073741824,"time":"'$
|
||||
$ETCDCTL put /vitastor/osd/stats/8 '{"host":"host4","size":1073741824,"time":"'$TIME'"}'
|
||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":4,"failure_domain":"rack"}}'
|
||||
|
||||
cd mon
|
||||
npm install
|
||||
cd ..
|
||||
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
|
||||
MON_PID=$!
|
||||
|
||||
|
48
tests/test_snapshot_chain.sh
Executable file
48
tests/test_snapshot_chain.sh
Executable file
@@ -0,0 +1,48 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
. `dirname $0`/run_3osds.sh
|
||||
check_qemu
|
||||
|
||||
# Test multiple snapshots
|
||||
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 32M testchain
|
||||
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
||||
|
||||
for i in {1..10}; do
|
||||
# Create a snapshot
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@$i
|
||||
# Check that the new snapshot is see-through
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/check.bin
|
||||
cmp ./testdata/check.bin ./testdata/mirror.bin
|
||||
# Write something to it
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -rw=randwrite \
|
||||
-randrepeat=$((i <= 2)) -buffer_pattern=0x$((10+i))$((10+i))$((10+i))$((10+i)) \
|
||||
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/mirror.bin
|
||||
# Check the new content
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
done
|
||||
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
|
||||
|
||||
# Check the final image
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
|
||||
# Check the last remaining snapshot
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain@10" \
|
||||
-O raw ./testdata/layer0.bin
|
||||
cmp ./testdata/layer0.bin ./testdata/check.bin
|
||||
|
||||
format_green OK
|
37
tests/test_snapshot_down.sh
Executable file
37
tests/test_snapshot_down.sh
Executable file
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash -ex
|
||||
|
||||
. `dirname $0`/run_3osds.sh
|
||||
check_qemu
|
||||
|
||||
# Test merge to child (without "inverse rename" optimisation)
|
||||
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 128M testchain
|
||||
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
||||
|
||||
# Create a snapshot
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
|
||||
|
||||
# Write something to it
|
||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \
|
||||
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/mirror.bin
|
||||
|
||||
# Check the new content
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
|
||||
# Merge
|
||||
build/src/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
|
||||
|
||||
# Check the final image
|
||||
qemu-img convert -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
||||
-O raw ./testdata/layer1.bin
|
||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
||||
|
||||
format_green OK
|
Reference in New Issue
Block a user