Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
0d1b6d0760 |
@@ -10,9 +10,6 @@ RUN set -e -x; \
|
|||||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||||
ln -s /root/qemu-build/qemu-*/ ./qemu; \
|
ln -s /root/qemu-build/qemu-*/ ./qemu; \
|
||||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||||
cd mon; \
|
|
||||||
npm install; \
|
|
||||||
cd ..; \
|
|
||||||
mkdir build; \
|
mkdir build; \
|
||||||
cd build; \
|
cd build; \
|
||||||
cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \
|
cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \
|
||||||
|
@@ -190,6 +190,24 @@ jobs:
|
|||||||
echo ""
|
echo ""
|
||||||
done
|
done
|
||||||
|
|
||||||
|
test_failure_domain:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_failure_domain.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
test_interrupted_rebalance:
|
test_interrupted_rebalance:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: build
|
needs: build
|
||||||
@@ -262,60 +280,6 @@ jobs:
|
|||||||
echo ""
|
echo ""
|
||||||
done
|
done
|
||||||
|
|
||||||
test_failure_domain:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
|
||||||
steps:
|
|
||||||
- name: Run test
|
|
||||||
id: test
|
|
||||||
timeout-minutes: 3
|
|
||||||
run: /root/vitastor/tests/test_failure_domain.sh
|
|
||||||
- name: Print logs
|
|
||||||
if: always() && steps.test.outcome == 'failure'
|
|
||||||
run: |
|
|
||||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
|
||||||
echo "-------- $i --------"
|
|
||||||
cat $i
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
test_snapshot:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
|
||||||
steps:
|
|
||||||
- name: Run test
|
|
||||||
id: test
|
|
||||||
timeout-minutes: 3
|
|
||||||
run: /root/vitastor/tests/test_snapshot.sh
|
|
||||||
- name: Print logs
|
|
||||||
if: always() && steps.test.outcome == 'failure'
|
|
||||||
run: |
|
|
||||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
|
||||||
echo "-------- $i --------"
|
|
||||||
cat $i
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
test_snapshot_ec:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
|
||||||
steps:
|
|
||||||
- name: Run test
|
|
||||||
id: test
|
|
||||||
timeout-minutes: 3
|
|
||||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
|
|
||||||
- name: Print logs
|
|
||||||
if: always() && steps.test.outcome == 'failure'
|
|
||||||
run: |
|
|
||||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
|
||||||
echo "-------- $i --------"
|
|
||||||
cat $i
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
test_minsize_1:
|
test_minsize_1:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: build
|
needs: build
|
||||||
@@ -352,114 +316,6 @@ jobs:
|
|||||||
echo ""
|
echo ""
|
||||||
done
|
done
|
||||||
|
|
||||||
test_rm:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
|
||||||
steps:
|
|
||||||
- name: Run test
|
|
||||||
id: test
|
|
||||||
timeout-minutes: 3
|
|
||||||
run: /root/vitastor/tests/test_rm.sh
|
|
||||||
- name: Print logs
|
|
||||||
if: always() && steps.test.outcome == 'failure'
|
|
||||||
run: |
|
|
||||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
|
||||||
echo "-------- $i --------"
|
|
||||||
cat $i
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
test_snapshot_chain:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
|
||||||
steps:
|
|
||||||
- name: Run test
|
|
||||||
id: test
|
|
||||||
timeout-minutes: 3
|
|
||||||
run: /root/vitastor/tests/test_snapshot_chain.sh
|
|
||||||
- name: Print logs
|
|
||||||
if: always() && steps.test.outcome == 'failure'
|
|
||||||
run: |
|
|
||||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
|
||||||
echo "-------- $i --------"
|
|
||||||
cat $i
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
test_snapshot_chain_ec:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
|
||||||
steps:
|
|
||||||
- name: Run test
|
|
||||||
id: test
|
|
||||||
timeout-minutes: 3
|
|
||||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
|
|
||||||
- name: Print logs
|
|
||||||
if: always() && steps.test.outcome == 'failure'
|
|
||||||
run: |
|
|
||||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
|
||||||
echo "-------- $i --------"
|
|
||||||
cat $i
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
test_snapshot_down:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
|
||||||
steps:
|
|
||||||
- name: Run test
|
|
||||||
id: test
|
|
||||||
timeout-minutes: 3
|
|
||||||
run: /root/vitastor/tests/test_snapshot_down.sh
|
|
||||||
- name: Print logs
|
|
||||||
if: always() && steps.test.outcome == 'failure'
|
|
||||||
run: |
|
|
||||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
|
||||||
echo "-------- $i --------"
|
|
||||||
cat $i
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
test_snapshot_down_ec:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
|
||||||
steps:
|
|
||||||
- name: Run test
|
|
||||||
id: test
|
|
||||||
timeout-minutes: 3
|
|
||||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot_down.sh
|
|
||||||
- name: Print logs
|
|
||||||
if: always() && steps.test.outcome == 'failure'
|
|
||||||
run: |
|
|
||||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
|
||||||
echo "-------- $i --------"
|
|
||||||
cat $i
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
test_splitbrain:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
needs: build
|
|
||||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
|
||||||
steps:
|
|
||||||
- name: Run test
|
|
||||||
id: test
|
|
||||||
timeout-minutes: 3
|
|
||||||
run: /root/vitastor/tests/test_splitbrain.sh
|
|
||||||
- name: Print logs
|
|
||||||
if: always() && steps.test.outcome == 'failure'
|
|
||||||
run: |
|
|
||||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
|
||||||
echo "-------- $i --------"
|
|
||||||
cat $i
|
|
||||||
echo ""
|
|
||||||
done
|
|
||||||
|
|
||||||
test_rebalance_verify:
|
test_rebalance_verify:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: build
|
needs: build
|
||||||
@@ -532,6 +388,78 @@ jobs:
|
|||||||
echo ""
|
echo ""
|
||||||
done
|
done
|
||||||
|
|
||||||
|
test_rm:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_rm.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_snapshot:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_snapshot.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_snapshot_ec:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_splitbrain:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_splitbrain.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
test_write:
|
test_write:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: build
|
needs: build
|
||||||
|
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
|||||||
|
|
||||||
project(vitastor)
|
project(vitastor)
|
||||||
|
|
||||||
set(VERSION "0.9.5")
|
set(VERSION "0.9.2")
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
@@ -15,7 +15,7 @@ Vitastor архитектурно похож на Ceph, что означает
|
|||||||
и автоматическое распределение данных по любому числу дисков любого размера с настраиваемыми схемами
|
и автоматическое распределение данных по любому числу дисков любого размера с настраиваемыми схемами
|
||||||
избыточности - репликацией или с произвольными кодами коррекции ошибок.
|
избыточности - репликацией или с произвольными кодами коррекции ошибок.
|
||||||
|
|
||||||
Vitastor нацелен в первую очередь на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
|
Vitastor нацелен на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
|
||||||
TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
|
TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
|
||||||
что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.
|
что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.
|
||||||
|
|
||||||
|
@@ -14,8 +14,8 @@ Vitastor is architecturally similar to Ceph which means strong consistency,
|
|||||||
primary-replication, symmetric clustering and automatic data distribution over any
|
primary-replication, symmetric clustering and automatic data distribution over any
|
||||||
number of drives of any size with configurable redundancy (replication or erasure codes/XOR).
|
number of drives of any size with configurable redundancy (replication or erasure codes/XOR).
|
||||||
|
|
||||||
Vitastor targets primarily SSD and SSD+HDD clusters with at least 10 Gbit/s network,
|
Vitastor targets SSD and SSD+HDD clusters with at least 10 Gbit/s network, supports
|
||||||
supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
|
TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
|
||||||
with proper hardware which is ~10 times faster than other popular SDS's like Ceph
|
with proper hardware which is ~10 times faster than other popular SDS's like Ceph
|
||||||
or internal systems of public clouds.
|
or internal systems of public clouds.
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
VERSION ?= v0.9.5
|
VERSION ?= v0.9.2
|
||||||
|
|
||||||
all: build push
|
all: build push
|
||||||
|
|
||||||
|
@@ -49,7 +49,7 @@ spec:
|
|||||||
capabilities:
|
capabilities:
|
||||||
add: ["SYS_ADMIN"]
|
add: ["SYS_ADMIN"]
|
||||||
allowPrivilegeEscalation: true
|
allowPrivilegeEscalation: true
|
||||||
image: vitalif/vitastor-csi:v0.9.5
|
image: vitalif/vitastor-csi:v0.9.2
|
||||||
args:
|
args:
|
||||||
- "--node=$(NODE_ID)"
|
- "--node=$(NODE_ID)"
|
||||||
- "--endpoint=$(CSI_ENDPOINT)"
|
- "--endpoint=$(CSI_ENDPOINT)"
|
||||||
|
@@ -116,7 +116,7 @@ spec:
|
|||||||
privileged: true
|
privileged: true
|
||||||
capabilities:
|
capabilities:
|
||||||
add: ["SYS_ADMIN"]
|
add: ["SYS_ADMIN"]
|
||||||
image: vitalif/vitastor-csi:v0.9.5
|
image: vitalif/vitastor-csi:v0.9.2
|
||||||
args:
|
args:
|
||||||
- "--node=$(NODE_ID)"
|
- "--node=$(NODE_ID)"
|
||||||
- "--endpoint=$(CSI_ENDPOINT)"
|
- "--endpoint=$(CSI_ENDPOINT)"
|
||||||
|
@@ -5,7 +5,7 @@ package vitastor
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
vitastorCSIDriverName = "csi.vitastor.io"
|
vitastorCSIDriverName = "csi.vitastor.io"
|
||||||
vitastorCSIDriverVersion = "0.9.5"
|
vitastorCSIDriverVersion = "0.9.2"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Config struct fills the parameters of request or user input
|
// Config struct fills the parameters of request or user input
|
||||||
|
58
debian/build-pve-qemu.sh
vendored
58
debian/build-pve-qemu.sh
vendored
@@ -1,58 +0,0 @@
|
|||||||
exit
|
|
||||||
|
|
||||||
git clone https://git.yourcmc.ru/vitalif/pve-qemu .
|
|
||||||
|
|
||||||
# bookworm
|
|
||||||
|
|
||||||
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
|
|
||||||
|
|
||||||
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources
|
|
||||||
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription' >> /etc/apt/sources.list
|
|
||||||
echo 'deb https://vitastor.io/debian bookworm main' >> /etc/apt/sources.list
|
|
||||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
|
|
||||||
echo 'ru_RU UTF-8' >> /etc/locale.gen
|
|
||||||
echo 'en_US UTF-8' >> /etc/locale.gen
|
|
||||||
apt-get update
|
|
||||||
apt-get install wget ca-certificates
|
|
||||||
wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg
|
|
||||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
|
|
||||||
apt-get update
|
|
||||||
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
|
|
||||||
mk-build-deps --install ./control
|
|
||||||
|
|
||||||
# bullseye
|
|
||||||
|
|
||||||
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
|
|
||||||
|
|
||||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
|
|
||||||
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bullseye pve-no-subscription' >> /etc/apt/sources.list
|
|
||||||
echo 'deb https://vitastor.io/debian bullseye main' >> /etc/apt/sources.list
|
|
||||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
|
|
||||||
echo 'ru_RU UTF-8' >> /etc/locale.gen
|
|
||||||
echo 'en_US UTF-8' >> /etc/locale.gen
|
|
||||||
apt-get update
|
|
||||||
apt-get install wget
|
|
||||||
wget https://enterprise.proxmox.com/debian/proxmox-release-bullseye.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bullseye.gpg
|
|
||||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
|
|
||||||
apt-get update
|
|
||||||
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
|
|
||||||
mk-build-deps --install ./control
|
|
||||||
|
|
||||||
# buster
|
|
||||||
|
|
||||||
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-buster debian:buster bash
|
|
||||||
|
|
||||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
|
|
||||||
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve buster pve-no-subscription' >> /etc/apt/sources.list
|
|
||||||
echo 'deb https://vitastor.io/debian buster main' >> /etc/apt/sources.list
|
|
||||||
echo 'deb http://deb.debian.org/debian buster-backports main' >> /etc/apt/sources.list
|
|
||||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
|
|
||||||
echo 'ru_RU UTF-8' >> /etc/locale.gen
|
|
||||||
echo 'en_US UTF-8' >> /etc/locale.gen
|
|
||||||
apt-get update
|
|
||||||
apt-get install wget ca-certificates
|
|
||||||
wget http://download.proxmox.com/debian/proxmox-ve-release-6.x.gpg -O /etc/apt/trusted.gpg.d/proxmox-ve-release-6.x.gpg
|
|
||||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
|
|
||||||
apt-get update
|
|
||||||
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
|
|
||||||
mk-build-deps --install ./control
|
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@@ -1,10 +1,10 @@
|
|||||||
vitastor (0.9.5-1) unstable; urgency=medium
|
vitastor (0.9.2-1) unstable; urgency=medium
|
||||||
|
|
||||||
* Bugfixes
|
* Bugfixes
|
||||||
|
|
||||||
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
||||||
|
|
||||||
vitastor (0.9.5-1) unstable; urgency=medium
|
vitastor (0.9.2-1) unstable; urgency=medium
|
||||||
|
|
||||||
* Implement NFS proxy
|
* Implement NFS proxy
|
||||||
* Add documentation
|
* Add documentation
|
||||||
|
18
debian/patched-qemu.Dockerfile
vendored
18
debian/patched-qemu.Dockerfile
vendored
@@ -28,19 +28,13 @@ RUN apt-get --download-only source qemu
|
|||||||
|
|
||||||
ADD patches /root/vitastor/patches
|
ADD patches /root/vitastor/patches
|
||||||
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
|
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
|
||||||
|
|
||||||
#RUN set -e; \
|
|
||||||
# apt-get install -y wget; \
|
|
||||||
# wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
|
|
||||||
# (echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
|
|
||||||
# (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
|
|
||||||
# apt-get update; \
|
|
||||||
# apt-get install -y vitastor-client vitastor-client-dev quilt
|
|
||||||
|
|
||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
dpkg -i /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
|
apt-get install -y wget; \
|
||||||
|
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
|
||||||
|
(echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
|
||||||
|
(echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
|
||||||
apt-get update; \
|
apt-get update; \
|
||||||
apt-get install -y quilt; \
|
apt-get install -y vitastor-client vitastor-client-dev quilt; \
|
||||||
mkdir -p /root/packages/qemu-$REL; \
|
mkdir -p /root/packages/qemu-$REL; \
|
||||||
rm -rf /root/packages/qemu-$REL/*; \
|
rm -rf /root/packages/qemu-$REL/*; \
|
||||||
cd /root/packages/qemu-$REL; \
|
cd /root/packages/qemu-$REL; \
|
||||||
@@ -54,7 +48,7 @@ RUN set -e; \
|
|||||||
quilt add block/vitastor.c; \
|
quilt add block/vitastor.c; \
|
||||||
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
|
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
|
||||||
quilt refresh; \
|
quilt refresh; \
|
||||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor3; \
|
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor1; \
|
||||||
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
|
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
|
||||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||||
rm -rf /root/packages/qemu-$REL/qemu-*/
|
rm -rf /root/packages/qemu-$REL/qemu-*/
|
||||||
|
8
debian/vitastor.Dockerfile
vendored
8
debian/vitastor.Dockerfile
vendored
@@ -35,8 +35,8 @@ RUN set -e -x; \
|
|||||||
mkdir -p /root/packages/vitastor-$REL; \
|
mkdir -p /root/packages/vitastor-$REL; \
|
||||||
rm -rf /root/packages/vitastor-$REL/*; \
|
rm -rf /root/packages/vitastor-$REL/*; \
|
||||||
cd /root/packages/vitastor-$REL; \
|
cd /root/packages/vitastor-$REL; \
|
||||||
cp -r /root/vitastor vitastor-0.9.5; \
|
cp -r /root/vitastor vitastor-0.9.2; \
|
||||||
cd vitastor-0.9.5; \
|
cd vitastor-0.9.2; \
|
||||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||||
@@ -49,8 +49,8 @@ RUN set -e -x; \
|
|||||||
rm -rf a b; \
|
rm -rf a b; \
|
||||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||||
cd /root/packages/vitastor-$REL; \
|
cd /root/packages/vitastor-$REL; \
|
||||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.5.orig.tar.xz vitastor-0.9.5; \
|
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.2.orig.tar.xz vitastor-0.9.2; \
|
||||||
cd vitastor-0.9.5; \
|
cd vitastor-0.9.2; \
|
||||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||||
|
@@ -21,7 +21,7 @@ Configuration parameters can be set in 3 places:
|
|||||||
mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter
|
mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter
|
||||||
doesn't allow to set all variables directly, but it allows to override the
|
doesn't allow to set all variables directly, but it allows to override the
|
||||||
configuration file and set everything you need inside it.
|
configuration file and set everything you need inside it.
|
||||||
- OSD superblocks created by [vitastor-disk](usage/disk.en.md) contain
|
- OSD superblocks created by [vitastor-disk](../usage/disk.en.md) contain
|
||||||
primarily disk layout parameters of specific OSDs. In fact, these parameters
|
primarily disk layout parameters of specific OSDs. In fact, these parameters
|
||||||
are automatically passed into the command line of vitastor-osd process, so
|
are automatically passed into the command line of vitastor-osd process, so
|
||||||
they have the same "status" as command-line parameters.
|
they have the same "status" as command-line parameters.
|
||||||
|
@@ -23,7 +23,7 @@
|
|||||||
монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние,
|
монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние,
|
||||||
как правило, не включают полный набор параметров напрямую, но позволяют
|
как правило, не включают полный набор параметров напрямую, но позволяют
|
||||||
определить путь к файлу конфигурации и задать любые параметры в нём.
|
определить путь к файлу конфигурации и задать любые параметры в нём.
|
||||||
- В суперблоке OSD, записываемом [vitastor-disk](usage/disk.ru.md) - параметры,
|
- В суперблоке OSD, записываемом [vitastor-disk](../usage/disk.ru.md) - параметры,
|
||||||
связанные с дисковым форматом и с этим конкретным OSD. На самом деле,
|
связанные с дисковым форматом и с этим конкретным OSD. На самом деле,
|
||||||
при запуске OSD эти параметры автоматически передаются в командную строку
|
при запуске OSD эти параметры автоматически передаются в командную строку
|
||||||
процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам
|
процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам
|
||||||
|
@@ -33,13 +33,12 @@ Size of objects (data blocks) into which all physical and virtual drives
|
|||||||
in Vitastor, affects memory usage, write amplification and I/O load
|
in Vitastor, affects memory usage, write amplification and I/O load
|
||||||
distribution effectiveness.
|
distribution effectiveness.
|
||||||
|
|
||||||
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
|
Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
|
||||||
it's possible to use 1 MB for SSD too - it will lower memory usage, but
|
it's possible to use 4 MB for SSD too - it will lower memory usage, but
|
||||||
may increase average WA and reduce linear performance.
|
may increase average WA and reduce linear performance.
|
||||||
|
|
||||||
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
|
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
|
||||||
544 MB per 1 TB of used disk space with the default 128 KB block size.
|
544 MB per 1 TB of used disk space with the default 128 KB block size.
|
||||||
With 1 MB it's 8 times lower.
|
|
||||||
|
|
||||||
## bitmap_granularity
|
## bitmap_granularity
|
||||||
|
|
||||||
|
@@ -33,14 +33,14 @@ OSD) могут сосуществовать в одном кластере Vita
|
|||||||
настроек, влияет на потребление памяти, объём избыточной записи (write
|
настроек, влияет на потребление памяти, объём избыточной записи (write
|
||||||
amplification) и эффективность распределения нагрузки по OSD.
|
amplification) и эффективность распределения нагрузки по OSD.
|
||||||
|
|
||||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
|
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
|
||||||
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
|
мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
|
||||||
это понизит использование памяти, но ухудшит распределение нагрузки и в
|
это понизит использование памяти, но ухудшит распределение нагрузки и в
|
||||||
среднем увеличит WA.
|
среднем увеличит WA.
|
||||||
|
|
||||||
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
|
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
|
||||||
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
|
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
|
||||||
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
|
стандартном 128 КБ блоке.
|
||||||
|
|
||||||
## bitmap_granularity
|
## bitmap_granularity
|
||||||
|
|
||||||
|
@@ -1,145 +0,0 @@
|
|||||||
#!/usr/bin/nodejs
|
|
||||||
|
|
||||||
const fsp = require('fs').promises;
|
|
||||||
|
|
||||||
run(process.argv).catch(console.error);
|
|
||||||
|
|
||||||
async function run(argv)
|
|
||||||
{
|
|
||||||
if (argv.length < 3)
|
|
||||||
{
|
|
||||||
console.log('Markdown preprocessor\nUSAGE: ./include.js file.md');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const index_file = await fsp.realpath(argv[2]);
|
|
||||||
const re = /(\{\{[\s\S]*?\}\}|\[[^\]]+\]\([^\)]+\)|(?:^|\n)#[^\n]+)/;
|
|
||||||
let text = await fsp.readFile(index_file, { encoding: 'utf-8' });
|
|
||||||
text = text.split(re);
|
|
||||||
let included = {};
|
|
||||||
let heading = 0, heading_name = '', m;
|
|
||||||
for (let i = 0; i < text.length; i++)
|
|
||||||
{
|
|
||||||
if (text[i].substr(0, 2) == '{{')
|
|
||||||
{
|
|
||||||
// Inclusion
|
|
||||||
let incfile = text[i].substr(2, text[i].length-4);
|
|
||||||
let section = null;
|
|
||||||
let indent = heading;
|
|
||||||
incfile = incfile.replace(/\s*\|\s*indent\s*=\s*(-?\d+)\s*$/, (m, m1) => { indent = parseInt(m1); return ''; });
|
|
||||||
incfile = incfile.replace(/\s*#\s*([^#]+)$/, (m, m1) => { section = m1; return ''; });
|
|
||||||
let inc_heading = section;
|
|
||||||
incfile = rel2abs(index_file, incfile);
|
|
||||||
let inc = await fsp.readFile(incfile, { encoding: 'utf-8' });
|
|
||||||
inc = inc.trim().replace(/^[\s\S]+?\n#/, '#'); // remove until the first header
|
|
||||||
inc = inc.split(re);
|
|
||||||
const indent_str = new Array(indent+1).join('#');
|
|
||||||
let section_start = -1, section_end = -1;
|
|
||||||
for (let j = 0; j < inc.length; j++)
|
|
||||||
{
|
|
||||||
if ((m = /^(\n?)(#+\s*)([\s\S]+)$/.exec(inc[j])))
|
|
||||||
{
|
|
||||||
if (!inc_heading)
|
|
||||||
{
|
|
||||||
inc_heading = m[3].trim();
|
|
||||||
}
|
|
||||||
if (section)
|
|
||||||
{
|
|
||||||
if (m[3].trim() == section)
|
|
||||||
section_start = j;
|
|
||||||
else if (section_start >= 0)
|
|
||||||
{
|
|
||||||
section_end = j;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
inc[j] = m[1] + indent_str + m[2] + m[3];
|
|
||||||
}
|
|
||||||
else if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(inc[j])) && !/^https?:(\/\/)|^#/.exec(m[2]))
|
|
||||||
{
|
|
||||||
const abs_m2 = rel2abs(incfile, m[2]);
|
|
||||||
const rel_m = abs2rel(__filename, abs_m2);
|
|
||||||
if (rel_m.substr(0, 9) == '../../../') // outside docs
|
|
||||||
inc[j] = m[1] + 'https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/'+rel2abs('docs/config/src/include.js', rel_m) + m[3];
|
|
||||||
else
|
|
||||||
inc[j] = m[1] + abs_m2 + m[3];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (section)
|
|
||||||
{
|
|
||||||
inc = section_start >= 0 ? inc.slice(section_start, section_end < 0 ? inc.length : section_end) : [];
|
|
||||||
}
|
|
||||||
if (inc.length)
|
|
||||||
{
|
|
||||||
if (!inc_heading)
|
|
||||||
inc_heading = heading_name||'';
|
|
||||||
included[incfile+(section ? '#'+section : '')] = '#'+inc_heading.toLowerCase().replace(/\P{L}+/ug, '-').replace(/^-|-$/g, '');
|
|
||||||
inc[0] = inc[0].replace(/^\s+/, '');
|
|
||||||
inc[inc.length-1] = inc[inc.length-1].replace(/\s+$/, '');
|
|
||||||
}
|
|
||||||
text.splice(i, 1, ...inc);
|
|
||||||
i = i + inc.length - 1;
|
|
||||||
}
|
|
||||||
else if ((m = /^\n?(#+)\s*([\s\S]+)$/.exec(text[i])))
|
|
||||||
{
|
|
||||||
// Heading
|
|
||||||
heading = m[1].length;
|
|
||||||
heading_name = m[2].trim();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (let i = 0; i < text.length; i++)
|
|
||||||
{
|
|
||||||
if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(text[i])) && !/^https?:(\/\/)|^#/.exec(m[2]))
|
|
||||||
{
|
|
||||||
const p = m[2].indexOf('#');
|
|
||||||
if (included[m[2]])
|
|
||||||
{
|
|
||||||
text[i] = m[1]+included[m[2]]+m[3];
|
|
||||||
}
|
|
||||||
else if (p >= 0 && included[m[2].substr(0, p)])
|
|
||||||
{
|
|
||||||
text[i] = m[1]+m[2].substr(p)+m[3];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
console.log(text.join(''));
|
|
||||||
}
|
|
||||||
|
|
||||||
function rel2abs(ref, rel)
|
|
||||||
{
|
|
||||||
rel = [ ...ref.replace(/^(.*)\/[^\/]+$/, '$1').split(/\/+/), ...rel.split(/\/+/) ];
|
|
||||||
return killdots(rel).join('/');
|
|
||||||
}
|
|
||||||
|
|
||||||
function abs2rel(ref, abs)
|
|
||||||
{
|
|
||||||
ref = ref.split(/\/+/);
|
|
||||||
abs = abs.split(/\/+/);
|
|
||||||
while (ref.length > 1 && ref[0] == abs[0])
|
|
||||||
{
|
|
||||||
ref.shift();
|
|
||||||
abs.shift();
|
|
||||||
}
|
|
||||||
for (let i = 1; i < ref.length; i++)
|
|
||||||
{
|
|
||||||
abs.unshift('..');
|
|
||||||
}
|
|
||||||
return killdots(abs).join('/');
|
|
||||||
}
|
|
||||||
|
|
||||||
function killdots(rel)
|
|
||||||
{
|
|
||||||
for (let i = 0; i < rel.length; i++)
|
|
||||||
{
|
|
||||||
if (rel[i] == '.')
|
|
||||||
{
|
|
||||||
rel.splice(i, 1);
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
else if (i >= 1 && rel[i] == '..' && rel[i-1] != '..')
|
|
||||||
{
|
|
||||||
rel.splice(i-1, 2);
|
|
||||||
i -= 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rel;
|
|
||||||
}
|
|
@@ -1,65 +0,0 @@
|
|||||||
# Vitastor
|
|
||||||
|
|
||||||
{{../../../README.md#The Idea}}
|
|
||||||
|
|
||||||
{{../../../README.md#Talks and presentations}}
|
|
||||||
|
|
||||||
{{../../intro/features.en.md}}
|
|
||||||
|
|
||||||
{{../../intro/quickstart.en.md}}
|
|
||||||
|
|
||||||
{{../../intro/architecture.en.md}}
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
{{../../installation/packages.en.md}}
|
|
||||||
|
|
||||||
{{../../installation/proxmox.en.md}}
|
|
||||||
|
|
||||||
{{../../installation/openstack.en.md}}
|
|
||||||
|
|
||||||
{{../../installation/kubernetes.en.md}}
|
|
||||||
|
|
||||||
{{../../installation/source.en.md}}
|
|
||||||
|
|
||||||
{{../../config.en.md|indent=1}}
|
|
||||||
|
|
||||||
{{../../config/common.en.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/network.en.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/layout-cluster.en.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/layout-osd.en.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/osd.en.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/monitor.en.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/pool.en.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/inode.en.md|indent=2}}
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
{{../../usage/cli.en.md}}
|
|
||||||
|
|
||||||
{{../../usage/disk.en.md}}
|
|
||||||
|
|
||||||
{{../../usage/fio.en.md}}
|
|
||||||
|
|
||||||
{{../../usage/nbd.en.md}}
|
|
||||||
|
|
||||||
{{../../usage/qemu.en.md}}
|
|
||||||
|
|
||||||
{{../../usage/nfs.en.md}}
|
|
||||||
|
|
||||||
## Performance
|
|
||||||
|
|
||||||
{{../../performance/understanding.en.md}}
|
|
||||||
|
|
||||||
{{../../performance/theoretical.en.md}}
|
|
||||||
|
|
||||||
{{../../performance/comparison1.en.md}}
|
|
||||||
|
|
||||||
{{../../intro/author.en.md|indent=1}}
|
|
@@ -1,65 +0,0 @@
|
|||||||
# Vitastor
|
|
||||||
|
|
||||||
{{../../../README-ru.md#Идея|indent=0}}
|
|
||||||
|
|
||||||
{{../../../README-ru.md#Презентации и записи докладов|indent=0}}
|
|
||||||
|
|
||||||
{{../../intro/features.ru.md}}
|
|
||||||
|
|
||||||
{{../../intro/quickstart.ru.md}}
|
|
||||||
|
|
||||||
{{../../intro/architecture.ru.md}}
|
|
||||||
|
|
||||||
## Установка
|
|
||||||
|
|
||||||
{{../../installation/packages.ru.md}}
|
|
||||||
|
|
||||||
{{../../installation/proxmox.ru.md}}
|
|
||||||
|
|
||||||
{{../../installation/openstack.ru.md}}
|
|
||||||
|
|
||||||
{{../../installation/kubernetes.ru.md}}
|
|
||||||
|
|
||||||
{{../../installation/source.ru.md}}
|
|
||||||
|
|
||||||
{{../../config.ru.md|indent=1}}
|
|
||||||
|
|
||||||
{{../../config/common.ru.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/network.ru.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/layout-cluster.ru.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/layout-osd.ru.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/osd.ru.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/monitor.ru.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/pool.ru.md|indent=2}}
|
|
||||||
|
|
||||||
{{../../config/inode.ru.md|indent=2}}
|
|
||||||
|
|
||||||
## Использование
|
|
||||||
|
|
||||||
{{../../usage/cli.ru.md}}
|
|
||||||
|
|
||||||
{{../../usage/disk.ru.md}}
|
|
||||||
|
|
||||||
{{../../usage/fio.ru.md}}
|
|
||||||
|
|
||||||
{{../../usage/nbd.ru.md}}
|
|
||||||
|
|
||||||
{{../../usage/qemu.ru.md}}
|
|
||||||
|
|
||||||
{{../../usage/nfs.ru.md}}
|
|
||||||
|
|
||||||
## Производительность
|
|
||||||
|
|
||||||
{{../../performance/understanding.ru.md}}
|
|
||||||
|
|
||||||
{{../../performance/theoretical.ru.md}}
|
|
||||||
|
|
||||||
{{../../performance/comparison1.ru.md}}
|
|
||||||
|
|
||||||
{{../../intro/author.ru.md|indent=1}}
|
|
@@ -7,27 +7,26 @@
|
|||||||
in Vitastor, affects memory usage, write amplification and I/O load
|
in Vitastor, affects memory usage, write amplification and I/O load
|
||||||
distribution effectiveness.
|
distribution effectiveness.
|
||||||
|
|
||||||
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
|
Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
|
||||||
it's possible to use 1 MB for SSD too - it will lower memory usage, but
|
it's possible to use 4 MB for SSD too - it will lower memory usage, but
|
||||||
may increase average WA and reduce linear performance.
|
may increase average WA and reduce linear performance.
|
||||||
|
|
||||||
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
|
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
|
||||||
544 MB per 1 TB of used disk space with the default 128 KB block size.
|
544 MB per 1 TB of used disk space with the default 128 KB block size.
|
||||||
With 1 MB it's 8 times lower.
|
|
||||||
info_ru: |
|
info_ru: |
|
||||||
Размер объектов (блоков данных), на которые делятся физические и виртуальные
|
Размер объектов (блоков данных), на которые делятся физические и виртуальные
|
||||||
диски в Vitastor (в рамках каждого пула). Одна из ключевых на данный момент
|
диски в Vitastor (в рамках каждого пула). Одна из ключевых на данный момент
|
||||||
настроек, влияет на потребление памяти, объём избыточной записи (write
|
настроек, влияет на потребление памяти, объём избыточной записи (write
|
||||||
amplification) и эффективность распределения нагрузки по OSD.
|
amplification) и эффективность распределения нагрузки по OSD.
|
||||||
|
|
||||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
|
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
|
||||||
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
|
мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
|
||||||
это понизит использование памяти, но ухудшит распределение нагрузки и в
|
это понизит использование памяти, но ухудшит распределение нагрузки и в
|
||||||
среднем увеличит WA.
|
среднем увеличит WA.
|
||||||
|
|
||||||
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
|
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
|
||||||
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
|
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
|
||||||
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
|
стандартном 128 КБ блоке.
|
||||||
- name: bitmap_granularity
|
- name: bitmap_granularity
|
||||||
type: int
|
type: int
|
||||||
default: 4096
|
default: 4096
|
||||||
|
@@ -8,13 +8,13 @@
|
|||||||
|
|
||||||
У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома.
|
У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома.
|
||||||
|
|
||||||
Для установки возьмите манифесты из директории [csi/deploy/](../../csi/deploy/), поместите
|
Для установки возьмите манифесты из директории [csi/deploy/](../csi/deploy/), поместите
|
||||||
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml),
|
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../csi/deploy/001-csi-config-map.yaml),
|
||||||
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../../csi/deploy/009-storage-class.yaml)
|
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../csi/deploy/009-storage-class.yaml)
|
||||||
и примените все `NNN-*.yaml` к вашей инсталляции Kubernetes.
|
и примените все `NNN-*.yaml` к вашей инсталляции Kubernetes.
|
||||||
|
|
||||||
```
|
```
|
||||||
for i in ./???-*.yaml; do kubectl apply -f $i; done
|
for i in ./???-*.yaml; do kubectl apply -f $i; done
|
||||||
```
|
```
|
||||||
|
|
||||||
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../../csi/deploy/example-pvc.yaml).
|
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../csi/deploy/example-pvc.yaml).
|
||||||
|
@@ -36,5 +36,5 @@ vitastor_pool_id = 1
|
|||||||
image_upload_use_cinder_backend = True
|
image_upload_use_cinder_backend = True
|
||||||
```
|
```
|
||||||
|
|
||||||
To put Glance images in Vitastor, use [volume-backed images](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
|
To put Glance images in Vitastor, use [https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](volume-backed images),
|
||||||
although the support has not been verified yet.
|
although the support has not been verified yet.
|
||||||
|
@@ -36,5 +36,5 @@ image_upload_use_cinder_backend = True
|
|||||||
```
|
```
|
||||||
|
|
||||||
Чтобы помещать в Vitastor Glance-образы, нужно использовать
|
Чтобы помещать в Vitastor Glance-образы, нужно использовать
|
||||||
[образы на основе томов Cinder](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
|
[https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](образы на основе томов Cinder),
|
||||||
однако, поддержка этой функции ещё не проверялась.
|
однако, поддержка этой функции ещё не проверялась.
|
||||||
|
@@ -6,10 +6,10 @@
|
|||||||
|
|
||||||
# Proxmox VE
|
# Proxmox VE
|
||||||
|
|
||||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-8.0 are supported):
|
To enable Vitastor support in Proxmox Virtual Environment (6.4-7.4 are supported):
|
||||||
|
|
||||||
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
|
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
|
||||||
bookworm for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4
|
buster for 6.4, bullseye for 7.4, pve7.1 for 7.1, pve7.2 for 7.2, pve7.3 for 7.3
|
||||||
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
|
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
|
||||||
- Define storage in `/etc/pve/storage.cfg` (see below)
|
- Define storage in `/etc/pve/storage.cfg` (see below)
|
||||||
- Block network access from VMs to Vitastor network (to OSDs and etcd),
|
- Block network access from VMs to Vitastor network (to OSDs and etcd),
|
||||||
@@ -35,5 +35,5 @@ vitastor: vitastor
|
|||||||
vitastor_nbd 0
|
vitastor_nbd 0
|
||||||
```
|
```
|
||||||
|
|
||||||
\* Note: you can also manually copy [patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) to Proxmox hosts
|
\* Note: you can also manually copy [patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) to Proxmox hosts
|
||||||
as `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm` instead of installing pve-storage-vitastor.
|
as `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm` instead of installing pve-storage-vitastor.
|
||||||
|
@@ -1,15 +1,15 @@
|
|||||||
[Документация](../../README-ru.md#документация) → Установка → Proxmox VE
|
[Документация](../../README-ru.md#документация) → Установка → Proxmox
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
[Read in English](proxmox.en.md)
|
[Read in English](proxmox.en.md)
|
||||||
|
|
||||||
# Proxmox VE
|
# Proxmox
|
||||||
|
|
||||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-8.0):
|
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-7.4):
|
||||||
|
|
||||||
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
|
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
|
||||||
bookworm для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4
|
buster для 6.4, bullseye для 7.4, pve7.1 для 7.1, pve7.2 для 7.2, pve7.3 для 7.3
|
||||||
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
|
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
|
||||||
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
|
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
|
||||||
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
|
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
|
||||||
@@ -35,5 +35,5 @@ vitastor: vitastor
|
|||||||
```
|
```
|
||||||
|
|
||||||
\* Примечание: вместо установки пакета pve-storage-vitastor вы можете вручную скопировать файл
|
\* Примечание: вместо установки пакета pve-storage-vitastor вы можете вручную скопировать файл
|
||||||
[patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) на хосты Proxmox как
|
[patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) на хосты Proxmox как
|
||||||
`/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm`.
|
`/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm`.
|
||||||
|
@@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
## Basic instructions
|
## Basic instructions
|
||||||
|
|
||||||
Download source, for example using git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/`
|
Download source, for example using git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`
|
||||||
|
|
||||||
Get `fio` source and symlink it into `<vitastor>/fio`. If you don't want to build fio engine,
|
Get `fio` source and symlink it into `<vitastor>/fio`. If you don't want to build fio engine,
|
||||||
you can disable it by passing `-DWITH_FIO=no` to cmake.
|
you can disable it by passing `-DWITH_FIO=no` to cmake.
|
||||||
@@ -41,7 +41,7 @@ It's recommended to build the QEMU driver (qemu_driver.c) in-tree, as a part of
|
|||||||
QEMU build process. To do that:
|
QEMU build process. To do that:
|
||||||
- Install vitastor client library headers (from source or from vitastor-client-dev package)
|
- Install vitastor client library headers (from source or from vitastor-client-dev package)
|
||||||
- Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source
|
- Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source
|
||||||
- Copy `src/qemu_driver.c` to QEMU source directory as `block/vitastor.c`
|
- Copy `src/qemu_driver.c` to QEMU source directory as `block/block-vitastor.c`
|
||||||
- Build QEMU as usual
|
- Build QEMU as usual
|
||||||
|
|
||||||
But it is also possible to build it out-of-tree. To do that:
|
But it is also possible to build it out-of-tree. To do that:
|
||||||
|
@@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
## Базовая инструкция
|
## Базовая инструкция
|
||||||
|
|
||||||
Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/`
|
Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`
|
||||||
|
|
||||||
Скачайте исходные коды пакета `fio`, распакуйте их и создайте символическую ссылку на них
|
Скачайте исходные коды пакета `fio`, распакуйте их и создайте символическую ссылку на них
|
||||||
в директории исходников Vitastor: `<vitastor>/fio`. Либо, если вы не хотите собирать плагин fio,
|
в директории исходников Vitastor: `<vitastor>/fio`. Либо, если вы не хотите собирать плагин fio,
|
||||||
@@ -41,7 +41,7 @@ cmake .. && make -j8 install
|
|||||||
Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого:
|
Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого:
|
||||||
- Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev)
|
- Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev)
|
||||||
- Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU
|
- Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU
|
||||||
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c`
|
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/block-vitastor.c`
|
||||||
- Соберите QEMU как обычно
|
- Соберите QEMU как обычно
|
||||||
|
|
||||||
Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого:
|
Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого:
|
||||||
@@ -60,7 +60,7 @@ cmake .. && make -j8 install
|
|||||||
* Для QEMU 2.0+: `<qemu>/qapi-types.h` → `<vitastor>/qemu/b/qemu/qapi-types.h`
|
* Для QEMU 2.0+: `<qemu>/qapi-types.h` → `<vitastor>/qemu/b/qemu/qapi-types.h`
|
||||||
- `config-host.h` и `qapi` нужны, т.к. в них содержатся автогенерируемые заголовки
|
- `config-host.h` и `qapi` нужны, т.к. в них содержатся автогенерируемые заголовки
|
||||||
- Сконфигурируйте cmake Vitastor с `WITH_QEMU=yes` (`cmake .. -DWITH_QEMU=yes`) и, если вы
|
- Сконфигурируйте cmake Vitastor с `WITH_QEMU=yes` (`cmake .. -DWITH_QEMU=yes`) и, если вы
|
||||||
используете RHEL-подобный дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`.
|
используете RHEL-подобый дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`.
|
||||||
- После этого в процессе сборки Vitastor также будет собираться подходящий для вашей
|
- После этого в процессе сборки Vitastor также будет собираться подходящий для вашей
|
||||||
версии QEMU `block-vitastor.so`.
|
версии QEMU `block-vitastor.so`.
|
||||||
- Таким образом можно использовать драйвер даже с немодифицированным QEMU, но в этом случае
|
- Таким образом можно использовать драйвер даже с немодифицированным QEMU, но в этом случае
|
||||||
|
@@ -44,7 +44,7 @@
|
|||||||
depends linearly on drive capacity and data store block size which is 128 KB by default.
|
depends linearly on drive capacity and data store block size which is 128 KB by default.
|
||||||
With 128 KB blocks metadata takes around 512 MB per 1 TB (which is still less than Ceph wants).
|
With 128 KB blocks metadata takes around 512 MB per 1 TB (which is still less than Ceph wants).
|
||||||
Journal is also kept in memory by default, but in SSD-only clusters it's only 32 MB, and in SSD+HDD
|
Journal is also kept in memory by default, but in SSD-only clusters it's only 32 MB, and in SSD+HDD
|
||||||
clusters, where it's beneficial to increase it, [inmemory_journal](../config/osd.en.md#inmemory_journal) can be disabled.
|
clusters, where it's beneficial to increase it, [inmemory_journal](docs/config/osd.en.md#inmemory_journal) can be disabled.
|
||||||
- Vitastor storage layer doesn't have internal copy-on-write or redirect-write. I know that maybe
|
- Vitastor storage layer doesn't have internal copy-on-write or redirect-write. I know that maybe
|
||||||
it's possible to create a good copy-on-write storage, but it's much harder and makes performance
|
it's possible to create a good copy-on-write storage, but it's much harder and makes performance
|
||||||
less deterministic, so CoW isn't used in Vitastor.
|
less deterministic, so CoW isn't used in Vitastor.
|
||||||
|
@@ -156,7 +156,7 @@
|
|||||||
блока хранилища (block_size, по умолчанию 128 КБ). С 128 КБ блоком потребление памяти
|
блока хранилища (block_size, по умолчанию 128 КБ). С 128 КБ блоком потребление памяти
|
||||||
составляет примерно 512 МБ на 1 ТБ данных. Журналы по умолчанию тоже хранятся в памяти,
|
составляет примерно 512 МБ на 1 ТБ данных. Журналы по умолчанию тоже хранятся в памяти,
|
||||||
но в SSD-кластерах нужный размер журнала составляет всего 32 МБ, а в гибридных (SSD+HDD)
|
но в SSD-кластерах нужный размер журнала составляет всего 32 МБ, а в гибридных (SSD+HDD)
|
||||||
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../config/osd.ru.md#inmemory_journal).
|
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../docs/config/osd.ru.md#inmemory_journal).
|
||||||
- В Vitastor нет внутреннего copy-on-write. Я считаю, что реализация CoW-хранилища гораздо сложнее,
|
- В Vitastor нет внутреннего copy-on-write. Я считаю, что реализация CoW-хранилища гораздо сложнее,
|
||||||
поэтому сложнее добиться устойчиво хороших результатов. Возможно, в один прекрасный день
|
поэтому сложнее добиться устойчиво хороших результатов. Возможно, в один прекрасный день
|
||||||
я придумаю красивый алгоритм для CoW-хранилища, но пока нет — внутреннего CoW в Vitastor не будет.
|
я придумаю красивый алгоритм для CoW-хранилища, но пока нет — внутреннего CoW в Vitastor не будет.
|
||||||
|
@@ -35,7 +35,7 @@
|
|||||||
|
|
||||||
- [Debian and CentOS packages](../installation/packages.en.md)
|
- [Debian and CentOS packages](../installation/packages.en.md)
|
||||||
- [Image management CLI (vitastor-cli)](../usage/cli.en.md)
|
- [Image management CLI (vitastor-cli)](../usage/cli.en.md)
|
||||||
- [Disk management CLI (vitastor-disk)](../usage/disk.en.md)
|
- [Disk management CLI (vitastor-disk)](docs/usage/disk.en.md)
|
||||||
- Generic user-space client library
|
- Generic user-space client library
|
||||||
- [Native QEMU driver](../usage/qemu.en.md)
|
- [Native QEMU driver](../usage/qemu.en.md)
|
||||||
- [Loadable fio engine for benchmarks](../usage/fio.en.md)
|
- [Loadable fio engine for benchmarks](../usage/fio.en.md)
|
||||||
|
@@ -13,7 +13,7 @@
|
|||||||
## Серверные функции
|
## Серверные функции
|
||||||
|
|
||||||
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
|
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
|
||||||
- [Производительность](../performance/comparison1.ru.md) ;-D
|
- [Производительность](../comparison1.ru.md) ;-D
|
||||||
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
|
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
|
||||||
Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
|
Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
|
||||||
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
|
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
|
|
||||||
- [Пакеты для Debian и CentOS](../installation/packages.ru.md)
|
- [Пакеты для Debian и CentOS](../installation/packages.ru.md)
|
||||||
- [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md)
|
- [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md)
|
||||||
- [Инструмент управления дисками (vitastor-disk)](../usage/disk.ru.md)
|
- [Инструмент управления дисками (vitastor-disk)](docs/usage/disk.ru.md)
|
||||||
- Общая пользовательская клиентская библиотека для работы с кластером
|
- Общая пользовательская клиентская библиотека для работы с кластером
|
||||||
- [Драйвер диска для QEMU](../usage/qemu.ru.md)
|
- [Драйвер диска для QEMU](../usage/qemu.ru.md)
|
||||||
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
|
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
|
||||||
|
@@ -7,7 +7,6 @@
|
|||||||
# Quick Start
|
# Quick Start
|
||||||
|
|
||||||
- [Preparation](#preparation)
|
- [Preparation](#preparation)
|
||||||
- [Recommended drives](#recommended-drives)
|
|
||||||
- [Configure monitors](#configure-monitors)
|
- [Configure monitors](#configure-monitors)
|
||||||
- [Configure OSDs](#configure-osds)
|
- [Configure OSDs](#configure-osds)
|
||||||
- [Create a pool](#create-a-pool)
|
- [Create a pool](#create-a-pool)
|
||||||
@@ -20,20 +19,10 @@
|
|||||||
- Get some SATA or NVMe SSDs with capacitors (server-grade drives). You can use desktop SSDs
|
- Get some SATA or NVMe SSDs with capacitors (server-grade drives). You can use desktop SSDs
|
||||||
with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
|
with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
|
||||||
[here](../config/layout-cluster.en.md#immediate_commit).
|
[here](../config/layout-cluster.en.md#immediate_commit).
|
||||||
- If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
|
|
||||||
Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then
|
|
||||||
you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
|
|
||||||
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
|
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
|
||||||
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
|
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
|
||||||
- [Install Vitastor packages](../installation/packages.en.md).
|
- [Install Vitastor packages](../installation/packages.en.md).
|
||||||
|
|
||||||
## Recommended drives
|
|
||||||
|
|
||||||
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
|
|
||||||
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
|
|
||||||
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
|
|
||||||
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
|
|
||||||
|
|
||||||
## Configure monitors
|
## Configure monitors
|
||||||
|
|
||||||
On the monitor hosts:
|
On the monitor hosts:
|
||||||
@@ -56,10 +45,9 @@ On the monitor hosts:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
- Initialize OSDs:
|
- Initialize OSDs:
|
||||||
- SSD-only or HDD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
|
- SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. You can add
|
||||||
Add `--disable_data_fsync off` to leave disk write cache enabled if you use
|
`--disable_data_fsync off` to leave disk cache enabled if you use desktop
|
||||||
desktop SSDs without capacitors. Do NOT add `--disable_data_fsync off` if you
|
SSDs without capacitors.
|
||||||
use HDDs or SSD+HDD.
|
|
||||||
- Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
- Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
||||||
Pass all your devices (HDD and SSD) to this script — it will partition disks and initialize journals on its own.
|
Pass all your devices (HDD and SSD) to this script — it will partition disks and initialize journals on its own.
|
||||||
This script skips HDDs which are already partitioned so if you want to use non-empty disks for
|
This script skips HDDs which are already partitioned so if you want to use non-empty disks for
|
||||||
|
@@ -7,7 +7,6 @@
|
|||||||
# Быстрый старт
|
# Быстрый старт
|
||||||
|
|
||||||
- [Подготовка](#подготовка)
|
- [Подготовка](#подготовка)
|
||||||
- [Рекомендуемые диски](#рекомендуемые-диски)
|
|
||||||
- [Настройте мониторы](#настройте-мониторы)
|
- [Настройте мониторы](#настройте-мониторы)
|
||||||
- [Настройте OSD](#настройте-osd)
|
- [Настройте OSD](#настройте-osd)
|
||||||
- [Создайте пул](#создайте-пул)
|
- [Создайте пул](#создайте-пул)
|
||||||
@@ -20,20 +19,10 @@
|
|||||||
- Возьмите серверы с SSD (SATA или NVMe), желательно с конденсаторами (серверные SSD). Можно
|
- Возьмите серверы с SSD (SATA или NVMe), желательно с конденсаторами (серверные SSD). Можно
|
||||||
использовать и десктопные SSD, включив режим отложенного fsync, но производительность будет хуже.
|
использовать и десктопные SSD, включив режим отложенного fsync, но производительность будет хуже.
|
||||||
О конденсаторах читайте [здесь](../config/layout-cluster.ru.md#immediate_commit).
|
О конденсаторах читайте [здесь](../config/layout-cluster.ru.md#immediate_commit).
|
||||||
- Если хотите использовать HDD, берите современные модели с Media или SSD кэшем - HGST Ultrastar,
|
|
||||||
Toshiba MG08, Seagate EXOS или что-то похожее. Если такого кэша у ваших дисков нет,
|
|
||||||
обязательно возьмите SSD под метаданные и журнал (маленькие, буквально 2 ГБ на 1 ТБ HDD-места).
|
|
||||||
- Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
|
- Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
|
||||||
- Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
|
- Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
|
||||||
- [Установите пакеты Vitastor](../installation/packages.ru.md).
|
- [Установите пакеты Vitastor](../installation/packages.ru.md).
|
||||||
|
|
||||||
## Рекомендуемые диски
|
|
||||||
|
|
||||||
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
|
|
||||||
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
|
|
||||||
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
|
|
||||||
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
|
|
||||||
|
|
||||||
## Настройте мониторы
|
## Настройте мониторы
|
||||||
|
|
||||||
На хостах, выделенных под мониторы:
|
На хостах, выделенных под мониторы:
|
||||||
@@ -56,10 +45,9 @@
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
- Инициализуйте OSD:
|
- Инициализуйте OSD:
|
||||||
- Только SSD или только HDD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
|
- SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. Если вы используете
|
||||||
Если вы используете десктопные SSD без конденсаторов, добавьте опцию `--disable_data_fsync off`,
|
десктопные SSD без конденсаторов, можете оставить кэш включённым, добавив
|
||||||
чтобы оставить кэш записи диска включённым. НЕ добавляйте эту опцию, если используете
|
опцию `--disable_data_fsync off`.
|
||||||
жёсткие диски (HDD).
|
|
||||||
- Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
- Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
||||||
Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
|
Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
|
||||||
разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы
|
разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы
|
||||||
|
@@ -13,8 +13,6 @@ remains decent (see an example [here](../performance/comparison1.en.md#vitastor-
|
|||||||
|
|
||||||
Vitastor Kubernetes CSI driver is based on NBD.
|
Vitastor Kubernetes CSI driver is based on NBD.
|
||||||
|
|
||||||
See also [VDUSE](qemu.en.md#vduse).
|
|
||||||
|
|
||||||
## Map image
|
## Map image
|
||||||
|
|
||||||
To create a local block device for a Vitastor image run:
|
To create a local block device for a Vitastor image run:
|
||||||
|
@@ -16,8 +16,6 @@ NBD немного снижает производительность из-за
|
|||||||
|
|
||||||
CSI-драйвер Kubernetes Vitastor основан на NBD.
|
CSI-драйвер Kubernetes Vitastor основан на NBD.
|
||||||
|
|
||||||
Смотрите также [VDUSE](qemu.ru.md#vduse).
|
|
||||||
|
|
||||||
## Подключить устройство
|
## Подключить устройство
|
||||||
|
|
||||||
Чтобы создать локальное блочное устройство для образа, выполните команду:
|
Чтобы создать локальное блочное устройство для образа, выполните команду:
|
||||||
|
@@ -29,7 +29,7 @@ vitastor-nfs [--etcd_address ADDR] [ДРУГИЕ ОПЦИИ]
|
|||||||
--bind <IP> принимать соединения по адресу <IP> (по умолчанию 0.0.0.0 - на всех)
|
--bind <IP> принимать соединения по адресу <IP> (по умолчанию 0.0.0.0 - на всех)
|
||||||
--nfspath <PATH> установить путь NFS-экспорта в <PATH> (по умолчанию /)
|
--nfspath <PATH> установить путь NFS-экспорта в <PATH> (по умолчанию /)
|
||||||
--port <PORT> использовать порт <PORT> для NFS-сервисов (по умолчанию 2049)
|
--port <PORT> использовать порт <PORT> для NFS-сервисов (по умолчанию 2049)
|
||||||
--pool <POOL> использовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
|
--pool <POOL> исползовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
|
||||||
--foreground 1 не уходить в фон после запуска
|
--foreground 1 не уходить в фон после запуска
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@@ -83,43 +83,3 @@ qemu-img rebase -u -b '' testimg.qcow2
|
|||||||
This can be used for backups. Just note that exporting an image that is currently being written to
|
This can be used for backups. Just note that exporting an image that is currently being written to
|
||||||
is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
|
is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
|
||||||
on a live VM.
|
on a live VM.
|
||||||
|
|
||||||
## VDUSE
|
|
||||||
|
|
||||||
Linux kernel, starting with version 5.15, supports a new interface for attaching virtual disks
|
|
||||||
to the host - VDUSE (vDPA Device in Userspace). QEMU, starting with 7.2, has support for
|
|
||||||
exporting QEMU block devices over this protocol using qemu-storage-daemon.
|
|
||||||
|
|
||||||
VDUSE has the same problem as other FUSE-like interfaces in Linux: if a userspace process hangs,
|
|
||||||
for example, if it loses connectivity with Vitastor cluster - active processes doing I/O may
|
|
||||||
hang in the D state (uninterruptible sleep) and you won't be able to kill them even with kill -9.
|
|
||||||
In this case reboot will be the only way to remove VDUSE devices from system.
|
|
||||||
|
|
||||||
On the other hand, VDUSE is faster than [NBD](nbd.en.md), so you may prefer to use it if
|
|
||||||
performance is important for you. Approximate performance numbers:
|
|
||||||
direct fio benchmark - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
|
|
||||||
|
|
||||||
To try VDUSE you need at least Linux 5.15, built with VDUSE support
|
|
||||||
(CONFIG_VIRTIO_VDPA=m and CONFIG_VDPA_USER=m). Debian Linux kernels have these options
|
|
||||||
disabled by now, so if you want to try it on Debian, use a kernel from Ubuntu
|
|
||||||
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) or Proxmox.
|
|
||||||
|
|
||||||
Commands to attach Vitastor image as a VDUSE device:
|
|
||||||
|
|
||||||
```
|
|
||||||
modprobe vduse virtio-vdpa
|
|
||||||
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
|
|
||||||
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
|
|
||||||
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
|
|
||||||
vdpa dev add name test1 mgmtdev vduse
|
|
||||||
```
|
|
||||||
|
|
||||||
After running these commands /dev/vda device will appear in the system and you'll be able to
|
|
||||||
use it as a normal disk.
|
|
||||||
|
|
||||||
To remove the device:
|
|
||||||
|
|
||||||
```
|
|
||||||
vdpa dev del test1
|
|
||||||
kill <qemu-storage-daemon_process_PID>
|
|
||||||
```
|
|
||||||
|
@@ -87,43 +87,3 @@ qemu-img rebase -u -b '' testimg.qcow2
|
|||||||
Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
|
Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
|
||||||
в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
|
в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
|
||||||
с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.
|
с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.
|
||||||
|
|
||||||
## VDUSE
|
|
||||||
|
|
||||||
В Linux, начиная с версии ядра 5.15, доступен новый интерфейс для подключения виртуальных дисков
|
|
||||||
к системе - VDUSE (vDPA Device in Userspace), а в QEMU, начиная с версии 7.2, есть поддержка
|
|
||||||
экспорта блочных устройств QEMU по этому протоколу через qemu-storage-daemon.
|
|
||||||
|
|
||||||
VDUSE страдает общей проблемой FUSE-подобных интерфейсов в Linux: если пользовательский процесс
|
|
||||||
подвиснет, например, если будет потеряна связь с кластером Vitastor - читающие/пишущие в кластер
|
|
||||||
процессы могут "залипнуть" в состоянии D (непрерываемый сон) и их будет невозможно убить даже
|
|
||||||
через kill -9. В этом случае удалить из системы устройство можно только перезагрузившись.
|
|
||||||
|
|
||||||
С другой стороны, VDUSE быстрее по сравнению с [NBD](nbd.ru.md), поэтому его может
|
|
||||||
быть предпочтительно использовать там, где производительность важнее. Порядок показателей:
|
|
||||||
прямое тестирование через fio - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
|
|
||||||
|
|
||||||
Чтобы использовать VDUSE, вам нужно ядро Linux версии хотя бы 5.15, собранное с поддержкой
|
|
||||||
VDUSE (CONFIG_VIRTIO_VDPA=m и CONFIG_VDPA_USER=m). В ядрах в Debian Linux поддержка пока
|
|
||||||
отключена - если хотите попробовать эту функцию на Debian, поставьте ядро из Ubuntu
|
|
||||||
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) или из Proxmox.
|
|
||||||
|
|
||||||
Команды для подключения виртуального диска через VDUSE:
|
|
||||||
|
|
||||||
```
|
|
||||||
modprobe vduse virtio-vdpa
|
|
||||||
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
|
|
||||||
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
|
|
||||||
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
|
|
||||||
vdpa dev add name test1 mgmtdev vduse
|
|
||||||
```
|
|
||||||
|
|
||||||
После этого в системе появится устройство /dev/vda, которое можно будет использовать как
|
|
||||||
обычный диск.
|
|
||||||
|
|
||||||
Для удаления устройства из системы:
|
|
||||||
|
|
||||||
```
|
|
||||||
vdpa dev del test1
|
|
||||||
kill <PID_процесса_qemu-storage-daemon>
|
|
||||||
```
|
|
||||||
|
@@ -63,9 +63,8 @@ Wants=network-online.target local-fs.target time-sync.target
|
|||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Restart=always
|
Restart=always
|
||||||
Environment=GOGC=50
|
ExecStart=/usr/local/bin/etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
|
||||||
ExecStart=etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
|
--advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
|
||||||
--snapshot-count 10000 --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
|
|
||||||
--initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
|
--initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
|
||||||
--initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
|
--initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
|
||||||
--initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\
|
--initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\
|
||||||
|
26
mon/mon.js
26
mon/mon.js
@@ -1608,7 +1608,7 @@ class Mon
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return { inode_stats, seen_pools };
|
return inode_stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
serialize_bigints(obj)
|
serialize_bigints(obj)
|
||||||
@@ -1634,7 +1634,7 @@ class Mon
|
|||||||
const timestamp = Date.now();
|
const timestamp = Date.now();
|
||||||
const { object_counts, object_bytes } = this.sum_object_counts();
|
const { object_counts, object_bytes } = this.sum_object_counts();
|
||||||
let stats = this.sum_op_stats(timestamp, this.prev_stats);
|
let stats = this.sum_op_stats(timestamp, this.prev_stats);
|
||||||
let { inode_stats, seen_pools } = this.sum_inode_stats(
|
let inode_stats = this.sum_inode_stats(
|
||||||
this.prev_stats ? this.prev_stats.inode_stats : null,
|
this.prev_stats ? this.prev_stats.inode_stats : null,
|
||||||
timestamp, this.prev_stats ? this.prev_stats.timestamp : null
|
timestamp, this.prev_stats ? this.prev_stats.timestamp : null
|
||||||
);
|
);
|
||||||
@@ -1669,22 +1669,12 @@ class Mon
|
|||||||
}
|
}
|
||||||
for (const pool_id in this.state.pool.stats)
|
for (const pool_id in this.state.pool.stats)
|
||||||
{
|
{
|
||||||
if (!seen_pools[pool_id])
|
const pool_stats = { ...this.state.pool.stats[pool_id] };
|
||||||
{
|
this.serialize_bigints(pool_stats);
|
||||||
txn.push({ requestDeleteRange: {
|
txn.push({ requestPut: {
|
||||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||||
} });
|
value: b64(JSON.stringify(pool_stats)),
|
||||||
delete this.state.pool.stats[pool_id];
|
} });
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const pool_stats = { ...this.state.pool.stats[pool_id] };
|
|
||||||
this.serialize_bigints(pool_stats);
|
|
||||||
txn.push({ requestPut: {
|
|
||||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
|
||||||
value: b64(JSON.stringify(pool_stats)),
|
|
||||||
} });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (txn.length)
|
if (txn.length)
|
||||||
{
|
{
|
||||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
|||||||
from cinder.volume import driver
|
from cinder.volume import driver
|
||||||
from cinder.volume import volume_utils
|
from cinder.volume import volume_utils
|
||||||
|
|
||||||
VERSION = '0.9.5'
|
VERSION = '0.9.2'
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@@ -1,190 +0,0 @@
|
|||||||
diff --git a/block/meson.build b/block/meson.build
|
|
||||||
index 382bec0e7d..af6207dbce 100644
|
|
||||||
--- a/block/meson.build
|
|
||||||
+++ b/block/meson.build
|
|
||||||
@@ -114,6 +114,7 @@ foreach m : [
|
|
||||||
[libnfs, 'nfs', files('nfs.c')],
|
|
||||||
[libssh, 'ssh', files('ssh.c')],
|
|
||||||
[rbd, 'rbd', files('rbd.c')],
|
|
||||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
|
||||||
]
|
|
||||||
if m[0].found()
|
|
||||||
module_ss = ss.source_set()
|
|
||||||
diff --git a/meson.build b/meson.build
|
|
||||||
index c44d05a13f..ebedb42843 100644
|
|
||||||
--- a/meson.build
|
|
||||||
+++ b/meson.build
|
|
||||||
@@ -1028,6 +1028,26 @@ if not get_option('rbd').auto() or have_block
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
+vitastor = not_found
|
|
||||||
+if not get_option('vitastor').auto() or have_block
|
|
||||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
|
||||||
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
|
||||||
+ if libvitastor_client.found()
|
|
||||||
+ if cc.links('''
|
|
||||||
+ #include <vitastor_c.h>
|
|
||||||
+ int main(void) {
|
|
||||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
|
||||||
+ return 0;
|
|
||||||
+ }''', dependencies: libvitastor_client)
|
|
||||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
|
||||||
+ elif get_option('vitastor').enabled()
|
|
||||||
+ error('could not link libvitastor_client')
|
|
||||||
+ else
|
|
||||||
+ warning('could not link libvitastor_client, disabling')
|
|
||||||
+ endif
|
|
||||||
+ endif
|
|
||||||
+endif
|
|
||||||
+
|
|
||||||
glusterfs = not_found
|
|
||||||
glusterfs_ftruncate_has_stat = false
|
|
||||||
glusterfs_iocb_has_stat = false
|
|
||||||
@@ -1882,6 +1902,7 @@ endif
|
|
||||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
|
||||||
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
|
||||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
|
||||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
|
||||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
|
||||||
config_host_data.set('CONFIG_SDL', sdl.found())
|
|
||||||
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
|
||||||
@@ -4020,6 +4041,7 @@ if spice_protocol.found()
|
|
||||||
summary_info += {' spice server support': spice}
|
|
||||||
endif
|
|
||||||
summary_info += {'rbd support': rbd}
|
|
||||||
+summary_info += {'vitastor support': vitastor}
|
|
||||||
summary_info += {'smartcard support': cacard}
|
|
||||||
summary_info += {'U2F support': u2f}
|
|
||||||
summary_info += {'libusb': libusb}
|
|
||||||
diff --git a/meson_options.txt b/meson_options.txt
|
|
||||||
index fc9447d267..c4ac55c283 100644
|
|
||||||
--- a/meson_options.txt
|
|
||||||
+++ b/meson_options.txt
|
|
||||||
@@ -173,6 +173,8 @@ option('lzo', type : 'feature', value : 'auto',
|
|
||||||
description: 'lzo compression support')
|
|
||||||
option('rbd', type : 'feature', value : 'auto',
|
|
||||||
description: 'Ceph block device driver')
|
|
||||||
+option('vitastor', type : 'feature', value : 'auto',
|
|
||||||
+ description: 'Vitastor block device driver')
|
|
||||||
option('opengl', type : 'feature', value : 'auto',
|
|
||||||
description: 'OpenGL support')
|
|
||||||
option('rdma', type : 'feature', value : 'auto',
|
|
||||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
|
||||||
index c05ad0c07e..f5eb701604 100644
|
|
||||||
--- a/qapi/block-core.json
|
|
||||||
+++ b/qapi/block-core.json
|
|
||||||
@@ -3308,7 +3308,7 @@
|
|
||||||
'raw', 'rbd',
|
|
||||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
|
||||||
'pbs',
|
|
||||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
|
||||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
|
||||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
|
||||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
|
||||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
|
||||||
@@ -4338,6 +4338,28 @@
|
|
||||||
'*key-secret': 'str',
|
|
||||||
'*server': ['InetSocketAddressBase'] } }
|
|
||||||
|
|
||||||
+##
|
|
||||||
+# @BlockdevOptionsVitastor:
|
|
||||||
+#
|
|
||||||
+# Driver specific block device options for vitastor
|
|
||||||
+#
|
|
||||||
+# @image: Image name
|
|
||||||
+# @inode: Inode number
|
|
||||||
+# @pool: Pool ID
|
|
||||||
+# @size: Desired image size in bytes
|
|
||||||
+# @config-path: Path to Vitastor configuration
|
|
||||||
+# @etcd-host: etcd connection address(es)
|
|
||||||
+# @etcd-prefix: etcd key/value prefix
|
|
||||||
+##
|
|
||||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
|
||||||
+ 'data': { '*inode': 'uint64',
|
|
||||||
+ '*pool': 'uint64',
|
|
||||||
+ '*size': 'uint64',
|
|
||||||
+ '*image': 'str',
|
|
||||||
+ '*config-path': 'str',
|
|
||||||
+ '*etcd-host': 'str',
|
|
||||||
+ '*etcd-prefix': 'str' } }
|
|
||||||
+
|
|
||||||
##
|
|
||||||
# @ReplicationMode:
|
|
||||||
#
|
|
||||||
@@ -4787,6 +4809,7 @@
|
|
||||||
'throttle': 'BlockdevOptionsThrottle',
|
|
||||||
'vdi': 'BlockdevOptionsGenericFormat',
|
|
||||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
|
||||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
|
||||||
'virtio-blk-vfio-pci':
|
|
||||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
|
||||||
'if': 'CONFIG_BLKIO' },
|
|
||||||
@@ -5187,6 +5210,17 @@
|
|
||||||
'*cluster-size' : 'size',
|
|
||||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
|
||||||
|
|
||||||
+##
|
|
||||||
+# @BlockdevCreateOptionsVitastor:
|
|
||||||
+#
|
|
||||||
+# Driver specific image creation options for Vitastor.
|
|
||||||
+#
|
|
||||||
+# @size: Size of the virtual disk in bytes
|
|
||||||
+##
|
|
||||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
|
||||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
|
||||||
+ 'size': 'size' } }
|
|
||||||
+
|
|
||||||
##
|
|
||||||
# @BlockdevVmdkSubformat:
|
|
||||||
#
|
|
||||||
@@ -5385,6 +5419,7 @@
|
|
||||||
'ssh': 'BlockdevCreateOptionsSsh',
|
|
||||||
'vdi': 'BlockdevCreateOptionsVdi',
|
|
||||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
|
||||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
|
||||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
|
||||||
'vpc': 'BlockdevCreateOptionsVpc'
|
|
||||||
} }
|
|
||||||
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
|
||||||
index 6e8983f39c..1b0b9fcf3e 100755
|
|
||||||
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
|
||||||
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
|
||||||
@@ -32,7 +32,7 @@
|
|
||||||
--with-git=meson \
|
|
||||||
--with-git-submodules=update \
|
|
||||||
--target-list="x86_64-softmmu" \
|
|
||||||
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
|
||||||
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
|
||||||
--audio-drv-list="" \
|
|
||||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
|
||||||
--with-coroutine=ucontext \
|
|
||||||
@@ -179,6 +179,7 @@
|
|
||||||
--enable-opengl \
|
|
||||||
--enable-pie \
|
|
||||||
--enable-rbd \
|
|
||||||
+--enable-vitastor \
|
|
||||||
--enable-rdma \
|
|
||||||
--enable-seccomp \
|
|
||||||
--enable-snappy \
|
|
||||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
|
||||||
index 009fab1515..95914e6ebc 100644
|
|
||||||
--- a/scripts/meson-buildoptions.sh
|
|
||||||
+++ b/scripts/meson-buildoptions.sh
|
|
||||||
@@ -144,6 +144,7 @@ meson_options_help() {
|
|
||||||
printf "%s\n" ' qed qed image format support'
|
|
||||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
|
||||||
printf "%s\n" ' rbd Ceph block device driver'
|
|
||||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
|
||||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
|
||||||
printf "%s\n" ' replication replication support'
|
|
||||||
printf "%s\n" ' sdl SDL user interface'
|
|
||||||
@@ -392,6 +393,8 @@ _meson_option_parse() {
|
|
||||||
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
|
||||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
|
||||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
|
||||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
|
||||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
|
||||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
|
||||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
|
||||||
--enable-replication) printf "%s" -Dreplication=enabled ;;
|
|
@@ -1,176 +0,0 @@
|
|||||||
diff --git a/block/Makefile.objs b/block/Makefile.objs
|
|
||||||
index d644bac60a..e404236291 100644
|
|
||||||
--- a/block/Makefile.objs
|
|
||||||
+++ b/block/Makefile.objs
|
|
||||||
@@ -19,6 +19,7 @@ block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
|
|
||||||
block-obj-$(CONFIG_LIBNFS) += nfs.o
|
|
||||||
block-obj-$(CONFIG_CURL) += curl.o
|
|
||||||
block-obj-$(CONFIG_RBD) += rbd.o
|
|
||||||
+block-obj-$(CONFIG_VITASTOR) += vitastor.o
|
|
||||||
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
|
|
||||||
block-obj-$(CONFIG_VXHS) += vxhs.o
|
|
||||||
block-obj-$(CONFIG_LIBSSH2) += ssh.o
|
|
||||||
@@ -39,6 +40,8 @@ curl.o-cflags := $(CURL_CFLAGS)
|
|
||||||
curl.o-libs := $(CURL_LIBS)
|
|
||||||
rbd.o-cflags := $(RBD_CFLAGS)
|
|
||||||
rbd.o-libs := $(RBD_LIBS)
|
|
||||||
+vitastor.o-cflags := $(VITASTOR_CFLAGS)
|
|
||||||
+vitastor.o-libs := $(VITASTOR_LIBS)
|
|
||||||
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
|
|
||||||
gluster.o-libs := $(GLUSTERFS_LIBS)
|
|
||||||
vxhs.o-libs := $(VXHS_LIBS)
|
|
||||||
diff --git a/configure b/configure
|
|
||||||
index 0a19b033bc..58b7fbf24c 100755
|
|
||||||
--- a/configure
|
|
||||||
+++ b/configure
|
|
||||||
@@ -398,6 +398,7 @@ trace_backends="log"
|
|
||||||
trace_file="trace"
|
|
||||||
spice=""
|
|
||||||
rbd=""
|
|
||||||
+vitastor=""
|
|
||||||
smartcard=""
|
|
||||||
libusb=""
|
|
||||||
usb_redir=""
|
|
||||||
@@ -1213,6 +1214,10 @@ for opt do
|
|
||||||
;;
|
|
||||||
--enable-rbd) rbd="yes"
|
|
||||||
;;
|
|
||||||
+ --disable-vitastor) vitastor="no"
|
|
||||||
+ ;;
|
|
||||||
+ --enable-vitastor) vitastor="yes"
|
|
||||||
+ ;;
|
|
||||||
--disable-xfsctl) xfs="no"
|
|
||||||
;;
|
|
||||||
--enable-xfsctl) xfs="yes"
|
|
||||||
@@ -1601,6 +1606,7 @@ disabled with --disable-FEATURE, default is enabled if available:
|
|
||||||
vhost-crypto vhost-crypto acceleration support
|
|
||||||
spice spice
|
|
||||||
rbd rados block device (rbd)
|
|
||||||
+ vitastor vitastor block device
|
|
||||||
libiscsi iscsi support
|
|
||||||
libnfs nfs support
|
|
||||||
smartcard smartcard support (libcacard)
|
|
||||||
@@ -3594,6 +3600,27 @@ EOF
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
+##########################################
|
|
||||||
+# vitastor probe
|
|
||||||
+if test "$vitastor" != "no" ; then
|
|
||||||
+ cat > $TMPC <<EOF
|
|
||||||
+#include <vitastor_c.h>
|
|
||||||
+int main(void) {
|
|
||||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+EOF
|
|
||||||
+ vitastor_libs="-lvitastor_client"
|
|
||||||
+ if compile_prog "" "$vitastor_libs" ; then
|
|
||||||
+ vitastor=yes
|
|
||||||
+ else
|
|
||||||
+ if test "$vitastor" = "yes" ; then
|
|
||||||
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
|
|
||||||
+ fi
|
|
||||||
+ vitastor=no
|
|
||||||
+ fi
|
|
||||||
+fi
|
|
||||||
+
|
|
||||||
##########################################
|
|
||||||
# libssh2 probe
|
|
||||||
min_libssh2_version=1.2.8
|
|
||||||
@@ -5837,6 +5864,7 @@ echo "Trace output file $trace_file-<pid>"
|
|
||||||
fi
|
|
||||||
echo "spice support $spice $(echo_version $spice $spice_protocol_version/$spice_server_version)"
|
|
||||||
echo "rbd support $rbd"
|
|
||||||
+echo "vitastor support $vitastor"
|
|
||||||
echo "xfsctl support $xfs"
|
|
||||||
echo "smartcard support $smartcard"
|
|
||||||
echo "libusb $libusb"
|
|
||||||
@@ -6416,6 +6444,11 @@ if test "$rbd" = "yes" ; then
|
|
||||||
echo "RBD_CFLAGS=$rbd_cflags" >> $config_host_mak
|
|
||||||
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
|
|
||||||
fi
|
|
||||||
+if test "$vitastor" = "yes" ; then
|
|
||||||
+ echo "CONFIG_VITASTOR=m" >> $config_host_mak
|
|
||||||
+ echo "VITASTOR_CFLAGS=$vitastor_cflags" >> $config_host_mak
|
|
||||||
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
|
|
||||||
+fi
|
|
||||||
|
|
||||||
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
|
|
||||||
if test "$coroutine_pool" = "yes" ; then
|
|
||||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
|
||||||
index c50517bff3..c780bb2c1c 100644
|
|
||||||
--- a/qapi/block-core.json
|
|
||||||
+++ b/qapi/block-core.json
|
|
||||||
@@ -2514,7 +2514,7 @@
|
|
||||||
'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
|
|
||||||
'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
|
|
||||||
'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
|
|
||||||
- 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
|
|
||||||
+ 'quorum', 'raw', 'rbd', 'vitastor', 'replication', 'sheepdog', 'ssh',
|
|
||||||
'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
|
|
||||||
|
|
||||||
##
|
|
||||||
@@ -3217,6 +3217,28 @@
|
|
||||||
'*snap-id': 'uint32',
|
|
||||||
'*tag': 'str' } }
|
|
||||||
|
|
||||||
+##
|
|
||||||
+# @BlockdevOptionsVitastor:
|
|
||||||
+#
|
|
||||||
+# Driver specific block device options for vitastor
|
|
||||||
+#
|
|
||||||
+# @image: Image name
|
|
||||||
+# @inode: Inode number
|
|
||||||
+# @pool: Pool ID
|
|
||||||
+# @size: Desired image size in bytes
|
|
||||||
+# @config-path: Path to Vitastor configuration
|
|
||||||
+# @etcd-host: etcd connection address(es)
|
|
||||||
+# @etcd-prefix: etcd key/value prefix
|
|
||||||
+##
|
|
||||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
|
||||||
+ 'data': { '*inode': 'uint64',
|
|
||||||
+ '*pool': 'uint64',
|
|
||||||
+ '*size': 'uint64',
|
|
||||||
+ '*image': 'str',
|
|
||||||
+ '*config-path': 'str',
|
|
||||||
+ '*etcd-host': 'str',
|
|
||||||
+ '*etcd-prefix': 'str' } }
|
|
||||||
+
|
|
||||||
##
|
|
||||||
# @ReplicationMode:
|
|
||||||
#
|
|
||||||
@@ -3547,6 +3569,7 @@
|
|
||||||
'rbd': 'BlockdevOptionsRbd',
|
|
||||||
'replication':'BlockdevOptionsReplication',
|
|
||||||
'sheepdog': 'BlockdevOptionsSheepdog',
|
|
||||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
|
||||||
'ssh': 'BlockdevOptionsSsh',
|
|
||||||
'throttle': 'BlockdevOptionsThrottle',
|
|
||||||
'vdi': 'BlockdevOptionsGenericFormat',
|
|
||||||
@@ -3991,6 +4014,17 @@
|
|
||||||
'*subformat': 'BlockdevVhdxSubformat',
|
|
||||||
'*block-state-zero': 'bool' } }
|
|
||||||
|
|
||||||
+##
|
|
||||||
+# @BlockdevCreateOptionsVitastor:
|
|
||||||
+#
|
|
||||||
+# Driver specific image creation options for Vitastor.
|
|
||||||
+#
|
|
||||||
+# @size: Size of the virtual disk in bytes
|
|
||||||
+##
|
|
||||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
|
||||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
|
||||||
+ 'size': 'size' } }
|
|
||||||
+
|
|
||||||
##
|
|
||||||
# @BlockdevVpcSubformat:
|
|
||||||
#
|
|
||||||
@@ -4074,6 +4108,7 @@
|
|
||||||
'rbd': 'BlockdevCreateOptionsRbd',
|
|
||||||
'replication': 'BlockdevCreateNotSupported',
|
|
||||||
'sheepdog': 'BlockdevCreateOptionsSheepdog',
|
|
||||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
|
||||||
'ssh': 'BlockdevCreateOptionsSsh',
|
|
||||||
'throttle': 'BlockdevCreateNotSupported',
|
|
||||||
'vdi': 'BlockdevCreateOptionsVdi',
|
|
@@ -1,181 +0,0 @@
|
|||||||
Index: qemu-5.2+dfsg/qapi/block-core.json
|
|
||||||
===================================================================
|
|
||||||
--- qemu-5.2+dfsg.orig/qapi/block-core.json
|
|
||||||
+++ qemu-5.2+dfsg/qapi/block-core.json
|
|
||||||
@@ -2831,7 +2831,7 @@
|
|
||||||
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
|
|
||||||
'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
|
|
||||||
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
|
|
||||||
- 'sheepdog',
|
|
||||||
+ 'sheepdog', 'vitastor',
|
|
||||||
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
|
|
||||||
|
|
||||||
##
|
|
||||||
@@ -3668,6 +3668,28 @@
|
|
||||||
'*tag': 'str' } }
|
|
||||||
|
|
||||||
##
|
|
||||||
+# @BlockdevOptionsVitastor:
|
|
||||||
+#
|
|
||||||
+# Driver specific block device options for vitastor
|
|
||||||
+#
|
|
||||||
+# @image: Image name
|
|
||||||
+# @inode: Inode number
|
|
||||||
+# @pool: Pool ID
|
|
||||||
+# @size: Desired image size in bytes
|
|
||||||
+# @config-path: Path to Vitastor configuration
|
|
||||||
+# @etcd-host: etcd connection address(es)
|
|
||||||
+# @etcd-prefix: etcd key/value prefix
|
|
||||||
+##
|
|
||||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
|
||||||
+ 'data': { '*inode': 'uint64',
|
|
||||||
+ '*pool': 'uint64',
|
|
||||||
+ '*size': 'uint64',
|
|
||||||
+ '*image': 'str',
|
|
||||||
+ '*config-path': 'str',
|
|
||||||
+ '*etcd-host': 'str',
|
|
||||||
+ '*etcd-prefix': 'str' } }
|
|
||||||
+
|
|
||||||
+##
|
|
||||||
# @ReplicationMode:
|
|
||||||
#
|
|
||||||
# An enumeration of replication modes.
|
|
||||||
@@ -4015,6 +4037,7 @@
|
|
||||||
'replication': { 'type': 'BlockdevOptionsReplication',
|
|
||||||
'if': 'defined(CONFIG_REPLICATION)' },
|
|
||||||
'sheepdog': 'BlockdevOptionsSheepdog',
|
|
||||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
|
||||||
'ssh': 'BlockdevOptionsSsh',
|
|
||||||
'throttle': 'BlockdevOptionsThrottle',
|
|
||||||
'vdi': 'BlockdevOptionsGenericFormat',
|
|
||||||
@@ -4404,6 +4427,17 @@
|
|
||||||
'*cluster-size' : 'size' } }
|
|
||||||
|
|
||||||
##
|
|
||||||
+# @BlockdevCreateOptionsVitastor:
|
|
||||||
+#
|
|
||||||
+# Driver specific image creation options for Vitastor.
|
|
||||||
+#
|
|
||||||
+# @size: Size of the virtual disk in bytes
|
|
||||||
+##
|
|
||||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
|
||||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
|
||||||
+ 'size': 'size' } }
|
|
||||||
+
|
|
||||||
+##
|
|
||||||
# @BlockdevVmdkSubformat:
|
|
||||||
#
|
|
||||||
# Subformat options for VMDK images
|
|
||||||
@@ -4665,6 +4699,7 @@
|
|
||||||
'qed': 'BlockdevCreateOptionsQed',
|
|
||||||
'rbd': 'BlockdevCreateOptionsRbd',
|
|
||||||
'sheepdog': 'BlockdevCreateOptionsSheepdog',
|
|
||||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
|
||||||
'ssh': 'BlockdevCreateOptionsSsh',
|
|
||||||
'vdi': 'BlockdevCreateOptionsVdi',
|
|
||||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
|
||||||
Index: qemu-5.2+dfsg/block/meson.build
|
|
||||||
===================================================================
|
|
||||||
--- qemu-5.2+dfsg.orig/block/meson.build
|
|
||||||
+++ qemu-5.2+dfsg/block/meson.build
|
|
||||||
@@ -76,6 +76,7 @@ foreach m : [
|
|
||||||
['CONFIG_LIBNFS', 'nfs', libnfs, 'nfs.c'],
|
|
||||||
['CONFIG_LIBSSH', 'ssh', libssh, 'ssh.c'],
|
|
||||||
['CONFIG_RBD', 'rbd', rbd, 'rbd.c'],
|
|
||||||
+ ['CONFIG_VITASTOR', 'vitastor', vitastor, 'vitastor.c'],
|
|
||||||
]
|
|
||||||
if config_host.has_key(m[0])
|
|
||||||
if enable_modules
|
|
||||||
Index: qemu-5.2+dfsg/configure
|
|
||||||
===================================================================
|
|
||||||
--- qemu-5.2+dfsg.orig/configure
|
|
||||||
+++ qemu-5.2+dfsg/configure
|
|
||||||
@@ -372,6 +372,7 @@ trace_backends="log"
|
|
||||||
trace_file="trace"
|
|
||||||
spice=""
|
|
||||||
rbd=""
|
|
||||||
+vitastor=""
|
|
||||||
smartcard=""
|
|
||||||
u2f="auto"
|
|
||||||
libusb=""
|
|
||||||
@@ -1263,6 +1264,10 @@ for opt do
|
|
||||||
;;
|
|
||||||
--enable-rbd) rbd="yes"
|
|
||||||
;;
|
|
||||||
+ --disable-vitastor) vitastor="no"
|
|
||||||
+ ;;
|
|
||||||
+ --enable-vitastor) vitastor="yes"
|
|
||||||
+ ;;
|
|
||||||
--disable-xfsctl) xfs="no"
|
|
||||||
;;
|
|
||||||
--enable-xfsctl) xfs="yes"
|
|
||||||
@@ -1827,6 +1832,7 @@ disabled with --disable-FEATURE, default
|
|
||||||
vhost-vdpa vhost-vdpa kernel backend support
|
|
||||||
spice spice
|
|
||||||
rbd rados block device (rbd)
|
|
||||||
+ vitastor vitastor block device
|
|
||||||
libiscsi iscsi support
|
|
||||||
libnfs nfs support
|
|
||||||
smartcard smartcard support (libcacard)
|
|
||||||
@@ -3719,6 +3725,27 @@ EOF
|
|
||||||
fi
|
|
||||||
|
|
||||||
##########################################
|
|
||||||
+# vitastor probe
|
|
||||||
+if test "$vitastor" != "no" ; then
|
|
||||||
+ cat > $TMPC <<EOF
|
|
||||||
+#include <vitastor_c.h>
|
|
||||||
+int main(void) {
|
|
||||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+EOF
|
|
||||||
+ vitastor_libs="-lvitastor_client"
|
|
||||||
+ if compile_prog "" "$vitastor_libs" ; then
|
|
||||||
+ vitastor=yes
|
|
||||||
+ else
|
|
||||||
+ if test "$vitastor" = "yes" ; then
|
|
||||||
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
|
|
||||||
+ fi
|
|
||||||
+ vitastor=no
|
|
||||||
+ fi
|
|
||||||
+fi
|
|
||||||
+
|
|
||||||
+##########################################
|
|
||||||
# libssh probe
|
|
||||||
if test "$libssh" != "no" ; then
|
|
||||||
if $pkg_config --exists libssh; then
|
|
||||||
@@ -6456,6 +6483,10 @@ if test "$rbd" = "yes" ; then
|
|
||||||
echo "CONFIG_RBD=y" >> $config_host_mak
|
|
||||||
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
|
|
||||||
fi
|
|
||||||
+if test "$vitastor" = "yes" ; then
|
|
||||||
+ echo "CONFIG_VITASTOR=y" >> $config_host_mak
|
|
||||||
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
|
|
||||||
+fi
|
|
||||||
|
|
||||||
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
|
|
||||||
if test "$coroutine_pool" = "yes" ; then
|
|
||||||
Index: qemu-5.2+dfsg/meson.build
|
|
||||||
===================================================================
|
|
||||||
--- qemu-5.2+dfsg.orig/meson.build
|
|
||||||
+++ qemu-5.2+dfsg/meson.build
|
|
||||||
@@ -596,6 +596,10 @@ rbd = not_found
|
|
||||||
if 'CONFIG_RBD' in config_host
|
|
||||||
rbd = declare_dependency(link_args: config_host['RBD_LIBS'].split())
|
|
||||||
endif
|
|
||||||
+vitastor = not_found
|
|
||||||
+if 'CONFIG_VITASTOR' in config_host
|
|
||||||
+ vitastor = declare_dependency(link_args: config_host['VITASTOR_LIBS'].split())
|
|
||||||
+endif
|
|
||||||
glusterfs = not_found
|
|
||||||
if 'CONFIG_GLUSTERFS' in config_host
|
|
||||||
glusterfs = declare_dependency(compile_args: config_host['GLUSTERFS_CFLAGS'].split(),
|
|
||||||
@@ -2145,6 +2149,7 @@ endif
|
|
||||||
# TODO: add back protocol and server version
|
|
||||||
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
|
|
||||||
summary_info += {'rbd support': config_host.has_key('CONFIG_RBD')}
|
|
||||||
+summary_info += {'vitastor support': config_host.has_key('CONFIG_VITASTOR')}
|
|
||||||
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
|
|
||||||
summary_info += {'smartcard support': config_host.has_key('CONFIG_SMARTCARD')}
|
|
||||||
summary_info += {'U2F support': u2f.found()}
|
|
@@ -24,4 +24,4 @@ rm fio
|
|||||||
mv fio-copy fio
|
mv fio-copy fio
|
||||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||||
tar --transform 's#^#vitastor-0.9.5/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.5$(rpm --eval '%dist').tar.gz *
|
tar --transform 's#^#vitastor-0.9.2/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.2$(rpm --eval '%dist').tar.gz *
|
||||||
|
@@ -22,7 +22,7 @@
|
|||||||
Name: qemu-kvm
|
Name: qemu-kvm
|
||||||
Version: 4.2.0
|
Version: 4.2.0
|
||||||
-Release: 29.vitastor%{?dist}.6
|
-Release: 29.vitastor%{?dist}.6
|
||||||
+Release: 34.vitastor%{?dist}.6
|
+Release: 32.vitastor%{?dist}.6
|
||||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||||
Epoch: 15
|
Epoch: 15
|
||||||
License: GPLv2 and GPLv2+ and CC-BY
|
License: GPLv2 and GPLv2+ and CC-BY
|
||||||
|
@@ -13,7 +13,7 @@
|
|||||||
Name: qemu-kvm
|
Name: qemu-kvm
|
||||||
Version: 4.2.0
|
Version: 4.2.0
|
||||||
-Release: 29%{?dist}.6
|
-Release: 29%{?dist}.6
|
||||||
+Release: 33.vitastor%{?dist}.6
|
+Release: 32.vitastor%{?dist}.6
|
||||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||||
Epoch: 15
|
Epoch: 15
|
||||||
License: GPLv2 and GPLv2+ and CC-BY
|
License: GPLv2 and GPLv2+ and CC-BY
|
||||||
|
@@ -1,103 +0,0 @@
|
|||||||
--- qemu-kvm-6.2.spec.orig 2023-07-18 13:52:57.636625440 +0000
|
|
||||||
+++ qemu-kvm-6.2.spec 2023-07-18 13:52:19.011683886 +0000
|
|
||||||
@@ -73,6 +73,7 @@ Requires: %{name}-hw-usbredir = %{epoch}
|
|
||||||
%endif \
|
|
||||||
Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \
|
|
||||||
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
|
|
||||||
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
|
|
||||||
Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release}
|
|
||||||
|
|
||||||
# Macro to properly setup RHEL/RHEV conflict handling
|
|
||||||
@@ -83,7 +84,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version
|
|
||||||
Summary: QEMU is a machine emulator and virtualizer
|
|
||||||
Name: qemu-kvm
|
|
||||||
Version: 6.2.0
|
|
||||||
-Release: 32%{?rcrel}%{?dist}
|
|
||||||
+Release: 32.vitastor%{?rcrel}%{?dist}
|
|
||||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
|
||||||
Epoch: 15
|
|
||||||
License: GPLv2 and GPLv2+ and CC-BY
|
|
||||||
@@ -122,6 +123,7 @@ Source37: tests_data_acpi_pc_SSDT.dimmpx
|
|
||||||
Source38: tests_data_acpi_q35_FACP.slic
|
|
||||||
Source39: tests_data_acpi_q35_SSDT.dimmpxm
|
|
||||||
Source40: tests_data_acpi_virt_SSDT.memhp
|
|
||||||
+Source41: qemu-vitastor.c
|
|
||||||
|
|
||||||
Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch
|
|
||||||
Patch0005: 0005-Initial-redhat-build.patch
|
|
||||||
@@ -652,6 +654,7 @@ Patch255: kvm-scsi-protect-req-aiocb-wit
|
|
||||||
Patch256: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch
|
|
||||||
# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0]
|
|
||||||
Patch257: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch
|
|
||||||
+Patch258: qemu-6.2-vitastor.patch
|
|
||||||
|
|
||||||
BuildRequires: wget
|
|
||||||
BuildRequires: rpm-build
|
|
||||||
@@ -689,6 +692,7 @@ BuildRequires: libcurl-devel
|
|
||||||
BuildRequires: libssh-devel
|
|
||||||
BuildRequires: librados-devel
|
|
||||||
BuildRequires: librbd-devel
|
|
||||||
+BuildRequires: vitastor-client-devel
|
|
||||||
%if %{have_gluster}
|
|
||||||
# For gluster block driver
|
|
||||||
BuildRequires: glusterfs-api-devel
|
|
||||||
@@ -926,6 +930,14 @@ Install this package if you want to acce
|
|
||||||
using the rbd protocol.
|
|
||||||
|
|
||||||
|
|
||||||
+%package block-vitastor
|
|
||||||
+Summary: QEMU Vitastor block driver
|
|
||||||
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
|
||||||
+
|
|
||||||
+%description block-vitastor
|
|
||||||
+This package provides the additional Vitastor block driver for QEMU.
|
|
||||||
+
|
|
||||||
+
|
|
||||||
%package block-ssh
|
|
||||||
Summary: QEMU SSH block driver
|
|
||||||
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
|
||||||
@@ -979,6 +991,7 @@ This package provides usbredir support.
|
|
||||||
rm -fr slirp
|
|
||||||
mkdir slirp
|
|
||||||
%autopatch -p1
|
|
||||||
+cp %{SOURCE41} ./block/vitastor.c
|
|
||||||
|
|
||||||
%global qemu_kvm_build qemu_kvm_build
|
|
||||||
mkdir -p %{qemu_kvm_build}
|
|
||||||
@@ -994,7 +1007,7 @@ cp -f %{SOURCE40} tests/data/acpi/virt/S
|
|
||||||
# --build-id option is used for giving info to the debug packages.
|
|
||||||
buildldflags="VL_LDFLAGS=-Wl,--build-id"
|
|
||||||
|
|
||||||
-%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle
|
|
||||||
+%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle
|
|
||||||
|
|
||||||
%if 0%{have_gluster}
|
|
||||||
%global block_drivers_list %{block_drivers_list},gluster
|
|
||||||
@@ -1149,9 +1162,7 @@ pushd %{qemu_kvm_build}
|
|
||||||
--firmwarepath=%{_prefix}/share/qemu-firmware \
|
|
||||||
--meson="git" \
|
|
||||||
--target-list="%{buildarch}" \
|
|
||||||
- --block-drv-rw-whitelist=%{block_drivers_list} \
|
|
||||||
--audio-drv-list= \
|
|
||||||
- --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \
|
|
||||||
--with-coroutine=ucontext \
|
|
||||||
--with-git=git \
|
|
||||||
--tls-priority=@QEMU,SYSTEM \
|
|
||||||
@@ -1197,6 +1208,7 @@ pushd %{qemu_kvm_build}
|
|
||||||
%endif
|
|
||||||
--enable-pie \
|
|
||||||
--enable-rbd \
|
|
||||||
+ --enable-vitastor \
|
|
||||||
%if 0%{have_librdma}
|
|
||||||
--enable-rdma \
|
|
||||||
%endif
|
|
||||||
@@ -1794,6 +1806,9 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.
|
|
||||||
%files block-rbd
|
|
||||||
%{_libdir}/qemu-kvm/block-rbd.so
|
|
||||||
|
|
||||||
+%files block-vitastor
|
|
||||||
+%{_libdir}/qemu-kvm/block-vitastor.so
|
|
||||||
+
|
|
||||||
%files block-ssh
|
|
||||||
%{_libdir}/qemu-kvm/block-ssh.so
|
|
||||||
|
|
@@ -1,93 +0,0 @@
|
|||||||
--- qemu-kvm-7.2.spec.orig 2023-06-22 13:56:19.000000000 +0000
|
|
||||||
+++ qemu-kvm-7.2.spec 2023-07-18 07:55:22.347090196 +0000
|
|
||||||
@@ -100,8 +100,6 @@
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%global target_list %{kvm_target}-softmmu
|
|
||||||
-%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress
|
|
||||||
-%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https
|
|
||||||
%define qemudocdir %{_docdir}/%{name}
|
|
||||||
%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios"
|
|
||||||
|
|
||||||
@@ -126,6 +124,7 @@ Requires: %{name}-device-usb-host = %{ep
|
|
||||||
Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \
|
|
||||||
%endif \
|
|
||||||
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
|
|
||||||
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
|
|
||||||
Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release}
|
|
||||||
|
|
||||||
# Since SPICE is removed from RHEL-9, the following Obsoletes:
|
|
||||||
@@ -148,7 +147,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}
|
|
||||||
Summary: QEMU is a machine emulator and virtualizer
|
|
||||||
Name: qemu-kvm
|
|
||||||
Version: 7.2.0
|
|
||||||
-Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.1
|
|
||||||
+Release: 14.vitastor%{?rcrel}%{?dist}%{?cc_suffix}.1
|
|
||||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
|
||||||
# Epoch 15 used for RHEL 8
|
|
||||||
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
|
|
||||||
@@ -171,6 +170,7 @@ Source28: 95-kvm-memlock.conf
|
|
||||||
Source30: kvm-s390x.conf
|
|
||||||
Source31: kvm-x86.conf
|
|
||||||
Source36: README.tests
|
|
||||||
+Source37: qemu-vitastor.c
|
|
||||||
|
|
||||||
|
|
||||||
Patch0004: 0004-Initial-redhat-build.patch
|
|
||||||
@@ -418,6 +418,7 @@ Patch134: kvm-target-i386-Fix-BZHI-instr
|
|
||||||
Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch
|
|
||||||
# For bz#2203745 - Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z]
|
|
||||||
Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch
|
|
||||||
+Patch137: qemu-7.2-vitastor.patch
|
|
||||||
|
|
||||||
%if %{have_clang}
|
|
||||||
BuildRequires: clang
|
|
||||||
@@ -449,6 +450,7 @@ BuildRequires: libcurl-devel
|
|
||||||
%if %{have_block_rbd}
|
|
||||||
BuildRequires: librbd-devel
|
|
||||||
%endif
|
|
||||||
+BuildRequires: vitastor-client-devel
|
|
||||||
# We need both because the 'stap' binary is probed for by configure
|
|
||||||
BuildRequires: systemtap
|
|
||||||
BuildRequires: systemtap-sdt-devel
|
|
||||||
@@ -642,6 +644,14 @@ using the rbd protocol.
|
|
||||||
%endif
|
|
||||||
|
|
||||||
|
|
||||||
+%package block-vitastor
|
|
||||||
+Summary: QEMU Vitastor block driver
|
|
||||||
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
|
||||||
+
|
|
||||||
+%description block-vitastor
|
|
||||||
+This package provides the additional Vitastor block driver for QEMU.
|
|
||||||
+
|
|
||||||
+
|
|
||||||
%package audio-pa
|
|
||||||
Summary: QEMU PulseAudio audio driver
|
|
||||||
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
|
||||||
@@ -719,6 +729,7 @@ This package provides usbredir support.
|
|
||||||
%prep
|
|
||||||
%setup -q -n qemu-%{version}%{?rcstr}
|
|
||||||
%autopatch -p1
|
|
||||||
+cp %{SOURCE37} ./block/vitastor.c
|
|
||||||
|
|
||||||
%global qemu_kvm_build qemu_kvm_build
|
|
||||||
mkdir -p %{qemu_kvm_build}
|
|
||||||
@@ -946,6 +957,7 @@ run_configure \
|
|
||||||
%if %{have_block_rbd}
|
|
||||||
--enable-rbd \
|
|
||||||
%endif
|
|
||||||
+ --enable-vitastor \
|
|
||||||
%if %{have_librdma}
|
|
||||||
--enable-rdma \
|
|
||||||
%endif
|
|
||||||
@@ -1426,6 +1438,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s
|
|
||||||
%files block-rbd
|
|
||||||
%{_libdir}/%{name}/block-rbd.so
|
|
||||||
%endif
|
|
||||||
+%files block-vitastor
|
|
||||||
+%{_libdir}/%{name}/block-vitastor.so
|
|
||||||
+
|
|
||||||
%files audio-pa
|
|
||||||
%{_libdir}/%{name}/audio-pa.so
|
|
||||||
|
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
|||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
cd /root/vitastor/rpm; \
|
cd /root/vitastor/rpm; \
|
||||||
sh build-tarball.sh; \
|
sh build-tarball.sh; \
|
||||||
cp /root/vitastor-0.9.5.el7.tar.gz ~/rpmbuild/SOURCES; \
|
cp /root/vitastor-0.9.2.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
cd ~/rpmbuild/SPECS/; \
|
cd ~/rpmbuild/SPECS/; \
|
||||||
rpmbuild -ba vitastor.spec; \
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
Name: vitastor
|
Name: vitastor
|
||||||
Version: 0.9.5
|
Version: 0.9.2
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Vitastor, a fast software-defined clustered block storage
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
License: Vitastor Network Public License 1.1
|
License: Vitastor Network Public License 1.1
|
||||||
URL: https://vitastor.io/
|
URL: https://vitastor.io/
|
||||||
Source0: vitastor-0.9.5.el7.tar.gz
|
Source0: vitastor-0.9.2.el7.tar.gz
|
||||||
|
|
||||||
BuildRequires: liburing-devel >= 0.6
|
BuildRequires: liburing-devel >= 0.6
|
||||||
BuildRequires: gperftools-devel
|
BuildRequires: gperftools-devel
|
||||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
|||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
cd /root/vitastor/rpm; \
|
cd /root/vitastor/rpm; \
|
||||||
sh build-tarball.sh; \
|
sh build-tarball.sh; \
|
||||||
cp /root/vitastor-0.9.5.el8.tar.gz ~/rpmbuild/SOURCES; \
|
cp /root/vitastor-0.9.2.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
cd ~/rpmbuild/SPECS/; \
|
cd ~/rpmbuild/SPECS/; \
|
||||||
rpmbuild -ba vitastor.spec; \
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
Name: vitastor
|
Name: vitastor
|
||||||
Version: 0.9.5
|
Version: 0.9.2
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Vitastor, a fast software-defined clustered block storage
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
License: Vitastor Network Public License 1.1
|
License: Vitastor Network Public License 1.1
|
||||||
URL: https://vitastor.io/
|
URL: https://vitastor.io/
|
||||||
Source0: vitastor-0.9.5.el8.tar.gz
|
Source0: vitastor-0.9.2.el8.tar.gz
|
||||||
|
|
||||||
BuildRequires: liburing-devel >= 0.6
|
BuildRequires: liburing-devel >= 0.6
|
||||||
BuildRequires: gperftools-devel
|
BuildRequires: gperftools-devel
|
||||||
|
@@ -18,7 +18,7 @@ ADD . /root/vitastor
|
|||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
cd /root/vitastor/rpm; \
|
cd /root/vitastor/rpm; \
|
||||||
sh build-tarball.sh; \
|
sh build-tarball.sh; \
|
||||||
cp /root/vitastor-0.9.5.el9.tar.gz ~/rpmbuild/SOURCES; \
|
cp /root/vitastor-0.9.2.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
cd ~/rpmbuild/SPECS/; \
|
cd ~/rpmbuild/SPECS/; \
|
||||||
rpmbuild -ba vitastor.spec; \
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
Name: vitastor
|
Name: vitastor
|
||||||
Version: 0.9.5
|
Version: 0.9.2
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Vitastor, a fast software-defined clustered block storage
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
License: Vitastor Network Public License 1.1
|
License: Vitastor Network Public License 1.1
|
||||||
URL: https://vitastor.io/
|
URL: https://vitastor.io/
|
||||||
Source0: vitastor-0.9.5.el9.tar.gz
|
Source0: vitastor-0.9.2.el9.tar.gz
|
||||||
|
|
||||||
BuildRequires: liburing-devel >= 0.6
|
BuildRequires: liburing-devel >= 0.6
|
||||||
BuildRequires: gperftools-devel
|
BuildRequires: gperftools-devel
|
||||||
|
@@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
|||||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_definitions(-DVERSION="0.9.5")
|
add_definitions(-DVERSION="0.9.2")
|
||||||
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
||||||
if (${WITH_ASAN})
|
if (${WITH_ASAN})
|
||||||
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
||||||
@@ -56,6 +56,11 @@ if (ISAL_LIBRARIES)
|
|||||||
add_definitions(-DWITH_ISAL)
|
add_definitions(-DWITH_ISAL)
|
||||||
endif (ISAL_LIBRARIES)
|
endif (ISAL_LIBRARIES)
|
||||||
|
|
||||||
|
find_package(OpenSSL)
|
||||||
|
if (OPENSSL_FOUND)
|
||||||
|
add_definitions(-DWITH_OPENSSL)
|
||||||
|
endif (OPENSSL_FOUND)
|
||||||
|
|
||||||
add_custom_target(build_tests)
|
add_custom_target(build_tests)
|
||||||
add_custom_target(test
|
add_custom_target(test
|
||||||
COMMAND
|
COMMAND
|
||||||
|
@@ -714,15 +714,9 @@ resume_1:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (new_trim_pos < bs->journal.used_start
|
|
||||||
? (bs->journal.dirty_start >= bs->journal.used_start || bs->journal.dirty_start < new_trim_pos)
|
|
||||||
: (bs->journal.dirty_start >= bs->journal.used_start && bs->journal.dirty_start < new_trim_pos))
|
|
||||||
{
|
|
||||||
bs->journal.dirty_start = new_trim_pos;
|
|
||||||
}
|
|
||||||
bs->journal.used_start = new_trim_pos;
|
bs->journal.used_start = new_trim_pos;
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf("Journal trimmed to %08lx (next_free=%08lx dirty_start=%08lx)\n", bs->journal.used_start, bs->journal.next_free, bs->journal.dirty_start);
|
printf("Journal trimmed to %08lx (next_free=%08lx)\n", bs->journal.used_start, bs->journal.next_free);
|
||||||
#endif
|
#endif
|
||||||
if (bs->journal.flush_journal && !flusher->flush_queue.size())
|
if (bs->journal.flush_journal && !flusher->flush_queue.size())
|
||||||
{
|
{
|
||||||
|
@@ -103,7 +103,6 @@ public:
|
|||||||
journal_flusher_t(blockstore_impl_t *bs);
|
journal_flusher_t(blockstore_impl_t *bs);
|
||||||
~journal_flusher_t();
|
~journal_flusher_t();
|
||||||
void loop();
|
void loop();
|
||||||
bool is_trim_wanted() { return trim_wanted; }
|
|
||||||
bool is_active();
|
bool is_active();
|
||||||
void mark_trim_possible();
|
void mark_trim_possible();
|
||||||
void request_trim();
|
void request_trim();
|
||||||
|
@@ -218,7 +218,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
|||||||
auto used = --journal.used_sectors[dirty_it->second.journal_sector];
|
auto used = --journal.used_sectors[dirty_it->second.journal_sector];
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf(
|
printf(
|
||||||
"remove usage of journal offset %08lx by %lx:%lx v%lu (%lu refs)\n", dirty_it->second.journal_sector,
|
"remove usage of journal offset %08lx by %lx:%lx v%lu (%d refs)\n", dirty_it->second.journal_sector,
|
||||||
dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version, used
|
dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version, used
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
@@ -661,13 +661,8 @@ void blockstore_impl_t::release_journal_sectors(blockstore_op_t *op)
|
|||||||
uint64_t s = PRIV(op)->min_flushed_journal_sector;
|
uint64_t s = PRIV(op)->min_flushed_journal_sector;
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
if (!journal.sector_info[s-1].dirty && journal.sector_info[s-1].flush_count == 0)
|
if (s != (1+journal.cur_sector) && journal.sector_info[s-1].flush_count == 0)
|
||||||
{
|
{
|
||||||
if (s == (1+journal.cur_sector))
|
|
||||||
{
|
|
||||||
// Forcibly move to the next sector and move dirty position
|
|
||||||
journal.in_sector_pos = journal.block_size;
|
|
||||||
}
|
|
||||||
// We know for sure that we won't write into this sector anymore
|
// We know for sure that we won't write into this sector anymore
|
||||||
uint64_t new_ds = journal.sector_info[s-1].offset + journal.block_size;
|
uint64_t new_ds = journal.sector_info[s-1].offset + journal.block_size;
|
||||||
if (new_ds >= journal.len)
|
if (new_ds >= journal.len)
|
||||||
|
@@ -56,15 +56,14 @@ struct image_lister_t
|
|||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto pool_it = parent->cli->st_cli.pool_config.find(INODE_POOL(ic.second.num));
|
auto & pool_cfg = parent->cli->st_cli.pool_config.at(INODE_POOL(ic.second.num));
|
||||||
bool good_pool = pool_it != parent->cli->st_cli.pool_config.end();
|
|
||||||
auto item = json11::Json::object {
|
auto item = json11::Json::object {
|
||||||
{ "name", ic.second.name },
|
{ "name", ic.second.name },
|
||||||
{ "size", ic.second.size },
|
{ "size", ic.second.size },
|
||||||
{ "used_size", 0 },
|
{ "used_size", 0 },
|
||||||
{ "readonly", ic.second.readonly },
|
{ "readonly", ic.second.readonly },
|
||||||
{ "pool_id", (uint64_t)INODE_POOL(ic.second.num) },
|
{ "pool_id", (uint64_t)INODE_POOL(ic.second.num) },
|
||||||
{ "pool_name", good_pool ? pool_it->second.name : "? (ID:"+std::to_string(INODE_POOL(ic.second.num))+")" },
|
{ "pool_name", pool_cfg.name },
|
||||||
{ "inode_num", INODE_NO_POOL(ic.second.num) },
|
{ "inode_num", INODE_NO_POOL(ic.second.num) },
|
||||||
{ "inode_id", ic.second.num },
|
{ "inode_id", ic.second.num },
|
||||||
};
|
};
|
||||||
@@ -248,8 +247,6 @@ resume_1:
|
|||||||
if (state == 1)
|
if (state == 1)
|
||||||
goto resume_1;
|
goto resume_1;
|
||||||
get_list();
|
get_list();
|
||||||
if (state == 100)
|
|
||||||
return;
|
|
||||||
if (show_stats)
|
if (show_stats)
|
||||||
{
|
{
|
||||||
resume_1:
|
resume_1:
|
||||||
@@ -272,7 +269,7 @@ resume_1:
|
|||||||
{ "key", "name" },
|
{ "key", "name" },
|
||||||
{ "title", "NAME" },
|
{ "title", "NAME" },
|
||||||
});
|
});
|
||||||
if (list_pool_name == "")
|
if (!list_pool_id)
|
||||||
{
|
{
|
||||||
cols.push_back(json11::Json::object{
|
cols.push_back(json11::Json::object{
|
||||||
{ "key", "pool_name" },
|
{ "key", "pool_name" },
|
||||||
|
@@ -41,7 +41,7 @@ struct snap_merger_t
|
|||||||
int fsync_interval = 128;
|
int fsync_interval = 128;
|
||||||
|
|
||||||
// -- STATE --
|
// -- STATE --
|
||||||
inode_t target, to_num;
|
inode_t target;
|
||||||
int target_rank;
|
int target_rank;
|
||||||
bool inside_continue = false;
|
bool inside_continue = false;
|
||||||
int state = 0;
|
int state = 0;
|
||||||
@@ -98,7 +98,6 @@ struct snap_merger_t
|
|||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
to_num = to_cfg->num;
|
|
||||||
// Check that to_cfg is actually a child of from_cfg and target_cfg is somewhere between them
|
// Check that to_cfg is actually a child of from_cfg and target_cfg is somewhere between them
|
||||||
std::vector<inode_t> chain_list;
|
std::vector<inode_t> chain_list;
|
||||||
inode_config_t *cur = to_cfg;
|
inode_config_t *cur = to_cfg;
|
||||||
@@ -452,7 +451,7 @@ struct snap_merger_t
|
|||||||
{
|
{
|
||||||
cluster_op_t *op = &rwo->op;
|
cluster_op_t *op = &rwo->op;
|
||||||
op->opcode = OSD_OP_READ;
|
op->opcode = OSD_OP_READ;
|
||||||
op->inode = to_num;
|
op->inode = target;
|
||||||
op->offset = rwo->offset;
|
op->offset = rwo->offset;
|
||||||
op->len = target_block_size;
|
op->len = target_block_size;
|
||||||
op->iov.push_back(rwo->buf, target_block_size);
|
op->iov.push_back(rwo->buf, target_block_size);
|
||||||
@@ -484,7 +483,7 @@ struct snap_merger_t
|
|||||||
{
|
{
|
||||||
// write start->end
|
// write start->end
|
||||||
rwo->todo++;
|
rwo->todo++;
|
||||||
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas && to_num == target ? 1+rwo->op.version : 0);
|
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
|
||||||
rwo->start = rwo->end;
|
rwo->start = rwo->end;
|
||||||
if (use_cas)
|
if (use_cas)
|
||||||
{
|
{
|
||||||
@@ -503,7 +502,7 @@ struct snap_merger_t
|
|||||||
{
|
{
|
||||||
// write start->end
|
// write start->end
|
||||||
rwo->todo++;
|
rwo->todo++;
|
||||||
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas && to_num == target ? 1+rwo->op.version : 0);
|
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
|
||||||
rwo->start = rwo->end;
|
rwo->start = rwo->end;
|
||||||
if (use_cas)
|
if (use_cas)
|
||||||
{
|
{
|
||||||
@@ -533,7 +532,7 @@ struct snap_merger_t
|
|||||||
if (use_cas && subop->retval == -EINTR)
|
if (use_cas && subop->retval == -EINTR)
|
||||||
{
|
{
|
||||||
// CAS failure - reread and repeat optimistically
|
// CAS failure - reread and repeat optimistically
|
||||||
rwo->start = rwo->end = 0;
|
rwo->start = subop->offset - rwo->offset;
|
||||||
rwo_read(rwo);
|
rwo_read(rwo);
|
||||||
delete subop;
|
delete subop;
|
||||||
return;
|
return;
|
||||||
@@ -543,7 +542,7 @@ struct snap_merger_t
|
|||||||
rwo->error_read = false;
|
rwo->error_read = false;
|
||||||
}
|
}
|
||||||
// Increment CAS version
|
// Increment CAS version
|
||||||
rwo->op.version = subop->version;
|
rwo->op.version++;
|
||||||
if (use_cas)
|
if (use_cas)
|
||||||
next_write(rwo);
|
next_write(rwo);
|
||||||
else
|
else
|
||||||
|
@@ -65,9 +65,6 @@ struct snap_remover_t
|
|||||||
int current_child = 0;
|
int current_child = 0;
|
||||||
std::function<bool(cli_result_t &)> cb;
|
std::function<bool(cli_result_t &)> cb;
|
||||||
|
|
||||||
std::vector<std::string> rebased_images, deleted_images;
|
|
||||||
std::vector<uint64_t> deleted_ids;
|
|
||||||
std::string inverse_child_name, inverse_parent_name;
|
|
||||||
cli_result_t result;
|
cli_result_t result;
|
||||||
|
|
||||||
bool is_done()
|
bool is_done()
|
||||||
@@ -125,7 +122,6 @@ resume_1:
|
|||||||
{
|
{
|
||||||
if (merge_children[current_child] == inverse_child)
|
if (merge_children[current_child] == inverse_child)
|
||||||
continue;
|
continue;
|
||||||
rebased_images.push_back(parent->cli->st_cli.inode_config.at(merge_children[current_child]).name);
|
|
||||||
start_merge_child(merge_children[current_child], merge_children[current_child]);
|
start_merge_child(merge_children[current_child], merge_children[current_child]);
|
||||||
if (state == 100)
|
if (state == 100)
|
||||||
return;
|
return;
|
||||||
@@ -138,12 +134,9 @@ resume_2:
|
|||||||
cb = NULL;
|
cb = NULL;
|
||||||
if (result.err)
|
if (result.err)
|
||||||
{
|
{
|
||||||
result.data = my_result(result.data);
|
|
||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else if (parent->progress)
|
|
||||||
printf("%s\n", result.text.c_str());
|
|
||||||
parent->change_parent(merge_children[current_child], new_parent, &result);
|
parent->change_parent(merge_children[current_child], new_parent, &result);
|
||||||
state = 3;
|
state = 3;
|
||||||
resume_3:
|
resume_3:
|
||||||
@@ -151,7 +144,6 @@ resume_3:
|
|||||||
return;
|
return;
|
||||||
if (result.err)
|
if (result.err)
|
||||||
{
|
{
|
||||||
result.data = my_result(result.data);
|
|
||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -173,12 +165,9 @@ resume_4:
|
|||||||
cb = NULL;
|
cb = NULL;
|
||||||
if (result.err)
|
if (result.err)
|
||||||
{
|
{
|
||||||
result.data = my_result(result.data);
|
|
||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else if (parent->progress)
|
|
||||||
printf("%s\n", result.text.c_str());
|
|
||||||
// Delete "inverse" child data
|
// Delete "inverse" child data
|
||||||
start_delete_source(inverse_child);
|
start_delete_source(inverse_child);
|
||||||
if (state == 100)
|
if (state == 100)
|
||||||
@@ -192,12 +181,9 @@ resume_5:
|
|||||||
cb = NULL;
|
cb = NULL;
|
||||||
if (result.err)
|
if (result.err)
|
||||||
{
|
{
|
||||||
result.data = my_result(result.data);
|
|
||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else if (parent->progress)
|
|
||||||
printf("%s\n", result.text.c_str());
|
|
||||||
// Delete "inverse" child metadata, rename parent over it,
|
// Delete "inverse" child metadata, rename parent over it,
|
||||||
// and also change parent links of the previous "inverse" child
|
// and also change parent links of the previous "inverse" child
|
||||||
rename_inverse_parent();
|
rename_inverse_parent();
|
||||||
@@ -213,12 +199,6 @@ resume_6:
|
|||||||
{
|
{
|
||||||
if (chain_list[current_child] == inverse_parent)
|
if (chain_list[current_child] == inverse_parent)
|
||||||
continue;
|
continue;
|
||||||
{
|
|
||||||
auto parent_it = parent->cli->st_cli.inode_config.find(chain_list[current_child]);
|
|
||||||
if (parent_it != parent->cli->st_cli.inode_config.end())
|
|
||||||
deleted_images.push_back(parent_it->second.name);
|
|
||||||
deleted_ids.push_back(chain_list[current_child]);
|
|
||||||
}
|
|
||||||
start_delete_source(chain_list[current_child]);
|
start_delete_source(chain_list[current_child]);
|
||||||
resume_7:
|
resume_7:
|
||||||
while (!cb(result))
|
while (!cb(result))
|
||||||
@@ -229,12 +209,9 @@ resume_7:
|
|||||||
cb = NULL;
|
cb = NULL;
|
||||||
if (result.err)
|
if (result.err)
|
||||||
{
|
{
|
||||||
result.data = my_result(result.data);
|
|
||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else if (parent->progress)
|
|
||||||
printf("%s\n", result.text.c_str());
|
|
||||||
delete_inode_config(chain_list[current_child]);
|
delete_inode_config(chain_list[current_child]);
|
||||||
if (state == 100)
|
if (state == 100)
|
||||||
return;
|
return;
|
||||||
@@ -244,26 +221,11 @@ resume_8:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
state = 100;
|
state = 100;
|
||||||
result = (cli_result_t){
|
|
||||||
.text = "",
|
|
||||||
.data = my_result(result.data),
|
|
||||||
};
|
|
||||||
resume_100:
|
resume_100:
|
||||||
// Done
|
// Done
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
json11::Json my_result(json11::Json src)
|
|
||||||
{
|
|
||||||
auto obj = src.object_items();
|
|
||||||
obj["deleted_ids"] = deleted_ids;
|
|
||||||
obj["deleted_images"] = deleted_images;
|
|
||||||
obj["rebased_images"] = rebased_images;
|
|
||||||
obj["renamed_from"] = inverse_parent_name;
|
|
||||||
obj["renamed_to"] = inverse_child_name;
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
|
|
||||||
void get_merge_children()
|
void get_merge_children()
|
||||||
{
|
{
|
||||||
// Get all children of from..to
|
// Get all children of from..to
|
||||||
@@ -376,11 +338,7 @@ resume_100:
|
|||||||
}
|
}
|
||||||
for (auto inode_result: data["responses"].array_items())
|
for (auto inode_result: data["responses"].array_items())
|
||||||
{
|
{
|
||||||
if (inode_result["response_range"]["kvs"].array_items().size() == 0)
|
auto kv = parent->cli->st_cli.parse_etcd_kv(inode_result["kvs"][0]);
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
auto kv = parent->cli->st_cli.parse_etcd_kv(inode_result["response_range"]["kvs"][0]);
|
|
||||||
pool_id_t pool_id = 0;
|
pool_id_t pool_id = 0;
|
||||||
inode_t inode = 0;
|
inode_t inode = 0;
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
@@ -419,7 +377,7 @@ resume_100:
|
|||||||
inode_t child = cp.first;
|
inode_t child = cp.first;
|
||||||
uint64_t child_used = inode_used[child];
|
uint64_t child_used = inode_used[child];
|
||||||
int rank = cp.second;
|
int rank = cp.second;
|
||||||
for (int i = chain_list.size()-1-rank; i < chain_list.size(); i++)
|
for (int i = chain_list.size()-rank; i < chain_list.size(); i++)
|
||||||
{
|
{
|
||||||
inode_t parent = chain_list[i];
|
inode_t parent = chain_list[i];
|
||||||
uint64_t parent_used = inode_used[parent];
|
uint64_t parent_used = inode_used[parent];
|
||||||
@@ -455,8 +413,8 @@ resume_100:
|
|||||||
}
|
}
|
||||||
inode_config_t *child_cfg = &child_it->second;
|
inode_config_t *child_cfg = &child_it->second;
|
||||||
inode_config_t *target_cfg = &target_it->second;
|
inode_config_t *target_cfg = &target_it->second;
|
||||||
inverse_child_name = child_cfg->name;
|
std::string child_name = child_cfg->name;
|
||||||
inverse_parent_name = target_cfg->name;
|
std::string target_name = target_cfg->name;
|
||||||
std::string child_cfg_key = base64_encode(
|
std::string child_cfg_key = base64_encode(
|
||||||
parent->cli->st_cli.etcd_prefix+
|
parent->cli->st_cli.etcd_prefix+
|
||||||
"/config/inode/"+std::to_string(INODE_POOL(inverse_child))+
|
"/config/inode/"+std::to_string(INODE_POOL(inverse_child))+
|
||||||
@@ -467,9 +425,6 @@ resume_100:
|
|||||||
"/config/inode/"+std::to_string(INODE_POOL(inverse_parent))+
|
"/config/inode/"+std::to_string(INODE_POOL(inverse_parent))+
|
||||||
"/"+std::to_string(INODE_NO_POOL(inverse_parent))
|
"/"+std::to_string(INODE_NO_POOL(inverse_parent))
|
||||||
);
|
);
|
||||||
std::string target_idx_key = base64_encode(
|
|
||||||
parent->cli->st_cli.etcd_prefix+"/index/image/"+inverse_parent_name
|
|
||||||
);
|
|
||||||
// Fill new configuration
|
// Fill new configuration
|
||||||
inode_config_t new_cfg = *child_cfg;
|
inode_config_t new_cfg = *child_cfg;
|
||||||
new_cfg.num = target_cfg->num;
|
new_cfg.num = target_cfg->num;
|
||||||
@@ -494,11 +449,6 @@ resume_100:
|
|||||||
{ "key", child_cfg_key },
|
{ "key", child_cfg_key },
|
||||||
} },
|
} },
|
||||||
},
|
},
|
||||||
json11::Json::object {
|
|
||||||
{ "request_delete_range", json11::Json::object {
|
|
||||||
{ "key", target_idx_key },
|
|
||||||
} },
|
|
||||||
},
|
|
||||||
json11::Json::object {
|
json11::Json::object {
|
||||||
{ "request_put", json11::Json::object {
|
{ "request_put", json11::Json::object {
|
||||||
{ "key", target_cfg_key },
|
{ "key", target_cfg_key },
|
||||||
@@ -545,12 +495,12 @@ resume_100:
|
|||||||
parent->cli->st_cli.etcd_txn_slow(json11::Json::object {
|
parent->cli->st_cli.etcd_txn_slow(json11::Json::object {
|
||||||
{ "compare", cmp },
|
{ "compare", cmp },
|
||||||
{ "success", txn },
|
{ "success", txn },
|
||||||
}, [this](std::string err, json11::Json res)
|
}, [this, target_name, child_name](std::string err, json11::Json res)
|
||||||
{
|
{
|
||||||
parent->waiting--;
|
parent->waiting--;
|
||||||
if (err != "")
|
if (err != "")
|
||||||
{
|
{
|
||||||
result = (cli_result_t){ .err = EIO, .text = "Error renaming "+inverse_parent_name+" to "+inverse_child_name+": "+err };
|
result = (cli_result_t){ .err = EIO, .text = "Error renaming "+target_name+" to "+child_name+": "+err };
|
||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -558,14 +508,14 @@ resume_100:
|
|||||||
{
|
{
|
||||||
result = (cli_result_t){
|
result = (cli_result_t){
|
||||||
.err = EAGAIN,
|
.err = EAGAIN,
|
||||||
.text = "Parent ("+inverse_parent_name+"), child ("+inverse_child_name+"), or one of its children"
|
.text = "Parent ("+target_name+"), child ("+child_name+"), or one of its children"
|
||||||
" configuration was modified during rename",
|
" configuration was modified during rename",
|
||||||
};
|
};
|
||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (parent->progress)
|
if (parent->progress)
|
||||||
printf("Layer %s renamed to %s\n", inverse_parent_name.c_str(), inverse_child_name.c_str());
|
printf("Layer %s renamed to %s\n", target_name.c_str(), child_name.c_str());
|
||||||
parent->ringloop->wakeup();
|
parent->ringloop->wakeup();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@@ -28,7 +28,6 @@ struct rm_inode_t
|
|||||||
cli_tool_t *parent = NULL;
|
cli_tool_t *parent = NULL;
|
||||||
inode_list_t *lister = NULL;
|
inode_list_t *lister = NULL;
|
||||||
std::vector<rm_pg_t*> lists;
|
std::vector<rm_pg_t*> lists;
|
||||||
std::vector<osd_num_t> inactive_osds;
|
|
||||||
uint64_t total_count = 0, total_done = 0, total_prev_pct = 0;
|
uint64_t total_count = 0, total_done = 0, total_prev_pct = 0;
|
||||||
uint64_t pgs_to_list = 0;
|
uint64_t pgs_to_list = 0;
|
||||||
bool lists_done = false;
|
bool lists_done = false;
|
||||||
@@ -87,16 +86,6 @@ struct rm_inode_t
|
|||||||
state = 100;
|
state = 100;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
inactive_osds = parent->cli->list_inode_get_inactive_osds(lister);
|
|
||||||
if (inactive_osds.size() && !parent->json_output)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "Some data may remain after delete on OSDs which are currently down: ");
|
|
||||||
for (int i = 0; i < inactive_osds.size(); i++)
|
|
||||||
{
|
|
||||||
fprintf(stderr, i > 0 ? ", %lu" : "%lu", inactive_osds[i]);
|
|
||||||
}
|
|
||||||
fprintf(stderr, "\n");
|
|
||||||
}
|
|
||||||
pgs_to_list = parent->cli->list_pg_count(lister);
|
pgs_to_list = parent->cli->list_pg_count(lister);
|
||||||
parent->cli->list_inode_next(lister, parent->parallel_osds);
|
parent->cli->list_inode_next(lister, parent->parallel_osds);
|
||||||
}
|
}
|
||||||
@@ -178,33 +167,16 @@ struct rm_inode_t
|
|||||||
}
|
}
|
||||||
if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
|
if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
|
printf("\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
|
||||||
total_prev_pct = total_done*1000/total_count;
|
total_prev_pct = total_done*1000/total_count;
|
||||||
}
|
}
|
||||||
if (lists_done && !lists.size())
|
if (lists_done && !lists.size())
|
||||||
{
|
{
|
||||||
if (parent->progress && total_count > 0)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "\n");
|
|
||||||
}
|
|
||||||
if (parent->progress && (total_done < total_count || inactive_osds.size() > 0))
|
|
||||||
{
|
|
||||||
fprintf(
|
|
||||||
stderr, "Warning: Pool:%u,ID:%lu inode data may not have been fully removed.\n"
|
|
||||||
" Use `vitastor-cli rm-data --pool %u --inode %lu` if you encounter it in listings.\n",
|
|
||||||
pool_id, INODE_NO_POOL(inode), pool_id, INODE_NO_POOL(inode)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
result = (cli_result_t){
|
result = (cli_result_t){
|
||||||
.err = error_count > 0 ? EIO : 0,
|
.err = error_count > 0 ? EIO : 0,
|
||||||
.text = error_count > 0 ? "Some blocks were not removed" : (
|
.text = error_count > 0 ? "Some blocks were not removed" : (
|
||||||
"Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+
|
"Done, inode "+std::to_string(INODE_NO_POOL(inode))+" from pool "+
|
||||||
std::to_string(pool_id)+" removed"),
|
std::to_string(pool_id)+" removed"),
|
||||||
.data = json11::Json::object {
|
|
||||||
{ "removed_objects", total_done },
|
|
||||||
{ "total_objects", total_count },
|
|
||||||
{ "inactive_osds", inactive_osds },
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
state = 100;
|
state = 100;
|
||||||
}
|
}
|
||||||
|
@@ -1209,10 +1209,6 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
|
|||||||
copy_part_bitmap(op, part);
|
copy_part_bitmap(op, part);
|
||||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
||||||
}
|
}
|
||||||
else if (op->opcode == OSD_OP_WRITE)
|
|
||||||
{
|
|
||||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
|
||||||
}
|
|
||||||
if (op->inflight_count == 0)
|
if (op->inflight_count == 0)
|
||||||
{
|
{
|
||||||
if (op->opcode == OSD_OP_SYNC)
|
if (op->opcode == OSD_OP_SYNC)
|
||||||
|
@@ -130,7 +130,6 @@ public:
|
|||||||
inode_list_t *list_inode_start(inode_t inode,
|
inode_list_t *list_inode_start(inode_t inode,
|
||||||
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
|
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
|
||||||
int list_pg_count(inode_list_t *lst);
|
int list_pg_count(inode_list_t *lst);
|
||||||
const std::vector<osd_num_t> & list_inode_get_inactive_osds(inode_list_t *lst);
|
|
||||||
void list_inode_next(inode_list_t *lst, int next_pgs);
|
void list_inode_next(inode_list_t *lst, int next_pgs);
|
||||||
//inline uint32_t get_bs_bitmap_granularity() { return st_cli.global_bitmap_granularity; }
|
//inline uint32_t get_bs_bitmap_granularity() { return st_cli.global_bitmap_granularity; }
|
||||||
//inline uint64_t get_bs_block_size() { return st_cli.global_block_size; }
|
//inline uint64_t get_bs_block_size() { return st_cli.global_block_size; }
|
||||||
|
@@ -36,7 +36,6 @@ struct inode_list_t
|
|||||||
inode_t inode = 0;
|
inode_t inode = 0;
|
||||||
int done_pgs = 0;
|
int done_pgs = 0;
|
||||||
int want = 0;
|
int want = 0;
|
||||||
std::vector<osd_num_t> inactive_osds;
|
|
||||||
std::vector<inode_list_pg_t*> pgs;
|
std::vector<inode_list_pg_t*> pgs;
|
||||||
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback;
|
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback;
|
||||||
};
|
};
|
||||||
@@ -61,7 +60,6 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
|||||||
lst->inode = inode;
|
lst->inode = inode;
|
||||||
lst->callback = callback;
|
lst->callback = callback;
|
||||||
auto pool_cfg = st_cli.pool_config[pool_id];
|
auto pool_cfg = st_cli.pool_config[pool_id];
|
||||||
std::set<osd_num_t> inactive_osd_set;
|
|
||||||
for (auto & pg_item: pool_cfg.pg_config)
|
for (auto & pg_item: pool_cfg.pg_config)
|
||||||
{
|
{
|
||||||
auto & pg = pg_item.second;
|
auto & pg = pg_item.second;
|
||||||
@@ -108,18 +106,11 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
|||||||
}
|
}
|
||||||
for (osd_num_t peer_osd: all_peers)
|
for (osd_num_t peer_osd: all_peers)
|
||||||
{
|
{
|
||||||
if (st_cli.peer_states.find(peer_osd) != st_cli.peer_states.end())
|
r->list_osds.push_back((inode_list_osd_t){
|
||||||
{
|
.pg = r,
|
||||||
r->list_osds.push_back((inode_list_osd_t){
|
.osd_num = peer_osd,
|
||||||
.pg = r,
|
.sent = false,
|
||||||
.osd_num = peer_osd,
|
});
|
||||||
.sent = false,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
inactive_osd_set.insert(peer_osd);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -141,7 +132,6 @@ inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
|||||||
{
|
{
|
||||||
lst->pgs[i]->pos = i;
|
lst->pgs[i]->pos = i;
|
||||||
}
|
}
|
||||||
lst->inactive_osds.insert(lst->inactive_osds.end(), inactive_osd_set.begin(), inactive_osd_set.end());
|
|
||||||
lists.push_back(lst);
|
lists.push_back(lst);
|
||||||
return lst;
|
return lst;
|
||||||
}
|
}
|
||||||
@@ -151,11 +141,6 @@ int cluster_client_t::list_pg_count(inode_list_t *lst)
|
|||||||
return lst->pgs.size();
|
return lst->pgs.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<osd_num_t> & cluster_client_t::list_inode_get_inactive_osds(inode_list_t *lst)
|
|
||||||
{
|
|
||||||
return lst->inactive_osds;
|
|
||||||
}
|
|
||||||
|
|
||||||
void cluster_client_t::list_inode_next(inode_list_t *lst, int next_pgs)
|
void cluster_client_t::list_inode_next(inode_list_t *lst, int next_pgs)
|
||||||
{
|
{
|
||||||
if (next_pgs >= 0)
|
if (next_pgs >= 0)
|
||||||
|
@@ -99,16 +99,15 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
|
|||||||
if (options["journal_size"] == "")
|
if (options["journal_size"] == "")
|
||||||
{
|
{
|
||||||
if (options["journal_device"] == "")
|
if (options["journal_device"] == "")
|
||||||
options["journal_size"] = is_hdd ? "128M" : "32M";
|
options["journal_size"] = "32M";
|
||||||
else if (is_hdd)
|
else if (is_hdd)
|
||||||
options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
|
options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
|
||||||
}
|
}
|
||||||
bool is_hybrid = is_hdd && options["journal_device"] != "" && options["journal_device"] != options["data_device"];
|
|
||||||
if (is_hdd)
|
if (is_hdd)
|
||||||
{
|
{
|
||||||
if (options["block_size"] == "")
|
if (options["block_size"] == "")
|
||||||
options["block_size"] = "1M";
|
options["block_size"] = "1M";
|
||||||
if (is_hybrid && options["throttle_small_writes"] == "")
|
if (options["throttle_small_writes"] == "")
|
||||||
options["throttle_small_writes"] = "1";
|
options["throttle_small_writes"] = "1";
|
||||||
}
|
}
|
||||||
json11::Json::object sb;
|
json11::Json::object sb;
|
||||||
@@ -135,7 +134,7 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
|
|||||||
{ "meta_offset", 4096 + (dsk.meta_device == dsk.journal_device ? dsk.journal_len : 0) },
|
{ "meta_offset", 4096 + (dsk.meta_device == dsk.journal_device ? dsk.journal_len : 0) },
|
||||||
{ "data_offset", 4096 + (dsk.data_device == dsk.meta_device ? dsk.meta_len : 0) +
|
{ "data_offset", 4096 + (dsk.data_device == dsk.meta_device ? dsk.meta_len : 0) +
|
||||||
(dsk.data_device == dsk.journal_device ? dsk.journal_len : 0) },
|
(dsk.data_device == dsk.journal_device ? dsk.journal_len : 0) },
|
||||||
{ "journal_no_same_sector_overwrites", !is_hdd || is_hybrid },
|
{ "journal_no_same_sector_overwrites", true },
|
||||||
{ "journal_sector_buffer_count", 1024 },
|
{ "journal_sector_buffer_count", 1024 },
|
||||||
{ "disable_data_fsync", json_is_true(options["disable_data_fsync"]) },
|
{ "disable_data_fsync", json_is_true(options["disable_data_fsync"]) },
|
||||||
{ "disable_meta_fsync", json_is_true(options["disable_meta_fsync"]) },
|
{ "disable_meta_fsync", json_is_true(options["disable_meta_fsync"]) },
|
||||||
|
@@ -187,30 +187,22 @@ void etcd_state_client_t::add_etcd_url(std::string addr)
|
|||||||
check_addr = addr;
|
check_addr = addr;
|
||||||
if (pos == std::string::npos)
|
if (pos == std::string::npos)
|
||||||
addr += "/v3";
|
addr += "/v3";
|
||||||
bool local = false;
|
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < local_ips.size(); i++)
|
for (i = 0; i < local_ips.size(); i++)
|
||||||
{
|
{
|
||||||
if (local_ips[i] == check_addr)
|
if (local_ips[i] == check_addr)
|
||||||
{
|
{
|
||||||
local = true;
|
this->etcd_local.push_back(addr);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto & to = local ? this->etcd_local : this->etcd_addresses;
|
if (i >= local_ips.size())
|
||||||
for (i = 0; i < to.size(); i++)
|
this->etcd_addresses.push_back(addr);
|
||||||
{
|
|
||||||
if (to[i] == addr)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (i >= to.size())
|
|
||||||
to.push_back(addr);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void etcd_state_client_t::parse_config(const json11::Json & config)
|
void etcd_state_client_t::parse_config(const json11::Json & config)
|
||||||
{
|
{
|
||||||
this->etcd_local.clear();
|
|
||||||
this->etcd_addresses.clear();
|
this->etcd_addresses.clear();
|
||||||
if (config["etcd_address"].is_string())
|
if (config["etcd_address"].is_string())
|
||||||
{
|
{
|
||||||
@@ -357,7 +349,7 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||||||
watch_id == ETCD_OSD_STATE_WATCH_ID)
|
watch_id == ETCD_OSD_STATE_WATCH_ID)
|
||||||
etcd_watches_initialised++;
|
etcd_watches_initialised++;
|
||||||
if (etcd_watches_initialised == 4 && this->log_level > 0)
|
if (etcd_watches_initialised == 4 && this->log_level > 0)
|
||||||
fprintf(stderr, "Successfully subscribed to etcd at %s\n", cur_addr.c_str());
|
fprintf(stderr, "Successfully subscribed to etcd at %s\n", selected_etcd_address.c_str());
|
||||||
}
|
}
|
||||||
if (data["result"]["canceled"].bool_value())
|
if (data["result"]["canceled"].bool_value())
|
||||||
{
|
{
|
||||||
@@ -368,17 +360,15 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||||||
// so we should restart from the beginning if we can
|
// so we should restart from the beginning if we can
|
||||||
if (on_reload_hook != NULL)
|
if (on_reload_hook != NULL)
|
||||||
{
|
{
|
||||||
// check to not trigger on_reload_hook multiple times
|
fprintf(stderr, "Revisions before %lu were compacted by etcd, reloading state\n",
|
||||||
if (etcd_watch_ws != NULL)
|
data["result"]["compact_revision"].uint64_value());
|
||||||
|
if (etcd_watch_ws)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Revisions before %lu were compacted by etcd, reloading state\n",
|
|
||||||
data["result"]["compact_revision"].uint64_value());
|
|
||||||
http_close(etcd_watch_ws);
|
http_close(etcd_watch_ws);
|
||||||
etcd_watch_ws = NULL;
|
etcd_watch_ws = NULL;
|
||||||
etcd_watch_revision = 0;
|
|
||||||
on_reload_hook();
|
|
||||||
}
|
}
|
||||||
return;
|
etcd_watch_revision = 0;
|
||||||
|
on_reload_hook();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -425,9 +415,13 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||||||
}
|
}
|
||||||
if (msg->eof)
|
if (msg->eof)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Disconnected from etcd %s\n", cur_addr.c_str());
|
|
||||||
if (cur_addr == selected_etcd_address)
|
if (cur_addr == selected_etcd_address)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Disconnected from etcd %s\n", selected_etcd_address.c_str());
|
||||||
selected_etcd_address = "";
|
selected_etcd_address = "";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
fprintf(stderr, "Disconnected from etcd\n");
|
||||||
if (etcd_watch_ws)
|
if (etcd_watch_ws)
|
||||||
{
|
{
|
||||||
http_close(etcd_watch_ws);
|
http_close(etcd_watch_ws);
|
||||||
@@ -444,7 +438,6 @@ void etcd_state_client_t::start_etcd_watcher()
|
|||||||
else if (etcd_watches_initialised > 0)
|
else if (etcd_watches_initialised > 0)
|
||||||
{
|
{
|
||||||
// Connection was live, retry immediately
|
// Connection was live, retry immediately
|
||||||
etcd_watches_initialised = 0;
|
|
||||||
start_etcd_watcher();
|
start_etcd_watcher();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -27,10 +27,19 @@ static void parse_http_headers(std::string & res, http_response_t *parsed);
|
|||||||
|
|
||||||
struct http_co_t
|
struct http_co_t
|
||||||
{
|
{
|
||||||
|
#ifdef WITH_OPENSSL
|
||||||
|
static SSL_CTX *ssl_ctx = NULL;
|
||||||
|
SSL *ssl_cli = NULL;
|
||||||
|
BIO *ssl_rbio = NULL;
|
||||||
|
BIO *ssl_wbio = NULL;
|
||||||
|
std::vector<uint8_t> encrypted_out;
|
||||||
|
#endif
|
||||||
|
|
||||||
timerfd_manager_t *tfd;
|
timerfd_manager_t *tfd;
|
||||||
std::function<void(const http_response_t*)> response_callback;
|
std::function<void(const http_response_t*)> response_callback;
|
||||||
|
|
||||||
int request_timeout = 0;
|
int request_timeout = 0;
|
||||||
|
bool ssl = false;
|
||||||
std::string host;
|
std::string host;
|
||||||
std::string request;
|
std::string request;
|
||||||
std::string ws_outbox;
|
std::string ws_outbox;
|
||||||
@@ -46,7 +55,7 @@ struct http_co_t
|
|||||||
int timeout_id = -1;
|
int timeout_id = -1;
|
||||||
int epoll_events = 0;
|
int epoll_events = 0;
|
||||||
int sent = 0;
|
int sent = 0;
|
||||||
std::vector<char> rbuf;
|
std::vector<uint8_t> rbuf;
|
||||||
iovec read_iov, send_iov;
|
iovec read_iov, send_iov;
|
||||||
msghdr read_msg = { 0 }, send_msg = { 0 };
|
msghdr read_msg = { 0 }, send_msg = { 0 };
|
||||||
http_response_t parsed;
|
http_response_t parsed;
|
||||||
@@ -259,6 +268,12 @@ void http_response_t::parse_json_response(std::string & error, json11::Json & r)
|
|||||||
|
|
||||||
http_co_t::~http_co_t()
|
http_co_t::~http_co_t()
|
||||||
{
|
{
|
||||||
|
#ifdef WITH_OPENSSL
|
||||||
|
if (ssl_cli)
|
||||||
|
{
|
||||||
|
SSL_free(ssl_cli);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
close_connection();
|
close_connection();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -275,6 +290,16 @@ void http_co_t::close_connection()
|
|||||||
close(peer_fd);
|
close(peer_fd);
|
||||||
peer_fd = -1;
|
peer_fd = -1;
|
||||||
}
|
}
|
||||||
|
#ifdef WITH_OPENSSL
|
||||||
|
if (ssl_ctx)
|
||||||
|
{
|
||||||
|
// Frees context, client and bios at once
|
||||||
|
SSL_free(ssl_ctx);
|
||||||
|
ssl_rbio = NULL;
|
||||||
|
ssl_wbio = NULL;
|
||||||
|
ssl_cli = NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
state = HTTP_CO_CLOSED;
|
state = HTTP_CO_CLOSED;
|
||||||
connected_host = "";
|
connected_host = "";
|
||||||
response = "";
|
response = "";
|
||||||
@@ -304,6 +329,27 @@ void http_co_t::start_connection()
|
|||||||
}
|
}
|
||||||
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
||||||
epoll_events = 0;
|
epoll_events = 0;
|
||||||
|
#ifdef WITH_OPENSSL
|
||||||
|
// https://wiki.openssl.org/index.php/Hostname_validation
|
||||||
|
if (ssl)
|
||||||
|
{
|
||||||
|
if (!ssl_ctx)
|
||||||
|
ssl_ctx = SSL_CTX_new(TLS_method());
|
||||||
|
ssl_rbio = BIO_new(BIO_s_mem());
|
||||||
|
ssl_wbio = BIO_new(BIO_s_mem());
|
||||||
|
ssl_cli = SSL_new(ssl_ctx);
|
||||||
|
if (!ssl_ctx || !ssl_cli || !ssl_rbio || !ssl_wbio)
|
||||||
|
{
|
||||||
|
parsed = { .error = std::string("openssl initialization failed: ")+ERR_get_error(NULL) };
|
||||||
|
response_callback(&parsed);
|
||||||
|
response_callback = NULL;
|
||||||
|
stackout();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
SSL_set_connect_state(ssl_cli);
|
||||||
|
SSL_set_bio(ssl_cli, ssl_rbio, ssl_wbio);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
// Finally call connect
|
// Finally call connect
|
||||||
int r = ::connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
int r = ::connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
||||||
if (r < 0 && errno != EINPROGRESS)
|
if (r < 0 && errno != EINPROGRESS)
|
||||||
@@ -432,11 +478,11 @@ void http_co_t::submit_read(bool check_timeout)
|
|||||||
stackin();
|
stackin();
|
||||||
int res;
|
int res;
|
||||||
again:
|
again:
|
||||||
if (rbuf.size() != READ_BUFFER_SIZE)
|
if (rbuf.capacity()-rbuf.size() < READ_BUFFER_SIZE)
|
||||||
{
|
{
|
||||||
rbuf.resize(READ_BUFFER_SIZE);
|
rbuf.reserve(rbuf.size() + READ_BUFFER_SIZE);
|
||||||
}
|
}
|
||||||
read_iov = { .iov_base = rbuf.data(), .iov_len = READ_BUFFER_SIZE };
|
read_iov = { .iov_base = rbuf.data()+rbuf.size(), .iov_len = READ_BUFFER_SIZE };
|
||||||
read_msg.msg_iov = &read_iov;
|
read_msg.msg_iov = &read_iov;
|
||||||
read_msg.msg_iovlen = 1;
|
read_msg.msg_iovlen = 1;
|
||||||
res = recvmsg(peer_fd, &read_msg, 0);
|
res = recvmsg(peer_fd, &read_msg, 0);
|
||||||
@@ -466,22 +512,177 @@ again:
|
|||||||
else if (res <= 0)
|
else if (res <= 0)
|
||||||
{
|
{
|
||||||
// < 0 means error, 0 means EOF
|
// < 0 means error, 0 means EOF
|
||||||
epoll_events = epoll_events & ~EPOLLIN;
|
on_read_error(res);
|
||||||
if (state == HTTP_CO_HEADERS_RECEIVED)
|
|
||||||
std::swap(parsed.body, response);
|
|
||||||
close_connection();
|
|
||||||
if (res < 0)
|
|
||||||
parsed = { .error = std::string("recvmsg: ")+strerror(-res) };
|
|
||||||
run_cb_and_clear();
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
response += std::string(rbuf.data(), res);
|
if (ssl)
|
||||||
|
handle_ssl_read(rbuf);
|
||||||
|
else
|
||||||
|
response += std::string((char*)rbuf.data(), res);
|
||||||
|
rbuf.resize(0);
|
||||||
handle_read();
|
handle_read();
|
||||||
}
|
}
|
||||||
stackout();
|
stackout();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void http_co_t::on_read_error(int res)
|
||||||
|
{
|
||||||
|
epoll_events = epoll_events & ~EPOLLIN;
|
||||||
|
if (state == HTTP_CO_HEADERS_RECEIVED)
|
||||||
|
std::swap(parsed.body, response);
|
||||||
|
close_connection();
|
||||||
|
if (res < 0)
|
||||||
|
parsed = { .error = std::string("recvmsg: ")+strerror(-res) };
|
||||||
|
run_cb_and_clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
int http_co_t::do_ssl_handshake()
|
||||||
|
{
|
||||||
|
stackin();
|
||||||
|
int r;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
r = SSL_do_handshake(ssl_cli);
|
||||||
|
if (r == SSL_ERROR_WANT_WRITE)
|
||||||
|
{
|
||||||
|
r = ssl_encrypt();
|
||||||
|
if (r >= 0)
|
||||||
|
submit_send();
|
||||||
|
else
|
||||||
|
{
|
||||||
|
r = -r;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (r == SSL_ERROR_WANT_READ || r == SSL_ERROR_NONE)
|
||||||
|
{
|
||||||
|
// OK or wait until we have more incoming data
|
||||||
|
r = 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stackout();
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enqueue outbound encrypted TLS data
|
||||||
|
int http_co_t::ssl_encrypt()
|
||||||
|
{
|
||||||
|
stackin();
|
||||||
|
int queued = 0;
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
if (encrypted_out.size() >= encrypted_out.capacity()/2)
|
||||||
|
encrypted_out.reserve(encrypted_out.size() < READ_BUFFER_SIZE ? encrypted_out.size() + READ_BUFFER_SIZE : 2*encrypted_out.size());
|
||||||
|
int r = BIO_read(ssl_wbio, encrypted_out.data()+encrypted_out.size(), encrypted_out.capacity()-encrypted_out.size());
|
||||||
|
if (r > 0)
|
||||||
|
{
|
||||||
|
queued += r;
|
||||||
|
encrypted_out.resize(encrypted_out.size()+r);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!BIO_should_retry(ssl_wbio))
|
||||||
|
queued = r;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stackout();
|
||||||
|
return queued;
|
||||||
|
}
|
||||||
|
|
||||||
|
void http_co_t::handle_ssl_write()
|
||||||
|
{
|
||||||
|
stackin();
|
||||||
|
int r = 0;
|
||||||
|
while (sent < request.size())
|
||||||
|
{
|
||||||
|
if (!SSL_is_init_finished(ssl_cli))
|
||||||
|
{
|
||||||
|
if (do_ssl_handshake() != 0)
|
||||||
|
{
|
||||||
|
on_read_error(-EIO);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!SSL_is_init_finished(ssl_cli))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int n = SSL_write(ssl_cli, request.data()+sent, request.size()-sent);
|
||||||
|
if (n > 0)
|
||||||
|
sent += n;
|
||||||
|
else if (get_sslstatus(ssl_cli, n) == SSLSTATUS_FAIL)
|
||||||
|
{
|
||||||
|
on_read_error(-EIO);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
r = ssl_encrypt();
|
||||||
|
if (r >= 0)
|
||||||
|
submit_send();
|
||||||
|
else
|
||||||
|
{
|
||||||
|
on_read_error(-EIO);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stackout();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process incoming encrypted TLS data
|
||||||
|
void http_co_t::handle_ssl_read()
|
||||||
|
{
|
||||||
|
stackin();
|
||||||
|
int size = rbuf.size();
|
||||||
|
int done = 0;
|
||||||
|
while (done < size)
|
||||||
|
{
|
||||||
|
int n = BIO_write(ssl_rbio, rbuf.data()+done, size-done);
|
||||||
|
if (n > 0)
|
||||||
|
{
|
||||||
|
done += n;
|
||||||
|
}
|
||||||
|
if (n <= 0)
|
||||||
|
{
|
||||||
|
on_read_error(-EIO);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!SSL_is_init_finished(ssl_cli))
|
||||||
|
{
|
||||||
|
if (do_ssl_handshake() != 0)
|
||||||
|
{
|
||||||
|
on_read_error(-EIO);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!SSL_is_init_finished(ssl_cli))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if (response.capacity() - response.size() < READ_BUFFER_SIZE)
|
||||||
|
response.reserve(2*response.size() < response.size() + READ_BUFFER_SIZE ? response.size() + READ_BUFFER_SIZE : 2*response.size());
|
||||||
|
n = SSL_read(ssl_cli, response.data() + response.size(), READ_BUFFER_SIZE);
|
||||||
|
if (n <= 0)
|
||||||
|
{
|
||||||
|
n = SSL_get_error(ssl_cli, n);
|
||||||
|
if (n == SSL_ERROR_WANT_READ)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (n > 0);
|
||||||
|
}
|
||||||
|
if (done < size)
|
||||||
|
memmove(rbuf.data(), rbuf.data()+done, size-done);
|
||||||
|
rbuf.resize(size-done);
|
||||||
|
stackout();
|
||||||
|
}
|
||||||
|
|
||||||
bool http_co_t::handle_read()
|
bool http_co_t::handle_read()
|
||||||
{
|
{
|
||||||
stackin();
|
stackin();
|
||||||
|
@@ -9,10 +9,6 @@ osd_op_t::~osd_op_t()
|
|||||||
{
|
{
|
||||||
assert(!bs_op);
|
assert(!bs_op);
|
||||||
assert(!op_data);
|
assert(!op_data);
|
||||||
if (bitmap_buf)
|
|
||||||
{
|
|
||||||
free(bitmap_buf);
|
|
||||||
}
|
|
||||||
if (rmw_buf)
|
if (rmw_buf)
|
||||||
{
|
{
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
|
@@ -165,7 +165,6 @@ struct osd_op_t
|
|||||||
void *bitmap = NULL;
|
void *bitmap = NULL;
|
||||||
unsigned bitmap_len = 0;
|
unsigned bitmap_len = 0;
|
||||||
unsigned bmp_data = 0;
|
unsigned bmp_data = 0;
|
||||||
void *bitmap_buf = NULL;
|
|
||||||
void *rmw_buf = NULL;
|
void *rmw_buf = NULL;
|
||||||
osd_primary_op_data_t* op_data = NULL;
|
osd_primary_op_data_t* op_data = NULL;
|
||||||
std::function<void(osd_op_t*)> callback;
|
std::function<void(osd_op_t*)> callback;
|
||||||
|
@@ -369,7 +369,7 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
|
|||||||
op->buf = malloc_or_die(op->reply.hdr.retval);
|
op->buf = malloc_or_die(op->reply.hdr.retval);
|
||||||
cl->recv_list.push_back(op->buf, op->reply.hdr.retval);
|
cl->recv_list.push_back(op->buf, op->reply.hdr.retval);
|
||||||
}
|
}
|
||||||
else if (op->reply.hdr.opcode == OSD_OP_DESCRIBE && op->reply.describe.result_bytes > 0)
|
else if (op->reply.hdr.opcode == OSD_OP_DESCRIBE && op->reply.hdr.retval > 0)
|
||||||
{
|
{
|
||||||
delete cl->read_op;
|
delete cl->read_op;
|
||||||
cl->read_op = op;
|
cl->read_op = op;
|
||||||
|
@@ -84,12 +84,9 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op)
|
|||||||
{
|
{
|
||||||
for (int i = 0; i < cur_op->iov.count; i++)
|
for (int i = 0; i < cur_op->iov.count; i++)
|
||||||
{
|
{
|
||||||
if (cur_op->iov.buf[i].iov_len > 0)
|
assert(cur_op->iov.buf[i].iov_base);
|
||||||
{
|
to_send_list.push_back(cur_op->iov.buf[i]);
|
||||||
assert(cur_op->iov.buf[i].iov_base);
|
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
|
||||||
to_send_list.push_back(cur_op->iov.buf[i]);
|
|
||||||
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)
|
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)
|
||||||
|
@@ -149,7 +149,7 @@ public:
|
|||||||
" --dev_num N\n"
|
" --dev_num N\n"
|
||||||
" Use the specified device /dev/nbdN instead of automatic selection.\n"
|
" Use the specified device /dev/nbdN instead of automatic selection.\n"
|
||||||
" --foreground 1\n"
|
" --foreground 1\n"
|
||||||
" Stay in foreground, do not daemonize.\n",
|
" Stay in foreground, do not daemonize.n",
|
||||||
exe_name, exe_name, exe_name
|
exe_name, exe_name, exe_name
|
||||||
);
|
);
|
||||||
exit(0);
|
exit(0);
|
||||||
|
@@ -198,14 +198,13 @@ class osd_t
|
|||||||
void on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num);
|
void on_change_pg_history_hook(pool_id_t pool_id, pg_num_t pg_num);
|
||||||
void on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes);
|
void on_change_etcd_state_hook(std::map<std::string, etcd_kv_t> & changes);
|
||||||
void on_load_config_hook(json11::Json::object & changes);
|
void on_load_config_hook(json11::Json::object & changes);
|
||||||
void on_reload_config_hook(json11::Json::object & changes);
|
|
||||||
json11::Json on_load_pgs_checks_hook();
|
json11::Json on_load_pgs_checks_hook();
|
||||||
void on_load_pgs_hook(bool success);
|
void on_load_pgs_hook(bool success);
|
||||||
void bind_socket();
|
void bind_socket();
|
||||||
void acquire_lease();
|
void acquire_lease();
|
||||||
json11::Json get_osd_state();
|
json11::Json get_osd_state();
|
||||||
void create_osd_state();
|
void create_osd_state();
|
||||||
void renew_lease(bool reload);
|
void renew_lease();
|
||||||
void print_stats();
|
void print_stats();
|
||||||
void print_slow();
|
void print_slow();
|
||||||
void reset_stats();
|
void reset_stats();
|
||||||
|
@@ -70,7 +70,6 @@ void osd_t::init_cluster()
|
|||||||
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
|
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
|
||||||
st_cli.load_pgs_checks_hook = [this]() { return on_load_pgs_checks_hook(); };
|
st_cli.load_pgs_checks_hook = [this]() { return on_load_pgs_checks_hook(); };
|
||||||
st_cli.on_load_pgs_hook = [this](bool success) { on_load_pgs_hook(success); };
|
st_cli.on_load_pgs_hook = [this](bool success) { on_load_pgs_hook(success); };
|
||||||
st_cli.on_reload_hook = [this]() { st_cli.load_global_config(); };
|
|
||||||
peering_state = OSD_LOADING_PGS;
|
peering_state = OSD_LOADING_PGS;
|
||||||
st_cli.load_global_config();
|
st_cli.load_global_config();
|
||||||
}
|
}
|
||||||
@@ -396,14 +395,6 @@ void osd_t::on_load_config_hook(json11::Json::object & global_config)
|
|||||||
parse_config(true);
|
parse_config(true);
|
||||||
bind_socket();
|
bind_socket();
|
||||||
acquire_lease();
|
acquire_lease();
|
||||||
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_reload_config_hook(cfg); };
|
|
||||||
}
|
|
||||||
|
|
||||||
void osd_t::on_reload_config_hook(json11::Json::object & global_config)
|
|
||||||
{
|
|
||||||
etcd_global_config = global_config;
|
|
||||||
parse_config(false);
|
|
||||||
renew_lease(true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Acquire lease
|
// Acquire lease
|
||||||
@@ -433,7 +424,7 @@ void osd_t::acquire_lease()
|
|||||||
);
|
);
|
||||||
tfd->set_timer(etcd_report_interval*1000, true, [this](int timer_id)
|
tfd->set_timer(etcd_report_interval*1000, true, [this](int timer_id)
|
||||||
{
|
{
|
||||||
renew_lease(false);
|
renew_lease();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -508,11 +499,11 @@ void osd_t::create_osd_state()
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Renew lease
|
// Renew lease
|
||||||
void osd_t::renew_lease(bool reload)
|
void osd_t::renew_lease()
|
||||||
{
|
{
|
||||||
st_cli.etcd_call("/lease/keepalive", json11::Json::object {
|
st_cli.etcd_call("/lease/keepalive", json11::Json::object {
|
||||||
{ "ID", etcd_lease_id }
|
{ "ID", etcd_lease_id }
|
||||||
}, st_cli.etcd_quick_timeout, 0, 0, [this, reload](std::string err, json11::Json data)
|
}, st_cli.etcd_quick_timeout, 0, 0, [this](std::string err, json11::Json data)
|
||||||
{
|
{
|
||||||
if (err == "" && data["result"]["TTL"].string_value() == "")
|
if (err == "" && data["result"]["TTL"].string_value() == "")
|
||||||
{
|
{
|
||||||
@@ -531,20 +522,15 @@ void osd_t::renew_lease(bool reload)
|
|||||||
force_stop(1);
|
force_stop(1);
|
||||||
}
|
}
|
||||||
// Retry
|
// Retry
|
||||||
tfd->set_timer(st_cli.etcd_quick_timeout, false, [this, reload](int timer_id)
|
tfd->set_timer(st_cli.etcd_quick_timeout, false, [this](int timer_id)
|
||||||
{
|
{
|
||||||
renew_lease(reload);
|
renew_lease();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
etcd_failed_attempts = 0;
|
etcd_failed_attempts = 0;
|
||||||
report_statistics();
|
report_statistics();
|
||||||
// Reload PGs
|
|
||||||
if (reload && run_primary)
|
|
||||||
{
|
|
||||||
st_cli.load_pgs();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -574,6 +560,7 @@ void osd_t::force_stop(int exitcode)
|
|||||||
|
|
||||||
json11::Json osd_t::on_load_pgs_checks_hook()
|
json11::Json osd_t::on_load_pgs_checks_hook()
|
||||||
{
|
{
|
||||||
|
assert(this->pgs.size() == 0);
|
||||||
json11::Json::array checks = {
|
json11::Json::array checks = {
|
||||||
json11::Json::object {
|
json11::Json::object {
|
||||||
{ "target", "LEASE" },
|
{ "target", "LEASE" },
|
||||||
|
@@ -220,7 +220,7 @@ struct __attribute__((__packed__)) osd_reply_rw_t
|
|||||||
// for reads: bitmap length
|
// for reads: bitmap length
|
||||||
uint32_t bitmap_len;
|
uint32_t bitmap_len;
|
||||||
uint32_t pad0;
|
uint32_t pad0;
|
||||||
// for reads and writes: object version
|
// for reads: object version
|
||||||
uint64_t version;
|
uint64_t version;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -87,7 +87,8 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
|||||||
// - op_data
|
// - op_data
|
||||||
1, sizeof(osd_primary_op_data_t) +
|
1, sizeof(osd_primary_op_data_t) +
|
||||||
// - stripes
|
// - stripes
|
||||||
stripe_count * sizeof(osd_rmw_stripe_t) +
|
// - resulting bitmap buffers
|
||||||
|
stripe_count * (clean_entry_bitmap_size + sizeof(osd_rmw_stripe_t)) +
|
||||||
chain_size * (
|
chain_size * (
|
||||||
// - copy of the chain
|
// - copy of the chain
|
||||||
sizeof(inode_t) +
|
sizeof(inode_t) +
|
||||||
@@ -109,12 +110,11 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
|||||||
op_data->pg_size = pg_it->second.pg_size;
|
op_data->pg_size = pg_it->second.pg_size;
|
||||||
cur_op->op_data = op_data;
|
cur_op->op_data = op_data;
|
||||||
split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes);
|
split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes);
|
||||||
// Resulting bitmaps have to survive op_data and be freed with the op itself
|
// Allocate bitmaps along with stripes to avoid extra allocations and fragmentation
|
||||||
assert(!cur_op->bitmap_buf);
|
|
||||||
cur_op->bitmap_buf = calloc_or_die(1, clean_entry_bitmap_size * stripe_count);
|
|
||||||
for (int i = 0; i < stripe_count; i++)
|
for (int i = 0; i < stripe_count; i++)
|
||||||
{
|
{
|
||||||
op_data->stripes[i].bmp_buf = (uint8_t*)cur_op->bitmap_buf + clean_entry_bitmap_size * i;
|
op_data->stripes[i].bmp_buf = data_buf;
|
||||||
|
data_buf = (uint8_t*)data_buf + clean_entry_bitmap_size;
|
||||||
}
|
}
|
||||||
op_data->chain_size = chain_size;
|
op_data->chain_size = chain_size;
|
||||||
if (chain_size > 0)
|
if (chain_size > 0)
|
||||||
@@ -129,19 +129,16 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op)
|
|||||||
data_buf = (uint8_t*)data_buf + chain_size * (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 0 : pg_it->second.pg_size);
|
data_buf = (uint8_t*)data_buf + chain_size * (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 0 : pg_it->second.pg_size);
|
||||||
// Copy chain
|
// Copy chain
|
||||||
int chain_num = 0;
|
int chain_num = 0;
|
||||||
op_data->read_chain[chain_num] = cur_op->req.rw.inode;
|
op_data->read_chain[chain_num++] = cur_op->req.rw.inode;
|
||||||
op_data->chain_states[chain_num] = NULL;
|
|
||||||
chain_num++;
|
|
||||||
auto inode_it = st_cli.inode_config.find(cur_op->req.rw.inode);
|
auto inode_it = st_cli.inode_config.find(cur_op->req.rw.inode);
|
||||||
while (inode_it != st_cli.inode_config.end() && inode_it->second.parent_id &&
|
while (inode_it != st_cli.inode_config.end() && inode_it->second.parent_id &&
|
||||||
INODE_POOL(inode_it->second.parent_id) == pg_it->second.pool_id &&
|
INODE_POOL(inode_it->second.parent_id) == pg_it->second.pool_id &&
|
||||||
// Check for loops
|
// Check for loops
|
||||||
inode_it->second.parent_id != cur_op->req.rw.inode)
|
inode_it->second.parent_id != cur_op->req.rw.inode)
|
||||||
{
|
{
|
||||||
op_data->read_chain[chain_num] = inode_it->second.parent_id;
|
op_data->read_chain[chain_num++] = inode_it->second.parent_id;
|
||||||
op_data->chain_states[chain_num] = NULL;
|
op_data->chain_states[chain_num++] = NULL;
|
||||||
inode_it = st_cli.inode_config.find(inode_it->second.parent_id);
|
inode_it = st_cli.inode_config.find(inode_it->second.parent_id);
|
||||||
chain_num++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pg_it->second.inflight++;
|
pg_it->second.inflight++;
|
||||||
@@ -647,6 +644,12 @@ void osd_t::continue_primary_del(osd_op_t *cur_op)
|
|||||||
else if (op_data->st == 4) goto resume_4;
|
else if (op_data->st == 4) goto resume_4;
|
||||||
else if (op_data->st == 5) goto resume_5;
|
else if (op_data->st == 5) goto resume_5;
|
||||||
assert(op_data->st == 0);
|
assert(op_data->st == 0);
|
||||||
|
// Delete is forbidden even in active PGs if they're also degraded or have previous dead OSDs
|
||||||
|
if (pg.state & (PG_DEGRADED | PG_LEFT_ON_DEAD))
|
||||||
|
{
|
||||||
|
finish_op(cur_op, -EBUSY);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (!check_write_queue(cur_op, pg))
|
if (!check_write_queue(cur_op, pg))
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
|
@@ -83,13 +83,11 @@ retry_1:
|
|||||||
// Object is degraded/misplaced and will be moved to <write_osd_set>
|
// Object is degraded/misplaced and will be moved to <write_osd_set>
|
||||||
op_data->stripes[0].read_start = 0;
|
op_data->stripes[0].read_start = 0;
|
||||||
op_data->stripes[0].read_end = bs_block_size;
|
op_data->stripes[0].read_end = bs_block_size;
|
||||||
assert(!cur_op->rmw_buf);
|
|
||||||
cur_op->rmw_buf = op_data->stripes[0].read_buf = memalign_or_die(MEM_ALIGNMENT, bs_block_size);
|
cur_op->rmw_buf = op_data->stripes[0].read_buf = memalign_or_die(MEM_ALIGNMENT, bs_block_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
assert(!cur_op->rmw_buf);
|
|
||||||
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
|
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
|
||||||
pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size);
|
||||||
if (!cur_op->rmw_buf)
|
if (!cur_op->rmw_buf)
|
||||||
|
@@ -35,11 +35,6 @@
|
|||||||
#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
|
#define qdict_put_str(options, name, value) qdict_put_obj(options, name, QOBJECT(qstring_from_str(value)))
|
||||||
#define qobject_unref QDECREF
|
#define qobject_unref QDECREF
|
||||||
#endif
|
#endif
|
||||||
#if QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2 || QEMU_VERSION_MAJOR > 4
|
|
||||||
#include "sysemu/replay.h"
|
|
||||||
#else
|
|
||||||
#include "sysemu/sysemu.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "vitastor_c.h"
|
#include "vitastor_c.h"
|
||||||
|
|
||||||
@@ -53,13 +48,9 @@ void DSO_STAMP_FUN(void)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef struct VitastorFdData VitastorFdData;
|
|
||||||
|
|
||||||
typedef struct VitastorClient
|
typedef struct VitastorClient
|
||||||
{
|
{
|
||||||
void *proxy;
|
void *proxy;
|
||||||
int uring_eventfd;
|
|
||||||
|
|
||||||
void *watch;
|
void *watch;
|
||||||
char *config_path;
|
char *config_path;
|
||||||
char *etcd_host;
|
char *etcd_host;
|
||||||
@@ -76,24 +67,12 @@ typedef struct VitastorClient
|
|||||||
int rdma_gid_index;
|
int rdma_gid_index;
|
||||||
int rdma_mtu;
|
int rdma_mtu;
|
||||||
QemuMutex mutex;
|
QemuMutex mutex;
|
||||||
AioContext *ctx;
|
|
||||||
VitastorFdData **fds;
|
|
||||||
int fd_count, fd_alloc;
|
|
||||||
int bh_uring_scheduled;
|
|
||||||
|
|
||||||
uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
|
uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
|
||||||
uint32_t last_bitmap_granularity;
|
uint32_t last_bitmap_granularity;
|
||||||
uint8_t *last_bitmap;
|
uint8_t *last_bitmap;
|
||||||
} VitastorClient;
|
} VitastorClient;
|
||||||
|
|
||||||
typedef struct VitastorFdData
|
|
||||||
{
|
|
||||||
VitastorClient *cli;
|
|
||||||
int fd;
|
|
||||||
IOHandler *fd_read, *fd_write;
|
|
||||||
void *opaque;
|
|
||||||
} VitastorFdData;
|
|
||||||
|
|
||||||
typedef struct VitastorRPC
|
typedef struct VitastorRPC
|
||||||
{
|
{
|
||||||
BlockDriverState *bs;
|
BlockDriverState *bs;
|
||||||
@@ -104,21 +83,10 @@ typedef struct VitastorRPC
|
|||||||
uint64_t inode, offset, len;
|
uint64_t inode, offset, len;
|
||||||
uint32_t bitmap_granularity;
|
uint32_t bitmap_granularity;
|
||||||
uint8_t *bitmap;
|
uint8_t *bitmap;
|
||||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
|
||||||
QEMUBH *bh;
|
|
||||||
#endif
|
|
||||||
} VitastorRPC;
|
} VitastorRPC;
|
||||||
|
|
||||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
|
||||||
typedef struct VitastorBH
|
|
||||||
{
|
|
||||||
VitastorClient *cli;
|
|
||||||
QEMUBH *bh;
|
|
||||||
} VitastorBH;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
||||||
static void vitastor_co_generic_cb(void *opaque, long retval);
|
static void vitastor_co_generic_bh_cb(void *opaque, long retval);
|
||||||
static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
|
static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version);
|
||||||
static void vitastor_close(BlockDriverState *bs);
|
static void vitastor_close(BlockDriverState *bs);
|
||||||
|
|
||||||
@@ -234,54 +202,6 @@ out:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
|
||||||
static void vitastor_uring_handler(void *opaque)
|
|
||||||
{
|
|
||||||
VitastorClient *client = (VitastorClient*)opaque;
|
|
||||||
qemu_mutex_lock(&client->mutex);
|
|
||||||
client->bh_uring_scheduled = 0;
|
|
||||||
vitastor_c_uring_handle_events(client->proxy);
|
|
||||||
qemu_mutex_unlock(&client->mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 8
|
|
||||||
static void vitastor_bh_uring_handler(void *opaque)
|
|
||||||
{
|
|
||||||
VitastorBH *vbh = opaque;
|
|
||||||
vitastor_bh_handler(vbh->cli);
|
|
||||||
qemu_bh_delete(vbh->bh);
|
|
||||||
free(vbh);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void vitastor_schedule_uring_handler(VitastorClient *client)
|
|
||||||
{
|
|
||||||
void *opaque = client;
|
|
||||||
if (client->uring_eventfd >= 0 && !client->bh_uring_scheduled)
|
|
||||||
{
|
|
||||||
client->bh_uring_scheduled = 1;
|
|
||||||
#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
|
||||||
replay_bh_schedule_oneshot_event(client->ctx, vitastor_uring_handler, opaque);
|
|
||||||
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
|
||||||
aio_bh_schedule_oneshot(client->ctx, vitastor_uring_handler, opaque);
|
|
||||||
#else
|
|
||||||
VitastorBH *vbh = (VitastorBH*)malloc(sizeof(VitastorBH));
|
|
||||||
vbh->cli = client;
|
|
||||||
#if QEMU_VERSION_MAJOR >= 2
|
|
||||||
vbh->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_bh_uring_handler, vbh);
|
|
||||||
#else
|
|
||||||
vbh->bh = qemu_bh_new(vitastor_bh_uring_handler, vbh);
|
|
||||||
#endif
|
|
||||||
qemu_bh_schedule(vbh->bh);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
static void vitastor_schedule_uring_handler(VitastorClient *client)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
||||||
{
|
{
|
||||||
BlockDriverState *bs = task->bs;
|
BlockDriverState *bs = task->bs;
|
||||||
@@ -289,8 +209,7 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
|||||||
task->co = qemu_coroutine_self();
|
task->co = qemu_coroutine_self();
|
||||||
|
|
||||||
qemu_mutex_lock(&client->mutex);
|
qemu_mutex_lock(&client->mutex);
|
||||||
vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_cb, task);
|
vitastor_c_watch_inode(client->proxy, client->image, vitastor_co_generic_bh_cb, task);
|
||||||
vitastor_schedule_uring_handler(client);
|
|
||||||
qemu_mutex_unlock(&client->mutex);
|
qemu_mutex_unlock(&client->mutex);
|
||||||
|
|
||||||
while (!task->complete)
|
while (!task->complete)
|
||||||
@@ -299,32 +218,14 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vitastor_aio_fd_read(void *fddv)
|
// FIXME: Fix thread safety of the driver - now it segfaults when iothread is enabled in QEMU
|
||||||
{
|
static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
|
||||||
VitastorFdData *fdd = (VitastorFdData*)fddv;
|
|
||||||
qemu_mutex_lock(&fdd->cli->mutex);
|
|
||||||
fdd->fd_read(fdd->opaque);
|
|
||||||
vitastor_schedule_uring_handler(fdd->cli);
|
|
||||||
qemu_mutex_unlock(&fdd->cli->mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void vitastor_aio_fd_write(void *fddv)
|
|
||||||
{
|
|
||||||
VitastorFdData *fdd = (VitastorFdData*)fddv;
|
|
||||||
qemu_mutex_lock(&fdd->cli->mutex);
|
|
||||||
fdd->fd_write(fdd->opaque);
|
|
||||||
vitastor_schedule_uring_handler(fdd->cli);
|
|
||||||
qemu_mutex_unlock(&fdd->cli->mutex);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_read, IOHandler *fd_write, void *opaque)
|
|
||||||
{
|
{
|
||||||
aio_set_fd_handler(ctx, fd,
|
aio_set_fd_handler(ctx, fd,
|
||||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
|
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
|
||||||
0 /*is_external*/,
|
0 /*is_external*/,
|
||||||
#endif
|
#endif
|
||||||
fd_read,
|
fd_read, fd_write,
|
||||||
fd_write,
|
|
||||||
#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
|
#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
|
||||||
NULL /*io_flush*/,
|
NULL /*io_flush*/,
|
||||||
#endif
|
#endif
|
||||||
@@ -337,53 +238,6 @@ static void universal_aio_set_fd_handler(AioContext *ctx, int fd, IOHandler *fd_
|
|||||||
opaque);
|
opaque);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vitastor_aio_set_fd_handler(void *vcli, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
|
|
||||||
{
|
|
||||||
VitastorClient *client = (VitastorClient*)vcli;
|
|
||||||
VitastorFdData *fdd = NULL;
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < client->fd_count; i++)
|
|
||||||
{
|
|
||||||
if (client->fds[i]->fd == fd)
|
|
||||||
{
|
|
||||||
if (fd_read || fd_write)
|
|
||||||
{
|
|
||||||
fdd = client->fds[i];
|
|
||||||
fdd->opaque = opaque;
|
|
||||||
fdd->fd_read = fd_read;
|
|
||||||
fdd->fd_write = fd_write;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int j = i+1; j < client->fd_count; j++)
|
|
||||||
client->fds[j-1] = client->fds[j];
|
|
||||||
client->fd_count--;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((fd_read || fd_write) && !fdd)
|
|
||||||
{
|
|
||||||
fdd = (VitastorFdData*)malloc(sizeof(VitastorFdData));
|
|
||||||
fdd->cli = client;
|
|
||||||
fdd->fd = fd;
|
|
||||||
fdd->fd_read = fd_read;
|
|
||||||
fdd->fd_write = fd_write;
|
|
||||||
fdd->opaque = opaque;
|
|
||||||
if (client->fd_count >= client->fd_alloc)
|
|
||||||
{
|
|
||||||
client->fd_alloc = client->fd_alloc*2;
|
|
||||||
if (client->fd_alloc < 16)
|
|
||||||
client->fd_alloc = 16;
|
|
||||||
client->fds = (VitastorFdData**)realloc(client->fds, sizeof(VitastorFdData*) * client->fd_alloc);
|
|
||||||
}
|
|
||||||
client->fds[client->fd_count++] = fdd;
|
|
||||||
}
|
|
||||||
universal_aio_set_fd_handler(
|
|
||||||
client->ctx, fd, fd_read ? vitastor_aio_fd_read : NULL, fd_write ? vitastor_aio_fd_write : NULL, fdd
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
||||||
{
|
{
|
||||||
VitastorRPC task;
|
VitastorRPC task;
|
||||||
@@ -401,36 +255,10 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
|||||||
client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
||||||
client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
|
client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
|
||||||
client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
|
client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
|
||||||
client->ctx = bdrv_get_aio_context(bs);
|
client->proxy = vitastor_c_create_qemu(
|
||||||
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
||||||
client->proxy = vitastor_c_create_qemu_uring(
|
|
||||||
vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
|
|
||||||
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||||
);
|
);
|
||||||
if (!client->proxy)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "vitastor: failed to create io_uring: %s - I/O will be slower\n", strerror(errno));
|
|
||||||
client->uring_eventfd = -1;
|
|
||||||
#endif
|
|
||||||
client->proxy = vitastor_c_create_qemu(
|
|
||||||
vitastor_aio_set_fd_handler, client, client->config_path, client->etcd_host, client->etcd_prefix,
|
|
||||||
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
|
||||||
);
|
|
||||||
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 2
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
client->uring_eventfd = vitastor_c_uring_register_eventfd(client->proxy);
|
|
||||||
if (client->uring_eventfd < 0)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "vitastor: failed to create io_uring eventfd: %s\n", strerror(errno));
|
|
||||||
error_setg(errp, "failed to create io_uring eventfd");
|
|
||||||
vitastor_close(bs);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
universal_aio_set_fd_handler(client->ctx, client->uring_eventfd, vitastor_uring_handler, NULL, client);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
image = client->image = g_strdup(qdict_get_try_str(options, "image"));
|
image = client->image = g_strdup(qdict_get_try_str(options, "image"));
|
||||||
client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
||||||
// Get image metadata (size and readonly flag) or just wait until the client is ready
|
// Get image metadata (size and readonly flag) or just wait until the client is ready
|
||||||
@@ -510,12 +338,6 @@ static void vitastor_close(BlockDriverState *bs)
|
|||||||
{
|
{
|
||||||
VitastorClient *client = bs->opaque;
|
VitastorClient *client = bs->opaque;
|
||||||
vitastor_c_destroy(client->proxy);
|
vitastor_c_destroy(client->proxy);
|
||||||
if (client->fds)
|
|
||||||
{
|
|
||||||
free(client->fds);
|
|
||||||
client->fds = NULL;
|
|
||||||
client->fd_alloc = client->fd_count = 0;
|
|
||||||
}
|
|
||||||
qemu_mutex_destroy(&client->mutex);
|
qemu_mutex_destroy(&client->mutex);
|
||||||
if (client->config_path)
|
if (client->config_path)
|
||||||
g_free(client->config_path);
|
g_free(client->config_path);
|
||||||
@@ -632,44 +454,25 @@ static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task)
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vitastor_co_generic_bh_cb(void *opaque)
|
static void vitastor_co_generic_bh_cb(void *opaque, long retval)
|
||||||
{
|
{
|
||||||
VitastorRPC *task = opaque;
|
VitastorRPC *task = opaque;
|
||||||
|
task->ret = retval;
|
||||||
task->complete = 1;
|
task->complete = 1;
|
||||||
if (qemu_coroutine_self() != task->co)
|
if (qemu_coroutine_self() != task->co)
|
||||||
{
|
{
|
||||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
||||||
aio_co_wake(task->co);
|
aio_co_wake(task->co);
|
||||||
#else
|
#else
|
||||||
#if QEMU_VERSION_MAJOR == 2
|
|
||||||
qemu_bh_delete(task->bh);
|
|
||||||
#endif
|
|
||||||
qemu_coroutine_enter(task->co, NULL);
|
qemu_coroutine_enter(task->co, NULL);
|
||||||
qemu_aio_release(task);
|
qemu_aio_release(task);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vitastor_co_generic_cb(void *opaque, long retval)
|
|
||||||
{
|
|
||||||
VitastorRPC *task = opaque;
|
|
||||||
task->ret = retval;
|
|
||||||
#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
|
||||||
replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
||||||
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
|
||||||
aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
||||||
#elif QEMU_VERSION_MAJOR >= 2
|
|
||||||
task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
||||||
qemu_bh_schedule(task->bh);
|
|
||||||
#else
|
|
||||||
task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
|
||||||
qemu_bh_schedule(task->bh);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
|
static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
|
||||||
{
|
{
|
||||||
vitastor_co_generic_cb(opaque, retval);
|
vitastor_co_generic_bh_cb(opaque, retval);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
|
static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
|
||||||
@@ -688,7 +491,6 @@ static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
|
|||||||
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||||
qemu_mutex_lock(&client->mutex);
|
qemu_mutex_lock(&client->mutex);
|
||||||
vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
|
vitastor_c_read(client->proxy, inode, offset, bytes, iov->iov, iov->niov, vitastor_co_read_cb, &task);
|
||||||
vitastor_schedule_uring_handler(client);
|
|
||||||
qemu_mutex_unlock(&client->mutex);
|
qemu_mutex_unlock(&client->mutex);
|
||||||
|
|
||||||
while (!task.complete)
|
while (!task.complete)
|
||||||
@@ -721,8 +523,7 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
|
|||||||
|
|
||||||
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||||
qemu_mutex_lock(&client->mutex);
|
qemu_mutex_lock(&client->mutex);
|
||||||
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_cb, &task);
|
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
|
||||||
vitastor_schedule_uring_handler(client);
|
|
||||||
qemu_mutex_unlock(&client->mutex);
|
qemu_mutex_unlock(&client->mutex);
|
||||||
|
|
||||||
while (!task.complete)
|
while (!task.complete)
|
||||||
@@ -740,6 +541,7 @@ static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitma
|
|||||||
VitastorRPC *task = opaque;
|
VitastorRPC *task = opaque;
|
||||||
VitastorClient *client = task->bs->opaque;
|
VitastorClient *client = task->bs->opaque;
|
||||||
task->ret = retval;
|
task->ret = retval;
|
||||||
|
task->complete = 1;
|
||||||
if (retval >= 0)
|
if (retval >= 0)
|
||||||
{
|
{
|
||||||
task->bitmap = bitmap;
|
task->bitmap = bitmap;
|
||||||
@@ -751,17 +553,15 @@ static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitma
|
|||||||
client->last_bitmap = bitmap;
|
client->last_bitmap = bitmap;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if QEMU_VERSION_MAJOR > 4 || QEMU_VERSION_MAJOR == 4 && QEMU_VERSION_MINOR >= 2
|
if (qemu_coroutine_self() != task->co)
|
||||||
replay_bh_schedule_oneshot_event(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
{
|
||||||
#elif QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 8
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
||||||
aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
aio_co_wake(task->co);
|
||||||
#elif QEMU_VERSION_MAJOR >= 2
|
|
||||||
task->bh = aio_bh_new(bdrv_get_aio_context(task->bs), vitastor_co_generic_bh_cb, opaque);
|
|
||||||
qemu_bh_schedule(task->bh);
|
|
||||||
#else
|
#else
|
||||||
task->bh = qemu_bh_new(vitastor_co_generic_bh_cb, opaque);
|
qemu_coroutine_enter(task->co, NULL);
|
||||||
qemu_bh_schedule(task->bh);
|
qemu_aio_release(task);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int coroutine_fn vitastor_co_block_status(
|
static int coroutine_fn vitastor_co_block_status(
|
||||||
@@ -802,7 +602,6 @@ static int coroutine_fn vitastor_co_block_status(
|
|||||||
task.bitmap = client->last_bitmap = NULL;
|
task.bitmap = client->last_bitmap = NULL;
|
||||||
qemu_mutex_lock(&client->mutex);
|
qemu_mutex_lock(&client->mutex);
|
||||||
vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
||||||
vitastor_schedule_uring_handler(client);
|
|
||||||
qemu_mutex_unlock(&client->mutex);
|
qemu_mutex_unlock(&client->mutex);
|
||||||
while (!task.complete)
|
while (!task.complete)
|
||||||
{
|
{
|
||||||
@@ -888,8 +687,7 @@ static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
|
|||||||
vitastor_co_init_task(bs, &task);
|
vitastor_co_init_task(bs, &task);
|
||||||
|
|
||||||
qemu_mutex_lock(&client->mutex);
|
qemu_mutex_lock(&client->mutex);
|
||||||
vitastor_c_sync(client->proxy, vitastor_co_generic_cb, &task);
|
vitastor_c_sync(client->proxy, vitastor_co_generic_bh_cb, &task);
|
||||||
vitastor_schedule_uring_handler(client);
|
|
||||||
qemu_mutex_unlock(&client->mutex);
|
qemu_mutex_unlock(&client->mutex);
|
||||||
|
|
||||||
while (!task.complete)
|
while (!task.complete)
|
||||||
|
@@ -2,12 +2,9 @@
|
|||||||
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include <sys/eventfd.h>
|
|
||||||
|
|
||||||
#include "ringloop.h"
|
#include "ringloop.h"
|
||||||
|
|
||||||
ring_loop_t::ring_loop_t(int qd)
|
ring_loop_t::ring_loop_t(int qd)
|
||||||
@@ -35,10 +32,6 @@ ring_loop_t::~ring_loop_t()
|
|||||||
free(free_ring_data);
|
free(free_ring_data);
|
||||||
free(ring_datas);
|
free(ring_datas);
|
||||||
io_uring_queue_exit(&ring);
|
io_uring_queue_exit(&ring);
|
||||||
if (ring_eventfd)
|
|
||||||
{
|
|
||||||
close(ring_eventfd);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ring_loop_t::register_consumer(ring_consumer_t *consumer)
|
void ring_loop_t::register_consumer(ring_consumer_t *consumer)
|
||||||
@@ -66,16 +59,6 @@ void ring_loop_t::unregister_consumer(ring_consumer_t *consumer)
|
|||||||
|
|
||||||
void ring_loop_t::loop()
|
void ring_loop_t::loop()
|
||||||
{
|
{
|
||||||
if (ring_eventfd >= 0)
|
|
||||||
{
|
|
||||||
// Reset eventfd counter
|
|
||||||
uint64_t ctr = 0;
|
|
||||||
int r = read(ring_eventfd, &ctr, 8);
|
|
||||||
if (r < 0 && errno != EAGAIN && errno != EINTR)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "Error resetting eventfd: %s\n", strerror(errno));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
struct io_uring_cqe *cqe;
|
struct io_uring_cqe *cqe;
|
||||||
while (!io_uring_peek_cqe(&ring, &cqe))
|
while (!io_uring_peek_cqe(&ring, &cqe))
|
||||||
{
|
{
|
||||||
@@ -94,7 +77,7 @@ void ring_loop_t::loop()
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Warning: empty callback in SQE\n");
|
printf("Warning: empty callback in SQE\n");
|
||||||
free_ring_data[free_ring_data_ptr++] = d - ring_datas;
|
free_ring_data[free_ring_data_ptr++] = d - ring_datas;
|
||||||
}
|
}
|
||||||
io_uring_cqe_seen(&ring, cqe);
|
io_uring_cqe_seen(&ring, cqe);
|
||||||
@@ -144,24 +127,3 @@ int ring_loop_t::sqes_left()
|
|||||||
}
|
}
|
||||||
return left;
|
return left;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ring_loop_t::register_eventfd()
|
|
||||||
{
|
|
||||||
if (ring_eventfd >= 0)
|
|
||||||
{
|
|
||||||
return ring_eventfd;
|
|
||||||
}
|
|
||||||
ring_eventfd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK);
|
|
||||||
if (ring_eventfd < 0)
|
|
||||||
{
|
|
||||||
return -errno;
|
|
||||||
}
|
|
||||||
int r = io_uring_register_eventfd(&ring, ring_eventfd);
|
|
||||||
if (r < 0)
|
|
||||||
{
|
|
||||||
close(ring_eventfd);
|
|
||||||
ring_eventfd = -1;
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
return ring_eventfd;
|
|
||||||
}
|
|
||||||
|
@@ -126,13 +126,11 @@ class ring_loop_t
|
|||||||
unsigned free_ring_data_ptr;
|
unsigned free_ring_data_ptr;
|
||||||
bool loop_again;
|
bool loop_again;
|
||||||
struct io_uring ring;
|
struct io_uring ring;
|
||||||
int ring_eventfd = -1;
|
|
||||||
public:
|
public:
|
||||||
ring_loop_t(int qd);
|
ring_loop_t(int qd);
|
||||||
~ring_loop_t();
|
~ring_loop_t();
|
||||||
void register_consumer(ring_consumer_t *consumer);
|
void register_consumer(ring_consumer_t *consumer);
|
||||||
void unregister_consumer(ring_consumer_t *consumer);
|
void unregister_consumer(ring_consumer_t *consumer);
|
||||||
int register_eventfd();
|
|
||||||
|
|
||||||
inline struct io_uring_sqe* get_sqe()
|
inline struct io_uring_sqe* get_sqe()
|
||||||
{
|
{
|
||||||
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
|||||||
|
|
||||||
Name: Vitastor
|
Name: Vitastor
|
||||||
Description: Vitastor client library
|
Description: Vitastor client library
|
||||||
Version: 0.9.5
|
Version: 0.9.2
|
||||||
Libs: -L${libdir} -lvitastor_client
|
Libs: -L${libdir} -lvitastor_client
|
||||||
Cflags: -I${includedir}
|
Cflags: -I${includedir}
|
||||||
|
|
||||||
|
@@ -5,7 +5,6 @@
|
|||||||
// Also acts as a C-C++ proxy for the QEMU driver (QEMU headers don't compile with g++)
|
// Also acts as a C-C++ proxy for the QEMU driver (QEMU headers don't compile with g++)
|
||||||
|
|
||||||
#include <sys/epoll.h>
|
#include <sys/epoll.h>
|
||||||
#include <sys/eventfd.h>
|
|
||||||
|
|
||||||
#include "ringloop.h"
|
#include "ringloop.h"
|
||||||
#include "epoll_manager.h"
|
#include "epoll_manager.h"
|
||||||
@@ -26,7 +25,6 @@ struct vitastor_c
|
|||||||
epoll_manager_t *epmgr = NULL;
|
epoll_manager_t *epmgr = NULL;
|
||||||
timerfd_manager_t *tfd = NULL;
|
timerfd_manager_t *tfd = NULL;
|
||||||
cluster_client_t *cli = NULL;
|
cluster_client_t *cli = NULL;
|
||||||
int uring_eventfd = -1;
|
|
||||||
|
|
||||||
QEMUSetFDHandler *aio_set_fd_handler = NULL;
|
QEMUSetFDHandler *aio_set_fd_handler = NULL;
|
||||||
void *aio_ctx = NULL;
|
void *aio_ctx = NULL;
|
||||||
@@ -72,8 +70,14 @@ static void vitastor_c_write_handler(void *opaque)
|
|||||||
data->callback(data->fd, EPOLLOUT);
|
data->callback(data->fd, EPOLLOUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
static vitastor_c *vitastor_c_create_qemu_common(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context)
|
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
||||||
|
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||||
|
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
|
||||||
{
|
{
|
||||||
|
json11::Json cfg_json = vitastor_c_common_config(
|
||||||
|
config_path, etcd_host, etcd_prefix, use_rdma,
|
||||||
|
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
|
||||||
|
);
|
||||||
vitastor_c *self = new vitastor_c;
|
vitastor_c *self = new vitastor_c;
|
||||||
self->aio_set_fd_handler = aio_set_fd_handler;
|
self->aio_set_fd_handler = aio_set_fd_handler;
|
||||||
self->aio_ctx = aio_context;
|
self->aio_ctx = aio_context;
|
||||||
@@ -91,77 +95,24 @@ static vitastor_c *vitastor_c_create_qemu_common(QEMUSetFDHandler *aio_set_fd_ha
|
|||||||
self->aio_set_fd_handler(self->aio_ctx, fd, false, NULL, NULL, NULL, NULL);
|
self->aio_set_fd_handler(self->aio_ctx, fd, false, NULL, NULL, NULL, NULL);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
|
||||||
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
|
||||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
|
|
||||||
{
|
|
||||||
json11::Json cfg_json = vitastor_c_common_config(
|
|
||||||
config_path, etcd_host, etcd_prefix, use_rdma,
|
|
||||||
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
|
|
||||||
);
|
|
||||||
auto self = vitastor_c_create_qemu_common(aio_set_fd_handler, aio_context);
|
|
||||||
self->cli = new cluster_client_t(NULL, self->tfd, cfg_json);
|
self->cli = new cluster_client_t(NULL, self->tfd, cfg_json);
|
||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
vitastor_c *vitastor_c_create_qemu_uring(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
|
||||||
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
|
||||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
|
|
||||||
{
|
|
||||||
ring_loop_t *ringloop = NULL;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
ringloop = new ring_loop_t(512);
|
|
||||||
}
|
|
||||||
catch (std::exception & e)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
json11::Json cfg_json = vitastor_c_common_config(
|
|
||||||
config_path, etcd_host, etcd_prefix, use_rdma,
|
|
||||||
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
|
|
||||||
);
|
|
||||||
auto self = vitastor_c_create_qemu_common(aio_set_fd_handler, aio_context);
|
|
||||||
self->ringloop = ringloop;
|
|
||||||
self->cli = new cluster_client_t(self->ringloop, self->tfd, cfg_json);
|
|
||||||
return self;
|
|
||||||
}
|
|
||||||
|
|
||||||
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
|
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
|
||||||
{
|
{
|
||||||
ring_loop_t *ringloop = NULL;
|
|
||||||
try
|
|
||||||
{
|
|
||||||
ringloop = new ring_loop_t(512);
|
|
||||||
}
|
|
||||||
catch (std::exception & e)
|
|
||||||
{
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
json11::Json cfg_json = vitastor_c_common_config(
|
json11::Json cfg_json = vitastor_c_common_config(
|
||||||
config_path, etcd_host, etcd_prefix, use_rdma,
|
config_path, etcd_host, etcd_prefix, use_rdma,
|
||||||
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
|
rdma_device, rdma_port_num, rdma_gid_index, rdma_mtu, log_level
|
||||||
);
|
);
|
||||||
vitastor_c *self = new vitastor_c;
|
vitastor_c *self = new vitastor_c;
|
||||||
self->ringloop = ringloop;
|
self->ringloop = new ring_loop_t(512);
|
||||||
self->epmgr = new epoll_manager_t(self->ringloop);
|
self->epmgr = new epoll_manager_t(self->ringloop);
|
||||||
self->cli = new cluster_client_t(self->ringloop, self->epmgr->tfd, cfg_json);
|
self->cli = new cluster_client_t(self->ringloop, self->epmgr->tfd, cfg_json);
|
||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
int vitastor_c_uring_register_eventfd(vitastor_c *client)
|
|
||||||
{
|
|
||||||
if (!client->ringloop)
|
|
||||||
{
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
return client->ringloop->register_eventfd();
|
|
||||||
}
|
|
||||||
|
|
||||||
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len)
|
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len)
|
||||||
{
|
{
|
||||||
json11::Json::object cfg;
|
json11::Json::object cfg;
|
||||||
@@ -215,11 +166,6 @@ void vitastor_c_uring_wait_events(vitastor_c *client)
|
|||||||
client->ringloop->wait();
|
client->ringloop->wait();
|
||||||
}
|
}
|
||||||
|
|
||||||
int vitastor_c_uring_has_work(vitastor_c *client)
|
|
||||||
{
|
|
||||||
return client->ringloop->has_work();
|
|
||||||
}
|
|
||||||
|
|
||||||
void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
|
void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
|
||||||
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque)
|
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque)
|
||||||
{
|
{
|
||||||
|
@@ -7,7 +7,7 @@
|
|||||||
#define VITASTOR_QEMU_PROXY_H
|
#define VITASTOR_QEMU_PROXY_H
|
||||||
|
|
||||||
// C API wrapper version
|
// C API wrapper version
|
||||||
#define VITASTOR_C_API_VERSION 2
|
#define VITASTOR_C_API_VERSION 1
|
||||||
|
|
||||||
#ifndef POOL_ID_BITS
|
#ifndef POOL_ID_BITS
|
||||||
#define POOL_ID_BITS 16
|
#define POOL_ID_BITS 16
|
||||||
@@ -34,19 +34,14 @@ typedef void QEMUSetFDHandler(void *ctx, int fd, int is_external, IOHandler *fd_
|
|||||||
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
||||||
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
|
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
|
||||||
vitastor_c *vitastor_c_create_qemu_uring(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
|
||||||
const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
|
||||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
|
|
||||||
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
|
||||||
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
|
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
|
||||||
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len);
|
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len);
|
||||||
void vitastor_c_destroy(vitastor_c *client);
|
void vitastor_c_destroy(vitastor_c *client);
|
||||||
int vitastor_c_is_ready(vitastor_c *client);
|
int vitastor_c_is_ready(vitastor_c *client);
|
||||||
int vitastor_c_uring_register_eventfd(vitastor_c *client);
|
|
||||||
void vitastor_c_uring_wait_ready(vitastor_c *client);
|
void vitastor_c_uring_wait_ready(vitastor_c *client);
|
||||||
void vitastor_c_uring_handle_events(vitastor_c *client);
|
void vitastor_c_uring_handle_events(vitastor_c *client);
|
||||||
void vitastor_c_uring_wait_events(vitastor_c *client);
|
void vitastor_c_uring_wait_events(vitastor_c *client);
|
||||||
int vitastor_c_uring_has_work(vitastor_c *client);
|
|
||||||
void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
|
void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
|
||||||
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque);
|
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque);
|
||||||
void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,
|
void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,
|
||||||
|
@@ -36,6 +36,9 @@ for i in $(seq 1 $OSD_COUNT); do
|
|||||||
start_osd $i
|
start_osd $i
|
||||||
done
|
done
|
||||||
|
|
||||||
|
cd mon
|
||||||
|
npm install
|
||||||
|
cd ..
|
||||||
(while true; do node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 || true; done) &>./testdata/mon.log &
|
(while true; do node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 || true; done) &>./testdata/mon.log &
|
||||||
MON_PID=$!
|
MON_PID=$!
|
||||||
|
|
||||||
@@ -82,9 +85,7 @@ wait_up()
|
|||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
if [[ $OSD_COUNT -gt 0 ]]; then
|
wait_up 60
|
||||||
wait_up 60
|
|
||||||
fi
|
|
||||||
|
|
||||||
try_reweight()
|
try_reweight()
|
||||||
{
|
{
|
||||||
|
@@ -16,35 +16,29 @@ SCHEME=ec ./test_change_pg_count.sh
|
|||||||
|
|
||||||
./test_etcd_fail.sh
|
./test_etcd_fail.sh
|
||||||
|
|
||||||
|
./test_failure_domain.sh
|
||||||
|
|
||||||
./test_interrupted_rebalance.sh
|
./test_interrupted_rebalance.sh
|
||||||
IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
||||||
SCHEME=ec ./test_interrupted_rebalance.sh
|
SCHEME=ec ./test_interrupted_rebalance.sh
|
||||||
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
|
||||||
|
|
||||||
./test_failure_domain.sh
|
|
||||||
|
|
||||||
./test_snapshot.sh
|
|
||||||
SCHEME=ec ./test_snapshot.sh
|
|
||||||
|
|
||||||
./test_minsize_1.sh
|
./test_minsize_1.sh
|
||||||
|
|
||||||
./test_move_reappear.sh
|
./test_move_reappear.sh
|
||||||
|
|
||||||
./test_rm.sh
|
|
||||||
|
|
||||||
./test_snapshot_chain.sh
|
|
||||||
SCHEME=ec ./test_snapshot_chain.sh
|
|
||||||
|
|
||||||
./test_snapshot_down.sh
|
|
||||||
SCHEME=ec ./test_snapshot_down.sh
|
|
||||||
|
|
||||||
./test_splitbrain.sh
|
|
||||||
|
|
||||||
./test_rebalance_verify.sh
|
./test_rebalance_verify.sh
|
||||||
IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
||||||
SCHEME=ec ./test_rebalance_verify.sh
|
SCHEME=ec ./test_rebalance_verify.sh
|
||||||
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
|
||||||
|
|
||||||
|
./test_rm.sh
|
||||||
|
|
||||||
|
./test_snapshot.sh
|
||||||
|
SCHEME=ec ./test_snapshot.sh
|
||||||
|
|
||||||
|
./test_splitbrain.sh
|
||||||
|
|
||||||
./test_write.sh
|
./test_write.sh
|
||||||
SCHEME=xor ./test_write.sh
|
SCHEME=xor ./test_write.sh
|
||||||
|
|
||||||
|
@@ -15,6 +15,9 @@ $ETCDCTL put /vitastor/osd/stats/7 '{"host":"host4","size":1073741824,"time":"'$
|
|||||||
$ETCDCTL put /vitastor/osd/stats/8 '{"host":"host4","size":1073741824,"time":"'$TIME'"}'
|
$ETCDCTL put /vitastor/osd/stats/8 '{"host":"host4","size":1073741824,"time":"'$TIME'"}'
|
||||||
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":4,"failure_domain":"rack"}}'
|
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":2,"pg_minsize":1,"pg_count":4,"failure_domain":"rack"}}'
|
||||||
|
|
||||||
|
cd mon
|
||||||
|
npm install
|
||||||
|
cd ..
|
||||||
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
|
node mon/mon-main.js --etcd_url $ETCD_URL --etcd_prefix "/vitastor" &>./testdata/mon.log &
|
||||||
MON_PID=$!
|
MON_PID=$!
|
||||||
|
|
||||||
|
@@ -1,48 +0,0 @@
|
|||||||
#!/bin/bash -ex
|
|
||||||
|
|
||||||
. `dirname $0`/run_3osds.sh
|
|
||||||
check_qemu
|
|
||||||
|
|
||||||
# Test multiple snapshots
|
|
||||||
|
|
||||||
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 32M testchain
|
|
||||||
|
|
||||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
|
||||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
|
||||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
|
||||||
|
|
||||||
for i in {1..10}; do
|
|
||||||
# Create a snapshot
|
|
||||||
build/src/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@$i
|
|
||||||
# Check that the new snapshot is see-through
|
|
||||||
qemu-img convert -p \
|
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
|
||||||
-O raw ./testdata/check.bin
|
|
||||||
cmp ./testdata/check.bin ./testdata/mirror.bin
|
|
||||||
# Write something to it
|
|
||||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
|
||||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -rw=randwrite \
|
|
||||||
-randrepeat=$((i <= 2)) -buffer_pattern=0x$((10+i))$((10+i))$((10+i))$((10+i)) \
|
|
||||||
-etcd=$ETCD_URL -image=testchain -number_ios=1024 -mirror_file=./testdata/mirror.bin
|
|
||||||
# Check the new content
|
|
||||||
qemu-img convert -p \
|
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
|
||||||
-O raw ./testdata/layer1.bin
|
|
||||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
|
||||||
done
|
|
||||||
|
|
||||||
build/src/vitastor-cli --etcd_address $ETCD_URL rm testchain@1 testchain@9
|
|
||||||
|
|
||||||
# Check the final image
|
|
||||||
qemu-img convert -p \
|
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
|
||||||
-O raw ./testdata/layer1.bin
|
|
||||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
|
||||||
|
|
||||||
# Check the last remaining snapshot
|
|
||||||
qemu-img convert -p \
|
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain@10" \
|
|
||||||
-O raw ./testdata/layer0.bin
|
|
||||||
cmp ./testdata/layer0.bin ./testdata/check.bin
|
|
||||||
|
|
||||||
format_green OK
|
|
@@ -1,37 +0,0 @@
|
|||||||
#!/bin/bash -ex
|
|
||||||
|
|
||||||
. `dirname $0`/run_3osds.sh
|
|
||||||
check_qemu
|
|
||||||
|
|
||||||
# Test merge to child (without "inverse rename" optimisation)
|
|
||||||
|
|
||||||
build/src/vitastor-cli --etcd_address $ETCD_URL create -s 128M testchain
|
|
||||||
|
|
||||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
|
||||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
|
|
||||||
-etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin
|
|
||||||
|
|
||||||
# Create a snapshot
|
|
||||||
build/src/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0
|
|
||||||
|
|
||||||
# Write something to it
|
|
||||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
|
||||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \
|
|
||||||
-randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/mirror.bin
|
|
||||||
|
|
||||||
# Check the new content
|
|
||||||
qemu-img convert -p \
|
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
|
||||||
-O raw ./testdata/layer1.bin
|
|
||||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
|
||||||
|
|
||||||
# Merge
|
|
||||||
build/src/vitastor-cli --etcd_address $ETCD_URL rm testchain@0
|
|
||||||
|
|
||||||
# Check the final image
|
|
||||||
qemu-img convert -p \
|
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \
|
|
||||||
-O raw ./testdata/layer1.bin
|
|
||||||
cmp ./testdata/layer1.bin ./testdata/mirror.bin
|
|
||||||
|
|
||||||
format_green OK
|
|
Reference in New Issue
Block a user