Compare commits

..

1 Commits

196 changed files with 1854 additions and 12511 deletions

View File

@@ -1,36 +0,0 @@
FROM node:16-bullseye
WORKDIR /root
ADD ./docker/vitastor.gpg /etc/apt/trusted.gpg.d
RUN echo 'deb http://deb.debian.org/debian bullseye-backports main' >> /etc/apt/sources.list; \
echo 'deb http://vitastor.io/debian bullseye main' >> /etc/apt/sources.list; \
echo >> /etc/apt/preferences; \
echo 'Package: *' >> /etc/apt/preferences; \
echo 'Pin: release a=bullseye-backports' >> /etc/apt/preferences; \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
echo >> /etc/apt/preferences; \
echo 'Package: *' >> /etc/apt/preferences; \
echo 'Pin: origin "vitastor.io"' >> /etc/apt/preferences; \
echo 'Pin-Priority: 1000' >> /etc/apt/preferences; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
RUN apt-get -y install jq lp-solve sudo
RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
RUN set -ex; \
mkdir qemu-build; \
cd qemu-build; \
dpkg-source -x /root/qemu*.dsc; \
cd qemu*/; \
debian/rules configure-qemu || debian/rules b/configure-stamp; \
cd b/qemu; \
make -j8 config-poison.h || true; \
make -j8 qapi/qapi-builtin-types.h

View File

@@ -1,19 +0,0 @@
FROM git.yourcmc.ru/vitalif/vitastor/buildenv
ADD . /root/vitastor
RUN set -e -x; \
mkdir -p /root/fio-build/; \
cd /root/fio-build/; \
dpkg-source -x /root/fio*.dsc; \
cd /root/vitastor; \
ln -s /root/fio-build/fio-*/ ./fio; \
ln -s /root/qemu-build/qemu-*/ ./qemu; \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
cd mon; \
npm install; \
cd ..; \
mkdir build; \
cd build; \
cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \
make -j16

View File

@@ -1,840 +0,0 @@
name: Test
on:
push:
branches:
- '*'
paths:
- '.gitea/**'
- 'src/**'
- 'mon/**'
- 'json11'
- 'cpp-btree'
- 'tests/**'
env:
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
OSD_ARGS: '--etcd_quick_timeout 2000'
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
jobs:
buildenv:
runs-on: ubuntu-latest
container: git.yourcmc.ru/vitalif/gitea-ci-dind
steps:
- uses: actions/checkout@v3
- name: Build and push
run: |
set -ex
if ! docker manifest inspect $BUILDENV_IMAGE >/dev/null; then
docker build -t $BUILDENV_IMAGE -f .gitea/workflows/buildenv.Dockerfile .
docker login git.yourcmc.ru -u vitalif -p "${{secrets.TOKEN}}"
docker push $BUILDENV_IMAGE
fi
build:
runs-on: ubuntu-latest
needs: buildenv
container: git.yourcmc.ru/vitalif/gitea-ci-dind
steps:
- uses: actions/checkout@v3
with:
submodules: true
- name: Build and push
run: |
set -ex
if ! docker manifest inspect $TEST_IMAGE:$GITHUB_SHA >/dev/null; then
docker build -t $TEST_IMAGE:$GITHUB_SHA -f .gitea/workflows/test.Dockerfile .
docker login git.yourcmc.ru -u vitalif -p "${{secrets.TOKEN}}"
docker push $TEST_IMAGE:$GITHUB_SHA
fi
make_test:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
# leak sanitizer sometimes crashes
- run: cd /root/vitastor/build && ASAN_OPTIONS=detect_leaks=0 make -j16 test
test_add_osd:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_add_osd.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_cas:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_cas.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_count:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_change_pg_count.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_count_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_change_pg_count.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_size:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_change_pg_size.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_create_nomaxid:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_create_nomaxid.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_etcd_fail:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_etcd_fail.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_ec_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_failure_domain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_failure_domain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_minsize_1:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_minsize_1.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_move_reappear:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_move_reappear.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_rm.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_splitbrain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_splitbrain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_ec_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_write.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write_xor:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=xor /root/vitastor/tests/test_write.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write_no_same:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_write_no_same.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_pg_size_2:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: PG_SIZE=2 /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_csum_32k_dmj:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: TEST_NAME=csum_32k_dmj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_csum_32k_dj:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: TEST_NAME=csum_32k_dj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_csum_32k:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: TEST_NAME=csum_32k OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_csum_4k_dmj:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: TEST_NAME=csum_4k_dmj OSD_ARGS="--data_csum_type crc32c --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_csum_4k_dj:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: TEST_NAME=csum_4k_dj OSD_ARGS="--data_csum_type crc32c --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_csum_4k:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: TEST_NAME=csum_4k OSD_ARGS="--data_csum_type crc32c" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_zero_osd_2:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: ZERO_OSD=2 /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_xor:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=xor /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_pg_size_3:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: PG_SIZE=3 /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: PG_SIZE=6 PG_MINSIZE=4 OSD_COUNT=6 SCHEME=ec /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done

View File

@@ -1,75 +0,0 @@
#!/usr/bin/perl
use strict;
for my $line (<>)
{
if ($line =~ /\.\/(test_[^\.]+)/s)
{
chomp $line;
my $base_name = $1;
my $test_name = $base_name;
my $timeout = 3;
if ($test_name eq 'test_etcd_fail' || $test_name eq 'test_heal' || $test_name eq 'test_add_osd' ||
$test_name eq 'test_interrupted_rebalance' || $test_name eq 'test_rebalance_verify')
{
$timeout = 10;
}
while ($line =~ /([^\s=]+)=(\S+)/gs)
{
if ($1 eq 'TEST_NAME')
{
$test_name = $base_name.'_'.$2;
last;
}
elsif ($1 eq 'SCHEME' && $2 eq 'ec')
{
$test_name .= '_ec';
}
elsif ($1 eq 'SCHEME' && $2 eq 'xor')
{
$test_name .= '_xor';
}
elsif ($1 eq 'IMMEDIATE_COMMIT')
{
$test_name .= '_imm';
}
else
{
$test_name .= '_'.lc($1).'_'.$2;
}
}
$line =~ s!\./test_!/root/vitastor/tests/test_!;
# Gitea CI doesn't support artifacts yet, lol
#- name: Upload results
# uses: actions/upload-artifact\@v3
# if: always()
# with:
# name: ${test_name}_result
# path: |
# /root/vitastor/testdata
# !/root/vitastor/testdata/*.bin
# retention-days: 5
print <<"EOF"
$test_name:
runs-on: ubuntu-latest
needs: build
container: \${{env.TEST_IMAGE}}:\${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: $timeout
run: $line
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- \$i --------"
cat \$i
echo ""
done
EOF
;
}
}

View File

@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VERSION "0.9.3")
set(VERSION "0.8.6")
add_subdirectory(src)

View File

@@ -15,7 +15,7 @@ Vitastor архитектурно похож на Ceph, что означает
и автоматическое распределение данных по любому числу дисков любого размера с настраиваемыми схемами
избыточности - репликацией или с произвольными кодами коррекции ошибок.
Vitastor нацелен в первую очередь на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
Vitastor нацелен на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.

View File

@@ -14,8 +14,8 @@ Vitastor is architecturally similar to Ceph which means strong consistency,
primary-replication, symmetric clustering and automatic data distribution over any
number of drives of any size with configurable redundancy (replication or erasure codes/XOR).
Vitastor targets primarily SSD and SSD+HDD clusters with at least 10 Gbit/s network,
supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
Vitastor targets SSD and SSD+HDD clusters with at least 10 Gbit/s network, supports
TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
with proper hardware which is ~10 times faster than other popular SDS's like Ceph
or internal systems of public clouds.

View File

@@ -1,4 +1,4 @@
VERSION ?= v0.9.3
VERSION ?= v0.8.6
all: build push

View File

@@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v0.9.3
image: vitalif/vitastor-csi:v0.8.6
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -116,7 +116,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v0.9.3
image: vitalif/vitastor-csi:v0.8.6
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -4,10 +4,25 @@ go 1.15
require (
github.com/container-storage-interface/spec v1.4.0
github.com/coreos/bbolt v0.0.0-00010101000000-000000000000 // indirect
github.com/coreos/etcd v3.3.25+incompatible // indirect
github.com/coreos/go-semver v0.3.0 // indirect
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect
github.com/dustin/go-humanize v1.0.0 // indirect
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
github.com/gorilla/websocket v1.4.2 // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
github.com/jonboulle/clockwork v0.2.2 // indirect
github.com/kubernetes-csi/csi-lib-utils v0.9.1
github.com/soheilhy/cmux v0.1.5 // indirect
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
go.etcd.io/bbolt v0.0.0-00010101000000-000000000000 // indirect
go.etcd.io/etcd v3.3.25+incompatible
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/grpc v1.33.1
k8s.io/klog v1.0.0
k8s.io/utils v0.0.0-20210305010621-2afb4311ab10

View File

@@ -31,11 +31,14 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
@@ -43,12 +46,25 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn
github.com/container-storage-interface/spec v1.2.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
github.com/container-storage-interface/spec v1.4.0 h1:ozAshSKxpJnYUfmkpZCTYyF/4MYeYlhdXbAvPvfGmkg=
github.com/container-storage-interface/spec v1.4.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
github.com/coreos/bbolt v1.3.5 h1:XFv7xaq7701j8ZSEzR28VohFYSlyakMyqNMU5FQH6Ac=
github.com/coreos/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
github.com/coreos/etcd v3.3.25+incompatible h1:0GQEw6h3YnuOVdtwygkIfJ+Omx0tZ8/QkVyXI4LkbeY=
github.com/coreos/etcd v3.3.25+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
@@ -57,6 +73,7 @@ github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLi
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
@@ -71,10 +88,14 @@ github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nA
github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls=
github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7 h1:5ZkaAPbicIKTF2I64qf5Fh8Aa83Q/dnOafMYV0OMwjA=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
@@ -92,6 +113,7 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
@@ -105,24 +127,38 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg=
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ=
github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
@@ -135,11 +171,14 @@ github.com/kubernetes-csi/csi-lib-utils v0.9.1 h1:sGq6ifVujfMSkfTsMZip44Ttv8SDXv
github.com/kubernetes-csi/csi-lib-utils v0.9.1/go.mod h1:8E2jVUX9j3QgspwHXa6LwyN7IHQDjW9jX3kwoWnSC+M=
github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI=
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
@@ -149,28 +188,38 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.7.1 h1:NTGy1Ja9pByO+xAeH/qiWnLrKtr3hJPNjaVUwnjpdpA=
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.10.0 h1:RyRA7RzGXQZiW+tGMr7sxa85G1z0yOpM1qq5c8lNawc=
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.1.3 h1:F0+tqvhOksq22sc6iCHF5WGlWjdwj92p0udFh1VFBS8=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0=
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
@@ -182,11 +231,24 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA=
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
go.etcd.io/etcd v3.3.25+incompatible h1:V1RzkZJj9LqsJRy+TUBgpWSbZXITLB819lstuTFoZOY=
go.etcd.io/etcd v3.3.25+incompatible/go.mod h1:yaeTdrJi5lOmYerz05bd8+V7KubZs8YSFZfzsF9A6aI=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.uber.org/atomic v1.4.0 h1:cxzIVoETapQEqDhQu3QfnvXAV4AlzcvUCxkVUFw3+EU=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
@@ -194,6 +256,7 @@ golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8U
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -213,6 +276,8 @@ golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCc
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -226,20 +291,26 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -255,9 +326,11 @@ golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4 h1:5/PjkGUjvEU5Gl6BxmvKRPpqo2uNMv4rcHBMwzk/st8=
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -268,6 +341,7 @@ golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0 h1:/5xXl8Y5W96D+TtHSlonuFqGHIWVuyCkGJLwGh9JJFs=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -286,10 +360,14 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
@@ -310,6 +388,8 @@ google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/grpc v1.25.1 h1:wdKvqQk7IttEw92GoRyKG2IDrUIpgpj6H6m81yfeMW0=
@@ -335,6 +415,7 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
@@ -363,4 +444,5 @@ k8s.io/utils v0.0.0-20210305010621-2afb4311ab10/go.mod h1:jPW/WVKK9YHAvNhRxK0md/
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw=
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q=
sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=

View File

@@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "0.9.3"
vitastorCSIDriverVersion = "0.8.6"
)
// Config struct fills the parameters of request or user input

View File

@@ -1,58 +0,0 @@
exit
git clone https://git.yourcmc.ru/vitalif/pve-qemu .
# bookworm
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription' >> /etc/apt/sources.list
echo 'deb https://vitastor.io/debian bookworm main' >> /etc/apt/sources.list
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
echo 'ru_RU UTF-8' >> /etc/locale.gen
echo 'en_US UTF-8' >> /etc/locale.gen
apt-get update
apt-get install wget ca-certificates
wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
apt-get update
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
mk-build-deps --install ./control
# bullseye
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bullseye pve-no-subscription' >> /etc/apt/sources.list
echo 'deb https://vitastor.io/debian bullseye main' >> /etc/apt/sources.list
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
echo 'ru_RU UTF-8' >> /etc/locale.gen
echo 'en_US UTF-8' >> /etc/locale.gen
apt-get update
apt-get install wget
wget https://enterprise.proxmox.com/debian/proxmox-release-bullseye.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bullseye.gpg
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
apt-get update
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
mk-build-deps --install ./control
# buster
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-buster debian:buster bash
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve buster pve-no-subscription' >> /etc/apt/sources.list
echo 'deb https://vitastor.io/debian buster main' >> /etc/apt/sources.list
echo 'deb http://deb.debian.org/debian buster-backports main' >> /etc/apt/sources.list
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
echo 'ru_RU UTF-8' >> /etc/locale.gen
echo 'en_US UTF-8' >> /etc/locale.gen
apt-get update
apt-get install wget ca-certificates
wget http://download.proxmox.com/debian/proxmox-ve-release-6.x.gpg -O /etc/apt/trusted.gpg.d/proxmox-ve-release-6.x.gpg
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
apt-get update
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
mk-build-deps --install ./control

View File

@@ -1,7 +0,0 @@
#!/bin/bash
cat < vitastor.Dockerfile > ../Dockerfile
cd ..
mkdir -p packages
sudo podman build --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
rm Dockerfile

4
debian/changelog vendored
View File

@@ -1,10 +1,10 @@
vitastor (0.9.3-1) unstable; urgency=medium
vitastor (0.8.6-1) unstable; urgency=medium
* Bugfixes
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
vitastor (0.9.3-1) unstable; urgency=medium
vitastor (0.8.6-1) unstable; urgency=medium
* Implement NFS proxy
* Add documentation

View File

@@ -1,4 +1,4 @@
# Build patched QEMU for Debian inside a container
# Build patched QEMU for Debian Buster or Bullseye/Sid inside a container
# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/patched-qemu.Dockerfile .
ARG REL=
@@ -15,19 +15,17 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
fi; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y install qemu fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep qemu
# To build a custom version
#RUN cp /root/packages/qemu-orig/* /root
RUN apt-get --download-only source qemu
ADD patches /root/vitastor/patches
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
ADD patches/qemu-5.0-vitastor.patch patches/qemu-5.1-vitastor.patch patches/qemu-6.1-vitastor.patch src/qemu_driver.c /root/vitastor/patches/
RUN set -e; \
apt-get install -y wget; \
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
@@ -39,16 +37,25 @@ RUN set -e; \
rm -rf /root/packages/qemu-$REL/*; \
cd /root/packages/qemu-$REL; \
dpkg-source -x /root/qemu*.dsc; \
QEMU_VER=$(ls -d qemu*/ | perl -pe 's!^.*(\d+\.\d+).*!$1!'); \
D=$(ls -d qemu*/); \
cp /root/vitastor/patches/qemu-$QEMU_VER-vitastor.patch ./qemu-*/debian/patches; \
echo qemu-$QEMU_VER-vitastor.patch >> $D/debian/patches/series; \
if ls -d /root/packages/qemu-$REL/qemu-5.0*; then \
D=$(ls -d /root/packages/qemu-$REL/qemu-5.0*); \
cp /root/vitastor/patches/qemu-5.0-vitastor.patch $D/debian/patches; \
echo qemu-5.0-vitastor.patch >> $D/debian/patches/series; \
elif ls /root/packages/qemu-$REL/qemu-6.1*; then \
D=$(ls -d /root/packages/qemu-$REL/qemu-6.1*); \
cp /root/vitastor/patches/qemu-6.1-vitastor.patch $D/debian/patches; \
echo qemu-6.1-vitastor.patch >> $D/debian/patches/series; \
else \
cp /root/vitastor/patches/qemu-5.1-vitastor.patch /root/packages/qemu-$REL/qemu-*/debian/patches; \
P=`ls -d /root/packages/qemu-$REL/qemu-*/debian/patches`; \
echo qemu-5.1-vitastor.patch >> $P/series; \
fi; \
cd /root/packages/qemu-$REL/qemu-*/; \
quilt push -a; \
quilt add block/vitastor.c; \
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
cp /root/vitastor/patches/qemu_driver.c block/vitastor.c; \
quilt refresh; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor3; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor1; \
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
rm -rf /root/packages/qemu-$REL/qemu-*/

View File

@@ -1,4 +1,4 @@
# Build Vitastor packages for Debian inside a container
# Build Vitastor packages for Debian Buster or Bullseye/Sid inside a container
# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
ARG REL=
@@ -15,12 +15,11 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
fi; \
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
RUN apt-get update
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y install fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep fio
RUN apt-get --download-only source fio
RUN apt-get update && apt-get -y install libjerasure-dev cmake libibverbs-dev libisal-dev
@@ -35,8 +34,8 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-0.9.3; \
cd vitastor-0.9.3; \
cp -r /root/vitastor vitastor-0.8.6; \
cd vitastor-0.8.6; \
ln -s /root/fio-build/fio-*/ ./fio; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@@ -49,8 +48,8 @@ RUN set -e -x; \
rm -rf a b; \
echo "dep:fio=$FIO" > debian/fio_version; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.3.orig.tar.xz vitastor-0.9.3; \
cd vitastor-0.9.3; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.6.orig.tar.xz vitastor-0.8.6; \
cd vitastor-0.8.6; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

Binary file not shown.

View File

@@ -17,16 +17,14 @@ Configuration parameters can be set in 3 places:
- Configuration file (`/etc/vitastor/vitastor.conf` or other path)
- etcd key `/vitastor/config/global`. Most variables can be set there, but etcd
connection parameters should obviously be set in the configuration file.
- Command line of Vitastor components: OSD (when you run it without vitastor-disk),
mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter
doesn't allow to set all variables directly, but it allows to override the
configuration file and set everything you need inside it.
- OSD superblocks created by [vitastor-disk](usage/disk.en.md) contain
primarily disk layout parameters of specific OSDs. In fact, these parameters
are automatically passed into the command line of vitastor-osd process, so
they have the same "status" as command-line parameters.
- Command line of Vitastor components: OSD, mon, fio and QEMU options,
OpenStack/Proxmox/etc configuration. The latter doesn't allow to set all
variables directly, but it allows to override the configuration file and
set everything you need inside it.
In the future, additional configuration methods may be added:
- OSD superblock which will, by design, contain parameters related to the disk
layout and to one specific OSD.
- OSD-specific keys in etcd like `/vitastor/config/osd/<number>`.
## Parameter Reference

View File

@@ -19,17 +19,14 @@
- Ключе в etcd `/vitastor/config/global`. Большая часть параметров может
задаваться там, кроме, естественно, самих параметров соединения с etcd,
которые должны задаваться в файле конфигурации
- В командной строке компонентов Vitastor: OSD (при ручном запуске без vitastor-disk),
монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние,
как правило, не включают полный набор параметров напрямую, но позволяют
определить путь к файлу конфигурации и задать любые параметры в нём.
- В суперблоке OSD, записываемом [vitastor-disk](usage/disk.ru.md) - параметры,
связанные с дисковым форматом и с этим конкретным OSD. На самом деле,
при запуске OSD эти параметры автоматически передаются в командную строку
процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам
командной строки OSD.
- В командной строке компонентов Vitastor: OSD, монитора, опциях fio и QEMU,
настроек OpenStack, Proxmox и т.п. Последние, как правило, не включают полный
набор параметров напрямую, но разрешают определить путь к файлу конфигурации
и задать любые параметры в нём.
В будущем также могут быть добавлены другие способы конфигурации:
- Суперблок OSD, в котором будут храниться параметры OSD, связанные с дисковым
форматом и с этим конкретным OSD.
- OSD-специфичные ключи в etcd типа `/vitastor/config/osd/<номер>`.
## Список параметров

View File

@@ -25,16 +25,11 @@ running if required parameters are specified.
## etcd_address
- Type: string or array of strings
- Can be changed online: yes
etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
Note that https is not supported for etcd connections yet.
etcd connection endpoints can be changed online by updating global
configuration in etcd itself - this allows to switch the cluster to new
etcd addresses without downtime.
## etcd_prefix
- Type: string
@@ -47,6 +42,5 @@ example, use a single etcd cluster for multiple Vitastor clusters.
- Type: integer
- Default: 0
- Can be changed online: yes
Log level. Raise if you want more verbose output.

View File

@@ -24,14 +24,10 @@
## etcd_address
- Тип: строка или массив строк
- Можно менять на лету: да
Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
Адреса подключения к etcd можно поменять на лету, обновив конфигурацию в
самом etcd - это позволяет переключить кластер на новые etcd без остановки.
## etcd_prefix
- Тип: строка
@@ -45,6 +41,5 @@
- Тип: целое число
- Значение по умолчанию: 0
- Можно менять на лету: да
Уровень логгирования. Повысьте, если хотите более подробный вывод.

View File

@@ -33,13 +33,12 @@ Size of objects (data blocks) into which all physical and virtual drives
in Vitastor, affects memory usage, write amplification and I/O load
distribution effectiveness.
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
it's possible to use 1 MB for SSD too - it will lower memory usage, but
Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
it's possible to use 4 MB for SSD too - it will lower memory usage, but
may increase average WA and reduce linear performance.
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
544 MB per 1 TB of used disk space with the default 128 KB block size.
With 1 MB it's 8 times lower.
## bitmap_granularity

View File

@@ -33,14 +33,14 @@ OSD) могут сосуществовать в одном кластере Vita
настроек, влияет на потребление памяти, объём избыточной записи (write
amplification) и эффективность распределения нагрузки по OSD.
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
это понизит использование памяти, но ухудшит распределение нагрузки и в
среднем увеличит WA.
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
стандартном 128 КБ блоке.
## bitmap_granularity

View File

@@ -24,8 +24,6 @@ initialization and can't be changed after it without losing data.
- [disable_journal_fsync](#disable_journal_fsync)
- [disable_device_lock](#disable_device_lock)
- [disk_alignment](#disk_alignment)
- [data_csum_type](#data_csum_type)
- [csum_block_size](#csum_block_size)
## data_device
@@ -176,42 +174,3 @@ Intel Optane (probably, not tested yet).
Clients don't need to be aware of disk_alignment, so it's not required to
put a modified value into etcd key /vitastor/config/global.
## data_csum_type
- Type: string
- Default: none
Data checksum type to use. May be "crc32c" or "none". Set to "crc32c" to
enable data checksums.
## csum_block_size
- Type: integer
- Default: 4096
Checksum calculation block size.
Must be equal or a multiple of [bitmap_granularity](layout-cluster.en.md#bitmap_granularity)
(which is usually 4 KB).
Checksums increase metadata size by 4 bytes per each csum_block_size of data.
Checksums are always a compromise:
1. You either sacrifice +1 GB RAM per 1 TB of data
2. Or you raise csum_block_size, for example, to 32k and sacrifice
50% random write iops due to checksum read-modify-write
3. Or you turn off [inmemory_metadata](osd.en.md#inmemory_metadata) and
sacrifice 50% random read iops due to checksum reads
Option 1 (default) is recommended for all-flash setups because these usually
have enough RAM.
Option 2 is recommended for HDD-only setups. HDD-only setups usually do NOT
have enough RAM for the default 4 KB csum_block_size.
Option 3 is recommended for SSD+HDD setups (because metadata SSDs will handle
extra reads without any performance drop) and also *maybe* for NVMe all-flash
setups when you don't have enough RAM (because NVMe drives have plenty
of read iops to spare). You may also consider enabling
[cached_read_meta](osd.en.md#cached_read_meta) in this case.

View File

@@ -25,8 +25,6 @@
- [disable_journal_fsync](#disable_journal_fsync)
- [disable_device_lock](#disable_device_lock)
- [disk_alignment](#disk_alignment)
- [data_csum_type](#data_csum_type)
- [csum_block_size](#csum_block_size)
## data_device
@@ -185,52 +183,3 @@ journal_block_size и meta_block_size. Однако единственные SSD
Клиентам не обязательно знать про disk_alignment, так что помещать значение
этого параметра в etcd в /vitastor/config/global не нужно.
## data_csum_type
- Тип: строка
- Значение по умолчанию: none
Тип используемых OSD контрольных сумм данных. Может быть "crc32c" или "none".
Установите в "crc32c", чтобы включить расчёт и проверку контрольных сумм данных.
Следует понимать, что контрольные суммы в зависимости от размера блока их
расчёта либо увеличивают потребление памяти, либо снижают производительность.
Подробнее смотрите в описании параметра [csum_block_size](#csum_block_size).
## csum_block_size
- Тип: целое число
- Значение по умолчанию: 4096
Размер блока расчёта контрольных сумм.
Должен быть равен или кратен [bitmap_granularity](layout-cluster.ru.md#bitmap_granularity)
(который обычно равен 4 КБ).
Контрольные суммы увеличивают размер метаданных на 4 байта на каждые
csum_block_size данных.
Контрольные суммы - это всегда компромисс:
1. Вы либо жертвуете потреблением +1 ГБ памяти на 1 ТБ дискового пространства
2. Либо вы повышаете csum_block_size до, скажем, 32k и жертвуете 50%
скорости случайной записи из-за цикла чтения-изменения-записи для расчёта
новых контрольных сумм
3. Либо вы отключаете [inmemory_metadata](osd.ru.md#inmemory_metadata) и
жертвуете 50% скорости случайного чтения из-за чтения контрольных сумм
с диска
Вариант 1 (при настройках по умолчанию) рекомендуется для SSD (All-Flash)
кластеров, потому что памяти в них обычно хватает.
Вариант 2 рекомендуется для кластеров на одних жёстких дисках (без SSD
под метаданные). На 4 кб блок контрольной суммы памяти в таких кластерах
обычно НЕ хватает.
Вариант 3 рекомендуется для гибридных кластеров (SSD+HDD), потому что
скорости SSD под метаданными хватит, чтобы обработать дополнительные чтения
без снижения производительности. Также вариант 3 *может* рекомендоваться
для All-Flash кластеров на основе NVMe-дисков, когда памяти НЕ достаточно,
потому что NVMe-диски имеют огромный запас производительности по чтению.
В таких случаях, возможно, также имеет смысл включать параметр
[cached_read_meta](osd.ru.md#cached_read_meta).

View File

@@ -153,7 +153,6 @@ operations.
- Type: seconds
- Default: 5
- Minimum: 1
- Can be changed online: yes
Interval before attempting to reconnect to an unavailable OSD.
@@ -162,7 +161,6 @@ Interval before attempting to reconnect to an unavailable OSD.
- Type: seconds
- Default: 5
- Minimum: 1
- Can be changed online: yes
Timeout for OSD connection attempts.
@@ -171,7 +169,6 @@ Timeout for OSD connection attempts.
- Type: seconds
- Default: 5
- Minimum: 1
- Can be changed online: yes
OSD connection inactivity time after which clients and other OSDs send
keepalive requests to check state of the connection.
@@ -181,7 +178,6 @@ keepalive requests to check state of the connection.
- Type: seconds
- Default: 5
- Minimum: 1
- Can be changed online: yes
Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
within this time, the connection to it is dropped and a reconnection attempt
@@ -192,7 +188,6 @@ is scheduled.
- Type: milliseconds
- Default: 500
- Minimum: 50
- Can be changed online: yes
OSDs respond to clients with a special error code when they receive I/O
requests for a PG that's not synchronized and started. This parameter sets
@@ -202,7 +197,6 @@ the time for the clients to wait before re-attempting such I/O requests.
- Type: integer
- Default: 5
- Can be changed online: yes
Maximum number of attempts for etcd requests which can't be retried
indefinitely.
@@ -211,7 +205,6 @@ indefinitely.
- Type: milliseconds
- Default: 1000
- Can be changed online: yes
Timeout for etcd requests which should complete quickly, like lease refresh.
@@ -219,7 +212,6 @@ Timeout for etcd requests which should complete quickly, like lease refresh.
- Type: milliseconds
- Default: 5000
- Can be changed online: yes
Timeout for etcd requests which are allowed to wait for some time.
@@ -227,7 +219,6 @@ Timeout for etcd requests which are allowed to wait for some time.
- Type: seconds
- Default: max(30, etcd_report_interval*2)
- Can be changed online: yes
Timeout for etcd connection HTTP Keep-Alive. Should be higher than
etcd_report_interval to guarantee that keepalive actually works.
@@ -236,7 +227,6 @@ etcd_report_interval to guarantee that keepalive actually works.
- Type: seconds
- Default: 30
- Can be changed online: yes
etcd websocket ping interval required to keep the connection alive and
detect disconnections quickly.
@@ -245,7 +235,6 @@ detect disconnections quickly.
- Type: integer
- Default: 33554432
- Can be changed online: yes
Without immediate_commit=all this parameter sets the limit of "dirty"
(not committed by fsync) data allowed by the client before forcing an

View File

@@ -161,7 +161,6 @@ OSD в любом случае согласовывают реальное зн
- Тип: секунды
- Значение по умолчанию: 5
- Минимальное значение: 1
- Можно менять на лету: да
Время ожидания перед повторной попыткой соединиться с недоступным OSD.
@@ -170,7 +169,6 @@ OSD в любом случае согласовывают реальное зн
- Тип: секунды
- Значение по умолчанию: 5
- Минимальное значение: 1
- Можно менять на лету: да
Максимальное время ожидания попытки соединения с OSD.
@@ -179,7 +177,6 @@ OSD в любом случае согласовывают реальное зн
- Тип: секунды
- Значение по умолчанию: 5
- Минимальное значение: 1
- Можно менять на лету: да
Время неактивности соединения с OSD, после которого клиенты или другие OSD
посылают запрос проверки состояния соединения.
@@ -189,7 +186,6 @@ OSD в любом случае согласовывают реальное зн
- Тип: секунды
- Значение по умолчанию: 5
- Минимальное значение: 1
- Можно менять на лету: да
Максимальное время ожидания ответа на запрос проверки состояния соединения.
Если OSD не отвечает за это время, соединение отключается и производится
@@ -200,7 +196,6 @@ OSD в любом случае согласовывают реальное зн
- Тип: миллисекунды
- Значение по умолчанию: 500
- Минимальное значение: 50
- Можно менять на лету: да
Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
@@ -212,7 +207,6 @@ OSD в любом случае согласовывают реальное зн
- Тип: целое число
- Значение по умолчанию: 5
- Можно менять на лету: да
Максимальное число попыток выполнения запросов к etcd для тех запросов,
которые нельзя повторять бесконечно.
@@ -221,7 +215,6 @@ OSD в любом случае согласовывают реальное зн
- Тип: миллисекунды
- Значение по умолчанию: 1000
- Можно менять на лету: да
Максимальное время выполнения запросов к etcd, которые должны завершаться
быстро, таких, как обновление резервации (lease).
@@ -230,7 +223,6 @@ OSD в любом случае согласовывают реальное зн
- Тип: миллисекунды
- Значение по умолчанию: 5000
- Можно менять на лету: да
Максимальное время выполнения запросов к etcd, для которых не обязательно
гарантировать быстрое выполнение.
@@ -239,7 +231,6 @@ OSD в любом случае согласовывают реальное зн
- Тип: секунды
- Значение по умолчанию: max(30, etcd_report_interval*2)
- Можно менять на лету: да
Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
etcd_report_interval, чтобы keepalive гарантированно работал.
@@ -248,7 +239,6 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
- Тип: секунды
- Значение по умолчанию: 30
- Можно менять на лету: да
Интервал проверки живости вебсокет-подключений к etcd.
@@ -256,7 +246,6 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
- Тип: целое число
- Значение по умолчанию: 33554432
- Можно менять на лету: да
При работе без immediate_commit=all - это лимит объёма "грязных" (не
зафиксированных fsync-ом) данных, при достижении которого клиент будет

View File

@@ -7,8 +7,7 @@
# Runtime OSD Parameters
These parameters only apply to OSDs, are not fixed at the moment of OSD drive
initialization and can be changed - either with an OSD restart or, for some of
them, even without restarting by updating configuration in etcd.
initialization and can be changed with an OSD restart.
- [etcd_report_interval](#etcd_report_interval)
- [run_primary](#run_primary)
@@ -31,9 +30,6 @@ them, even without restarting by updating configuration in etcd.
- [max_flusher_count](#max_flusher_count)
- [inmemory_metadata](#inmemory_metadata)
- [inmemory_journal](#inmemory_journal)
- [cached_read_data](#cached_read_data)
- [cached_read_meta](#cached_read_meta)
- [cached_read_journal](#cached_read_journal)
- [journal_sector_buffer_count](#journal_sector_buffer_count)
- [journal_no_same_sector_overwrites](#journal_no_same_sector_overwrites)
- [throttle_small_writes](#throttle_small_writes)
@@ -42,14 +38,6 @@ them, even without restarting by updating configuration in etcd.
- [throttle_target_parallelism](#throttle_target_parallelism)
- [throttle_threshold_us](#throttle_threshold_us)
- [osd_memlock](#osd_memlock)
- [auto_scrub](#auto_scrub)
- [no_scrub](#no_scrub)
- [scrub_interval](#scrub_interval)
- [scrub_queue_depth](#scrub_queue_depth)
- [scrub_sleep](#scrub_sleep)
- [scrub_list_limit](#scrub_list_limit)
- [scrub_find_best](#scrub_find_best)
- [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
## etcd_report_interval
@@ -103,7 +91,6 @@ OSD by hand.
- Type: seconds
- Default: 5
- Can be changed online: yes
Time interval at which automatic fsyncs/flushes are issued by each OSD when
the immediate_commit mode if disabled. fsyncs are required because without
@@ -116,7 +103,6 @@ issue fsyncs at all.
- Type: integer
- Default: 128
- Can be changed online: yes
Same as autosync_interval, but sets the maximum number of uncommitted write
operations before issuing an fsync operation internally.
@@ -125,7 +111,6 @@ operations before issuing an fsync operation internally.
- Type: integer
- Default: 4
- Can be changed online: yes
Maximum recovery operations per one primary OSD at any given moment of time.
Currently it's the only parameter available to tune the speed or recovery
@@ -135,7 +120,6 @@ and rebalancing, but it's planned to implement more.
- Type: integer
- Default: 128
- Can be changed online: yes
Number of recovery operations before switching to recovery of the next PG.
The idea is to mix all PGs during recovery for more even space and load
@@ -146,7 +130,6 @@ Degraded PGs are anyway scanned first.
- Type: integer
- Default: 16
- Can be changed online: yes
Maximum number of recovery operations before issuing an additional fsync.
@@ -162,7 +145,6 @@ the underlying device. This may be useful for recovery purposes.
- Type: boolean
- Default: false
- Can be changed online: yes
Disable automatic background recovery of objects. Note that it doesn't
affect implicit recovery of objects happening during writes - a write is
@@ -172,7 +154,6 @@ always made to a full set of at least pg_minsize OSDs.
- Type: boolean
- Default: false
- Can be changed online: yes
Disable background movement of data between different OSDs. Disabling it
means that PGs in the `has_misplaced` state will be left in it indefinitely.
@@ -181,7 +162,6 @@ means that PGs in the `has_misplaced` state will be left in it indefinitely.
- Type: seconds
- Default: 3
- Can be changed online: yes
Time interval at which OSDs print simple human-readable operation
statistics on stdout.
@@ -190,7 +170,6 @@ statistics on stdout.
- Type: seconds
- Default: 10
- Can be changed online: yes
Time interval at which OSDs dump slow or stuck operations on stdout, if
they're any. Also it's the time after which an operation is considered
@@ -200,7 +179,6 @@ they're any. Also it's the time after which an operation is considered
- Type: seconds
- Default: 60
- Can be changed online: yes
Number of seconds after which a deleted inode is removed from OSD statistics.
@@ -208,7 +186,6 @@ Number of seconds after which a deleted inode is removed from OSD statistics.
- Type: integer
- Default: 128
- Can be changed online: yes
Parallel client write operation limit per one OSD. Operations that exceed
this limit are pushed to a temporary queue instead of being executed
@@ -218,7 +195,6 @@ immediately.
- Type: integer
- Default: 1
- Can be changed online: yes
Flusher is a micro-thread that moves data from the journal to the data
area of the device. Their number is auto-tuned between minimum and maximum.
@@ -228,7 +204,6 @@ Minimum number is set by this parameter.
- Type: integer
- Default: 256
- Can be changed online: yes
Maximum number of journal flushers (see above min_flusher_count).
@@ -258,46 +233,6 @@ is typically very small because it's sufficient to have 16-32 MB journal
for SSD OSDs. However, in theory it's possible that you'll want to turn it
off for hybrid (HDD+SSD) OSDs with large journals on quick devices.
## cached_read_data
- Type: boolean
- Default: false
Read data through Linux page cache, i.e. use a file descriptor opened without
O_DIRECT for data reads. May improve read performance for frequently accessed
data if it fits in RAM. Memory in page cache is shared by all processes and
not accounted in OSD memory consumption.
## cached_read_meta
- Type: boolean
- Default: false
Read metadata through Linux page cache. May be beneficial when checksums
are enabled and [inmemory_metadata](#inmemory_metadata) is disabled, because
in this case metadata blocks are read from disk to verify checksums on every
read request and caching them may reduce this extra read load.
Absolutely pointless to enable with enabled inmemory_metadata because all
metadata is kept in memory anyway, and likely pointless without checksums,
because in that case, metadata blocks are read from disk only during journal
flushing.
If the same device is used for data and metadata, enabling [cached_read_data](#cached_read_data)
also enables this parameter, given that it isn't turned off explicitly.
## cached_read_journal
- Type: boolean
- Default: false
Read buffered data from journal through Linux page cache. Does not have sense
without disabling [inmemory_journal](#inmemory_journal), which, again, is
enabled by default.
If the same device is used for metadata and journal, enabling [cached_read_meta](#cached_read_meta)
also enables this parameter, given that it isn't turned off explicitly.
## journal_sector_buffer_count
- Type: integer
@@ -325,7 +260,6 @@ Most (99%) other SSDs don't need this option.
- Type: boolean
- Default: false
- Can be changed online: yes
Enable soft throttling of small journaled writes. Useful for hybrid OSDs
with fast journal/metadata devices and slow data devices. The idea is that
@@ -343,7 +277,6 @@ fills up.
- Type: integer
- Default: 100
- Can be changed online: yes
Target maximum number of throttled operations per second under the condition
of full journal. Set it to approximate random write iops of your data devices
@@ -353,7 +286,6 @@ of full journal. Set it to approximate random write iops of your data devices
- Type: integer
- Default: 100
- Can be changed online: yes
Target maximum bandwidth in MB/s of throttled operations per second under
the condition of full journal. Set it to approximate linear write
@@ -363,7 +295,6 @@ performance of your data devices (HDDs).
- Type: integer
- Default: 1
- Can be changed online: yes
Target maximum parallelism of throttled operations under the condition of
full journal. Set it to approximate internal parallelism of your data
@@ -373,7 +304,6 @@ devices (1 for HDDs, 4-8 for SSDs).
- Type: microseconds
- Default: 50
- Can be changed online: yes
Minimal computed delay to be applied to throttled operations. Usually
doesn't need to be changed.
@@ -383,103 +313,4 @@ doesn't need to be changed.
- Type: boolean
- Default: false
Lock all OSD memory to prevent it from being unloaded into swap with
mlockall(). Requires sufficient ulimit -l (max locked memory).
## auto_scrub
- Type: boolean
- Default: false
- Can be changed online: yes
Data scrubbing is the process of background verification of copies to find
and repair corrupted blocks. It's not run automatically by default since
it's a new feature. Set this parameter to true to enable automatic scrubs.
This parameter makes OSDs automatically schedule data scrubbing of clean PGs
every `scrub_interval` (see below). You can also start/schedule scrubbing
manually by setting `next_scrub` JSON key to the desired UNIX time of the
next scrub in `/pg/history/...` values in etcd.
## no_scrub
- Type: boolean
- Default: false
- Can be changed online: yes
Temporarily disable scrubbing and stop running scrubs.
## scrub_interval
- Type: string
- Default: 30d
- Can be changed online: yes
Default automatic scrubbing interval for all pools. Numbers without suffix
are treated as seconds, possible unit suffixes include 's' (seconds),
'm' (minutes), 'h' (hours), 'd' (days), 'M' (months) and 'y' (years).
## scrub_queue_depth
- Type: integer
- Default: 1
- Can be changed online: yes
Number of parallel scrubbing operations per one OSD.
## scrub_sleep
- Type: milliseconds
- Default: 0
- Can be changed online: yes
Additional interval between two consecutive scrubbing operations on one OSD.
Can be used to slow down scrubbing if it affects user load too much.
## scrub_list_limit
- Type: integer
- Default: 1000
- Can be changed online: yes
Number of objects to list in one listing operation during scrub.
## scrub_find_best
- Type: boolean
- Default: true
- Can be changed online: yes
Find and automatically restore best versions of objects with unmatched
copies. In replicated setups, the best version is the version with most
matching replicas. In EC setups, the best version is the subset of data
and parity chunks without mismatches.
The hypothetical situation where you might want to disable it is when
you have 3 replicas and you are paranoid that 2 HDDs out of 3 may silently
corrupt an object in the same way (for example, zero it out) and only
1 HDD will remain good. In this case disabling scrub_find_best may help
you to recover the data! See also scrub_ec_max_bruteforce below.
## scrub_ec_max_bruteforce
- Type: integer
- Default: 100
- Can be changed online: yes
Vitastor can locate corrupted chunks in EC setups with more than 1 parity
chunk by brute-forcing all possible error locations. This configuration
value limits the maximum number of checked combinations. You can try to
increase it if you have EC N+K setup with N and K large enough for
combination count `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!` to be greater
than the default 100.
If there are too many possible combinations or if multiple combinations give
correct results then objects are marked inconsistent and aren't recovered
automatically.
In replicated setups bruteforcing isn't needed, Vitastor just assumes that
the variant with most available equal copies is correct. For example, if
you have 3 replicas and 1 of them differs, this one is considered to be
corrupted. But if there is no "best" version with more copies than all
others have then the object is also marked as inconsistent.
Lock all OSD memory to prevent it from being unloaded into swap with mlockall(). Requires sufficient ulimit -l (max locked memory).

View File

@@ -8,8 +8,7 @@
Данные параметры используются только OSD, но, в отличие от дисковых параметров,
не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
изменения конфигурации в etcd.
момент с перезапуском OSD.
- [etcd_report_interval](#etcd_report_interval)
- [run_primary](#run_primary)
@@ -32,9 +31,6 @@
- [max_flusher_count](#max_flusher_count)
- [inmemory_metadata](#inmemory_metadata)
- [inmemory_journal](#inmemory_journal)
- [cached_read_data](#cached_read_data)
- [cached_read_meta](#cached_read_meta)
- [cached_read_journal](#cached_read_journal)
- [journal_sector_buffer_count](#journal_sector_buffer_count)
- [journal_no_same_sector_overwrites](#journal_no_same_sector_overwrites)
- [throttle_small_writes](#throttle_small_writes)
@@ -43,14 +39,6 @@
- [throttle_target_parallelism](#throttle_target_parallelism)
- [throttle_threshold_us](#throttle_threshold_us)
- [osd_memlock](#osd_memlock)
- [auto_scrub](#auto_scrub)
- [no_scrub](#no_scrub)
- [scrub_interval](#scrub_interval)
- [scrub_queue_depth](#scrub_queue_depth)
- [scrub_sleep](#scrub_sleep)
- [scrub_list_limit](#scrub_list_limit)
- [scrub_find_best](#scrub_find_best)
- [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
## etcd_report_interval
@@ -105,7 +93,6 @@ RUNNING), подходящий под заданную маску. Также н
- Тип: секунды
- Значение по умолчанию: 5
- Можно менять на лету: да
Временной интервал отправки автоматических fsync-ов (операций очистки кэша)
каждым OSD для случая, когда режим immediate_commit отключён. fsync-и нужны
@@ -118,7 +105,6 @@ OSD, чтобы успевать очищать журнал - без них OSD
- Тип: целое число
- Значение по умолчанию: 128
- Можно менять на лету: да
Аналогично autosync_interval, но задаёт не временной интервал, а
максимальное количество незафиксированных операций записи перед
@@ -128,7 +114,6 @@ OSD, чтобы успевать очищать журнал - без них OSD
- Тип: целое число
- Значение по умолчанию: 4
- Можно менять на лету: да
Максимальное число операций восстановления на одном первичном OSD в любой
момент времени. На данный момент единственный параметр, который можно менять
@@ -139,7 +124,6 @@ OSD, чтобы успевать очищать журнал - без них OSD
- Тип: целое число
- Значение по умолчанию: 128
- Можно менять на лету: да
Число операций восстановления перед переключением на восстановление другой PG.
Идея заключается в том, чтобы восстанавливать все PG одновременно для более
@@ -151,7 +135,6 @@ OSD, чтобы успевать очищать журнал - без них OSD
- Тип: целое число
- Значение по умолчанию: 16
- Можно менять на лету: да
Максимальное число операций восстановления перед дополнительным fsync.
@@ -167,7 +150,6 @@ OSD, чтобы успевать очищать журнал - без них OSD
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Отключить автоматическое фоновое восстановление объектов. Обратите внимание,
что эта опция не отключает восстановление объектов, происходящее при
@@ -178,7 +160,6 @@ OSD.
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Отключить фоновое перемещение объектов между разными OSD. Отключение
означает, что PG, находящиеся в состоянии `has_misplaced`, будут оставлены
@@ -188,7 +169,6 @@ OSD.
- Тип: секунды
- Значение по умолчанию: 3
- Можно менять на лету: да
Временной интервал, с которым OSD печатают простую человекочитаемую
статистику выполнения операций в стандартный вывод.
@@ -197,7 +177,6 @@ OSD.
- Тип: секунды
- Значение по умолчанию: 10
- Можно менять на лету: да
Временной интервал, с которым OSD выводят в стандартный вывод список
медленных или зависших операций, если таковые имеются. Также время, при
@@ -207,7 +186,6 @@ OSD.
- Тип: секунды
- Значение по умолчанию: 60
- Можно менять на лету: да
Число секунд, через которое удалённые инод удаляется и из статистики OSD.
@@ -215,7 +193,6 @@ OSD.
- Тип: целое число
- Значение по умолчанию: 128
- Можно менять на лету: да
Максимальное число одновременных клиентских операций записи на один OSD.
Операции, превышающие этот лимит, не исполняются сразу, а сохраняются во
@@ -225,7 +202,6 @@ OSD.
- Тип: целое число
- Значение по умолчанию: 1
- Можно менять на лету: да
Flusher - это микро-поток (корутина), которая копирует данные из журнала в
основную область устройства данных. Их число настраивается динамически между
@@ -235,7 +211,6 @@ Flusher - это микро-поток (корутина), которая коп
- Тип: целое число
- Значение по умолчанию: 256
- Можно менять на лету: да
Максимальное число микро-потоков очистки журнала (см. выше min_flusher_count).
@@ -266,51 +241,6 @@ Flusher - это микро-поток (корутина), которая коп
параметра может оказаться полезным для гибридных OSD (HDD+SSD) с большими
журналами, расположенными на быстром по сравнению с HDD устройстве.
## cached_read_data
- Тип: булево (да/нет)
- Значение по умолчанию: false
Читать данные через системный кэш Linux (page cache), то есть, использовать
для чтения данных файловый дескриптор, открытый без флага O_DIRECT. Может
улучшить производительность чтения для часто используемых данных, если они
помещаются в память. Память кэша разделяется между всеми процессами в
системе и не учитывается в потреблении памяти процессом OSD.
## cached_read_meta
- Тип: булево (да/нет)
- Значение по умолчанию: false
Читать метаданные через системный кэш Linux. Может быть полезно, когда
включены контрольные суммы, а параметр [inmemory_metadata](#inmemory_metadata)
отключён, так как в этом случае блоки метаданных читаются с диска при каждом
запросе чтения для проверки контрольных сумм и их кэширование может снизить
дополнительную нагрузку на диск.
Абсолютно бессмысленно включать данный параметр, если параметр
inmemory_metadata включён (по умолчанию это так), и также вероятно
бессмысленно включать его, если не включены контрольные суммы, так как в
этом случае блоки метаданных читаются с диска только во время сброса
журнала.
Если одно и то же устройство используется для данных и метаданных, включение
[cached_read_data](#cached_read_data) также включает данный параметр, при
условии, что он не отключён явным образом.
## cached_read_journal
- Тип: булево (да/нет)
- Значение по умолчанию: false
Читать буферизованные в журнале данные через системный кэш Linux. Не имеет
смысла без отключения параметра [inmemory_journal](#inmemory_journal),
который, опять же, по умолчанию включён.
Если одно и то же устройство используется для метаданных и журнала,
включение [cached_read_meta](#cached_read_meta) также включает данный
параметр, при условии, что он не отключён явным образом.
## journal_sector_buffer_count
- Тип: целое число
@@ -340,7 +270,6 @@ inmemory_metadata включён (по умолчанию это так), и т
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Разрешить мягкое ограничение скорости журналируемой записи. Полезно для
гибридных OSD с быстрыми устройствами метаданных и медленными устройствами
@@ -359,7 +288,6 @@ inmemory_metadata включён (по умолчанию это так), и т
- Тип: целое число
- Значение по умолчанию: 100
- Можно менять на лету: да
Расчётное максимальное число ограничиваемых операций в секунду при условии
отсутствия свободного места в журнале. Устанавливайте приблизительно равным
@@ -370,7 +298,6 @@ inmemory_metadata включён (по умолчанию это так), и т
- Тип: целое число
- Значение по умолчанию: 100
- Можно менять на лету: да
Расчётный максимальный размер в МБ/с ограничиваемых операций в секунду при
условии отсутствия свободного места в журнале. Устанавливайте приблизительно
@@ -381,7 +308,6 @@ inmemory_metadata включён (по умолчанию это так), и т
- Тип: целое число
- Значение по умолчанию: 1
- Можно менять на лету: да
Расчётный максимальный параллелизм ограничиваемых операций в секунду при
условии отсутствия свободного места в журнале. Устанавливайте приблизительно
@@ -392,7 +318,6 @@ inmemory_metadata включён (по умолчанию это так), и т
- Тип: микросекунды
- Значение по умолчанию: 50
- Можно менять на лету: да
Минимальная применимая к ограничиваемым операциям задержка. Обычно не
требует изменений.
@@ -402,113 +327,4 @@ inmemory_metadata включён (по умолчанию это так), и т
- Тип: булево (да/нет)
- Значение по умолчанию: false
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку
в пространство подкачки. Требует достаточного значения ulimit -l (лимита
заблокированной памяти).
## auto_scrub
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Скраб - процесс фоновой проверки копий данных, предназначенный, чтобы
находить и исправлять повреждённые блоки. По умолчанию эти проверки ещё не
запускаются автоматически, так как являются новой функцией. Чтобы включить
автоматическое планирование скрабов, установите данный параметр в true.
Включённый параметр заставляет OSD автоматически планировать фоновую
проверку чистых PG раз в `scrub_interval` (см. ниже). Вы также можете
запустить или запланировать проверку вручную, установив значение ключа JSON
`next_scrub` внутри ключей etcd `/pg/history/...` в UNIX-время следующей
желаемой проверки.
## no_scrub
- Тип: булево (да/нет)
- Значение по умолчанию: false
- Можно менять на лету: да
Временно отключить и остановить запущенные скрабы.
## scrub_interval
- Тип: строка
- Значение по умолчанию: 30d
- Можно менять на лету: да
Интервал автоматической фоновой проверки по умолчанию для всех пулов.
Значения без указанной единицы измерения считаются в секундах, допустимые
символы единиц измерения в конце: 's' (секунды),
'm' (минуты), 'h' (часы), 'd' (дни), 'M' (месяца) или 'y' (годы).
## scrub_queue_depth
- Тип: целое число
- Значение по умолчанию: 1
- Можно менять на лету: да
Число параллельных операций фоновой проверки на один OSD.
## scrub_sleep
- Тип: миллисекунды
- Значение по умолчанию: 0
- Можно менять на лету: да
Дополнительный интервал ожидания после фоновой проверки каждого объекта на
одном OSD. Может использоваться для замедления скраба, если он слишком
сильно влияет на пользовательскую нагрузку.
## scrub_list_limit
- Тип: целое число
- Значение по умолчанию: 1000
- Можно менять на лету: да
Размер загружаемых за одну операцию списков объектов в процессе фоновой
проверки.
## scrub_find_best
- Тип: булево (да/нет)
- Значение по умолчанию: true
- Можно менять на лету: да
Находить и автоматически восстанавливать "лучшие версии" объектов с
несовпадающими копиями/частями. При использовании репликации "лучшая"
версия - версия, доступная в большем числе экземпляров, чем другие. При
использовании кодов коррекции ошибок "лучшая" версия - это подмножество
частей данных и чётности, полностью соответствующих друг другу.
Гипотетическая ситуация, в которой вы можете захотеть отключить этот
поиск - это если у вас 3 реплики и вы боитесь, что 2 диска из 3 могут
незаметно и одинаково повредить данные одного и того же объекта, например,
занулив его, и только 1 диск останется неповреждённым. В этой ситуации
отключение этого параметра поможет вам восстановить данные! Смотрите также
описание следующего параметра - scrub_ec_max_bruteforce.
## scrub_ec_max_bruteforce
- Тип: целое число
- Значение по умолчанию: 100
- Можно менять на лету: да
Vitastor старается определить повреждённые части объектов при использовании
EC (кодов коррекции ошибок) с более, чем 1 диском чётности, путём перебора
всех возможных комбинаций ошибочных частей. Данное значение конфигурации
ограничивает число перебираемых комбинаций. Вы можете попробовать поднять
его, если используете схему кодирования EC N+K с N и K, достаточно большими
для того, чтобы число сочетаний `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!`
было больше, чем стандартное значение 100.
Если возможных комбинаций слишком много или если корректная комбинаций не
определяется однозначно, объекты помечаются неконсистентными (inconsistent)
и не восстанавливаются автоматически.
При использовании репликации перебор не нужен, Vitastor просто предполагает,
что вариант объекта с наибольшим количеством одинаковых копий корректен.
Например, если вы используете 3 реплики и 1 из них отличается, эта 1 копия
считается некорректной. Однако, если "лучшую" версию с числом доступных
копий большим, чем у всех других версий, найти невозможно, то объект тоже
маркируется неконсистентным.
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку в пространство подкачки. Требует достаточного значения ulimit -l (лимита заблокированной памяти).

View File

@@ -40,7 +40,6 @@ Parameters:
- [root_node](#root_node)
- [osd_tags](#osd_tags)
- [primary_affinity_tags](#primary_affinity_tags)
- [scrub_interval](#scrub_interval)
Examples:
@@ -273,13 +272,6 @@ Specifies OSD tags to prefer putting primary OSDs in this pool to.
Note that for EC/XOR pools Vitastor always prefers to put primary OSD on one
of the OSDs containing a data chunk for a PG.
## scrub_interval
- Type: time interval (number + unit s/m/h/d/M/y)
Automatic scrubbing interval for this pool. Overrides
[global scrub_interval setting](osd.en.md#scrub_interval).
# Examples
## Replicated pool

View File

@@ -39,7 +39,6 @@
- [root_node](#root_node)
- [osd_tags](#osd_tags)
- [primary_affinity_tags](#primary_affinity_tags)
- [scrub_interval](#scrub_interval)
Примеры:
@@ -277,13 +276,6 @@ PG в Vitastor эферемерны, то есть вы можете менят
для PG этого пула. Имейте в виду, что для EC-пулов Vitastor также всегда
предпочитает помещать первичный OSD на один из OSD с данными, а не с чётностью.
## scrub_interval
- Тип: временной интервал (число + единица измерения s/m/h/d/M/y)
Интервал скраба, то есть, автоматической фоновой проверки данных для данного пула.
Переопределяет [глобальную настройку scrub_interval](osd.ru.md#scrub_interval).
# Примеры
## Реплицированный пул

View File

@@ -11,21 +11,13 @@
- name: etcd_address
type: string or array of strings
type_ru: строка или массив строк
online: true
info: |
etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
Note that https is not supported for etcd connections yet.
etcd connection endpoints can be changed online by updating global
configuration in etcd itself - this allows to switch the cluster to new
etcd addresses without downtime.
info_ru: |
Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
Адреса подключения к etcd можно поменять на лету, обновив конфигурацию в
самом etcd - это позволяет переключить кластер на новые etcd без остановки.
- name: etcd_prefix
type: string
default: "/vitastor"
@@ -39,6 +31,5 @@
- name: log_level
type: int
default: 0
online: true
info: Log level. Raise if you want more verbose output.
info_ru: Уровень логгирования. Повысьте, если хотите более подробный вывод.

View File

@@ -1,145 +0,0 @@
#!/usr/bin/nodejs
const fsp = require('fs').promises;
run(process.argv).catch(console.error);
async function run(argv)
{
if (argv.length < 3)
{
console.log('Markdown preprocessor\nUSAGE: ./include.js file.md');
return;
}
const index_file = await fsp.realpath(argv[2]);
const re = /(\{\{[\s\S]*?\}\}|\[[^\]]+\]\([^\)]+\)|(?:^|\n)#[^\n]+)/;
let text = await fsp.readFile(index_file, { encoding: 'utf-8' });
text = text.split(re);
let included = {};
let heading = 0, heading_name = '', m;
for (let i = 0; i < text.length; i++)
{
if (text[i].substr(0, 2) == '{{')
{
// Inclusion
let incfile = text[i].substr(2, text[i].length-4);
let section = null;
let indent = heading;
incfile = incfile.replace(/\s*\|\s*indent\s*=\s*(-?\d+)\s*$/, (m, m1) => { indent = parseInt(m1); return ''; });
incfile = incfile.replace(/\s*#\s*([^#]+)$/, (m, m1) => { section = m1; return ''; });
let inc_heading = section;
incfile = rel2abs(index_file, incfile);
let inc = await fsp.readFile(incfile, { encoding: 'utf-8' });
inc = inc.trim().replace(/^[\s\S]+?\n#/, '#'); // remove until the first header
inc = inc.split(re);
const indent_str = new Array(indent+1).join('#');
let section_start = -1, section_end = -1;
for (let j = 0; j < inc.length; j++)
{
if ((m = /^(\n?)(#+\s*)([\s\S]+)$/.exec(inc[j])))
{
if (!inc_heading)
{
inc_heading = m[3].trim();
}
if (section)
{
if (m[3].trim() == section)
section_start = j;
else if (section_start >= 0)
{
section_end = j;
break;
}
}
inc[j] = m[1] + indent_str + m[2] + m[3];
}
else if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(inc[j])) && !/^https?:(\/\/)|^#/.exec(m[2]))
{
const abs_m2 = rel2abs(incfile, m[2]);
const rel_m = abs2rel(__filename, abs_m2);
if (rel_m.substr(0, 9) == '../../../') // outside docs
inc[j] = m[1] + 'https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/'+rel2abs('docs/config/src/include.js', rel_m) + m[3];
else
inc[j] = m[1] + abs_m2 + m[3];
}
}
if (section)
{
inc = section_start >= 0 ? inc.slice(section_start, section_end < 0 ? inc.length : section_end) : [];
}
if (inc.length)
{
if (!inc_heading)
inc_heading = heading_name||'';
included[incfile+(section ? '#'+section : '')] = '#'+inc_heading.toLowerCase().replace(/\P{L}+/ug, '-').replace(/^-|-$/g, '');
inc[0] = inc[0].replace(/^\s+/, '');
inc[inc.length-1] = inc[inc.length-1].replace(/\s+$/, '');
}
text.splice(i, 1, ...inc);
i = i + inc.length - 1;
}
else if ((m = /^\n?(#+)\s*([\s\S]+)$/.exec(text[i])))
{
// Heading
heading = m[1].length;
heading_name = m[2].trim();
}
}
for (let i = 0; i < text.length; i++)
{
if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(text[i])) && !/^https?:(\/\/)|^#/.exec(m[2]))
{
const p = m[2].indexOf('#');
if (included[m[2]])
{
text[i] = m[1]+included[m[2]]+m[3];
}
else if (p >= 0 && included[m[2].substr(0, p)])
{
text[i] = m[1]+m[2].substr(p)+m[3];
}
}
}
console.log(text.join(''));
}
function rel2abs(ref, rel)
{
rel = [ ...ref.replace(/^(.*)\/[^\/]+$/, '$1').split(/\/+/), ...rel.split(/\/+/) ];
return killdots(rel).join('/');
}
function abs2rel(ref, abs)
{
ref = ref.split(/\/+/);
abs = abs.split(/\/+/);
while (ref.length > 1 && ref[0] == abs[0])
{
ref.shift();
abs.shift();
}
for (let i = 1; i < ref.length; i++)
{
abs.unshift('..');
}
return killdots(abs).join('/');
}
function killdots(rel)
{
for (let i = 0; i < rel.length; i++)
{
if (rel[i] == '.')
{
rel.splice(i, 1);
i--;
}
else if (i >= 1 && rel[i] == '..' && rel[i-1] != '..')
{
rel.splice(i-1, 2);
i -= 2;
}
}
return rel;
}

View File

@@ -1,65 +0,0 @@
# Vitastor
{{../../../README.md#The Idea}}
{{../../../README.md#Talks and presentations}}
{{../../intro/features.en.md}}
{{../../intro/quickstart.en.md}}
{{../../intro/architecture.en.md}}
## Installation
{{../../installation/packages.en.md}}
{{../../installation/proxmox.en.md}}
{{../../installation/openstack.en.md}}
{{../../installation/kubernetes.en.md}}
{{../../installation/source.en.md}}
{{../../config.en.md|indent=1}}
{{../../config/common.en.md|indent=2}}
{{../../config/network.en.md|indent=2}}
{{../../config/layout-cluster.en.md|indent=2}}
{{../../config/layout-osd.en.md|indent=2}}
{{../../config/osd.en.md|indent=2}}
{{../../config/monitor.en.md|indent=2}}
{{../../config/pool.en.md|indent=2}}
{{../../config/inode.en.md|indent=2}}
## Usage
{{../../usage/cli.en.md}}
{{../../usage/disk.en.md}}
{{../../usage/fio.en.md}}
{{../../usage/nbd.en.md}}
{{../../usage/qemu.en.md}}
{{../../usage/nfs.en.md}}
## Performance
{{../../performance/understanding.en.md}}
{{../../performance/theoretical.en.md}}
{{../../performance/comparison1.en.md}}
{{../../intro/author.en.md|indent=1}}

View File

@@ -1,65 +0,0 @@
# Vitastor
{{../../../README-ru.md#Идея|indent=0}}
{{../../../README-ru.md#Презентации и записи докладов|indent=0}}
{{../../intro/features.ru.md}}
{{../../intro/quickstart.ru.md}}
{{../../intro/architecture.ru.md}}
## Установка
{{../../installation/packages.ru.md}}
{{../../installation/proxmox.ru.md}}
{{../../installation/openstack.ru.md}}
{{../../installation/kubernetes.ru.md}}
{{../../installation/source.ru.md}}
{{../../config.ru.md|indent=1}}
{{../../config/common.ru.md|indent=2}}
{{../../config/network.ru.md|indent=2}}
{{../../config/layout-cluster.ru.md|indent=2}}
{{../../config/layout-osd.ru.md|indent=2}}
{{../../config/osd.ru.md|indent=2}}
{{../../config/monitor.ru.md|indent=2}}
{{../../config/pool.ru.md|indent=2}}
{{../../config/inode.ru.md|indent=2}}
## Использование
{{../../usage/cli.ru.md}}
{{../../usage/disk.ru.md}}
{{../../usage/fio.ru.md}}
{{../../usage/nbd.ru.md}}
{{../../usage/qemu.ru.md}}
{{../../usage/nfs.ru.md}}
## Производительность
{{../../performance/understanding.ru.md}}
{{../../performance/theoretical.ru.md}}
{{../../performance/comparison1.ru.md}}
{{../../intro/author.ru.md|indent=1}}

View File

@@ -7,27 +7,26 @@
in Vitastor, affects memory usage, write amplification and I/O load
distribution effectiveness.
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
it's possible to use 1 MB for SSD too - it will lower memory usage, but
Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
it's possible to use 4 MB for SSD too - it will lower memory usage, but
may increase average WA and reduce linear performance.
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
544 MB per 1 TB of used disk space with the default 128 KB block size.
With 1 MB it's 8 times lower.
info_ru: |
Размер объектов (блоков данных), на которые делятся физические и виртуальные
диски в Vitastor (в рамках каждого пула). Одна из ключевых на данный момент
настроек, влияет на потребление памяти, объём избыточной записи (write
amplification) и эффективность распределения нагрузки по OSD.
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
это понизит использование памяти, но ухудшит распределение нагрузки и в
среднем увеличит WA.
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
стандартном 128 КБ блоке.
- name: bitmap_granularity
type: int
default: 4096

View File

@@ -204,77 +204,3 @@
Клиентам не обязательно знать про disk_alignment, так что помещать значение
этого параметра в etcd в /vitastor/config/global не нужно.
- name: data_csum_type
type: string
default: none
info: |
Data checksum type to use. May be "crc32c" or "none". Set to "crc32c" to
enable data checksums.
info_ru: |
Тип используемых OSD контрольных сумм данных. Может быть "crc32c" или "none".
Установите в "crc32c", чтобы включить расчёт и проверку контрольных сумм данных.
Следует понимать, что контрольные суммы в зависимости от размера блока их
расчёта либо увеличивают потребление памяти, либо снижают производительность.
Подробнее смотрите в описании параметра [csum_block_size](#csum_block_size).
- name: csum_block_size
type: int
default: 4096
info: |
Checksum calculation block size.
Must be equal or a multiple of [bitmap_granularity](layout-cluster.en.md#bitmap_granularity)
(which is usually 4 KB).
Checksums increase metadata size by 4 bytes per each csum_block_size of data.
Checksums are always a compromise:
1. You either sacrifice +1 GB RAM per 1 TB of data
2. Or you raise csum_block_size, for example, to 32k and sacrifice
50% random write iops due to checksum read-modify-write
3. Or you turn off [inmemory_metadata](osd.en.md#inmemory_metadata) and
sacrifice 50% random read iops due to checksum reads
Option 1 (default) is recommended for all-flash setups because these usually
have enough RAM.
Option 2 is recommended for HDD-only setups. HDD-only setups usually do NOT
have enough RAM for the default 4 KB csum_block_size.
Option 3 is recommended for SSD+HDD setups (because metadata SSDs will handle
extra reads without any performance drop) and also *maybe* for NVMe all-flash
setups when you don't have enough RAM (because NVMe drives have plenty
of read iops to spare). You may also consider enabling
[cached_read_meta](osd.en.md#cached_read_meta) in this case.
info_ru: |
Размер блока расчёта контрольных сумм.
Должен быть равен или кратен [bitmap_granularity](layout-cluster.ru.md#bitmap_granularity)
(который обычно равен 4 КБ).
Контрольные суммы увеличивают размер метаданных на 4 байта на каждые
csum_block_size данных.
Контрольные суммы - это всегда компромисс:
1. Вы либо жертвуете потреблением +1 ГБ памяти на 1 ТБ дискового пространства
2. Либо вы повышаете csum_block_size до, скажем, 32k и жертвуете 50%
скорости случайной записи из-за цикла чтения-изменения-записи для расчёта
новых контрольных сумм
3. Либо вы отключаете [inmemory_metadata](osd.ru.md#inmemory_metadata) и
жертвуете 50% скорости случайного чтения из-за чтения контрольных сумм
с диска
Вариант 1 (при настройках по умолчанию) рекомендуется для SSD (All-Flash)
кластеров, потому что памяти в них обычно хватает.
Вариант 2 рекомендуется для кластеров на одних жёстких дисках (без SSD
под метаданные). На 4 кб блок контрольной суммы памяти в таких кластерах
обычно НЕ хватает.
Вариант 3 рекомендуется для гибридных кластеров (SSD+HDD), потому что
скорости SSD под метаданными хватит, чтобы обработать дополнительные чтения
без снижения производительности. Также вариант 3 *может* рекомендоваться
для All-Flash кластеров на основе NVMe-дисков, когда памяти НЕ достаточно,
потому что NVMe-диски имеют огромный запас производительности по чтению.
В таких случаях, возможно, также имеет смысл включать параметр
[cached_read_meta](osd.ru.md#cached_read_meta).

View File

@@ -14,7 +14,6 @@ const L = {
toc_config: '[Configuration](../config.en.md)',
toc_usage: 'Usage',
toc_performance: 'Performance',
online: 'Can be changed online: yes',
},
ru: {
Documentation: 'Документация',
@@ -29,7 +28,6 @@ const L = {
toc_config: '[Конфигурация](../config.ru.md)',
toc_usage: 'Использование',
toc_performance: 'Производительность',
online: 'Можно менять на лету: да',
},
};
const types = {
@@ -72,8 +70,6 @@ for (const file of params_files)
out += `- ${L[lang]['Default'] || 'Default'}: ${c.default}\n`;
if (c.min !== undefined)
out += `- ${L[lang]['Minimum'] || 'Minimum'}: ${c.min}\n`;
if (c.online)
out += `- ${L[lang]['online'] || 'Can be changed online: yes'}\n`;
out += `\n`+(c["info_"+lang] || c["info"]).replace(/\s+$/, '');
}
const head = fs.readFileSync(__dirname+'/'+file+'.'+lang+'.md', { encoding: 'utf-8' });

View File

@@ -164,21 +164,18 @@
type: sec
min: 1
default: 5
online: true
info: Interval before attempting to reconnect to an unavailable OSD.
info_ru: Время ожидания перед повторной попыткой соединиться с недоступным OSD.
- name: peer_connect_timeout
type: sec
min: 1
default: 5
online: true
info: Timeout for OSD connection attempts.
info_ru: Максимальное время ожидания попытки соединения с OSD.
- name: osd_idle_timeout
type: sec
min: 1
default: 5
online: true
info: |
OSD connection inactivity time after which clients and other OSDs send
keepalive requests to check state of the connection.
@@ -189,7 +186,6 @@
type: sec
min: 1
default: 5
online: true
info: |
Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
within this time, the connection to it is dropped and a reconnection attempt
@@ -202,7 +198,6 @@
type: ms
min: 50
default: 500
online: true
info: |
OSDs respond to clients with a special error code when they receive I/O
requests for a PG that's not synchronized and started. This parameter sets
@@ -216,7 +211,6 @@
- name: max_etcd_attempts
type: int
default: 5
online: true
info: |
Maximum number of attempts for etcd requests which can't be retried
indefinitely.
@@ -226,7 +220,6 @@
- name: etcd_quick_timeout
type: ms
default: 1000
online: true
info: |
Timeout for etcd requests which should complete quickly, like lease refresh.
info_ru: |
@@ -235,7 +228,6 @@
- name: etcd_slow_timeout
type: ms
default: 5000
online: true
info: Timeout for etcd requests which are allowed to wait for some time.
info_ru: |
Максимальное время выполнения запросов к etcd, для которых не обязательно
@@ -243,7 +235,6 @@
- name: etcd_keepalive_timeout
type: sec
default: max(30, etcd_report_interval*2)
online: true
info: |
Timeout for etcd connection HTTP Keep-Alive. Should be higher than
etcd_report_interval to guarantee that keepalive actually works.
@@ -253,7 +244,6 @@
- name: etcd_ws_keepalive_timeout
type: sec
default: 30
online: true
info: |
etcd websocket ping interval required to keep the connection alive and
detect disconnections quickly.
@@ -262,7 +252,6 @@
- name: client_dirty_limit
type: int
default: 33554432
online: true
info: |
Without immediate_commit=all this parameter sets the limit of "dirty"
(not committed by fsync) data allowed by the client before forcing an

View File

@@ -1,5 +1,4 @@
# Runtime OSD Parameters
These parameters only apply to OSDs, are not fixed at the moment of OSD drive
initialization and can be changed - either with an OSD restart or, for some of
them, even without restarting by updating configuration in etcd.
initialization and can be changed with an OSD restart.

View File

@@ -2,5 +2,4 @@
Данные параметры используются только OSD, но, в отличие от дисковых параметров,
не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
изменения конфигурации в etcd.
момент с перезапуском OSD.

View File

@@ -66,7 +66,6 @@
- name: autosync_interval
type: sec
default: 5
online: true
info: |
Time interval at which automatic fsyncs/flushes are issued by each OSD when
the immediate_commit mode if disabled. fsyncs are required because without
@@ -84,7 +83,6 @@
- name: autosync_writes
type: int
default: 128
online: true
info: |
Same as autosync_interval, but sets the maximum number of uncommitted write
operations before issuing an fsync operation internally.
@@ -95,7 +93,6 @@
- name: recovery_queue_depth
type: int
default: 4
online: true
info: |
Maximum recovery operations per one primary OSD at any given moment of time.
Currently it's the only parameter available to tune the speed or recovery
@@ -108,7 +105,6 @@
- name: recovery_pg_switch
type: int
default: 128
online: true
info: |
Number of recovery operations before switching to recovery of the next PG.
The idea is to mix all PGs during recovery for more even space and load
@@ -123,7 +119,6 @@
- name: recovery_sync_batch
type: int
default: 16
online: true
info: Maximum number of recovery operations before issuing an additional fsync.
info_ru: Максимальное число операций восстановления перед дополнительным fsync.
- name: readonly
@@ -138,7 +133,6 @@
- name: no_recovery
type: bool
default: false
online: true
info: |
Disable automatic background recovery of objects. Note that it doesn't
affect implicit recovery of objects happening during writes - a write is
@@ -151,7 +145,6 @@
- name: no_rebalance
type: bool
default: false
online: true
info: |
Disable background movement of data between different OSDs. Disabling it
means that PGs in the `has_misplaced` state will be left in it indefinitely.
@@ -162,7 +155,6 @@
- name: print_stats_interval
type: sec
default: 3
online: true
info: |
Time interval at which OSDs print simple human-readable operation
statistics on stdout.
@@ -172,7 +164,6 @@
- name: slow_log_interval
type: sec
default: 10
online: true
info: |
Time interval at which OSDs dump slow or stuck operations on stdout, if
they're any. Also it's the time after which an operation is considered
@@ -184,7 +175,6 @@
- name: inode_vanish_time
type: sec
default: 60
online: true
info: |
Number of seconds after which a deleted inode is removed from OSD statistics.
info_ru: |
@@ -192,7 +182,6 @@
- name: max_write_iodepth
type: int
default: 128
online: true
info: |
Parallel client write operation limit per one OSD. Operations that exceed
this limit are pushed to a temporary queue instead of being executed
@@ -204,7 +193,6 @@
- name: min_flusher_count
type: int
default: 1
online: true
info: |
Flusher is a micro-thread that moves data from the journal to the data
area of the device. Their number is auto-tuned between minimum and maximum.
@@ -216,7 +204,6 @@
- name: max_flusher_count
type: int
default: 256
online: true
info: |
Maximum number of journal flushers (see above min_flusher_count).
info_ru: |
@@ -260,70 +247,6 @@
достаточно 16- или 32-мегабайтного журнала. Однако в теории отключение
параметра может оказаться полезным для гибридных OSD (HDD+SSD) с большими
журналами, расположенными на быстром по сравнению с HDD устройстве.
- name: cached_read_data
type: bool
default: false
info: |
Read data through Linux page cache, i.e. use a file descriptor opened without
O_DIRECT for data reads. May improve read performance for frequently accessed
data if it fits in RAM. Memory in page cache is shared by all processes and
not accounted in OSD memory consumption.
info_ru: |
Читать данные через системный кэш Linux (page cache), то есть, использовать
для чтения данных файловый дескриптор, открытый без флага O_DIRECT. Может
улучшить производительность чтения для часто используемых данных, если они
помещаются в память. Память кэша разделяется между всеми процессами в
системе и не учитывается в потреблении памяти процессом OSD.
- name: cached_read_meta
type: bool
default: false
info: |
Read metadata through Linux page cache. May be beneficial when checksums
are enabled and [inmemory_metadata](#inmemory_metadata) is disabled, because
in this case metadata blocks are read from disk to verify checksums on every
read request and caching them may reduce this extra read load.
Absolutely pointless to enable with enabled inmemory_metadata because all
metadata is kept in memory anyway, and likely pointless without checksums,
because in that case, metadata blocks are read from disk only during journal
flushing.
If the same device is used for data and metadata, enabling [cached_read_data](#cached_read_data)
also enables this parameter, given that it isn't turned off explicitly.
info_ru: |
Читать метаданные через системный кэш Linux. Может быть полезно, когда
включены контрольные суммы, а параметр [inmemory_metadata](#inmemory_metadata)
отключён, так как в этом случае блоки метаданных читаются с диска при каждом
запросе чтения для проверки контрольных сумм и их кэширование может снизить
дополнительную нагрузку на диск.
Абсолютно бессмысленно включать данный параметр, если параметр
inmemory_metadata включён (по умолчанию это так), и также вероятно
бессмысленно включать его, если не включены контрольные суммы, так как в
этом случае блоки метаданных читаются с диска только во время сброса
журнала.
Если одно и то же устройство используется для данных и метаданных, включение
[cached_read_data](#cached_read_data) также включает данный параметр, при
условии, что он не отключён явным образом.
- name: cached_read_journal
type: bool
default: false
info: |
Read buffered data from journal through Linux page cache. Does not have sense
without disabling [inmemory_journal](#inmemory_journal), which, again, is
enabled by default.
If the same device is used for metadata and journal, enabling [cached_read_meta](#cached_read_meta)
also enables this parameter, given that it isn't turned off explicitly.
info_ru: |
Читать буферизованные в журнале данные через системный кэш Linux. Не имеет
смысла без отключения параметра [inmemory_journal](#inmemory_journal),
который, опять же, по умолчанию включён.
Если одно и то же устройство используется для метаданных и журнала,
включение [cached_read_meta](#cached_read_meta) также включает данный
параметр, при условии, что он не отключён явным образом.
- name: journal_sector_buffer_count
type: int
default: 32
@@ -361,7 +284,6 @@
- name: throttle_small_writes
type: bool
default: false
online: true
info: |
Enable soft throttling of small journaled writes. Useful for hybrid OSDs
with fast journal/metadata devices and slow data devices. The idea is that
@@ -390,7 +312,6 @@
- name: throttle_target_iops
type: int
default: 100
online: true
info: |
Target maximum number of throttled operations per second under the condition
of full journal. Set it to approximate random write iops of your data devices
@@ -403,7 +324,6 @@
- name: throttle_target_mbs
type: int
default: 100
online: true
info: |
Target maximum bandwidth in MB/s of throttled operations per second under
the condition of full journal. Set it to approximate linear write
@@ -416,7 +336,6 @@
- name: throttle_target_parallelism
type: int
default: 1
online: true
info: |
Target maximum parallelism of throttled operations under the condition of
full journal. Set it to approximate internal parallelism of your data
@@ -429,7 +348,6 @@
- name: throttle_threshold_us
type: us
default: 50
online: true
info: |
Minimal computed delay to be applied to throttled operations. Usually
doesn't need to be changed.
@@ -439,151 +357,10 @@
- name: osd_memlock
type: bool
default: false
info: |
info: >
Lock all OSD memory to prevent it from being unloaded into swap with
mlockall(). Requires sufficient ulimit -l (max locked memory).
info_ru: |
info_ru: >
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку
в пространство подкачки. Требует достаточного значения ulimit -l (лимита
заблокированной памяти).
- name: auto_scrub
type: bool
default: false
online: true
info: |
Data scrubbing is the process of background verification of copies to find
and repair corrupted blocks. It's not run automatically by default since
it's a new feature. Set this parameter to true to enable automatic scrubs.
This parameter makes OSDs automatically schedule data scrubbing of clean PGs
every `scrub_interval` (see below). You can also start/schedule scrubbing
manually by setting `next_scrub` JSON key to the desired UNIX time of the
next scrub in `/pg/history/...` values in etcd.
info_ru: |
Скраб - процесс фоновой проверки копий данных, предназначенный, чтобы
находить и исправлять повреждённые блоки. По умолчанию эти проверки ещё не
запускаются автоматически, так как являются новой функцией. Чтобы включить
автоматическое планирование скрабов, установите данный параметр в true.
Включённый параметр заставляет OSD автоматически планировать фоновую
проверку чистых PG раз в `scrub_interval` (см. ниже). Вы также можете
запустить или запланировать проверку вручную, установив значение ключа JSON
`next_scrub` внутри ключей etcd `/pg/history/...` в UNIX-время следующей
желаемой проверки.
- name: no_scrub
type: bool
default: false
online: true
info: |
Temporarily disable scrubbing and stop running scrubs.
info_ru: |
Временно отключить и остановить запущенные скрабы.
- name: scrub_interval
type: string
default: 30d
online: true
info: |
Default automatic scrubbing interval for all pools. Numbers without suffix
are treated as seconds, possible unit suffixes include 's' (seconds),
'm' (minutes), 'h' (hours), 'd' (days), 'M' (months) and 'y' (years).
info_ru: |
Интервал автоматической фоновой проверки по умолчанию для всех пулов.
Значения без указанной единицы измерения считаются в секундах, допустимые
символы единиц измерения в конце: 's' (секунды),
'm' (минуты), 'h' (часы), 'd' (дни), 'M' (месяца) или 'y' (годы).
- name: scrub_queue_depth
type: int
default: 1
online: true
info: |
Number of parallel scrubbing operations per one OSD.
info_ru: |
Число параллельных операций фоновой проверки на один OSD.
- name: scrub_sleep
type: ms
default: 0
online: true
info: |
Additional interval between two consecutive scrubbing operations on one OSD.
Can be used to slow down scrubbing if it affects user load too much.
info_ru: |
Дополнительный интервал ожидания после фоновой проверки каждого объекта на
одном OSD. Может использоваться для замедления скраба, если он слишком
сильно влияет на пользовательскую нагрузку.
- name: scrub_list_limit
type: int
default: 1000
online: true
info: |
Number of objects to list in one listing operation during scrub.
info_ru: |
Размер загружаемых за одну операцию списков объектов в процессе фоновой
проверки.
- name: scrub_find_best
type: bool
default: true
online: true
info: |
Find and automatically restore best versions of objects with unmatched
copies. In replicated setups, the best version is the version with most
matching replicas. In EC setups, the best version is the subset of data
and parity chunks without mismatches.
The hypothetical situation where you might want to disable it is when
you have 3 replicas and you are paranoid that 2 HDDs out of 3 may silently
corrupt an object in the same way (for example, zero it out) and only
1 HDD will remain good. In this case disabling scrub_find_best may help
you to recover the data! See also scrub_ec_max_bruteforce below.
info_ru: |
Находить и автоматически восстанавливать "лучшие версии" объектов с
несовпадающими копиями/частями. При использовании репликации "лучшая"
версия - версия, доступная в большем числе экземпляров, чем другие. При
использовании кодов коррекции ошибок "лучшая" версия - это подмножество
частей данных и чётности, полностью соответствующих друг другу.
Гипотетическая ситуация, в которой вы можете захотеть отключить этот
поиск - это если у вас 3 реплики и вы боитесь, что 2 диска из 3 могут
незаметно и одинаково повредить данные одного и того же объекта, например,
занулив его, и только 1 диск останется неповреждённым. В этой ситуации
отключение этого параметра поможет вам восстановить данные! Смотрите также
описание следующего параметра - scrub_ec_max_bruteforce.
- name: scrub_ec_max_bruteforce
type: int
default: 100
online: true
info: |
Vitastor can locate corrupted chunks in EC setups with more than 1 parity
chunk by brute-forcing all possible error locations. This configuration
value limits the maximum number of checked combinations. You can try to
increase it if you have EC N+K setup with N and K large enough for
combination count `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!` to be greater
than the default 100.
If there are too many possible combinations or if multiple combinations give
correct results then objects are marked inconsistent and aren't recovered
automatically.
In replicated setups bruteforcing isn't needed, Vitastor just assumes that
the variant with most available equal copies is correct. For example, if
you have 3 replicas and 1 of them differs, this one is considered to be
corrupted. But if there is no "best" version with more copies than all
others have then the object is also marked as inconsistent.
info_ru: |
Vitastor старается определить повреждённые части объектов при использовании
EC (кодов коррекции ошибок) с более, чем 1 диском чётности, путём перебора
всех возможных комбинаций ошибочных частей. Данное значение конфигурации
ограничивает число перебираемых комбинаций. Вы можете попробовать поднять
его, если используете схему кодирования EC N+K с N и K, достаточно большими
для того, чтобы число сочетаний `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!`
было больше, чем стандартное значение 100.
Если возможных комбинаций слишком много или если корректная комбинаций не
определяется однозначно, объекты помечаются неконсистентными (inconsistent)
и не восстанавливаются автоматически.
При использовании репликации перебор не нужен, Vitastor просто предполагает,
что вариант объекта с наибольшим количеством одинаковых копий корректен.
Например, если вы используете 3 реплики и 1 из них отличается, эта 1 копия
считается некорректной. Однако, если "лучшую" версию с числом доступных
копий большим, чем у всех других версий, найти невозможно, то объект тоже
маркируется неконсистентным.

View File

@@ -8,13 +8,13 @@
У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома.
Для установки возьмите манифесты из директории [csi/deploy/](../../csi/deploy/), поместите
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml),
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../../csi/deploy/009-storage-class.yaml)
Для установки возьмите манифесты из директории [csi/deploy/](../csi/deploy/), поместите
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../csi/deploy/001-csi-config-map.yaml),
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../csi/deploy/009-storage-class.yaml)
и примените все `NNN-*.yaml` к вашей инсталляции Kubernetes.
```
for i in ./???-*.yaml; do kubectl apply -f $i; done
```
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../../csi/deploy/example-pvc.yaml).
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../csi/deploy/example-pvc.yaml).

View File

@@ -36,5 +36,5 @@ vitastor_pool_id = 1
image_upload_use_cinder_backend = True
```
To put Glance images in Vitastor, use [volume-backed images](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
To put Glance images in Vitastor, use [https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](volume-backed images),
although the support has not been verified yet.

View File

@@ -36,5 +36,5 @@ image_upload_use_cinder_backend = True
```
Чтобы помещать в Vitastor Glance-образы, нужно использовать
[образы на основе томов Cinder](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
[https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](образы на основе томов Cinder),
однако, поддержка этой функции ещё не проверялась.

View File

@@ -11,8 +11,7 @@
- Trust Vitastor package signing key:
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
- Add Vitastor package repository to your /etc/apt/sources.list:
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- For Debian 10 (Buster) also enable backports repository:
`deb http://deb.debian.org/debian buster-backports main`
@@ -23,16 +22,13 @@
- Add Vitastor package repository:
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release.rpm`
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release.rpm`
- AlmaLinux 9 and other RHEL 9 clones (Rocky, Oracle...): `dnf install https://vitastor.io/rpms/centos/9/vitastor-release.rpm`
- Enable EPEL: `yum/dnf install epel-release`
- Enable additional CentOS repositories:
- CentOS 7: `yum install centos-release-scl`
- CentOS 8: `dnf install centos-release-advanced-virtualization`
- RHEL 9 clones: not required
- Enable elrepo-kernel:
- CentOS 7: `yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm`
- CentOS 8: `dnf install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm`
- RHEL 9 clones: `dnf install https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm`
- Install packages: `yum/dnf install vitastor lpsolve etcd kernel-ml qemu-kvm`
## Installation requirements
@@ -46,10 +42,3 @@
- etcd 3.4.15 or newer. Earlier versions won't work because of various bugs,
for example [#12402](https://github.com/etcd-io/etcd/pull/12402).
- node.js 10 or newer
## Version archive
All previous Vitastor and other components (QEMU, etcd...) package builds
can be found here:
https://vitastor.io/archive/

View File

@@ -11,8 +11,7 @@
- Добавьте ключ репозитория Vitastor:
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
- Для Debian 10 (Buster) также включите репозиторий backports:
`deb http://deb.debian.org/debian buster-backports main`
@@ -23,16 +22,13 @@
- Добавьте в систему репозиторий Vitastor:
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release.rpm`
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release.rpm`
- AlmaLinux 9 и другие клоны RHEL 9 (Rocky, Oracle...): `dnf install https://vitastor.io/rpms/centos/9/vitastor-release.rpm`
- Включите EPEL: `yum/dnf install epel-release`
- Включите дополнительные репозитории CentOS:
- CentOS 7: `yum install centos-release-scl`
- CentOS 8: `dnf install centos-release-advanced-virtualization`
- Клоны RHEL 9: не нужно
- Включите elrepo-kernel:
- CentOS 7: `yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm`
- CentOS 8: `dnf install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm`
- Клоны RHEL 9: `dnf install https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm`
- Установите пакеты: `yum/dnf install vitastor lpsolve etcd kernel-ml qemu-kvm`
## Установочные требования
@@ -45,10 +41,3 @@
- etcd 3.4.15 или новее. Более старые версии не будут работать из-за разных багов,
например, [#12402](https://github.com/etcd-io/etcd/pull/12402).
- node.js 10 или новее
## Архив предыдущих версий
Все предыдущие сборки пакетов Vitastor и других компонентов, таких, как QEMU
и etcd, можно скачать по следующей ссылке:
https://vitastor.io/archive/

View File

@@ -6,10 +6,10 @@
# Proxmox VE
To enable Vitastor support in Proxmox Virtual Environment (6.4-8.0 are supported):
To enable Vitastor support in Proxmox Virtual Environment (6.4-7.3 are supported):
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
bookworm for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4
buster for 6.4, bullseye for 7.3, pve7.1 for 7.1, pve7.2 for 7.2
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
- Define storage in `/etc/pve/storage.cfg` (see below)
- Block network access from VMs to Vitastor network (to OSDs and etcd),
@@ -35,5 +35,5 @@ vitastor: vitastor
vitastor_nbd 0
```
\* Note: you can also manually copy [patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) to Proxmox hosts
\* Note: you can also manually copy [patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) to Proxmox hosts
as `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm` instead of installing pve-storage-vitastor.

View File

@@ -1,15 +1,15 @@
[Документация](../../README-ru.md#документация) → Установка → Proxmox VE
[Документация](../../README-ru.md#документация) → Установка → Proxmox
-----
[Read in English](proxmox.en.md)
# Proxmox VE
# Proxmox
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-8.0):
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-7.3):
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
bookworm для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4
buster для 6.4, bullseye для 7.3, pve7.1 для 7.1, pve7.2 для 7.2
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
@@ -35,5 +35,5 @@ vitastor: vitastor
```
\* Примечание: вместо установки пакета pve-storage-vitastor вы можете вручную скопировать файл
[patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) на хосты Proxmox как
[patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) на хосты Proxmox как
`/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm`.

View File

@@ -44,7 +44,7 @@
depends linearly on drive capacity and data store block size which is 128 KB by default.
With 128 KB blocks metadata takes around 512 MB per 1 TB (which is still less than Ceph wants).
Journal is also kept in memory by default, but in SSD-only clusters it's only 32 MB, and in SSD+HDD
clusters, where it's beneficial to increase it, [inmemory_journal](../config/osd.en.md#inmemory_journal) can be disabled.
clusters, where it's beneficial to increase it, [inmemory_journal](docs/config/osd.en.md#inmemory_journal) can be disabled.
- Vitastor storage layer doesn't have internal copy-on-write or redirect-write. I know that maybe
it's possible to create a good copy-on-write storage, but it's much harder and makes performance
less deterministic, so CoW isn't used in Vitastor.

View File

@@ -156,7 +156,7 @@
блока хранилища (block_size, по умолчанию 128 КБ). С 128 КБ блоком потребление памяти
составляет примерно 512 МБ на 1 ТБ данных. Журналы по умолчанию тоже хранятся в памяти,
но в SSD-кластерах нужный размер журнала составляет всего 32 МБ, а в гибридных (SSD+HDD)
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../config/osd.ru.md#inmemory_journal).
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../docs/config/osd.ru.md#inmemory_journal).
- В Vitastor нет внутреннего copy-on-write. Я считаю, что реализация CoW-хранилища гораздо сложнее,
поэтому сложнее добиться устойчиво хороших результатов. Возможно, в один прекрасный день
я придумаю красивый алгоритм для CoW-хранилища, но пока нет — внутреннего CoW в Vitastor не будет.

View File

@@ -29,14 +29,12 @@
- Snapshots and copy-on-write image clones
- [Write throttling to smooth random write workloads in SSD+HDD configurations](../config/osd.en.md#throttle_small_writes)
- [RDMA/RoCEv2 support via libibverbs](../config/network.en.md#rdma_device)
- [Scrubbing without checksums](../config/osd.en.md#auto_scrub) (verification of copies)
- [Checksums](../config/layout-osd.en.md#data_csum_type)
## Plugins and tools
- [Debian and CentOS packages](../installation/packages.en.md)
- [Image management CLI (vitastor-cli)](../usage/cli.en.md)
- [Disk management CLI (vitastor-disk)](../usage/disk.en.md)
- [Disk management CLI (vitastor-disk)](docs/usage/disk.en.md)
- Generic user-space client library
- [Native QEMU driver](../usage/qemu.en.md)
- [Loadable fio engine for benchmarks](../usage/fio.en.md)
@@ -56,6 +54,8 @@ The following features are planned for the future:
- iSCSI proxy
- Multi-threaded client
- Faster failover
- Scrubbing without checksums (verification of replicas)
- Checksums
- Tiered storage (SSD caching)
- NVDIMM support
- Compression (possibly)

View File

@@ -13,7 +13,7 @@
## Серверные функции
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
- [Производительность](../performance/comparison1.ru.md) ;-D
- [Производительность](../comparison1.ru.md) ;-D
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
@@ -31,14 +31,12 @@
- Снапшоты и copy-on-write клоны
- [Сглаживание производительности случайной записи в SSD+HDD конфигурациях](../config/osd.ru.md#throttle_small_writes)
- [Поддержка RDMA/RoCEv2 через libibverbs](../config/network.ru.md#rdma_device)
- [Фоновая проверка целостности без контрольных сумм](../config/osd.ru.md#auto_scrub) (сверка копий)
- [Контрольные суммы](../config/layout-osd.ru.md#data_csum_type)
## Драйверы и инструменты
- [Пакеты для Debian и CentOS](../installation/packages.ru.md)
- [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md)
- [Инструмент управления дисками (vitastor-disk)](../usage/disk.ru.md)
- [Инструмент управления дисками (vitastor-disk)](docs/usage/disk.ru.md)
- Общая пользовательская клиентская библиотека для работы с кластером
- [Драйвер диска для QEMU](../usage/qemu.ru.md)
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
@@ -56,6 +54,8 @@
- iSCSI-прокси
- Многопоточный клиент
- Более быстрое переключение при отказах
- Фоновая проверка целостности без контрольных сумм (сверка реплик)
- Контрольные суммы
- Поддержка SSD-кэширования (tiered storage)
- Поддержка NVDIMM
- Возможно, сжатие

View File

@@ -7,7 +7,6 @@
# Quick Start
- [Preparation](#preparation)
- [Recommended drives](#recommended-drives)
- [Configure monitors](#configure-monitors)
- [Configure OSDs](#configure-osds)
- [Create a pool](#create-a-pool)
@@ -20,20 +19,10 @@
- Get some SATA or NVMe SSDs with capacitors (server-grade drives). You can use desktop SSDs
with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
[here](../config/layout-cluster.en.md#immediate_commit).
- If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then
you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
- [Install Vitastor packages](../installation/packages.en.md).
## Recommended drives
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
## Configure monitors
On the monitor hosts:
@@ -56,10 +45,7 @@ On the monitor hosts:
}
```
- Initialize OSDs:
- SSD-only or HDD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
Add `--disable_data_fsync off` to leave disk write cache enabled if you use
desktop SSDs without capacitors. Do NOT add `--disable_data_fsync off` if you
use HDDs or SSD+HDD.
- SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`
- Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
Pass all your devices (HDD and SSD) to this script &mdash; it will partition disks and initialize journals on its own.
This script skips HDDs which are already partitioned so if you want to use non-empty disks for
@@ -67,9 +53,7 @@ On the monitor hosts:
but some free unpartitioned space must be available because the script creates new partitions for journals.
- You can change OSD configuration in units or in `vitastor.conf`.
Check [Configuration Reference](../config.en.md) for parameter descriptions.
- If all your drives have capacitors, and even if not, but if you ran `vitastor-disk`
without `--disable_data_fsync off` at the first step, then put the following
setting into etcd: \
- If all your drives have capacitors, create global configuration in etcd: \
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
- Start all OSDs: `systemctl start vitastor.target`
@@ -86,15 +70,11 @@ For EC pools the configuration should look like the following:
```
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}}'
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}'
```
After you do this, one of the monitors will configure PGs and OSDs will start them.
If you use HDDs you should also add `"block_size": 1048576` to pool configuration.
The other option is to add it into /vitastor/config/global, in this case it will
apply to all pools by default.
## Check cluster status
`vitastor-cli status`

View File

@@ -7,7 +7,6 @@
# Быстрый старт
- [Подготовка](#подготовка)
- [Рекомендуемые диски](#рекомендуемые-диски)
- [Настройте мониторы](#настройте-мониторы)
- [Настройте OSD](#настройте-osd)
- [Создайте пул](#создайте-пул)
@@ -20,20 +19,10 @@
- Возьмите серверы с SSD (SATA или NVMe), желательно с конденсаторами (серверные SSD). Можно
использовать и десктопные SSD, включив режим отложенного fsync, но производительность будет хуже.
О конденсаторах читайте [здесь](../config/layout-cluster.ru.md#immediate_commit).
- Если хотите использовать HDD, берите современные модели с Media или SSD кэшем - HGST Ultrastar,
Toshiba MG08, Seagate EXOS или что-то похожее. Если такого кэша у ваших дисков нет,
обязательно возьмите SSD под метаданные и журнал (маленькие, буквально 2 ГБ на 1 ТБ HDD-места).
- Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
- Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
- [Установите пакеты Vitastor](../installation/packages.ru.md).
## Рекомендуемые диски
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
## Настройте мониторы
На хостах, выделенных под мониторы:
@@ -56,10 +45,7 @@
}
```
- Инициализуйте OSD:
- Только SSD или только HDD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
Если вы используете десктопные SSD без конденсаторов, добавьте опцию `--disable_data_fsync off`,
чтобы оставить кэш записи диска включённым. НЕ добавляйте эту опцию, если используете
жёсткие диски (HDD).
- SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`
- Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы
@@ -68,11 +54,8 @@
для журналов, на SSD должно быть доступно свободное нераспределённое место.
- Вы можете менять параметры OSD в юнитах systemd или в `vitastor.conf`. Описания параметров
смотрите в [справке по конфигурации](../config.ru.md).
- Если все ваши диски - серверные с конденсаторами, и даже если нет, но при этом
вы не добавляли опцию `--disable_data_fsync off` на первом шаге, а `vitastor-disk`
не ругался на невозможность отключения кэша дисков, пропишите следующую настройку
в глобальную конфигурацию в etcd: \
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`.
- Если все ваши диски - серверные с конденсаторами, пропишите это в глобальную конфигурацию в etcd: \
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
- Запустите все OSD: `systemctl start vitastor.target`
## Создайте пул
@@ -88,15 +71,11 @@ etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool",
```
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}}'
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}'
```
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
Если вы используете HDD-диски, то добавьте в конфигурацию пулов опцию `"block_size": 1048576`.
Также эту опцию можно добавить в /vitastor/config/global, в этом случае она будет
применяться ко всем пулам по умолчанию.
## Проверьте состояние кластера
`vitastor-cli status`

View File

@@ -20,8 +20,6 @@ It supports the following commands:
- [flatten](#flatten)
- [rm-data](#rm-data)
- [merge-data](#merge-data)
- [describe](#describe)
- [fix](#fix)
- [alloc-osd](#alloc-osd)
- [rm-osd](#rm-osd)
@@ -176,51 +174,6 @@ Merge layer data without changing metadata. Merge `<from>`..`<to>` to `<target>`
`<to>` must be a child of `<from>` and `<target>` may be one of the layers between
`<from>` and `<to>`, including `<from>` and `<to>`.
## describe
`vitastor-cli describe [--osds <osds>] [--object-state <states>] [--pool <pool>]
[--inode <ino>] [--min-inode <ino>] [--max-inode <ino>]
[--min-offset <offset>] [--max-offset <offset>]`
Describe unclean object locations in the cluster.
```
--osds <osds>
Only list objects from primary OSD(s) <osds>.
--object-state <states>
Only list objects in given state(s). State(s) may include:
degraded, misplaced, incomplete, corrupted, inconsistent.
--pool <pool name or number>
Only list objects in the given pool.
--inode, --min-inode, --max-inode
Restrict listing to specific inode numbers.
--min-offset, --max-offset
Restrict listing to specific offsets inside inodes.
```
## fix
`vitastor-cli fix [--objects <objects>] [--bad-osds <osds>] [--part <part>] [--check no]`
Fix inconsistent objects in the cluster by deleting some copies.
```
--objects <objects>
Objects to fix, either in plain text or JSON format. If not specified,
object list will be read from STDIN in one of the same formats.
Plain text format: 0x<inode>:0x<stripe> <any delimiter> 0x<inode>:0x<stripe> ...
JSON format: [{"inode":"0x...","stripe":"0x..."},...]
--bad-osds <osds>
Remove inconsistent copies/parts of objects from these OSDs, effectively
marking them bad and allowing Vitastor to recover objects from other copies.
--part <number>
Only remove EC part <number> (from 0 to pg_size-1), required for extreme
edge cases where one OSD has multiple parts of a EC object.
--check no
Do not recheck that requested objects are actually inconsistent,
delete requested copies/parts anyway.
```
## alloc-osd
`vitastor-cli alloc-osd`

View File

@@ -184,59 +184,6 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
в целевой образ `<target>`. `<to>` должен быть дочерним образом `<from>`, а `<target>`
должен быть одним из слоёв между `<from>` и `<to>`, включая сами `<from>` и `<to>`.
## describe
`vitastor-cli describe [--osds <osds>] [--object-state <состояния>] [--pool <пул>]
[--inode <номер>] [--min-inode <номер>] [--max-inode <номер>]
[--min-offset <смещение>] [--max-offset <смещение>]`
Описать состояние "грязных" объектов в кластере, то есть таких объектов, копии
или части которых хранятся на наборе OSD, не равном целевому.
```
--osds <osds>
Перечислять только объекты с первичных OSD из списка <osds>.
--object-state <состояния>
Перечислять только объекты в указанных состояниях. Возможные состояния
объектов:
- degraded - деградированная избыточность
- misplaced - перемещённый
- incomplete - нечитаемый из-за потери большего числа частей, чем допустимо
- corrupted - с одной или более повреждённой частью
- inconsistent - неконсистентный, с неоднозначным расхождением копий/частей
--pool <имя или ID пула>
Перечислять только объекты из заданного пула.
--inode, --min-inode, --max-inode
Перечислять только объекты из указанных номеров инодов (образов).
--min-offset, --max-offset
Перечислять только объекты с заданных смещений внутри образов.
```
## fix
`vitastor-cli fix [--objects <объекты>] [--bad-osds <osds>] [--part <номер>] [--check no]`
Исправить неконсистентные (неоднозначные) объекты путём удаления части копий.
```
--objects <объекты>
Объекты для исправления - в простом текстовом или JSON формате. Если опция
не указана, список объектов читается со стандартного ввода в тех же форматах.
Простой формат: 0x<инод>:0x<смещение> <любой разделитель> 0x<инод>:0x<смещение> ...
Формат JSON: [{"inode":"0x<инод>","stripe":"0x<смещение>"},...]
--bad-osds <osds>
Удалить неконсистентные копии/части объектов с данных OSD, таким образом
признавая потерю этих копий и позволяя Vitastor-у восстановить объекты из
других копий.
--part <номер>
Удалить только части EC с заданным номером (от 0 до pg_size-1). Нужно только
в редких граничных случаях, когда один и тот же OSD содержит несколько частей
одного EC-объекта.
--check no
Не перепроверять, что заданные объекты действительно в неконсистентном
состоянии и просто удалять заданные части.
```
## alloc-osd
`vitastor-cli alloc-osd`

View File

@@ -86,8 +86,6 @@ Options (both modes):
--journal_size 1G/32M Set journal size (area or partition size)
--block_size 1M/128k Set blockstore object size
--bitmap_granularity 4k Set bitmap granularity
--data_csum_type none Set data checksum type (crc32c or none)
--csum_block_size 4k Set data checksum block size
--data_device_block 4k Override data device block size
--meta_device_block 4k Override metadata device block size
--journal_device_block 4k Override journal device block size
@@ -102,9 +100,8 @@ checks the device cache status on start and tries to disable cache for SATA/SAS
If it doesn't succeed it issues a warning in the system log.
You can also pass other OSD options here as arguments and they'll be persisted
in the superblock: cached_read_data, cached_read_meta, cached_read_journal,
inmemory_metadata, inmemory_journal, max_write_iodepth,
min_flusher_count, max_flusher_count, journal_sector_buffer_count,
to the superblock: max_write_iodepth, max_write_iodepth, min_flusher_count,
max_flusher_count, inmemory_metadata, inmemory_journal, journal_sector_buffer_count,
journal_no_same_sector_overwrites, throttle_small_writes, throttle_target_iops,
throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
See [Runtime OSD Parameters](../config/osd.en.md) for details.
@@ -252,9 +249,7 @@ Options (see also [Cluster-Wide Disk Layout Parameters](../config/layout-cluster
```
--object_size 128k Set blockstore block size
--bitmap_granularity 4k Set bitmap granularity
--journal_size 16M Set journal size
--data_csum_type none Set data checksum type (crc32c or none)
--csum_block_size 4k Set data checksum block size
--journal_size 32M Set journal size
--device_block_size 4k Set device block size
--journal_offset 0 Set journal offset
--device_size 0 Set device size

View File

@@ -87,8 +87,6 @@ vitastor-disk - инструмент командной строки для уп
--journal_size 1G/32M Задать размер журнала (области или раздела журнала)
--block_size 1M/128k Задать размер объекта хранилища
--bitmap_granularity 4k Задать гранулярность битовых карт
--data_csum_type none Задать тип контрольных сумм (crc32c или none)
--csum_block_size 4k Задать размер блока расчёта контрольных сумм
--data_device_block 4k Задать размер блока устройства данных
--meta_device_block 4k Задать размер блока метаданных
--journal_device_block 4k Задать размер блока журнала
@@ -103,9 +101,8 @@ vitastor-disk - инструмент командной строки для уп
это не удаётся, в системный журнал выводится предупреждение.
Вы можете передать данной команде и некоторые другие опции OSD в качестве аргументов
и они тоже будут сохранены в суперблок: cached_read_data, cached_read_meta,
cached_read_journal, inmemory_metadata, inmemory_journal, max_write_iodepth,
min_flusher_count, max_flusher_count, journal_sector_buffer_count,
и они тоже будут сохранены в суперблок: max_write_iodepth, max_write_iodepth, min_flusher_count,
max_flusher_count, inmemory_metadata, inmemory_journal, journal_sector_buffer_count,
journal_no_same_sector_overwrites, throttle_small_writes, throttle_target_iops,
throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
Читайте об этих параметрах подробнее в разделе [Изменяемые параметры OSD](../config/osd.ru.md).
@@ -257,9 +254,7 @@ OSD отключены fsync-и.
```
--object_size 128k Размер блока хранилища
--bitmap_granularity 4k Гранулярность битовых карт
--journal_size 16M Размер журнала
--data_csum_type none Задать тип контрольных сумм (crc32c или none)
--csum_block_size 4k Задать размер блока расчёта контрольных сумм
--journal_size 32M Размер журнала
--device_block_size 4k Размер блока устройства
--journal_offset 0 Смещение журнала
--device_size 0 Размер устройства

View File

@@ -13,8 +13,6 @@ remains decent (see an example [here](../performance/comparison1.en.md#vitastor-
Vitastor Kubernetes CSI driver is based on NBD.
See also [VDUSE](qemu.en.md#vduse).
## Map image
To create a local block device for a Vitastor image run:
@@ -27,23 +25,6 @@ It will output a block device name like /dev/nbd0 which you can then use as a no
You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--image <IMAGE>` if you want.
Additional options for map command:
* `--nbd_timeout 30` \
Timeout for I/O operations in seconds after exceeding which the kernel stops
the device. You can set it to 0 to disable the timeout, but beware that you
won't be able to stop the device at all if vitastor-nbd process dies.
* `--nbd_max_devices 64 --nbd_max_part 3` \
Options for the `nbd` kernel module when modprobing it (`nbds_max` and `max_part`).
note that maximum allowed (nbds_max)*(1+max_part) is 256.
* `--logfile /path/to/log/file.txt` \
Write log messages to the specified file instead of dropping them (in background mode)
or printing them to the standard output (in foreground mode).
* `--dev_num N` \
Use the specified device /dev/nbdN instead of automatic selection.
* `--foreground 1` \
Stay in foreground, do not daemonize.
## Unmap image
To unmap the device run:
@@ -51,27 +32,3 @@ To unmap the device run:
```
vitastor-nbd unmap /dev/nbd0
```
## List mapped images
```
vitastor-nbd ls [--json]
```
Example output (normal format):
```
/dev/nbd0
image: bench
pid: 584536
/dev/nbd1
image: bench1
pid: 584546
```
Example output (JSON format):
```
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
```

View File

@@ -16,8 +16,6 @@ NBD немного снижает производительность из-за
CSI-драйвер Kubernetes Vitastor основан на NBD.
Смотрите также [VDUSE](qemu.ru.md#vduse).
## Подключить устройство
Чтобы создать локальное блочное устройство для образа, выполните команду:
@@ -32,27 +30,6 @@ vitastor-nbd map --etcd_address 10.115.0.10:2379/v3 --image testimg
Для обращения по номеру инода, аналогично другим командам, можно использовать опции
`--pool <POOL> --inode <INODE> --size <SIZE>` вместо `--image testimg`.
Дополнительные опции для команды подключения NBD-устройства:
* `--nbd_timeout 30` \
Максимальное время выполнения любой операции чтения/записи в секундах, при
превышении которого ядро остановит NBD-устройство. Вы можете установить опцию
в 0, чтобы отключить ограничение времени, но имейте в виду, что в этом случае
вы вообще не сможете отключить NBD-устройство при нештатном завершении процесса
vitastor-nbd.
* `--nbd_max_devices 64 --nbd_max_part 3` \
Опции, передаваемые модулю ядра nbd, если его загружает vitastor-nbd
(`nbds_max` и `max_part`). Имейте в виду, что (nbds_max)*(1+max_part)
обычно не должно превышать 256.
* `--logfile /path/to/log/file.txt` \
Писать сообщения о процессе работы в заданный файл, вместо пропуска их
при фоновом режиме запуска или печати на стандартный вывод при запуске
в консоли с `--foreground 1`.
* `--dev_num N` \
Использовать заданное устройство `/dev/nbdN` вместо автоматического подбора.
* `--foreground 1` \
Не уводить процесс в фоновый режим.
## Отключить устройство
Для отключения устройства выполните:
@@ -60,27 +37,3 @@ vitastor-nbd map --etcd_address 10.115.0.10:2379/v3 --image testimg
```
vitastor-nbd unmap /dev/nbd0
```
## Вывести подключённые устройства
```
vitastor-nbd ls [--json]
```
Пример вывода в обычном формате:
```
/dev/nbd0
image: bench
pid: 584536
/dev/nbd1
image: bench1
pid: 584546
```
Пример вывода в JSON-формате:
```
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
```

View File

@@ -29,7 +29,7 @@ vitastor-nfs [--etcd_address ADDR] [ДРУГИЕ ОПЦИИ]
--bind <IP> принимать соединения по адресу <IP> (по умолчанию 0.0.0.0 - на всех)
--nfspath <PATH> установить путь NFS-экспорта в <PATH> (по умолчанию /)
--port <PORT> использовать порт <PORT> для NFS-сервисов (по умолчанию 2049)
--pool <POOL> использовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
--pool <POOL> исползовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
--foreground 1 не уходить в фон после запуска
```

View File

@@ -83,43 +83,3 @@ qemu-img rebase -u -b '' testimg.qcow2
This can be used for backups. Just note that exporting an image that is currently being written to
is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
on a live VM.
## VDUSE
Linux kernel, starting with version 5.15, supports a new interface for attaching virtual disks
to the host - VDUSE (vDPA Device in Userspace). QEMU, starting with 7.2, has support for
exporting QEMU block devices over this protocol using qemu-storage-daemon.
VDUSE has the same problem as other FUSE-like interfaces in Linux: if a userspace process hangs,
for example, if it loses connectivity with Vitastor cluster - active processes doing I/O may
hang in the D state (uninterruptible sleep) and you won't be able to kill them even with kill -9.
In this case reboot will be the only way to remove VDUSE devices from system.
On the other hand, VDUSE is faster than [NBD](nbd.en.md), so you may prefer to use it if
performance is important for you. Approximate performance numbers:
direct fio benchmark - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
To try VDUSE you need at least Linux 5.15, built with VDUSE support
(CONFIG_VIRTIO_VDPA=m and CONFIG_VDPA_USER=m). Debian Linux kernels have these options
disabled by now, so if you want to try it on Debian, use a kernel from Ubuntu
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) or Proxmox.
Commands to attach Vitastor image as a VDUSE device:
```
modprobe vduse virtio-vdpa
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
vdpa dev add name test1 mgmtdev vduse
```
After running these commands /dev/vda device will appear in the system and you'll be able to
use it as a normal disk.
To remove the device:
```
vdpa dev del test1
kill <qemu-storage-daemon_process_PID>
```

View File

@@ -87,43 +87,3 @@ qemu-img rebase -u -b '' testimg.qcow2
Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.
## VDUSE
В Linux, начиная с версии ядра 5.15, доступен новый интерфейс для подключения виртуальных дисков
к системе - VDUSE (vDPA Device in Userspace), а в QEMU, начиная с версии 7.2, есть поддержка
экспорта блочных устройств QEMU по этому протоколу через qemu-storage-daemon.
VDUSE страдает общей проблемой FUSE-подобных интерфейсов в Linux: если пользовательский процесс
подвиснет, например, если будет потеряна связь с кластером Vitastor - читающие/пишущие в кластер
процессы могут "залипнуть" в состоянии D (непрерываемый сон) и их будет невозможно убить даже
через kill -9. В этом случае удалить из системы устройство можно только перезагрузившись.
С другой стороны, VDUSE быстрее по сравнению с [NBD](nbd.ru.md), поэтому его может
быть предпочтительно использовать там, где производительность важнее. Порядок показателей:
прямое тестирование через fio - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
Чтобы использовать VDUSE, вам нужно ядро Linux версии хотя бы 5.15, собранное с поддержкой
VDUSE (CONFIG_VIRTIO_VDPA=m и CONFIG_VDPA_USER=m). В ядрах в Debian Linux поддержка пока
отключена - если хотите попробовать эту функцию на Debian, поставьте ядро из Ubuntu
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) или из Proxmox.
Команды для подключения виртуального диска через VDUSE:
```
modprobe vduse virtio-vdpa
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
vdpa dev add name test1 mgmtdev vduse
```
После этого в системе появится устройство /dev/vda, которое можно будет использовать как
обычный диск.
Для удаления устройства из системы:
```
vdpa dev del test1
kill <PID_процесса_qemu-storage-daemon>
```

View File

@@ -43,16 +43,16 @@ function finish_pg_history(merged_history)
merged_history.all_peers = Object.values(merged_history.all_peers);
}
function scale_pg_count(prev_pgs, real_prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
function scale_pg_count(prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
{
const old_pg_count = real_prev_pgs.length;
const old_pg_count = prev_pgs.length;
// Add all possibly intersecting PGs to the history of new PGs
if (!(new_pg_count % old_pg_count))
{
// New PG count is a multiple of old PG count
for (let i = 0; i < new_pg_count; i++)
{
add_pg_history(new_pg_history, i, real_prev_pgs, prev_pg_history, i % old_pg_count);
add_pg_history(new_pg_history, i, prev_pgs, prev_pg_history, i % old_pg_count);
finish_pg_history(new_pg_history[i]);
}
}
@@ -64,7 +64,7 @@ function scale_pg_count(prev_pgs, real_prev_pgs, prev_pg_history, new_pg_history
{
for (let j = 0; j < mul; j++)
{
add_pg_history(new_pg_history, i, real_prev_pgs, prev_pg_history, i+j*new_pg_count);
add_pg_history(new_pg_history, i, prev_pgs, prev_pg_history, i+j*new_pg_count);
}
finish_pg_history(new_pg_history[i]);
}
@@ -76,7 +76,7 @@ function scale_pg_count(prev_pgs, real_prev_pgs, prev_pg_history, new_pg_history
let merged_history = {};
for (let i = 0; i < old_pg_count; i++)
{
add_pg_history(merged_history, 1, real_prev_pgs, prev_pg_history, i);
add_pg_history(merged_history, 1, prev_pgs, prev_pg_history, i);
}
finish_pg_history(merged_history[1]);
for (let i = 0; i < new_pg_count; i++)
@@ -90,15 +90,15 @@ function scale_pg_count(prev_pgs, real_prev_pgs, prev_pg_history, new_pg_history
new_pg_history[i] = null;
}
// Just for the lp_solve optimizer - pick a "previous" PG for each "new" one
if (prev_pgs.length < new_pg_count)
if (old_pg_count < new_pg_count)
{
for (let i = prev_pgs.length; i < new_pg_count; i++)
for (let i = old_pg_count; i < new_pg_count; i++)
{
prev_pgs[i] = prev_pgs[i % prev_pgs.length];
prev_pgs[i] = prev_pgs[i % old_pg_count];
}
}
else if (prev_pgs.length > new_pg_count)
else if (old_pg_count > new_pg_count)
{
prev_pgs.splice(new_pg_count, prev_pgs.length-new_pg_count);
prev_pgs.splice(new_pg_count, old_pg_count-new_pg_count);
}
}

View File

@@ -63,9 +63,8 @@ Wants=network-online.target local-fs.target time-sync.target
[Service]
Restart=always
Environment=GOGC=50
ExecStart=etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
--snapshot-count 10000 --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
ExecStart=/usr/local/bin/etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
--advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
--initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
--initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
--initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\

View File

@@ -51,9 +51,8 @@ const etcd_tree = {
// THIS IS JUST A POOR MAN'S CONFIG DOCUMENTATION
// etcd connection
config_path: "/etc/vitastor/vitastor.conf",
etcd_prefix: "/vitastor",
// etcd connection - configurable online
etcd_address: "10.0.115.10:2379/v3",
etcd_prefix: "/vitastor",
// mon
etcd_mon_ttl: 30, // min: 10
etcd_mon_timeout: 1000, // ms. min: 0
@@ -74,12 +73,11 @@ const etcd_tree = {
rdma_max_send: 8,
rdma_max_recv: 16,
rdma_max_msg: 132096,
log_level: 0,
block_size: 131072,
disk_alignment: 4096,
bitmap_granularity: 4096,
immediate_commit: false, // 'all' or 'small'
// client and osd - configurable online
log_level: 0,
client_dirty_limit: 33554432,
peer_connect_interval: 5, // seconds. min: 1
peer_connect_timeout: 5, // seconds. min: 1
@@ -97,28 +95,18 @@ const etcd_tree = {
osd_network: null, // "192.168.7.0/24" or an array of masks
bind_address: "0.0.0.0",
bind_port: 0,
readonly: false,
osd_memlock: false,
// osd - configurable online
autosync_interval: 5,
autosync_writes: 128,
client_queue_depth: 128, // unused
recovery_queue_depth: 4,
recovery_pg_switch: 128,
recovery_sync_batch: 16,
readonly: false,
no_recovery: false,
no_rebalance: false,
print_stats_interval: 3,
slow_log_interval: 10,
inode_vanish_time: 60,
auto_scrub: false,
no_scrub: false,
scrub_interval: '30d', // 1s/1m/1h/1d
scrub_queue_depth: 1,
scrub_sleep: 0, // milliseconds
scrub_list_limit: 1000, // objects to list on one scrub iteration
scrub_find_best: true,
scrub_ec_max_bruteforce: 100, // maximum EC error locator brute-force iterators
osd_memlock: false,
// blockstore - fixed in superblock
block_size,
disk_alignment,
@@ -137,15 +125,14 @@ const etcd_tree = {
meta_offset,
disable_meta_fsync,
disable_device_lock,
// blockstore - configurable offline
// blockstore - configurable
max_write_iodepth,
min_flusher_count: 1,
max_flusher_count: 256,
inmemory_metadata,
inmemory_journal,
journal_sector_buffer_count,
journal_no_same_sector_overwrites,
// blockstore - configurable online
max_write_iodepth,
min_flusher_count: 1,
max_flusher_count: 256,
throttle_small_writes: false,
throttle_target_iops: 100,
throttle_target_mbs: 100,
@@ -181,8 +168,6 @@ const etcd_tree = {
osd_tags?: 'nvme' | [ 'nvme', ... ],
// prefer to put primary on OSD with these tags
primary_affinity_tags?: 'nvme' | [ 'nvme', ... ],
// scrub interval
scrub_interval?: '30d',
},
...
}, */
@@ -278,7 +263,7 @@ const etcd_tree = {
primary: osd_num_t,
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
"has_invalid"|"has_inconsistent"|"has_corrupted"|"left_on_dead"|"scrubbing")[],
"has_invalid"|"left_on_dead")[],
}
}, */
},
@@ -300,7 +285,6 @@ const etcd_tree = {
osd_sets: osd_num_t[][],
all_peers: osd_num_t[],
epoch: uint64_t,
next_scrub: uint64_t,
},
}, */
},
@@ -391,7 +375,6 @@ class Mon
this.etcd_start_timeout = (config.etcd_start_timeout || 5) * 1000;
this.state = JSON.parse(JSON.stringify(this.constructor.etcd_tree));
this.signals_set = false;
this.stat_time = Date.now();
this.ws = null;
this.ws_alive = false;
this.ws_keepalive_timer = null;
@@ -861,7 +844,7 @@ class Mon
}
for (const node_id in tree)
{
if (node_id === '' || tree[node_id].level === 'osd' && (!tree[node_id].size || tree[node_id].size <= 0))
if (node_id === '')
{
continue;
}
@@ -969,9 +952,9 @@ class Mon
return alive_set[this.rng() % alive_set.length];
}
save_new_pgs_txn(save_to, request, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
save_new_pgs_txn(request, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
{
const aff_osds = this.get_affinity_osds(this.state.config.pools[pool_id] || {}, up_osds, osd_tree);
const aff_osds = this.get_affinity_osds(this.state.config.pools[pool_id], up_osds, osd_tree);
const pg_items = {};
this.reset_rng();
new_pgs.map((osd_set, i) =>
@@ -1022,14 +1005,14 @@ class Mon
});
}
}
save_to.items = save_to.items || {};
this.state.config.pgs.items = this.state.config.pgs.items || {};
if (!new_pgs.length)
{
delete save_to.items[pool_id];
delete this.state.config.pgs.items[pool_id];
}
else
{
save_to.items[pool_id] = pg_items;
this.state.config.pgs.items[pool_id] = pg_items;
}
}
@@ -1173,7 +1156,6 @@ class Mon
if (this.state.config.pgs.hash != tree_hash)
{
// Something has changed
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
const etcd_request = { compare: [], success: [] };
for (const pool_id in (this.state.config.pgs||{}).items||{})
{
@@ -1194,7 +1176,7 @@ class Mon
etcd_request.success.push({ requestDeleteRange: {
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
} });
this.save_new_pgs_txn(new_config_pgs, etcd_request, pool_id, up_osds, osd_tree, prev_pgs, [], []);
this.save_new_pgs_txn(etcd_request, pool_id, up_osds, osd_tree, prev_pgs, [], []);
}
}
for (const pool_id in this.state.config.pools)
@@ -1248,7 +1230,7 @@ class Mon
return;
}
const new_pg_history = [];
PGUtil.scale_pg_count(prev_pgs, real_prev_pgs, pg_history, new_pg_history, pool_cfg.pg_count);
PGUtil.scale_pg_count(prev_pgs, pg_history, new_pg_history, pool_cfg.pg_count);
pg_history = new_pg_history;
}
for (const pg of prev_pgs)
@@ -1301,15 +1283,14 @@ class Mon
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(this.state.pool.stats[pool_id])),
} });
this.save_new_pgs_txn(new_config_pgs, etcd_request, pool_id, up_osds, osd_tree, real_prev_pgs, optimize_result.int_pgs, pg_history);
this.save_new_pgs_txn(etcd_request, pool_id, up_osds, osd_tree, real_prev_pgs, optimize_result.int_pgs, pg_history);
}
new_config_pgs.hash = tree_hash;
await this.save_pg_config(new_config_pgs, etcd_request);
this.state.config.pgs.hash = tree_hash;
await this.save_pg_config(etcd_request);
}
else
{
// Nothing changed, but we still want to recheck the distribution of primaries
let new_config_pgs;
let changed = false;
for (const pool_id in this.state.config.pools)
{
@@ -1329,35 +1310,31 @@ class Mon
const new_primary = this.pick_primary(pool_id, pg_cfg.osd_set, up_osds, aff_osds);
if (pg_cfg.primary != new_primary)
{
if (!new_config_pgs)
{
new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
}
console.log(
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
);
changed = true;
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
pg_cfg.primary = new_primary;
}
}
}
}
if (changed)
{
await this.save_pg_config(new_config_pgs);
await this.save_pg_config();
}
}
}
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
async save_pg_config(etcd_request = { compare: [], success: [] })
{
etcd_request.compare.push(
{ key: b64(this.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
{ key: b64(this.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
);
etcd_request.success.push(
{ requestPut: { key: b64(this.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
{ requestPut: { key: b64(this.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(this.state.config.pgs)) } },
);
const res = await this.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
if (!res.succeeded)
@@ -1411,75 +1388,65 @@ class Mon
}
}
derive_osd_stats(st, prev)
{
const zero_stats = { op: { bps: 0n, iops: 0n, lat: 0n }, subop: { iops: 0n, lat: 0n }, recovery: { bps: 0n, iops: 0n } };
const diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
if (!st || !st.time || prev && (prev.time || this.stat_time/1000) >= st.time)
{
return diff;
}
const timediff = BigInt(st.time*1000 - (prev && prev.time*1000 || this.stat_time));
for (const op in st.op_stats||{})
{
const pr = prev && prev.op_stats && prev.op_stats[op];
let c = st.op_stats[op];
c = { bytes: BigInt(c.bytes||0), usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.op_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.subop_stats||{})
{
const pr = prev && prev.subop_stats && prev.subop_stats[op];
let c = st.subop_stats[op];
c = { usec: BigInt(c.usec||0), count: BigInt(c.count||0) };
const us = c.usec - BigInt(pr && pr.usec||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.subop_stats[op] = { ...c, iops: n*1000n/timediff, lat: us/n };
}
for (const op in st.recovery_stats||{})
{
const pr = prev && prev.recovery_stats && prev.recovery_stats[op];
let c = st.recovery_stats[op];
c = { bytes: BigInt(c.bytes||0), count: BigInt(c.count||0) };
const b = c.bytes - BigInt(pr && pr.bytes||0);
const n = c.count - BigInt(pr && pr.count||0);
if (n > 0)
diff.recovery_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff };
}
return diff;
}
sum_op_stats(timestamp, prev_stats)
{
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
if (!prev_stats || prev_stats.timestamp >= timestamp)
{
return sum_diff;
}
const tm = BigInt(timestamp - (prev_stats.timestamp || 0));
// Sum derived values instead of deriving summed
const op_stats = {}, subop_stats = {}, recovery_stats = {};
for (const osd in this.state.osd.stats)
{
const derived = this.derive_osd_stats(this.state.osd.stats[osd],
this.prev_stats && this.prev_stats.osd_stats && this.prev_stats.osd_stats[osd]);
for (const type in derived)
const st = this.state.osd.stats[osd]||{};
for (const op in st.op_stats||{})
{
for (const op in derived[type])
{
for (const k in derived[type][op])
{
sum_diff[type][op] = sum_diff[type][op] || {};
sum_diff[type][op][k] = (sum_diff[type][op][k] || 0n) + derived[type][op][k];
}
}
op_stats[op] = op_stats[op] || { count: 0n, usec: 0n, bytes: 0n };
op_stats[op].count += BigInt(st.op_stats[op].count||0);
op_stats[op].usec += BigInt(st.op_stats[op].usec||0);
op_stats[op].bytes += BigInt(st.op_stats[op].bytes||0);
}
for (const op in st.subop_stats||{})
{
subop_stats[op] = subop_stats[op] || { count: 0n, usec: 0n };
subop_stats[op].count += BigInt(st.subop_stats[op].count||0);
subop_stats[op].usec += BigInt(st.subop_stats[op].usec||0);
}
for (const op in st.recovery_stats||{})
{
recovery_stats[op] = recovery_stats[op] || { count: 0n, bytes: 0n };
recovery_stats[op].count += BigInt(st.recovery_stats[op].count||0);
recovery_stats[op].bytes += BigInt(st.recovery_stats[op].bytes||0);
}
}
return sum_diff;
if (prev_stats && prev_stats.timestamp >= timestamp)
{
prev_stats = null;
}
const tm = prev_stats ? BigInt(timestamp - prev_stats.timestamp) : 0;
for (const op in op_stats)
{
if (prev_stats && prev_stats.op_stats && prev_stats.op_stats[op])
{
op_stats[op].bps = (op_stats[op].bytes - prev_stats.op_stats[op].bytes) * 1000n / tm;
op_stats[op].iops = (op_stats[op].count - prev_stats.op_stats[op].count) * 1000n / tm;
op_stats[op].lat = (op_stats[op].usec - prev_stats.op_stats[op].usec)
/ ((op_stats[op].count - prev_stats.op_stats[op].count) || 1n);
}
}
for (const op in subop_stats)
{
if (prev_stats && prev_stats.subop_stats && prev_stats.subop_stats[op])
{
subop_stats[op].iops = (subop_stats[op].count - prev_stats.subop_stats[op].count) * 1000n / tm;
subop_stats[op].lat = (subop_stats[op].usec - prev_stats.subop_stats[op].usec)
/ ((subop_stats[op].count - prev_stats.subop_stats[op].count) || 1n);
}
}
for (const op in recovery_stats)
{
if (prev_stats && prev_stats.recovery_stats && prev_stats.recovery_stats[op])
{
recovery_stats[op].bps = (recovery_stats[op].bytes - prev_stats.recovery_stats[op].bytes) * 1000n / tm;
recovery_stats[op].iops = (recovery_stats[op].count - prev_stats.recovery_stats[op].count) * 1000n / tm;
}
}
return { op_stats, subop_stats, recovery_stats };
}
sum_object_counts()
@@ -1608,7 +1575,7 @@ class Mon
}
}
}
return { inode_stats, seen_pools };
return inode_stats;
}
serialize_bigints(obj)
@@ -1634,12 +1601,11 @@ class Mon
const timestamp = Date.now();
const { object_counts, object_bytes } = this.sum_object_counts();
let stats = this.sum_op_stats(timestamp, this.prev_stats);
let { inode_stats, seen_pools } = this.sum_inode_stats(
let inode_stats = this.sum_inode_stats(
this.prev_stats ? this.prev_stats.inode_stats : null,
timestamp, this.prev_stats ? this.prev_stats.timestamp : null
);
this.prev_stats = { timestamp, inode_stats, osd_stats: { ...this.state.osd.stats } };
this.stat_time = Date.now();
this.prev_stats = { timestamp, ...stats, inode_stats };
stats.object_counts = object_counts;
stats.object_bytes = object_bytes;
stats = this.serialize_bigints(stats);
@@ -1669,22 +1635,12 @@ class Mon
}
for (const pool_id in this.state.pool.stats)
{
if (!seen_pools[pool_id])
{
txn.push({ requestDeleteRange: {
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
} });
delete this.state.pool.stats[pool_id];
}
else
{
const pool_stats = { ...this.state.pool.stats[pool_id] };
this.serialize_bigints(pool_stats);
txn.push({ requestPut: {
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(pool_stats)),
} });
}
const pool_stats = { ...this.state.pool.stats[pool_id] };
this.serialize_bigints(pool_stats);
txn.push({ requestPut: {
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(pool_stats)),
} });
}
if (txn.length)
{
@@ -1765,14 +1721,13 @@ class Mon
else if (key_parts[0] === 'osd' && key_parts[1] === 'stats')
{
// Recheck OSD tree on OSD addition/deletion
const osd_num = key_parts[2];
if ((!old) != (!kv.value) || old && kv.value && old.size != kv.value.size)
{
this.schedule_recheck();
}
// Recheck PGs <osd_out_time> after last OSD statistics report
this.schedule_next_recheck_at(
!this.state.osd.stats[osd_num] ? 0 : this.state.osd.stats[osd_num].time+this.config.osd_out_time
!this.state.osd.stats[key[2]] ? 0 : this.state.osd.stats[key[2]].time+this.config.osd_out_time
);
}
}
@@ -1858,7 +1813,6 @@ function POST(url, body, timeout)
clearTimeout(timer_id);
let res_body = '';
res.setEncoding('utf8');
res.on('error', (error) => ok({ error }));
res.on('data', chunk => { res_body += chunk; });
res.on('end', () =>
{
@@ -1878,8 +1832,6 @@ function POST(url, body, timeout)
}
});
});
req.on('error', (error) => ok({ error }));
req.on('close', () => ok({ error: new Error('Connection closed prematurely') }));
req.write(body_text);
req.end();
});

View File

@@ -15,4 +15,4 @@ StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=multi-user.target
WantedBy=vitastor.target

View File

@@ -388,6 +388,8 @@ sub unmap_volume
my ($class, $storeid, $scfg, $volname, $snapname) = @_;
my $prefix = defined $scfg->{vitastor_prefix} ? $scfg->{vitastor_prefix} : 'pve/';
return 1 if !$scfg->{vitastor_nbd};
my ($vtype, $name, $vmid) = $class->parse_volname($volname);
$name .= '@'.$snapname if $snapname;
@@ -411,7 +413,7 @@ sub activate_volume
sub deactivate_volume
{
my ($class, $storeid, $scfg, $volname, $snapname, $cache) = @_;
$class->unmap_volume($storeid, $scfg, $volname, $snapname) if $scfg->{vitastor_nbd};
$class->unmap_volume($storeid, $scfg, $volname, $snapname);
return 1;
}

View File

@@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VERSION = '0.9.3'
VERSION = '0.8.6'
LOG = logging.getLogger(__name__)

View File

@@ -1,644 +0,0 @@
commit e6f935157944279c2c0634915c3c00feeec748c9
Author: Vitaliy Filippov <vitalif@yourcmc.ru>
Date: Mon Jun 19 00:58:19 2023 +0300
Add Vitastor support
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
index aaad4a3..5f5daa8 100644
--- a/include/libvirt/libvirt-storage.h
+++ b/include/libvirt/libvirt-storage.h
@@ -326,6 +326,7 @@ typedef enum {
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
} virConnectListAllStoragePoolsFlags;
int virConnectListAllStoragePools(virConnectPtr conn,
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 45965fa..b7c23d3 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -7103,7 +7103,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
src->configFile = virXPathString("string(./config/@file)", ctxt);
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
src->query = virXMLPropString(node, "query");
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
@@ -30121,6 +30122,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
case VIR_STORAGE_POOL_MPATH:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
index 5a9bf20..05058b8 100644
--- a/src/conf/domain_validate.c
+++ b/src/conf/domain_validate.c
@@ -494,6 +494,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_RBD:
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
}
}
- /* internal snapshots and config files are currently supported only with rbd: */
+ /* internal snapshots are currently supported only with rbd: */
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
if (src->snapshot) {
@@ -550,11 +551,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
"only with 'rbd' disks"));
return -1;
}
-
+ }
+ /* config files are currently supported only with rbd and vitastor: */
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
if (src->configFile) {
virReportError(VIR_ERR_XML_ERROR, "%s",
_("<config> element is currently supported "
- "only with 'rbd' disks"));
+ "only with 'rbd' and 'vitastor' disks"));
return -1;
}
}
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
index 6cb0a20..8bf7de9 100644
--- a/src/conf/schemas/domaincommon.rng
+++ b/src/conf/schemas/domaincommon.rng
@@ -1972,6 +1972,35 @@
</element>
</define>
+ <define name="diskSourceNetworkProtocolVitastor">
+ <element name="source">
+ <interleave>
+ <attribute name="protocol">
+ <value>vitastor</value>
+ </attribute>
+ <ref name="diskSourceCommon"/>
+ <optional>
+ <attribute name="name"/>
+ </optional>
+ <optional>
+ <attribute name="query"/>
+ </optional>
+ <zeroOrMore>
+ <ref name="diskSourceNetworkHost"/>
+ </zeroOrMore>
+ <optional>
+ <element name="config">
+ <attribute name="file">
+ <ref name="absFilePath"/>
+ </attribute>
+ <empty/>
+ </element>
+ </optional>
+ <empty/>
+ </interleave>
+ </element>
+ </define>
+
<define name="diskSourceNetworkProtocolISCSI">
<element name="source">
<attribute name="protocol">
@@ -2264,6 +2293,7 @@
<ref name="diskSourceNetworkProtocolSimple"/>
<ref name="diskSourceNetworkProtocolVxHS"/>
<ref name="diskSourceNetworkProtocolNFS"/>
+ <ref name="diskSourceNetworkProtocolVitastor"/>
</choice>
</define>
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
index f5a9636..8339bc4 100644
--- a/src/conf/storage_conf.c
+++ b/src/conf/storage_conf.c
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
"logical", "disk", "iscsi",
"iscsi-direct", "scsi", "mpath",
"rbd", "sheepdog", "gluster",
- "zfs", "vstorage",
+ "zfs", "vstorage", "vitastor",
);
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
.formatToString = virStorageFileFormatTypeToString,
}
},
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
+ .poolOptions = {
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
+ VIR_STORAGE_POOL_SOURCE_NAME),
+ },
+ .volOptions = {
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
+ .formatFromString = virStorageVolumeFormatFromString,
+ .formatToString = virStorageFileFormatTypeToString,
+ }
+ },
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
.poolOptions = {
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
@@ -542,6 +554,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
_("element 'name' is mandatory for RBD pool"));
return -1;
}
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
+ virReportError(VIR_ERR_XML_ERROR, "%s",
+ _("element 'name' is mandatory for Vitastor pool"));
+ return -1;
+ }
if (options->formatFromString) {
g_autofree char *format = NULL;
@@ -1132,6 +1149,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
* files, so they don't have a target */
if (def->type != VIR_STORAGE_POOL_RBD &&
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
def->type != VIR_STORAGE_POOL_GLUSTER &&
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
index fc67957..720c07e 100644
--- a/src/conf/storage_conf.h
+++ b/src/conf/storage_conf.h
@@ -103,6 +103,7 @@ typedef enum {
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
VIR_STORAGE_POOL_ZFS, /* ZFS */
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
VIR_STORAGE_POOL_LAST,
} virStoragePoolType;
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
index cecd7e8..d7b79a4 100644
--- a/src/conf/storage_source_conf.c
+++ b/src/conf/storage_source_conf.c
@@ -87,6 +87,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
"ssh",
"vxhs",
"nfs",
+ "vitastor",
);
@@ -1286,6 +1287,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
return 24007;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_RBD:
/* we don't provide a default for RBD */
return 0;
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
index 14a6825..eb4acac 100644
--- a/src/conf/storage_source_conf.h
+++ b/src/conf/storage_source_conf.h
@@ -128,6 +128,7 @@ typedef enum {
VIR_STORAGE_NET_PROTOCOL_SSH,
VIR_STORAGE_NET_PROTOCOL_VXHS,
VIR_STORAGE_NET_PROTOCOL_NFS,
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
VIR_STORAGE_NET_PROTOCOL_LAST
} virStorageNetProtocol;
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
index e6c187e..035b423 100644
--- a/src/conf/virstorageobj.c
+++ b/src/conf/virstorageobj.c
@@ -1433,6 +1433,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
return 1;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_RBD:
case VIR_STORAGE_POOL_LAST:
@@ -1918,6 +1919,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
index 8490034..ab2cdaa 100644
--- a/src/libvirt-storage.c
+++ b/src/libvirt-storage.c
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
index 17ac880..59711b5 100644
--- a/src/libxl/libxl_conf.c
+++ b/src/libxl/libxl_conf.c
@@ -970,6 +970,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
index 6919325..55ffc32 100644
--- a/src/libxl/xen_xl.c
+++ b/src/libxl/xen_xl.c
@@ -1445,6 +1445,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
case VIR_STORAGE_NET_PROTOCOL_SSH:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
virReportError(VIR_ERR_NO_SUPPORT,
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
index e865aa1..40162af 100644
--- a/src/qemu/qemu_block.c
+++ b/src/qemu/qemu_block.c
@@ -604,6 +604,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
}
+static virJSONValue *
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
+{
+ virJSONValue *ret = NULL;
+ virStorageNetHostDef *host;
+ size_t i;
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
+ g_autofree char *etcd = NULL;
+
+ for (i = 0; i < src->nhosts; i++) {
+ host = src->hosts + i;
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
+ return NULL;
+ }
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
+ }
+ if (src->nhosts > 0) {
+ etcd = virBufferContentAndReset(&buf);
+ }
+
+ if (virJSONValueObjectAdd(&ret,
+ "S:etcd-host", etcd,
+ "S:etcd-prefix", src->query,
+ "S:config-path", src->configFile,
+ "s:image", src->path,
+ NULL) < 0)
+ return NULL;
+
+ return ret;
+}
+
+
static virJSONValue *
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
{
@@ -917,6 +949,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
return NULL;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
+ return NULL;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
@@ -1860,6 +1898,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_VXHS:
case VIR_STORAGE_NET_PROTOCOL_NFS:
case VIR_STORAGE_NET_PROTOCOL_SSH:
@@ -2242,6 +2281,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ driver = "vitastor";
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
driver = "sheepdog";
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 2eb5653..60ee82d 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -4958,7 +4958,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
if (src->query &&
(actualType != VIR_STORAGE_TYPE_NETWORK ||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("query is supported only with HTTP(S) protocols"));
return -1;
@@ -10129,6 +10130,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
break;
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
index b841680..a6be771 100644
--- a/src/qemu/qemu_snapshot.c
+++ b/src/qemu/qemu_snapshot.c
@@ -373,6 +373,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
@@ -578,6 +579,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
case VIR_STORAGE_NET_PROTOCOL_NONE:
case VIR_STORAGE_NET_PROTOCOL_NBD:
case VIR_STORAGE_NET_PROTOCOL_RBD:
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
index d90c1c9..e853457 100644
--- a/src/storage/storage_driver.c
+++ b/src/storage/storage_driver.c
@@ -1627,6 +1627,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_SHEEPDOG:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_LAST:
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
index e48ae72..2017ccc 100644
--- a/src/storage_file/storage_source_backingstore.c
+++ b/src/storage_file/storage_source_backingstore.c
@@ -284,6 +284,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
}
+static int
+virStorageSourceParseVitastorColonString(const char *colonstr,
+ virStorageSource *src)
+{
+ char *p, *e, *next;
+ g_autofree char *options = NULL;
+
+ /* optionally skip the "vitastor:" prefix if provided */
+ if (STRPREFIX(colonstr, "vitastor:"))
+ colonstr += strlen("vitastor:");
+
+ options = g_strdup(colonstr);
+
+ p = options;
+ while (*p) {
+ /* find : delimiter or end of string */
+ for (e = p; *e && *e != ':'; ++e) {
+ if (*e == '\\') {
+ e++;
+ if (*e == '\0')
+ break;
+ }
+ }
+ if (*e == '\0') {
+ next = e; /* last kv pair */
+ } else {
+ next = e + 1;
+ *e = '\0';
+ }
+
+ if (STRPREFIX(p, "image=")) {
+ src->path = g_strdup(p + strlen("image="));
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
+ src->query = g_strdup(p + strlen("etcd-prefix="));
+ } else if (STRPREFIX(p, "config-path=")) {
+ src->configFile = g_strdup(p + strlen("config-path="));
+ } else if (STRPREFIX(p, "etcd-host=")) {
+ char *h, *sep;
+
+ h = p + strlen("etcd-host=");
+ while (h < e) {
+ for (sep = h; sep < e; ++sep) {
+ if (*sep == '\\' && (sep[1] == ',' ||
+ sep[1] == ';' ||
+ sep[1] == ' ')) {
+ *sep = '\0';
+ sep += 2;
+ break;
+ }
+ }
+
+ if (virStorageSourceRBDAddHost(src, h) < 0)
+ return -1;
+
+ h = sep;
+ }
+ }
+
+ p = next;
+ }
+
+ if (!src->path) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
static int
virStorageSourceParseNBDColonString(const char *nbdstr,
virStorageSource *src)
@@ -396,6 +465,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
return -1;
break;
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
+ return -1;
+ break;
+
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
case VIR_STORAGE_NET_PROTOCOL_LAST:
case VIR_STORAGE_NET_PROTOCOL_NONE:
@@ -984,6 +1058,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
return 0;
}
+static int
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
+ virJSONValue *json,
+ const char *jsonstr G_GNUC_UNUSED,
+ int opaque G_GNUC_UNUSED)
+{
+ const char *filename;
+ const char *image = virJSONValueObjectGetString(json, "image");
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
+ size_t nservers;
+ size_t i;
+
+ src->type = VIR_STORAGE_TYPE_NETWORK;
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
+
+ /* legacy syntax passed via 'filename' option */
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
+ return virStorageSourceParseVitastorColonString(filename, src);
+
+ if (!image) {
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
+ _("missing image name in Vitastor backing volume "
+ "JSON specification"));
+ return -1;
+ }
+
+ src->path = g_strdup(image);
+ src->configFile = g_strdup(conf);
+ src->query = g_strdup(etcd_prefix);
+
+ if (servers) {
+ nservers = virJSONValueArraySize(servers);
+
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
+ src->nhosts = nservers;
+
+ for (i = 0; i < nservers; i++) {
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
+ virJSONValueArrayGet(servers, i)) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static int
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
virJSONValue *json,
@@ -1162,6 +1284,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
index bd6f063..cce34e1 100644
--- a/src/test/test_driver.c
+++ b/src/test/test_driver.c
@@ -7338,6 +7338,7 @@ testStorageVolumeTypeForPool(int pooltype)
case VIR_STORAGE_POOL_ISCSI_DIRECT:
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_RBD:
+ case VIR_STORAGE_POOL_VITASTOR:
return VIR_STORAGE_VOL_NETWORK;
case VIR_STORAGE_POOL_LOGICAL:
case VIR_STORAGE_POOL_DISK:
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
index eee75af..8bd0a57 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='no'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
index 805950a..852df0d 100644
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
@@ -204,4 +204,11 @@
</enum>
</volOptions>
</pool>
+ <pool type='vitastor' supported='yes'>
+ <volOptions>
+ <defaultFormat type='raw'/>
+ <enum name='targetFormatType'>
+ </enum>
+ </volOptions>
+ </pool>
</storagepoolCapabilities>
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
index e8e40d6..db55fe5 100644
--- a/tests/storagepoolxml2argvtest.c
+++ b/tests/storagepoolxml2argvtest.c
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
case VIR_STORAGE_POOL_GLUSTER:
case VIR_STORAGE_POOL_ZFS:
case VIR_STORAGE_POOL_VSTORAGE:
+ case VIR_STORAGE_POOL_VITASTOR:
case VIR_STORAGE_POOL_LAST:
default:
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
index 8a98c6a..4b1bbd4 100644
--- a/tools/virsh-pool.c
+++ b/tools/virsh-pool.c
@@ -1221,6 +1221,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
case VIR_STORAGE_POOL_VSTORAGE:
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
break;
+ case VIR_STORAGE_POOL_VITASTOR:
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
+ break;
case VIR_STORAGE_POOL_LAST:
break;
}

View File

@@ -1,169 +0,0 @@
Index: pve-qemu-kvm-7.2.0/block/meson.build
===================================================================
--- pve-qemu-kvm-7.2.0.orig/block/meson.build
+++ pve-qemu-kvm-7.2.0/block/meson.build
@@ -113,6 +113,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: pve-qemu-kvm-7.2.0/meson.build
===================================================================
--- pve-qemu-kvm-7.2.0.orig/meson.build
+++ pve-qemu-kvm-7.2.0/meson.build
@@ -1026,6 +1026,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1865,6 +1885,7 @@ config_host_data.set('CONFIG_NUMA', numa
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -3957,6 +3978,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
Index: pve-qemu-kvm-7.2.0/meson_options.txt
===================================================================
--- pve-qemu-kvm-7.2.0.orig/meson_options.txt
+++ pve-qemu-kvm-7.2.0/meson_options.txt
@@ -169,6 +169,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
Index: pve-qemu-kvm-7.2.0/qapi/block-core.json
===================================================================
--- pve-qemu-kvm-7.2.0.orig/qapi/block-core.json
+++ pve-qemu-kvm-7.2.0/qapi/block-core.json
@@ -3213,7 +3213,7 @@
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4223,6 +4223,28 @@
'*server': ['InetSocketAddressBase'] } }
##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -4671,6 +4693,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5072,6 +5095,17 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
+##
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -5269,6 +5303,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: pve-qemu-kvm-7.2.0/scripts/ci/org.centos/stream/8/x86_64/configure
===================================================================
--- pve-qemu-kvm-7.2.0.orig/scripts/ci/org.centos/stream/8/x86_64/configure
+++ pve-qemu-kvm-7.2.0/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \

View File

@@ -1,190 +0,0 @@
diff --git a/block/meson.build b/block/meson.build
index 382bec0e7d..af6207dbce 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -114,6 +114,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index c44d05a13f..ebedb42843 100644
--- a/meson.build
+++ b/meson.build
@@ -1028,6 +1028,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1882,6 +1902,7 @@ endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -4020,6 +4041,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index fc9447d267..c4ac55c283 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -173,6 +173,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c05ad0c07e..f5eb701604 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3308,7 +3308,7 @@
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4338,6 +4338,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4787,6 +4809,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5187,6 +5210,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -5385,6 +5419,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 6e8983f39c..1b0b9fcf3e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -32,7 +32,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 009fab1515..95914e6ebc 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -144,6 +144,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -392,6 +393,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;

View File

@@ -1,169 +0,0 @@
diff --git a/block/meson.build b/block/meson.build
index deb73ca389..e269f599a1 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -78,6 +78,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 96de1a6ef9..2e3994777d 100644
--- a/meson.build
+++ b/meson.build
@@ -838,6 +838,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1455,6 +1475,7 @@ config_host_data.set('CONFIG_LINUX_AIO', libaio.found())
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
@@ -3412,6 +3433,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
diff --git a/meson_options.txt b/meson_options.txt
index e392323732..5b56007475 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -121,6 +121,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('gtk', type : 'feature', value : 'auto',
description: 'GTK+ user interface')
option('sdl', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1d3dd9cb48..88453405e5 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2930,7 +2930,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -3864,6 +3864,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4259,6 +4281,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4647,6 +4670,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -4846,6 +4880,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 7a17ff4218..cdddbf32aa 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -69,6 +69,7 @@ meson_options_help() {
printf "%s\n" ' oss OSS sound support'
printf "%s\n" ' pa PulseAudio sound support'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' sdl SDL user interface'
printf "%s\n" ' sdl-image SDL Image support for icons'
printf "%s\n" ' seccomp seccomp support'
@@ -210,6 +211,8 @@ _meson_option_parse() {
--disable-pa) printf "%s" -Dpa=disabled ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-sdl) printf "%s" -Dsdl=enabled ;;
--disable-sdl) printf "%s" -Dsdl=disabled ;;
--enable-sdl-image) printf "%s" -Dsdl_image=enabled ;;

View File

@@ -1,190 +0,0 @@
diff --git a/block/meson.build b/block/meson.build
index 0b2a60c99b..d923713804 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -98,6 +98,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 861de93c4f..272f72af11 100644
--- a/meson.build
+++ b/meson.build
@@ -884,6 +884,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1546,6 +1566,7 @@ config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
@@ -3709,6 +3730,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index 52b11cead4..d8d0868174 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -149,6 +149,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('gtk', type : 'feature', value : 'auto',
description: 'GTK+ user interface')
option('sdl', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index beeb91952a..1c98dc0e12 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2929,7 +2929,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -3863,6 +3863,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4277,6 +4299,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4665,6 +4688,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -4864,6 +4898,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 9850dd4444..72b1287520 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -181,6 +181,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 1e26f4571e..370898d48c 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -98,6 +98,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
printf "%s\n" ' sdl-image SDL Image support for icons'
@@ -289,6 +290,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;
--disable-replication) printf "%s" -Dreplication=disabled ;;
--enable-rng-none) printf "%s" -Drng_none=true ;;

View File

@@ -1,190 +0,0 @@
diff --git a/block/meson.build b/block/meson.build
index 60bc305597..89a042216f 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -98,6 +98,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 20fddbd707..600db4e2fb 100644
--- a/meson.build
+++ b/meson.build
@@ -967,6 +967,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1799,6 +1819,7 @@ config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -3954,6 +3975,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index e58e158396..9747b38fd0 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -167,6 +167,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2173e7734a..5a4900b322 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2955,7 +2955,7 @@
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
##
# @BlockdevOptionsFile:
@@ -3883,6 +3883,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4327,6 +4349,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'vmdk': 'BlockdevOptionsGenericCOWFormat',
'vpc': 'BlockdevOptionsGenericFormat',
'vvfat': 'BlockdevOptionsVVFAT'
@@ -4717,6 +4740,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -4915,6 +4949,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index a7f92aff90..53dc55be2e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 359b04e0e6..f5b85ba78c 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -135,6 +135,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -370,6 +371,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;

View File

@@ -1,190 +0,0 @@
diff --git a/block/meson.build b/block/meson.build
index b7c68b83a3..95d8a6f15d 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -100,6 +100,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 5c6b5a1c75..f31f73612e 100644
--- a/meson.build
+++ b/meson.build
@@ -1026,6 +1026,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1861,6 +1881,7 @@ config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -3945,6 +3966,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index 4b749ca549..6b37bd6b77 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -169,6 +169,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 95ac4fa634..7a240827e4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2959,7 +2959,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -3957,6 +3957,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4405,6 +4427,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -4804,6 +4827,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -5002,6 +5036,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index a7f92aff90..53dc55be2e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -31,7 +31,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index aa6e30ea91..c45d21c40f 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -135,6 +135,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -376,6 +377,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;

View File

@@ -1,190 +0,0 @@
diff --git a/block/meson.build b/block/meson.build
index 382bec0e7d..af6207dbce 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -101,6 +101,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index c44d05a13f..ebedb42843 100644
--- a/meson.build
+++ b/meson.build
@@ -1028,6 +1028,26 @@ if not get_option('rbd').auto() or have_block
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'), kwargs: static_kwargs)
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -1878,6 +1898,7 @@ endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_SDL', sdl.found())
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
@@ -4002,6 +4023,7 @@ if spice_protocol.found()
summary_info += {' spice server support': spice}
endif
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
diff --git a/meson_options.txt b/meson_options.txt
index fc9447d267..c4ac55c283 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -173,6 +173,8 @@ option('lzo', type : 'feature', value : 'auto',
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c05ad0c07e..f5eb701604 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3054,7 +3054,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4073,6 +4073,28 @@
'*key-secret': 'str',
'*server': ['InetSocketAddressBase'] } }
+##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
##
# @ReplicationMode:
#
@@ -4521,6 +4543,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -4920,6 +4943,17 @@
'*cluster-size' : 'size',
'*encrypt' : 'RbdEncryptionCreateOptions' } }
+##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
##
# @BlockdevVmdkSubformat:
#
@@ -5118,6 +5152,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
index 6e8983f39c..1b0b9fcf3e 100755
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -32,7 +32,7 @@
--with-git=meson \
--with-git-submodules=update \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -179,6 +179,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 009fab1515..95914e6ebc 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -142,6 +142,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' sdl SDL user interface'
@@ -388,6 +389,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-replication) printf "%s" -Dreplication=enabled ;;

View File

@@ -7,12 +7,13 @@ set -e
VITASTOR=$(dirname $0)
VITASTOR=$(realpath "$VITASTOR/..")
EL=$(rpm --eval '%dist')
if [ "$EL" = ".el8" ]; then
if [ -d /opt/rh/gcc-toolset-9 ]; then
# CentOS 8
EL=8
. /opt/rh/gcc-toolset-9/enable
elif [ "$EL" = ".el7" ]; then
else
# CentOS 7
EL=7
. /opt/rh/devtoolset-9/enable
fi
cd ~/rpmbuild/SPECS
@@ -24,4 +25,4 @@ rm fio
mv fio-copy fio
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-0.9.3/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.3$(rpm --eval '%dist').tar.gz *
tar --transform 's#^#vitastor-0.8.6/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.8.6$(rpm --eval '%dist').tar.gz *

View File

@@ -1,93 +0,0 @@
--- qemu-kvm.spec.orig 2023-02-28 08:04:06.000000000 +0000
+++ qemu-kvm.spec 2023-04-27 22:29:18.094878829 +0000
@@ -100,8 +100,6 @@
%endif
%global target_list %{kvm_target}-softmmu
-%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress
-%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https
%define qemudocdir %{_docdir}/%{name}
%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios"
@@ -129,6 +127,7 @@ Requires: %{name}-device-usb-host = %{ep
Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \
%endif \
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release}
# Since SPICE is removed from RHEL-9, the following Obsoletes:
@@ -151,7 +150,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}
Summary: QEMU is a machine emulator and virtualizer
Name: qemu-kvm
Version: 7.0.0
-Release: 13%{?rcrel}%{?dist}%{?cc_suffix}.2
+Release: 13.vitastor%{?rcrel}%{?dist}%{?cc_suffix}
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
# Epoch 15 used for RHEL 8
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
@@ -174,6 +173,7 @@ Source28: 95-kvm-memlock.conf
Source30: kvm-s390x.conf
Source31: kvm-x86.conf
Source36: README.tests
+Source37: qemu-vitastor.c
Patch0004: 0004-Initial-redhat-build.patch
@@ -498,6 +498,7 @@ Patch171: kvm-i386-do-kvm_put_msr_featur
Patch172: kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch
# For bz#2168221 - while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set [rhel-9.1.0.z]
Patch173: kvm-migration-Read-state-once.patch
+Patch174: qemu-7.0-vitastor.patch
# Source-git patches
@@ -531,6 +532,7 @@ BuildRequires: libcurl-devel
%if %{have_block_rbd}
BuildRequires: librbd-devel
%endif
+BuildRequires: vitastor-client-devel
# We need both because the 'stap' binary is probed for by configure
BuildRequires: systemtap
BuildRequires: systemtap-sdt-devel
@@ -718,6 +720,14 @@ using the rbd protocol.
%endif
+%package block-vitastor
+Summary: QEMU Vitastor block driver
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
+
+%description block-vitastor
+This package provides the additional Vitastor block driver for QEMU.
+
+
%package audio-pa
Summary: QEMU PulseAudio audio driver
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
@@ -811,6 +821,7 @@ This package provides usbredir support.
%prep
%setup -q -n qemu-%{version}%{?rcstr}
%autopatch -p1
+cp %{SOURCE37} ./block/vitastor.c
%global qemu_kvm_build qemu_kvm_build
mkdir -p %{qemu_kvm_build}
@@ -1032,6 +1043,7 @@ run_configure \
%if %{have_block_rbd}
--enable-rbd \
%endif
+ --enable-vitastor \
%if %{have_librdma}
--enable-rdma \
%endif
@@ -1511,6 +1523,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s
%files block-rbd
%{_libdir}/%{name}/block-rbd.so
%endif
+%files block-vitastor
+%{_libdir}/%{name}/block-vitastor.so
+
%files audio-pa
%{_libdir}/%{name}/audio-pa.so

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.9.3.el7.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.8.6.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.9.3
Version: 0.8.6
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.9.3.el7.tar.gz
Source0: vitastor-0.8.6.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.9.3.el8.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.8.6.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.9.3
Version: 0.8.6
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.9.3.el8.tar.gz
Source0: vitastor-0.8.6.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -1,28 +0,0 @@
# Build packages for AlmaLinux 9 inside a container
# cd ..; podman build -t vitastor-el9 -v `pwd`/packages:/root/packages -f rpm/vitastor-el9.Dockerfile .
FROM almalinux:9
WORKDIR /root
RUN sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*.repo
RUN dnf -y install epel-release dnf-plugins-core
RUN dnf -y install https://vitastor.io/rpms/centos/9/vitastor-release-1.0-1.el9.noarch.rpm
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive liburing-devel cmake
RUN dnf download --source fio
RUN rpm --nomd5 -i fio*.src.rpm
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --spec fio.spec
ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.9.3.el9.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \
mkdir -p /root/packages/vitastor-el9; \
rm -rf /root/packages/vitastor-el9/*; \
cp ~/rpmbuild/RPMS/*/vitastor* /root/packages/vitastor-el9/; \
cp ~/rpmbuild/SRPMS/vitastor* /root/packages/vitastor-el9/

View File

@@ -1,158 +0,0 @@
Name: vitastor
Version: 0.9.3
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.9.3.el9.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
BuildRequires: gcc-c++
BuildRequires: nodejs >= 10
BuildRequires: jerasure-devel
BuildRequires: libisa-l-devel
BuildRequires: gf-complete-devel
BuildRequires: rdma-core-devel
BuildRequires: cmake
Requires: vitastor-osd = %{version}-%{release}
Requires: vitastor-mon = %{version}-%{release}
Requires: vitastor-client = %{version}-%{release}
Requires: vitastor-client-devel = %{version}-%{release}
Requires: vitastor-fio = %{version}-%{release}
%description
Vitastor is a small, simple and fast clustered block storage (storage for VM drives),
architecturally similar to Ceph which means strong consistency, primary-replication,
symmetric clustering and automatic data distribution over any number of drives of any
size with configurable redundancy (replication or erasure codes/XOR).
%package -n vitastor-osd
Summary: Vitastor - OSD
Requires: vitastor-client = %{version}-%{release}
Requires: util-linux
Requires: parted
%description -n vitastor-osd
Vitastor object storage daemon, i.e. server program that stores data.
%package -n vitastor-mon
Summary: Vitastor - monitor
Requires: nodejs >= 10
Requires: lpsolve
%description -n vitastor-mon
Vitastor monitor, i.e. server program responsible for watching cluster state and
scheduling cluster-level operations.
%package -n vitastor-client
Summary: Vitastor - client
%description -n vitastor-client
Vitastor client library and command-line interface.
%package -n vitastor-client-devel
Summary: Vitastor - development files
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
%description -n vitastor-client-devel
Vitastor library headers for development.
%package -n vitastor-fio
Summary: Vitastor - fio drivers
Group: Development/Libraries
Requires: vitastor-client = %{version}-%{release}
Requires: fio = 3.27-8.el9
%description -n vitastor-fio
Vitastor fio drivers for benchmarking.
%prep
%setup -q
%build
%cmake
%cmake_build
%install
rm -rf $RPM_BUILD_ROOT
%cmake_install
cd mon
npm install
cd ..
mkdir -p %buildroot/usr/lib/vitastor
cp -r mon %buildroot/usr/lib/vitastor
mkdir -p %buildroot/lib/systemd/system
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system
mkdir -p %buildroot/lib/udev/rules.d
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d
%files
%doc GPL-2.0.txt VNPL-1.1.txt README.md README-ru.md
%files -n vitastor-osd
%_bindir/vitastor-osd
%_bindir/vitastor-disk
%_bindir/vitastor-dump-journal
/lib/systemd/system/vitastor-osd@.service
/lib/systemd/system/vitastor.target
/lib/udev/rules.d/90-vitastor.rules
%pre -n vitastor-osd
groupadd -r -f vitastor 2>/dev/null ||:
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
install -o vitastor -g vitastor -d /var/log/vitastor
mkdir -p /etc/vitastor
%files -n vitastor-mon
/usr/lib/vitastor/mon
/lib/systemd/system/vitastor-mon.service
%pre -n vitastor-mon
groupadd -r -f vitastor 2>/dev/null ||:
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
mkdir -p /etc/vitastor
%files -n vitastor-client
%_bindir/vitastor-nbd
%_bindir/vitastor-nfs
%_bindir/vitastor-cli
%_bindir/vitastor-rm
%_bindir/vita
%_libdir/libvitastor_blk.so*
%_libdir/libvitastor_client.so*
%files -n vitastor-client-devel
%_includedir/vitastor_c.h
%_libdir/pkgconfig
%files -n vitastor-fio
%_libdir/libfio_vitastor.so
%_libdir/libfio_vitastor_blk.so
%_libdir/libfio_vitastor_sec.so
%changelog

View File

@@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVERSION="0.9.3")
add_definitions(-DVERSION="0.8.6")
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
if (${WITH_ASAN})
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
@@ -111,7 +111,7 @@ target_compile_options(vitastor_common PUBLIC -fPIC)
add_executable(vitastor-osd
osd_main.cpp osd.cpp osd_secondary.cpp osd_peering.cpp osd_flush.cpp osd_peering_pg.cpp
osd_primary.cpp osd_primary_chain.cpp osd_primary_sync.cpp osd_primary_write.cpp osd_primary_subops.cpp
osd_cluster.cpp osd_rmw.cpp osd_scrub.cpp osd_primary_describe.cpp
osd_cluster.cpp osd_rmw.cpp
)
target_link_libraries(vitastor-osd
vitastor_common
@@ -141,8 +141,6 @@ add_library(vitastor_client SHARED
cli_common.cpp
cli_alloc_osd.cpp
cli_status.cpp
cli_describe.cpp
cli_fix.cpp
cli_df.cpp
cli_ls.cpp
cli_create.cpp
@@ -301,7 +299,7 @@ add_executable(test_cluster_client
EXCLUDE_FROM_ALL
test_cluster_client.cpp
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
etcd_state_client.cpp timerfd_manager.cpp str_util.cpp ../json11/json11.cpp
etcd_state_client.cpp timerfd_manager.cpp ../json11/json11.cpp
)
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
target_include_directories(test_cluster_client PUBLIC ${CMAKE_SOURCE_DIR}/src/mock)

View File

@@ -143,83 +143,34 @@ uint64_t allocator::get_free_count()
return free;
}
// FIXME: Move to utils?
void bitmap_set(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity)
{
if (start == 0 && len == 32*bitmap_granularity)
*((uint32_t*)bitmap) = UINT32_MAX;
else if (start == 0 && len == 64*bitmap_granularity)
*((uint64_t*)bitmap) = UINT64_MAX;
else
if (start == 0)
{
unsigned bit_start = start / bitmap_granularity;
unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
while (bit_start < bit_end)
if (len == 32*bitmap_granularity)
{
if (!(bit_start & 7) && bit_end >= bit_start+8)
{
((uint8_t*)bitmap)[bit_start / 8] = UINT8_MAX;
bit_start += 8;
}
else
{
((uint8_t*)bitmap)[bit_start / 8] |= 1 << (bit_start % 8);
bit_start++;
}
*((uint32_t*)bitmap) = UINT32_MAX;
return;
}
else if (len == 64*bitmap_granularity)
{
*((uint64_t*)bitmap) = UINT64_MAX;
return;
}
}
unsigned bit_start = start / bitmap_granularity;
unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
while (bit_start < bit_end)
{
if (!(bit_start & 7) && bit_end >= bit_start+8)
{
((uint8_t*)bitmap)[bit_start / 8] = UINT8_MAX;
bit_start += 8;
}
else
{
((uint8_t*)bitmap)[bit_start / 8] |= 1 << (bit_start % 8);
bit_start++;
}
}
}
void bitmap_clear(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity)
{
if (start == 0 && len == 32*bitmap_granularity)
*((uint32_t*)bitmap) = 0;
else if (start == 0 && len == 64*bitmap_granularity)
*((uint64_t*)bitmap) = 0;
else
{
unsigned bit_start = start / bitmap_granularity;
unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
while (bit_start < bit_end)
{
if (!(bit_start & 7) && bit_end >= bit_start+8)
{
((uint8_t*)bitmap)[bit_start / 8] = 0;
bit_start += 8;
}
else
{
((uint8_t*)bitmap)[bit_start / 8] &= (0xFF ^ (1 << (bit_start % 8)));
bit_start++;
}
}
}
}
bool bitmap_check(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity)
{
bool r = false;
if (start == 0 && len == 32*bitmap_granularity)
r = !!*((uint32_t*)bitmap);
else if (start == 0 && len == 64*bitmap_granularity)
r = !!*((uint64_t*)bitmap);
else
{
unsigned bit_start = start / bitmap_granularity;
unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
while (bit_start < bit_end)
{
if (!(bit_start & 7) && bit_end >= bit_start+8)
{
r = r || !!((uint8_t*)bitmap)[bit_start / 8];
bit_start += 8;
}
else
{
r = r || (((uint8_t*)bitmap)[bit_start / 8] & (1 << (bit_start % 8)));
bit_start++;
}
}
}
return r;
}

View File

@@ -23,5 +23,3 @@ public:
};
void bitmap_set(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity);
void bitmap_clear(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity);
bool bitmap_check(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity);

View File

@@ -13,11 +13,6 @@ blockstore_t::~blockstore_t()
delete impl;
}
void blockstore_t::parse_config(blockstore_config_t & config)
{
impl->parse_config(config, false);
}
void blockstore_t::loop()
{
impl->loop();

View File

@@ -73,11 +73,7 @@ Input:
write request is copied into the metadata area bitwise and stored there.
Output:
- retval = number of bytes actually read/written or negative error number
-EINVAL = invalid input parameters
-ENOENT = requested object/version does not exist for reads
-ENOSPC = no space left in the store for writes
-EDOM = checksum error.
- retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC)
- version = the version actually read or written
## BS_OP_DELETE
@@ -111,7 +107,7 @@ Input:
- buf = pre-allocated obj_ver_id array <len> units long
Output:
- retval = 0 or negative error number (-ENOENT if no such version for stabilize)
- retval = 0 or negative error number (-EINVAL, -ENOENT if no such version or -EBUSY if not synced)
## BS_OP_SYNC_STAB_ALL
@@ -126,14 +122,11 @@ Output:
Get a list of all objects in this Blockstore.
Input:
- pg_alignment = PG alignment
- pg_count = PG count or 0 to list all objects
- pg_number = PG number
- list_stable_limit = max number of clean objects in the reply
it's guaranteed that dirty objects are returned from the same interval,
i.e. from (min_oid .. min(max_oid, max(returned stable OIDs)))
- min_oid = min inode/stripe or 0 to list all objects
- max_oid = max inode/stripe or 0 to list all objects
- oid.stripe = PG alignment
- len = PG count or 0 to list all objects
- offset = PG number
- oid.inode = min inode number or 0 to list all inodes
- version = max inode number or 0 to list all inodes
Output:
- retval = total obj_ver_id count
@@ -150,27 +143,10 @@ struct blockstore_op_t
uint64_t opcode;
// finish callback
std::function<void (blockstore_op_t*)> callback;
union __attribute__((__packed__))
{
// R/W
struct __attribute__((__packed__))
{
object_id oid;
uint64_t version;
uint32_t offset;
uint32_t len;
};
// List
struct __attribute__((__packed__))
{
object_id min_oid;
object_id max_oid;
uint32_t pg_alignment;
uint32_t pg_count;
uint32_t pg_number;
uint32_t list_stable_limit;
};
};
object_id oid;
uint64_t version;
uint32_t offset;
uint32_t len;
void *buf;
void *bitmap;
int retval;
@@ -189,9 +165,6 @@ public:
blockstore_t(blockstore_config_t & config, ring_loop_t *ringloop, timerfd_manager_t *tfd);
~blockstore_t();
// Update configuration
void parse_config(blockstore_config_t & config);
// Event loop
void loop();

View File

@@ -40,31 +40,10 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
data_block_size = parse_size(config["block_size"]);
journal_device = config["journal_device"];
journal_offset = parse_size(config["journal_offset"]);
disk_alignment = parse_size(config["disk_alignment"]);
journal_block_size = parse_size(config["journal_block_size"]);
meta_block_size = parse_size(config["meta_block_size"]);
bitmap_granularity = parse_size(config["bitmap_granularity"]);
meta_format = stoull_full(config["meta_format"]);
cached_read_data = config["cached_read_data"] == "true" || config["cached_read_data"] == "yes" || config["cached_read_data"] == "1";
cached_read_meta = cached_read_data && (meta_device == data_device || meta_device == "") &&
config.find("cached_read_meta") == config.end() ||
config["cached_read_meta"] == "true" || config["cached_read_meta"] == "yes" || config["cached_read_meta"] == "1";
cached_read_journal = cached_read_meta && (journal_device == meta_device || journal_device == "") &&
config.find("cached_read_journal") == config.end() ||
config["cached_read_journal"] == "true" || config["cached_read_journal"] == "yes" || config["cached_read_journal"] == "1";
if (config["data_csum_type"] == "crc32c")
{
data_csum_type = BLOCKSTORE_CSUM_CRC32C;
}
else if (config["data_csum_type"] == "" || config["data_csum_type"] == "none")
{
data_csum_type = BLOCKSTORE_CSUM_NONE;
}
else
{
throw std::runtime_error("data_csum_type="+config["data_csum_type"]+" is unsupported, only \"crc32c\" and \"none\" are supported");
}
csum_block_size = parse_size(config["csum_block_size"]);
disk_alignment = strtoull(config["disk_alignment"].c_str(), NULL, 10);
journal_block_size = strtoull(config["journal_block_size"].c_str(), NULL, 10);
meta_block_size = strtoull(config["meta_block_size"].c_str(), NULL, 10);
bitmap_granularity = strtoull(config["bitmap_granularity"].c_str(), NULL, 10);
// Validate
if (!data_block_size)
{
@@ -112,23 +91,7 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
}
if (data_block_size % bitmap_granularity)
{
throw std::runtime_error("Data block size must be a multiple of sparse write tracking granularity");
}
if (!data_csum_type)
{
csum_block_size = 0;
}
else if (!csum_block_size)
{
csum_block_size = bitmap_granularity;
}
if (csum_block_size && (csum_block_size % bitmap_granularity))
{
throw std::runtime_error("Checksum block size must be a multiple of sparse write tracking granularity");
}
if (csum_block_size && (data_block_size % csum_block_size))
{
throw std::runtime_error("Checksum block size must be a divisor of data block size");
throw std::runtime_error("Block size must be a multiple of sparse write tracking granularity");
}
if (meta_device == "")
{
@@ -147,9 +110,7 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
}
clean_entry_bitmap_size = data_block_size / bitmap_granularity / 8;
clean_dyn_size = clean_entry_bitmap_size*2 + (csum_block_size
? data_block_size/csum_block_size*(data_csum_type & 0xFF) : 0);
clean_entry_size = sizeof(clean_disk_entry) + clean_dyn_size + 4 /*entry_csum*/;
clean_entry_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
}
void blockstore_disk_t::calc_lengths(bool skip_meta_check)
@@ -199,25 +160,6 @@ void blockstore_disk_t::calc_lengths(bool skip_meta_check)
// required metadata size
block_count = data_len / data_block_size;
meta_len = (1 + (block_count - 1 + meta_block_size / clean_entry_size) / (meta_block_size / clean_entry_size)) * meta_block_size;
if (meta_format == BLOCKSTORE_META_FORMAT_V1 ||
!meta_format && !skip_meta_check && meta_area_size < meta_len && !data_csum_type)
{
uint64_t clean_entry_v0_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
uint64_t meta_v0_len = (1 + (block_count - 1 + meta_block_size / clean_entry_v0_size)
/ (meta_block_size / clean_entry_v0_size)) * meta_block_size;
if (meta_format == BLOCKSTORE_META_FORMAT_V1 || meta_area_size >= meta_v0_len)
{
// Old metadata fits.
printf("Warning: Using old metadata format without checksums because the new format doesn't fit into provided area\n");
clean_entry_size = clean_entry_v0_size;
meta_len = meta_v0_len;
meta_format = BLOCKSTORE_META_FORMAT_V1;
}
else
meta_format = BLOCKSTORE_META_FORMAT_V2;
}
else
meta_format = BLOCKSTORE_META_FORMAT_V2;
if (!skip_meta_check && meta_area_size < meta_len)
{
throw std::runtime_error("Metadata area is too small, need at least "+std::to_string(meta_len)+" bytes");
@@ -295,18 +237,6 @@ void blockstore_disk_t::open_data()
{
throw std::runtime_error(std::string("Failed to lock data device: ") + strerror(errno));
}
if (cached_read_data)
{
read_data_fd = open(data_device.c_str(), O_RDWR);
if (read_data_fd == -1)
{
throw std::runtime_error("Failed to open data device "+data_device+": "+std::string(strerror(errno)));
}
}
else
{
read_data_fd = data_fd;
}
}
void blockstore_disk_t::open_meta()
@@ -327,18 +257,6 @@ void blockstore_disk_t::open_meta()
{
throw std::runtime_error(std::string("Failed to lock metadata device: ") + strerror(errno));
}
if (cached_read_meta)
{
read_meta_fd = open(meta_device.c_str(), O_RDWR);
if (read_meta_fd == -1)
{
throw std::runtime_error("Failed to open metadata device "+meta_device+": "+std::string(strerror(errno)));
}
}
else
{
read_meta_fd = meta_fd;
}
}
else
{
@@ -357,22 +275,6 @@ void blockstore_disk_t::open_meta()
") is not a multiple of data device sector size ("+std::to_string(meta_device_sect)+")"
);
}
if (!cached_read_meta)
{
read_meta_fd = meta_fd;
}
else if (meta_device == data_device && cached_read_data)
{
read_meta_fd = read_data_fd;
}
else
{
read_meta_fd = open(meta_device.c_str(), O_RDWR);
if (read_meta_fd == -1)
{
throw std::runtime_error("Failed to open metadata device "+meta_device+": "+std::string(strerror(errno)));
}
}
}
void blockstore_disk_t::open_journal()
@@ -407,26 +309,6 @@ void blockstore_disk_t::open_journal()
") is not a multiple of journal device sector size ("+std::to_string(journal_device_sect)+")"
);
}
if (!cached_read_journal)
{
read_journal_fd = journal_fd;
}
else if (journal_device == meta_device && cached_read_meta)
{
read_journal_fd = read_meta_fd;
}
else if (journal_device == data_device && cached_read_data)
{
read_journal_fd = read_data_fd;
}
else
{
read_journal_fd = open(journal_device.c_str(), O_RDWR);
if (read_journal_fd == -1)
{
throw std::runtime_error("Failed to open journal device "+journal_device+": "+std::string(strerror(errno)));
}
}
}
void blockstore_disk_t::close_all()
@@ -437,12 +319,5 @@ void blockstore_disk_t::close_all()
close(meta_fd);
if (journal_fd >= 0 && journal_fd != meta_fd)
close(journal_fd);
if (read_data_fd >= 0 && read_data_fd != data_fd)
close(read_data_fd);
if (read_meta_fd >= 0 && read_meta_fd != meta_fd)
close(read_meta_fd);
if (read_journal_fd >= 0 && read_journal_fd != journal_fd)
close(read_journal_fd);
data_fd = meta_fd = journal_fd = -1;
read_data_fd = read_meta_fd = read_journal_fd = -1;
}

View File

@@ -8,10 +8,6 @@
#include <string>
#include <map>
#define BLOCKSTORE_CSUM_NONE 0
// Lower byte of checksum type is its length
#define BLOCKSTORE_CSUM_CRC32C 0x104
struct blockstore_disk_t
{
std::string data_device, meta_device, journal_device;
@@ -25,24 +21,17 @@ struct blockstore_disk_t
uint64_t meta_block_size = 4096;
// Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment
uint64_t bitmap_granularity = 4096;
// Data checksum type, BLOCKSTORE_CSUM_NONE or BLOCKSTORE_CSUM_CRC32C
uint32_t data_csum_type = BLOCKSTORE_CSUM_NONE;
// Checksum block size, must be a multiple of bitmap_granularity
uint32_t csum_block_size = 4096;
// By default, Blockstore locks all opened devices exclusively. This option can be used to disable locking
bool disable_flock = false;
// Use linux page cache for reads. If enabled, separate buffered FDs will be opened for reading
bool cached_read_data = false, cached_read_meta = false, cached_read_journal = false;
int meta_fd = -1, data_fd = -1, journal_fd = -1;
int read_meta_fd = -1, read_data_fd = -1, read_journal_fd = -1;
uint64_t meta_offset, meta_device_sect, meta_device_size, meta_len, meta_format = 0;
uint64_t meta_offset, meta_device_sect, meta_device_size, meta_len;
uint64_t data_offset, data_device_sect, data_device_size, data_len;
uint64_t journal_offset, journal_device_sect, journal_device_size, journal_len;
uint32_t block_order;
uint64_t block_count;
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0, clean_dyn_size = 0;
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0;
void parse_config(std::map<std::string, std::string> & config);
void open_data();
@@ -50,13 +39,4 @@ struct blockstore_disk_t
void open_journal();
void calc_lengths(bool skip_meta_check = false);
void close_all();
inline uint64_t dirty_dyn_size(uint64_t offset, uint64_t len)
{
// Checksums may be partial if write is not aligned with csum_block_size
return clean_entry_bitmap_size + (csum_block_size && len > 0
? ((offset+len+csum_block_size-1)/csum_block_size - offset/csum_block_size)
* (data_csum_type & 0xFF)
: 0);
}
};

File diff suppressed because it is too large Load Diff

View File

@@ -1,22 +1,10 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#define COPY_BUF_JOURNAL 1
#define COPY_BUF_DATA 2
#define COPY_BUF_ZERO 4
#define COPY_BUF_CSUM_FILL 8
#define COPY_BUF_COALESCED 16
#define COPY_BUF_META_BLOCK 32
#define COPY_BUF_JOURNALED_BIG 64
struct copy_buffer_t
{
int copy_flags;
uint64_t offset, len, disk_offset;
uint64_t journal_sector; // only for reads: sector+1 if used and !journal.inmemory, otherwise 0
uint64_t offset, len;
void *buf;
uint8_t *csum_buf;
int *dyn_data;
};
struct meta_sector_t
@@ -49,7 +37,7 @@ class journal_flusher_co
{
blockstore_impl_t *bs;
journal_flusher_t *flusher;
int wait_state, wait_count, wait_journal_count;
int wait_state, wait_count;
struct io_uring_sqe *sqe;
struct ring_data_t *data;
@@ -58,39 +46,28 @@ class journal_flusher_co
obj_ver_id cur;
std::map<obj_ver_id, dirty_entry>::iterator dirty_it, dirty_start, dirty_end;
std::map<object_id, uint64_t>::iterator repeat_it;
std::function<void(ring_data_t*)> simple_callback_r, simple_callback_rj, simple_callback_w;
std::function<void(ring_data_t*)> simple_callback_r, simple_callback_w;
bool skip_copy, has_delete, has_writes;
std::vector<copy_buffer_t> v;
std::vector<copy_buffer_t>::iterator it;
int i;
bool fill_incomplete, cleared_incomplete;
int read_to_fill_incomplete;
int copy_count;
uint64_t clean_loc, clean_ver, old_clean_loc, old_clean_ver;
uint64_t clean_loc, old_clean_loc;
flusher_meta_write_t meta_old, meta_new;
bool clean_init_bitmap;
uint64_t clean_bitmap_offset, clean_bitmap_len;
uint8_t *clean_init_dyn_ptr;
uint8_t *new_clean_bitmap;
void *new_clean_bitmap;
uint64_t new_trim_pos;
// local: scan_dirty()
uint64_t offset, end_offset, submit_offset, submit_len;
friend class journal_flusher_t;
void scan_dirty();
bool read_dirty(int wait_base);
bool modify_meta_do_reads(int wait_base);
bool wait_meta_reads(int wait_base);
bool scan_dirty(int wait_base);
bool modify_meta_read(uint64_t meta_loc, flusher_meta_write_t &wr, int wait_base);
bool clear_incomplete_csum_block_bits(int wait_base);
void calc_block_checksums(uint32_t *new_data_csums, bool skip_overwrites);
void update_metadata_entry();
bool write_meta_block(flusher_meta_write_t & meta_block, int wait_base);
void update_clean_db();
void free_data_blocks();
bool fsync_batch(bool fsync_meta, int wait_base);
bool trim_journal(int wait_base);
void free_buffers();
public:
journal_flusher_co();
bool loop();
@@ -118,16 +95,14 @@ class journal_flusher_t
std::map<uint64_t, meta_sector_t> meta_sectors;
std::deque<object_id> flush_queue;
std::map<object_id, uint64_t> flush_versions; // FIXME: consider unordered_map?
std::map<object_id, uint64_t> flush_versions;
bool try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
bool try_find_other(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
public:
journal_flusher_t(blockstore_impl_t *bs);
~journal_flusher_t();
void loop();
bool is_trim_wanted() { return trim_wanted; }
bool is_active();
void mark_trim_possible();
void request_trim();
@@ -136,5 +111,4 @@ public:
void unshift_flush(obj_ver_id oid, bool force);
void remove_flush(object_id oid);
void dump_diagnostics();
bool is_mutated(uint64_t clean_loc);
};

View File

@@ -11,9 +11,8 @@ blockstore_impl_t::blockstore_impl_t(blockstore_config_t & config, ring_loop_t *
ring_consumer.loop = [this]() { loop(); };
ringloop->register_consumer(&ring_consumer);
initialized = 0;
parse_config(config, true);
parse_config(config);
zero_object = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, dsk.data_block_size);
alloc_dyn_data = dsk.clean_dyn_size > sizeof(void*) || dsk.csum_block_size > 0;
try
{
dsk.open_data();
@@ -39,8 +38,8 @@ blockstore_impl_t::~blockstore_impl_t()
dsk.close_all();
if (metadata_buffer)
free(metadata_buffer);
if (clean_bitmaps)
free(clean_bitmaps);
if (clean_bitmap)
free(clean_bitmap);
}
bool blockstore_impl_t::is_started()
@@ -172,7 +171,7 @@ void blockstore_impl_t::loop()
// Can't submit SYNC before previous writes
continue;
}
wr_st = continue_sync(op);
wr_st = continue_sync(op, false);
if (wr_st != 2)
{
has_writes = wr_st > 0 ? 1 : 2;
@@ -308,18 +307,6 @@ void blockstore_impl_t::check_wait(blockstore_op_t *op)
}
PRIV(op)->wait_for = 0;
}
else if (PRIV(op)->wait_for == WAIT_FREE)
{
if (!data_alloc->get_free_count() && big_to_flush > 0)
{
#ifdef BLOCKSTORE_DEBUG
printf("Still waiting for free space on the data device\n");
#endif
return;
}
flusher->release_trim();
PRIV(op)->wait_for = 0;
}
else
{
throw std::runtime_error("BUG: op->wait_for value is unexpected");
@@ -384,18 +371,13 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op)
ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
return;
}
init_op(op);
submit_queue.push_back(op);
ringloop->wakeup();
}
void blockstore_impl_t::init_op(blockstore_op_t *op)
{
// Call constructor without allocating memory. We'll call destructor before returning op back
new ((void*)op->private_data) blockstore_op_private_t;
PRIV(op)->wait_for = 0;
PRIV(op)->op_state = 0;
PRIV(op)->pending_ops = 0;
submit_queue.push_back(op);
ringloop->wakeup();
}
static bool replace_stable(object_id oid, uint64_t version, int search_start, int search_end, obj_ver_id* list)
@@ -463,11 +445,11 @@ void blockstore_impl_t::reshard_clean_db(pool_id_t pool, uint32_t pg_count, uint
void blockstore_impl_t::process_list(blockstore_op_t *op)
{
uint32_t list_pg = op->pg_number+1;
uint32_t pg_count = op->pg_count;
uint64_t pg_stripe_size = op->pg_alignment;
uint64_t min_inode = op->min_oid.inode;
uint64_t max_inode = op->max_oid.inode;
uint32_t list_pg = op->offset+1;
uint32_t pg_count = op->len;
uint64_t pg_stripe_size = op->oid.stripe;
uint64_t min_inode = op->oid.inode;
uint64_t max_inode = op->version;
// Check PG
if (pg_count != 0 && (pg_stripe_size < MIN_DATA_BLOCK_SIZE || list_pg > pg_count))
{
@@ -514,13 +496,7 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
stable_alloc += clean_db.size();
}
}
if (op->list_stable_limit > 0)
{
stable_alloc = op->list_stable_limit;
if (stable_alloc > 1024*1024)
stable_alloc = 1024*1024;
}
if (stable_alloc < 32768)
else
{
stable_alloc = 32768;
}
@@ -531,22 +507,22 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
FINISH_OP(op);
return;
}
auto max_oid = op->max_oid;
bool limited = false;
pool_pg_id_t last_shard_id = 0;
for (auto shard_it = clean_db_shards.lower_bound(first_shard);
shard_it != clean_db_shards.end() && shard_it->first <= last_shard;
shard_it++)
{
auto & clean_db = shard_it->second;
auto clean_it = clean_db.begin(), clean_end = clean_db.end();
if (op->min_oid.inode != 0 || op->min_oid.stripe != 0)
if ((min_inode != 0 || max_inode != 0) && min_inode <= max_inode)
{
clean_it = clean_db.lower_bound(op->min_oid);
}
if ((max_oid.inode != 0 || max_oid.stripe != 0) && !(max_oid < op->min_oid))
{
clean_end = clean_db.upper_bound(max_oid);
clean_it = clean_db.lower_bound({
.inode = min_inode,
.stripe = 0,
});
clean_end = clean_db.upper_bound({
.inode = max_inode,
.stripe = UINT64_MAX,
});
}
for (; clean_it != clean_end; clean_it++)
{
@@ -565,29 +541,11 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
.oid = clean_it->first,
.version = clean_it->second.version,
};
if (op->list_stable_limit > 0 && stable_count >= op->list_stable_limit)
{
if (!limited)
{
limited = true;
max_oid = stable[stable_count-1].oid;
}
break;
}
}
if (op->list_stable_limit > 0)
{
// To maintain the order, we have to include objects in the same range from other shards
if (last_shard_id != 0 && last_shard_id != shard_it->first)
std::sort(stable, stable+stable_count);
if (stable_count > op->list_stable_limit)
stable_count = op->list_stable_limit;
}
last_shard_id = shard_it->first;
}
if (op->list_stable_limit == 0 && first_shard != last_shard)
if (first_shard != last_shard)
{
// If that's not a per-PG listing, sort clean entries (already sorted if list_stable_limit != 0)
// If that's not a per-PG listing, sort clean entries
std::sort(stable, stable+stable_count);
}
int clean_stable_count = stable_count;
@@ -596,17 +554,20 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
obj_ver_id *unstable = NULL;
{
auto dirty_it = dirty_db.begin(), dirty_end = dirty_db.end();
if (op->min_oid.inode != 0 || op->min_oid.stripe != 0)
if ((min_inode != 0 || max_inode != 0) && min_inode <= max_inode)
{
dirty_it = dirty_db.lower_bound({
.oid = op->min_oid,
.oid = {
.inode = min_inode,
.stripe = 0,
},
.version = 0,
});
}
if ((max_oid.inode != 0 || max_oid.stripe != 0) && !(max_oid < op->min_oid))
{
dirty_end = dirty_db.upper_bound({
.oid = max_oid,
.oid = {
.inode = max_inode,
.stripe = UINT64_MAX,
},
.version = UINT64_MAX,
});
}
@@ -650,11 +611,6 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
stable[stable_count++] = dirty_it->first;
}
}
if (op->list_stable_limit > 0 && stable_count >= op->list_stable_limit)
{
// Stop here
break;
}
}
else
{

View File

@@ -93,10 +93,11 @@
// "VITAstor"
#define BLOCKSTORE_META_MAGIC_V1 0x726F747341544956l
#define BLOCKSTORE_META_FORMAT_V1 1
#define BLOCKSTORE_META_FORMAT_V2 2
#define BLOCKSTORE_META_VERSION_V1 1
// metadata header (superblock)
// FIXME: After adding the OSD superblock, add a key to metadata
// and journal headers to check if they belong to the same OSD
struct __attribute__((__packed__)) blockstore_meta_header_v1_t
{
uint64_t zero;
@@ -107,29 +108,14 @@ struct __attribute__((__packed__)) blockstore_meta_header_v1_t
uint32_t bitmap_granularity;
};
struct __attribute__((__packed__)) blockstore_meta_header_v2_t
{
uint64_t zero;
uint64_t magic;
uint64_t version;
uint32_t meta_block_size;
uint32_t data_block_size;
uint32_t bitmap_granularity;
uint32_t data_csum_type;
uint32_t csum_block_size;
uint32_t header_csum;
};
// 32 bytes = 24 bytes + block bitmap (4 bytes by default) + external attributes (also bitmap, 4 bytes by default)
// per "clean" entry on disk with fixed metadata tables
// FIXME: maybe add crc32's to metadata
struct __attribute__((__packed__)) clean_disk_entry
{
object_id oid;
uint64_t version;
uint8_t bitmap[];
// Two more fields come after bitmap in metadata version 2:
// uint32_t data_csum[];
// uint32_t entry_csum;
};
// 32 = 16 + 16 bytes per "clean" entry in memory (object_id => clean_entry)
@@ -139,7 +125,7 @@ struct __attribute__((__packed__)) clean_entry
uint64_t location;
};
// 64 = 24 + 40 bytes per dirty entry in memory (obj_ver_id => dirty_entry). Plus checksums
// 64 = 24 + 40 bytes per dirty entry in memory (obj_ver_id => dirty_entry)
struct __attribute__((__packed__)) dirty_entry
{
uint32_t state;
@@ -148,7 +134,7 @@ struct __attribute__((__packed__)) dirty_entry
uint32_t offset; // data offset within object (stripe)
uint32_t len; // data length
uint64_t journal_sector; // journal sector used for this entry
void* dyn_data; // dynamic data: external bitmap and data block checksums. may be a pointer to the in-memory journal
void* bitmap; // either external bitmap itself when it fits, or a pointer to it when it doesn't
};
// - Sync must be submitted after previous writes/deletes (not before!)
@@ -174,26 +160,13 @@ struct __attribute__((__packed__)) dirty_entry
#define WAIT_JOURNAL 3
// Suspend operation until the next journal sector buffer is free
#define WAIT_JOURNAL_BUFFER 4
// Suspend operation until there is some free space on the data device
#define WAIT_FREE 5
struct used_clean_obj_t
struct fulfill_read_t
{
int refs;
bool was_freed; // was freed by a parallel flush?
bool was_changed; // was changed by a parallel flush?
uint64_t offset, len;
uint64_t journal_sector; // sector+1 if used and !journal.inmemory, otherwise 0
};
// https://github.com/algorithm-ninja/cpp-btree
// https://github.com/greg7mdp/sparsepp/ was used previously, but it was TERRIBLY slow after resizing
// with sparsepp, random reads dropped to ~700 iops very fast with just as much as ~32k objects in the DB
typedef btree::btree_map<object_id, clean_entry> blockstore_clean_db_t;
typedef std::map<obj_ver_id, dirty_entry> blockstore_dirty_db_t;
#include "blockstore_init.h"
#include "blockstore_flush.h"
#define PRIV(op) ((blockstore_op_private_t*)(op)->private_data)
#define FINISH_OP(op) PRIV(op)->~blockstore_op_private_t(); std::function<void (blockstore_op_t*)>(op->callback)(op)
@@ -206,8 +179,7 @@ struct blockstore_op_private_t
int op_state;
// Read
uint64_t clean_block_used;
std::vector<copy_buffer_t> read_vec;
std::vector<fulfill_read_t> read_vec;
// Sync, write
int min_flushed_journal_sector, max_flushed_journal_sector;
@@ -223,6 +195,16 @@ struct blockstore_op_private_t
int sync_small_checked, sync_big_checked;
};
// https://github.com/algorithm-ninja/cpp-btree
// https://github.com/greg7mdp/sparsepp/ was used previously, but it was TERRIBLY slow after resizing
// with sparsepp, random reads dropped to ~700 iops very fast with just as much as ~32k objects in the DB
typedef btree::btree_map<object_id, clean_entry> blockstore_clean_db_t;
typedef std::map<obj_ver_id, dirty_entry> blockstore_dirty_db_t;
#include "blockstore_init.h"
#include "blockstore_flush.h"
typedef uint32_t pool_id_t;
typedef uint64_t pool_pg_id_t;
@@ -234,11 +216,6 @@ struct pool_shard_settings_t
uint32_t pg_stripe_size;
};
#define STAB_SPLIT_DONE 1
#define STAB_SPLIT_WAIT 2
#define STAB_SPLIT_SYNC 3
#define STAB_SPLIT_TODO 4
class blockstore_impl_t
{
blockstore_disk_t dsk;
@@ -269,7 +246,7 @@ class blockstore_impl_t
std::map<pool_id_t, pool_shard_settings_t> clean_db_settings;
std::map<pool_pg_id_t, blockstore_clean_db_t> clean_db_shards;
uint8_t *clean_bitmaps = NULL;
uint8_t *clean_bitmap = NULL;
blockstore_dirty_db_t dirty_db;
std::vector<blockstore_op_t*> submit_queue;
std::vector<obj_ver_id> unsynced_big_writes, unsynced_small_writes;
@@ -281,12 +258,7 @@ class blockstore_impl_t
struct journal_t journal;
journal_flusher_t *flusher;
int big_to_flush = 0;
int write_iodepth = 0;
bool alloc_dyn_data = false;
// clean data blocks referenced by read operations
std::map<uint64_t, used_clean_obj_t> used_clean_objects;
bool live = false, queue_stall = false;
ring_loop_t *ringloop;
@@ -305,6 +277,7 @@ class blockstore_impl_t
friend class journal_flusher_t;
friend class journal_flusher_co;
void parse_config(blockstore_config_t & config);
void calc_lengths();
void open_data();
void open_meta();
@@ -326,34 +299,11 @@ class blockstore_impl_t
blockstore_init_journal* journal_init_reader;
void check_wait(blockstore_op_t *op);
void init_op(blockstore_op_t *op);
// Read
int dequeue_read(blockstore_op_t *read_op);
void find_holes(std::vector<copy_buffer_t> & read_vec, uint32_t item_start, uint32_t item_end,
std::function<int(int, bool, uint32_t, uint32_t)> callback);
int fulfill_read(blockstore_op_t *read_op,
uint64_t &fulfilled, uint32_t item_start, uint32_t item_end,
uint32_t item_state, uint64_t item_version, uint64_t item_location,
uint64_t journal_sector, uint8_t *csum, int *dyn_data);
bool fulfill_clean_read(blockstore_op_t *read_op, uint64_t & fulfilled,
uint8_t *clean_entry_bitmap, int *dyn_data,
uint32_t item_start, uint32_t item_end, uint64_t clean_loc, uint64_t clean_ver);
int fill_partial_checksum_blocks(std::vector<copy_buffer_t> & rv, uint64_t & fulfilled,
uint8_t *clean_entry_bitmap, int *dyn_data, bool from_journal, uint8_t *read_buf, uint64_t read_offset, uint64_t read_end);
int pad_journal_read(std::vector<copy_buffer_t> & rv, copy_buffer_t & cp,
uint64_t dirty_offset, uint64_t dirty_end, uint64_t dirty_loc, uint8_t *csum_ptr, int *dyn_data,
uint64_t offset, uint64_t submit_len, uint64_t & blk_begin, uint64_t & blk_end, uint8_t* & blk_buf);
bool read_range_fulfilled(std::vector<copy_buffer_t> & rv, uint64_t & fulfilled, uint8_t *read_buf,
uint8_t *clean_entry_bitmap, uint32_t item_start, uint32_t item_end);
bool read_checksum_block(blockstore_op_t *op, int rv_pos, uint64_t &fulfilled, uint64_t clean_loc);
uint8_t* read_clean_meta_block(blockstore_op_t *read_op, uint64_t clean_loc, int rv_pos);
bool verify_padded_checksums(uint8_t *clean_entry_bitmap, uint8_t *csum_buf, uint32_t offset,
iovec *iov, int n_iov, std::function<void(uint32_t, uint32_t, uint32_t)> bad_block_cb);
bool verify_journal_checksums(uint8_t *csums, uint32_t offset,
iovec *iov, int n_iov, std::function<void(uint32_t, uint32_t, uint32_t)> bad_block_cb);
bool verify_clean_padded_checksums(blockstore_op_t *op, uint64_t clean_loc, uint8_t *dyn_data, bool from_journal,
iovec *iov, int n_iov, std::function<void(uint32_t, uint32_t, uint32_t)> bad_block_cb);
int fulfill_read(blockstore_op_t *read_op, uint64_t &fulfilled, uint32_t item_start, uint32_t item_end,
uint32_t item_state, uint64_t item_version, uint64_t item_location, uint64_t journal_sector);
int fulfill_read_push(blockstore_op_t *op, void *buf, uint64_t offset, uint64_t len,
uint32_t item_state, uint64_t item_version);
void handle_read_event(ring_data_t *data, blockstore_op_t *op);
@@ -368,7 +318,7 @@ class blockstore_impl_t
void handle_write_event(ring_data_t *data, blockstore_op_t *op);
// Sync
int continue_sync(blockstore_op_t *op);
int continue_sync(blockstore_op_t *op, bool queue_has_in_progress_sync);
void ack_sync(blockstore_op_t *op);
// Stabilize
@@ -376,15 +326,12 @@ class blockstore_impl_t
int continue_stable(blockstore_op_t *op);
void mark_stable(const obj_ver_id & ov, bool forget_dirty = false);
void stabilize_object(object_id oid, uint64_t max_ver);
blockstore_op_t* selective_sync(blockstore_op_t *op);
int split_stab_op(blockstore_op_t *op, std::function<int(obj_ver_id v)> decider);
// Rollback
int dequeue_rollback(blockstore_op_t *op);
int continue_rollback(blockstore_op_t *op);
void mark_rolled_back(const obj_ver_id & ov);
void erase_dirty(blockstore_dirty_db_t::iterator dirty_start, blockstore_dirty_db_t::iterator dirty_end, uint64_t clean_loc);
void free_dirty_dyn_data(dirty_entry & e);
// List
void process_list(blockstore_op_t *op);
@@ -394,8 +341,6 @@ public:
blockstore_impl_t(blockstore_config_t & config, ring_loop_t *ringloop, timerfd_manager_t *tfd);
~blockstore_impl_t();
void parse_config(blockstore_config_t & config, bool init);
// Event loop
void loop();

View File

@@ -65,7 +65,7 @@ int blockstore_init_meta::loop()
GET_SQE();
data->iov = { metadata_buffer, bs->dsk.meta_block_size };
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
my_uring_prep_readv(sqe, bs->dsk.read_meta_fd, &data->iov, 1, bs->dsk.meta_offset);
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
bs->ringloop->submit();
submitted++;
resume_1:
@@ -77,20 +77,13 @@ resume_1:
if (iszero((uint64_t*)metadata_buffer, bs->dsk.meta_block_size / sizeof(uint64_t)))
{
{
blockstore_meta_header_v2_t *hdr = (blockstore_meta_header_v2_t *)metadata_buffer;
blockstore_meta_header_v1_t *hdr = (blockstore_meta_header_v1_t *)metadata_buffer;
hdr->zero = 0;
hdr->magic = BLOCKSTORE_META_MAGIC_V1;
hdr->version = bs->dsk.meta_format;
hdr->version = BLOCKSTORE_META_VERSION_V1;
hdr->meta_block_size = bs->dsk.meta_block_size;
hdr->data_block_size = bs->dsk.data_block_size;
hdr->bitmap_granularity = bs->dsk.bitmap_granularity;
if (bs->dsk.meta_format >= BLOCKSTORE_META_FORMAT_V2)
{
hdr->data_csum_type = bs->dsk.data_csum_type;
hdr->csum_block_size = bs->dsk.csum_block_size;
hdr->header_csum = 0;
hdr->header_csum = crc32c(0, hdr, sizeof(*hdr));
}
}
if (bs->readonly)
{
@@ -116,62 +109,28 @@ resume_1:
}
else
{
blockstore_meta_header_v2_t *hdr = (blockstore_meta_header_v2_t *)metadata_buffer;
if (hdr->zero != 0 || hdr->magic != BLOCKSTORE_META_MAGIC_V1 || hdr->version < BLOCKSTORE_META_FORMAT_V1)
blockstore_meta_header_v1_t *hdr = (blockstore_meta_header_v1_t *)metadata_buffer;
if (hdr->zero != 0 ||
hdr->magic != BLOCKSTORE_META_MAGIC_V1 ||
hdr->version != BLOCKSTORE_META_VERSION_V1)
{
printf(
"Metadata is corrupt or too old (pre-0.6.x).\n"
" If this is a new OSD, please zero out the metadata area before starting it.\n"
" If you need to upgrade from 0.5.x, convert metadata with vitastor-disk.\n"
);
exit(1);
}
if (hdr->version == BLOCKSTORE_META_FORMAT_V2)
{
uint32_t csum = hdr->header_csum;
hdr->header_csum = 0;
if (crc32c(0, hdr, sizeof(*hdr)) != csum)
{
printf("Metadata header is corrupt (checksum mismatch).\n");
exit(1);
}
hdr->header_csum = csum;
bs->dsk.meta_format = BLOCKSTORE_META_FORMAT_V2;
}
else if (hdr->version == BLOCKSTORE_META_FORMAT_V1)
{
hdr->data_csum_type = 0;
hdr->csum_block_size = 0;
hdr->header_csum = 0;
// Enable compatibility mode - entries without checksums
bs->dsk.clean_entry_size = sizeof(clean_disk_entry) + bs->dsk.clean_entry_bitmap_size*2;
bs->dsk.meta_len = (1 + (bs->dsk.block_count - 1 + bs->dsk.meta_block_size / bs->dsk.clean_entry_size)
/ (bs->dsk.meta_block_size / bs->dsk.clean_entry_size)) * bs->dsk.meta_block_size;
bs->dsk.meta_format = BLOCKSTORE_META_FORMAT_V1;
printf("Warning: Starting with metadata in the old format without checksums, as stored on disk\n");
}
else if (hdr->version > BLOCKSTORE_META_FORMAT_V2)
{
printf(
"Metadata format is too new for me (stored version is %lu, max supported %u).\n",
hdr->version, BLOCKSTORE_META_FORMAT_V2
"Metadata is corrupt or old version.\n"
" If this is a new OSD please zero out the metadata area before starting it.\n"
" If you need to upgrade from 0.5.x please request it via the issue tracker.\n"
);
exit(1);
}
if (hdr->meta_block_size != bs->dsk.meta_block_size ||
hdr->data_block_size != bs->dsk.data_block_size ||
hdr->bitmap_granularity != bs->dsk.bitmap_granularity ||
hdr->data_csum_type != bs->dsk.data_csum_type ||
hdr->csum_block_size != bs->dsk.csum_block_size)
hdr->bitmap_granularity != bs->dsk.bitmap_granularity)
{
printf(
"Configuration stored in metadata superblock"
" (meta_block_size=%u, data_block_size=%u, bitmap_granularity=%u, data_csum_type=%u, csum_block_size=%u)"
" differs from OSD configuration (%lu/%u/%lu, %u/%u).\n",
" (meta_block_size=%u, data_block_size=%u, bitmap_granularity=%u)"
" differs from OSD configuration (%lu/%u/%lu).\n",
hdr->meta_block_size, hdr->data_block_size, hdr->bitmap_granularity,
hdr->data_csum_type, hdr->csum_block_size,
bs->dsk.meta_block_size, bs->dsk.data_block_size, bs->dsk.bitmap_granularity,
bs->dsk.data_csum_type, bs->dsk.csum_block_size
bs->dsk.meta_block_size, bs->dsk.data_block_size, bs->dsk.bitmap_granularity
);
exit(1);
}
@@ -202,7 +161,7 @@ resume_2:
data->iov = { bufs[i].buf, bufs[i].size };
data->callback = [this, i](ring_data_t *data) { handle_event(data, i); };
if (!zero_on_init)
my_uring_prep_readv(sqe, bs->dsk.read_meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
else
{
// Fill metadata with zeroes
@@ -259,7 +218,7 @@ resume_2:
GET_SQE();
data->iov = { metadata_buffer, bs->dsk.meta_block_size };
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
my_uring_prep_readv(sqe, bs->dsk.read_meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
submitted++;
resume_5:
if (submitted > 0)
@@ -320,22 +279,12 @@ bool blockstore_init_meta::handle_meta_block(uint8_t *buf, uint64_t entries_per_
for (uint64_t i = 0; i < max_i; i++)
{
clean_disk_entry *entry = (clean_disk_entry*)(buf + i*bs->dsk.clean_entry_size);
if (!bs->inmemory_meta && bs->dsk.clean_entry_bitmap_size)
{
memcpy(bs->clean_bitmap + (done_cnt+i)*2*bs->dsk.clean_entry_bitmap_size, &entry->bitmap, 2*bs->dsk.clean_entry_bitmap_size);
}
if (entry->oid.inode > 0)
{
if (bs->dsk.meta_format >= BLOCKSTORE_META_FORMAT_V2)
{
// Check entry crc32
uint32_t *entry_csum = (uint32_t*)((uint8_t*)entry + bs->dsk.clean_entry_size - 4);
if (*entry_csum != crc32c(0, entry, bs->dsk.clean_entry_size - 4))
{
printf("Metadata entry %lu is corrupt (checksum mismatch), skipping\n", done_cnt+i);
continue;
}
}
if (!bs->inmemory_meta && bs->dsk.clean_entry_bitmap_size)
{
memcpy(bs->clean_bitmaps + (done_cnt+i) * 2 * bs->dsk.clean_entry_bitmap_size, &entry->bitmap, 2 * bs->dsk.clean_entry_bitmap_size);
}
auto & clean_db = bs->clean_db_shard(entry->oid);
auto clean_it = clean_db.find(entry->oid);
if (clean_it == clean_db.end() || clean_it->second.version < entry->version)
@@ -467,7 +416,7 @@ int blockstore_init_journal::loop()
data = ((ring_data_t*)sqe->user_data);
data->iov = { submitted_buf, bs->journal.block_size };
data->callback = simple_callback;
my_uring_prep_readv(sqe, bs->dsk.read_journal_fd, &data->iov, 1, bs->journal.offset);
my_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
bs->ringloop->submit();
wait_count = 1;
resume_1:
@@ -491,9 +440,7 @@ resume_1:
.size = sizeof(journal_entry_start),
.reserved = 0,
.journal_start = bs->journal.block_size,
.version = JOURNAL_VERSION_V2,
.data_csum_type = bs->dsk.data_csum_type,
.csum_block_size = bs->dsk.csum_block_size,
.version = JOURNAL_VERSION,
};
((journal_entry_start*)submitted_buf)->crc32 = je_crc32((journal_entry*)submitted_buf);
if (bs->readonly)
@@ -545,36 +492,18 @@ resume_1:
if (je_start->magic != JOURNAL_MAGIC ||
je_start->type != JE_START ||
je_crc32((journal_entry*)je_start) != je_start->crc32 ||
je_start->size != JE_START_V0_SIZE && je_start->size != JE_START_V1_SIZE && je_start->size != JE_START_V2_SIZE)
je_start->size != sizeof(journal_entry_start) && je_start->size != JE_START_LEGACY_SIZE)
{
// Entry is corrupt
fprintf(stderr, "First entry of the journal is corrupt or unsupported\n");
fprintf(stderr, "First entry of the journal is corrupt\n");
exit(1);
}
if (je_start->size == JE_START_V0_SIZE ||
(je_start->version != JOURNAL_VERSION_V1 || je_start->size != JE_START_V1_SIZE) &&
(je_start->version != JOURNAL_VERSION_V2 || je_start->size != JE_START_V2_SIZE))
if (je_start->size == JE_START_LEGACY_SIZE || je_start->version != JOURNAL_VERSION)
{
fprintf(
stderr, "The code only supports journal versions 2 and 1, but it is %lu on disk."
" Please use vitastor-disk to rewrite the journal\n",
je_start->size == JE_START_V0_SIZE ? 0 : je_start->version
);
exit(1);
}
if (je_start->version == JOURNAL_VERSION_V1)
{
je_start->data_csum_type = 0;
je_start->csum_block_size = 0;
}
if (je_start->data_csum_type != bs->dsk.data_csum_type ||
je_start->csum_block_size != bs->dsk.csum_block_size)
{
printf(
"Configuration stored in journal superblock (data_csum_type=%u, csum_block_size=%u)"
" differs from OSD configuration (%u/%u).\n",
je_start->data_csum_type, je_start->csum_block_size,
bs->dsk.data_csum_type, bs->dsk.csum_block_size
stderr, "The code only supports journal version %d, but it is %lu on disk."
" Please use the previous version to flush the journal before upgrading OSD\n",
JOURNAL_VERSION, je_start->size == JE_START_LEGACY_SIZE ? 0 : je_start->version
);
exit(1);
}
@@ -607,7 +536,7 @@ resume_1:
end - journal_pos < JOURNAL_BUFFER_SIZE ? end - journal_pos : JOURNAL_BUFFER_SIZE,
};
data->callback = [this](ring_data_t *data1) { handle_event(data1); };
my_uring_prep_readv(sqe, bs->dsk.read_journal_fd, &data->iov, 1, bs->journal.offset + journal_pos);
my_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + journal_pos);
bs->ringloop->submit();
}
while (done.size() > 0)
@@ -776,14 +705,11 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
snprintf(err, 1024, "BUG: calculated journal data offset (%08lx) != stored journal data offset (%08lx)", location, je->small_write.data_offset);
throw std::runtime_error(err);
}
small_write_data.clear();
uint32_t data_crc32 = 0;
if (location >= done_pos && location+je->small_write.len <= done_pos+len)
{
// data is within this buffer
small_write_data.push_back((iovec){
.iov_base = (uint8_t*)buf + location - done_pos,
.iov_len = je->small_write.len,
});
data_crc32 = crc32c(0, (uint8_t*)buf + location - done_pos, je->small_write.len);
}
else
{
@@ -798,10 +724,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
? location+je->small_write.len : done[i].pos+done[i].len);
uint64_t part_begin = (location < done[i].pos ? done[i].pos : location);
covered += part_end - part_begin;
small_write_data.push_back((iovec){
.iov_base = (uint8_t*)done[i].buf + part_begin - done[i].pos,
.iov_len = part_end - part_begin,
});
data_crc32 = crc32c(data_crc32, (uint8_t*)done[i].buf + part_begin - done[i].pos, part_end - part_begin);
}
}
if (covered < je->small_write.len)
@@ -811,102 +734,12 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
return 2;
}
}
bool data_csum_valid = true;
if (!bs->dsk.csum_block_size)
{
uint32_t data_crc32 = 0;
for (auto & sd: small_write_data)
{
data_crc32 = crc32c(data_crc32, sd.iov_base, sd.iov_len);
}
data_csum_valid = data_crc32 == je->small_write.crc32_data;
if (!data_csum_valid)
{
printf(
"Journal entry data is corrupt for small_write%s oid=%lx:%lx ver=%lu offset=%u len=%u - data crc32 %x != %x\n",
je->type == JE_SMALL_WRITE_INSTANT ? "_instant" : "",
je->small_write.oid.inode, je->small_write.oid.stripe, je->small_write.version,
je->small_write.offset, je->small_write.len,
data_crc32, je->small_write.crc32_data
);
}
}
else if (je->small_write.len > 0)
{
// FIXME: deduplicate with disk_tool_journal.cpp
// like in enqueue_write()
uint32_t start = je->small_write.offset / bs->dsk.csum_block_size;
uint32_t end = (je->small_write.offset+je->small_write.len-1) / bs->dsk.csum_block_size;
uint32_t data_csum_size = (end-start+1) * (bs->dsk.data_csum_type & 0xFF);
uint32_t required_size = sizeof(journal_entry_small_write) + bs->dsk.clean_entry_bitmap_size + data_csum_size;
if (je->size != required_size)
{
printf(
"Journal entry data has invalid size for small_write%s oid=%lx:%lx ver=%lu offset=%u len=%u - should be %u bytes but is %u bytes\n",
je->type == JE_SMALL_WRITE_INSTANT ? "_instant" : "",
je->small_write.oid.inode, je->small_write.oid.stripe, je->small_write.version,
je->small_write.offset, je->small_write.len,
required_size, je->size
);
data_csum_valid = false;
}
else
{
int sd_num = 0;
size_t sd_pos = 0;
uint32_t *block_csums = (uint32_t*)((uint8_t*)je + sizeof(journal_entry_small_write) + bs->dsk.clean_entry_bitmap_size);
for (uint32_t pos = start; pos <= end; pos++, block_csums++)
{
size_t block_left = (pos == start
? (start == end
? je->small_write.len
: bs->dsk.csum_block_size - je->small_write.offset%bs->dsk.csum_block_size)
: (pos < end
? bs->dsk.csum_block_size
: (je->small_write.offset + je->small_write.len)%bs->dsk.csum_block_size));
if (pos > start && pos == end && block_left == 0)
{
// full last block
block_left = bs->dsk.csum_block_size;
}
uint32_t block_crc32 = 0;
while (block_left > 0)
{
assert(sd_num < small_write_data.size());
if (small_write_data[sd_num].iov_len >= sd_pos+block_left)
{
block_crc32 = crc32c(block_crc32, (uint8_t*)small_write_data[sd_num].iov_base+sd_pos, block_left);
sd_pos += block_left;
break;
}
else
{
block_crc32 = crc32c(block_crc32, (uint8_t*)small_write_data[sd_num].iov_base+sd_pos, small_write_data[sd_num].iov_len-sd_pos);
block_left -= (small_write_data[sd_num].iov_len-sd_pos);
sd_pos = 0;
sd_num++;
}
}
if (block_crc32 != *block_csums)
{
printf(
"Journal entry data is corrupt for small_write%s oid=%lx:%lx ver=%lu offset=%u len=%u - block %u crc32 %x != %x\n",
je->type == JE_SMALL_WRITE_INSTANT ? "_instant" : "",
je->small_write.oid.inode, je->small_write.oid.stripe, je->small_write.version,
je->small_write.offset, je->small_write.len,
pos, block_crc32, *block_csums
);
data_csum_valid = false;
break;
}
}
}
}
if (!data_csum_valid)
if (data_crc32 != je->small_write.crc32_data)
{
// journal entry is corrupt, stop here
// interesting thing is that we must clear the corrupt entry if we're not readonly,
// because we don't write next entries in the same journal block
printf("Journal entry data is corrupt (data crc32 %x != %x)\n", data_crc32, je->small_write.crc32_data);
memset((uint8_t*)buf + proc_pos - done_pos + pos, 0, bs->journal.block_size - pos);
bs->journal.next_free = prev_free;
init_write_buf = (uint8_t*)buf + proc_pos - done_pos;
@@ -922,14 +755,11 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
.oid = je->small_write.oid,
.version = je->small_write.version,
};
uint64_t dyn_size = bs->dsk.dirty_dyn_size(je->small_write.offset, je->small_write.len);
void *dyn = NULL;
void *dyn_from = (uint8_t*)je + sizeof(journal_entry_small_write);
if (!bs->alloc_dyn_data)
void *bmp = NULL;
void *bmp_from = (uint8_t*)je + sizeof(journal_entry_small_write);
if (bs->dsk.clean_entry_bitmap_size <= sizeof(void*))
{
// Bitmap without checksum is only 4 bytes for 128k objects, save it inline
// It can even contain 4 byte bitmap + 4 byte CRC32 for 4 kb writes :)
memcpy(&dyn, dyn_from, dyn_size);
memcpy(&bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
}
else
{
@@ -937,9 +767,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
// allocations for entry bitmaps. This can only be fixed by using
// a patched map with dynamic entry size, but not the btree_map,
// because it doesn't keep iterators valid all the time.
dyn = malloc_or_die(dyn_size+sizeof(int));
*((int*)dyn) = 1;
memcpy((uint8_t*)dyn+sizeof(int), dyn_from, dyn_size);
bmp = malloc_or_die(bs->dsk.clean_entry_bitmap_size);
memcpy(bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
}
bs->dirty_db.emplace(ov, (dirty_entry){
.state = (BS_ST_SMALL_WRITE | BS_ST_SYNCED),
@@ -948,7 +777,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
.offset = je->small_write.offset,
.len = je->small_write.len,
.journal_sector = proc_pos,
.dyn_data = dyn,
.bitmap = bmp,
});
bs->journal.used_sectors[proc_pos]++;
#ifdef BLOCKSTORE_DEBUG
@@ -1007,13 +836,11 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
.oid = je->big_write.oid,
.version = je->big_write.version,
};
uint64_t dyn_size = bs->dsk.dirty_dyn_size(je->big_write.offset, je->big_write.len);
void *dyn = NULL;
void *dyn_from = (uint8_t*)je + sizeof(journal_entry_big_write);
if (!bs->alloc_dyn_data)
void *bmp = NULL;
void *bmp_from = (uint8_t*)je + sizeof(journal_entry_big_write);
if (bs->dsk.clean_entry_bitmap_size <= sizeof(void*))
{
// Bitmap without checksum is only 4 bytes for 128k objects, save it inline
memcpy(&dyn, dyn_from, dyn_size);
memcpy(&bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
}
else
{
@@ -1021,9 +848,8 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
// allocations for entry bitmaps. This can only be fixed by using
// a patched map with dynamic entry size, but not the btree_map,
// because it doesn't keep iterators valid all the time.
dyn = malloc_or_die(dyn_size+sizeof(int));
*((int*)dyn) = 1;
memcpy((uint8_t*)dyn+sizeof(int), dyn_from, dyn_size);
bmp = malloc_or_die(bs->dsk.clean_entry_bitmap_size);
memcpy(bmp, bmp_from, bs->dsk.clean_entry_bitmap_size);
}
auto dirty_it = bs->dirty_db.emplace(ov, (dirty_entry){
.state = (BS_ST_BIG_WRITE | BS_ST_SYNCED),
@@ -1032,7 +858,7 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
.offset = je->big_write.offset,
.len = je->big_write.len,
.journal_sector = proc_pos,
.dyn_data = dyn,
.bitmap = bmp,
}).first;
if (bs->data_alloc->get(je->big_write.location >> bs->dsk.block_order))
{

Some files were not shown because too many files have changed in this diff Show More