Compare commits
138 Commits
etcd-hide-
...
v0.9.0
Author | SHA1 | Date | |
---|---|---|---|
3b4cf29e65 | |||
eeaba11ebd | |||
aea567cfbd | |||
ce02f47de6 | |||
5fd3208616 | |||
5997b76535 | |||
f1961157f0 | |||
88c1ba0790 | |||
b5bd611683 | |||
fa90b5a4e7 | |||
8d40ad99a6 | |||
3475772b07 | |||
25fcedf6e7 | |||
6ca20aa194 | |||
4bfd994341 | |||
59e959dcbb | |||
a9581f0739 | |||
105a405b0a | |||
d55d7d5326 | |||
0e5d0e02a9 | |||
0439981a66 | |||
6648f6bb6e | |||
281be547eb | |||
0c78dd7178 | |||
3c924397e7 | |||
c3bd26193d | |||
43b77d7619 | |||
a6d846863b | |||
8dc427b43c | |||
bf2112653b | |||
0538a484b3 | |||
97720fa6b4 | |||
e60e352df6 | |||
98077a1712 | |||
1c7d53996d | |||
2ca07b1ea7 | |||
022176aa98 | |||
120e3fa7bc | |||
629999f789 | |||
93eca11ba2 | |||
5a9e1ede52 | |||
1c9a188600 | |||
de3e609166 | |||
11481170f5 | |||
e69d459d43 | |||
da82754baa | |||
d356aca030 | |||
04a273d213 | |||
6442010f93 | |||
6f4dc16c59 | |||
ce4a8067b5 | |||
e431ecb715 | |||
8cac795445 | |||
a409598b16 | |||
f4c6765522 | |||
ad2916068a | |||
321cb435a6 | |||
cfcf4f4355 | |||
e0fb17bfee | |||
5b9031fecc | |||
5da1d8e1b5 | |||
44f86f1999 | |||
2d9a80c6f6 | |||
5e295e346e | |||
d9c0898b7c | |||
04cfb48361 | |||
ab615849d6 | |||
38be9a49c0 | |||
7d6bf84a3e | |||
41a40a4123 | |||
b94587ef0e | |||
2a2f4f6738 | |||
c768a9015f | |||
0d9e10cf96 | |||
b74ccb613c | |||
5052174918 | |||
eec9cf5575 | |||
a04dab0840 | |||
160863f707 | |||
2f16c32eb4 | |||
2877cd0adb | |||
480509f5b9 | |||
46462da45e | |||
024c8658f6 | |||
7e958afeda | |||
2f5e769a29 | |||
28d5e53c6c | |||
d9f55f11d8 | |||
3237014608 | |||
baaf8f6f44 | |||
1d83fdcd17 | |||
0ddd787c38 | |||
6eff3a60a5 | |||
888a6975ab | |||
cd1e890bd4 | |||
0fbf4c6a08 | |||
d06ed2b0e7 | |||
3bbc46543d | |||
2fb0c85618 | |||
d81a6c04fc | |||
7b35801647 | |||
f3228d5c07 | |||
18366f5055 | |||
851507c147 | |||
9aaad28488 | |||
dd57d086fe | |||
8810eae8fb | |||
c1365f46c9 | |||
14d6acbcba | |||
1e307069bc | |||
c3e80abad7 | |||
138ffe4032 | |||
8139a34e97 | |||
4ab630b44d | |||
2c8241b7db | |||
36a7dd3671 | |||
936122bbcf | |||
1a1ba0d1e7 | |||
3d09c9cec7 | |||
3d08a1ad6c | |||
499881d81c | |||
aba93b951b | |||
d125fb1f30 | |||
9d3fd72298 | |||
8b552a01f9 | |||
0385b2f9e8 | |||
749c837045 | |||
98001d845b | |||
c96bcae74b | |||
9f4e34a8cc | |||
81fc8bb94c | |||
bc465c16de | |||
8763e9211c | |||
9e1a80bd17 | |||
3e280f2f08 | |||
fe87b4076b | |||
a38957c1a7 | |||
137309cf29 |
36
.gitea/workflows/buildenv.Dockerfile
Normal file
36
.gitea/workflows/buildenv.Dockerfile
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
FROM node:16-bullseye
|
||||||
|
|
||||||
|
WORKDIR /root
|
||||||
|
|
||||||
|
ADD ./docker/vitastor.gpg /etc/apt/trusted.gpg.d
|
||||||
|
|
||||||
|
RUN echo 'deb http://deb.debian.org/debian bullseye-backports main' >> /etc/apt/sources.list; \
|
||||||
|
echo 'deb http://vitastor.io/debian bullseye main' >> /etc/apt/sources.list; \
|
||||||
|
echo >> /etc/apt/preferences; \
|
||||||
|
echo 'Package: *' >> /etc/apt/preferences; \
|
||||||
|
echo 'Pin: release a=bullseye-backports' >> /etc/apt/preferences; \
|
||||||
|
echo 'Pin-Priority: 500' >> /etc/apt/preferences; \
|
||||||
|
echo >> /etc/apt/preferences; \
|
||||||
|
echo 'Package: *' >> /etc/apt/preferences; \
|
||||||
|
echo 'Pin: origin "vitastor.io"' >> /etc/apt/preferences; \
|
||||||
|
echo 'Pin-Priority: 1000' >> /etc/apt/preferences; \
|
||||||
|
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
|
||||||
|
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
|
||||||
|
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||||
|
|
||||||
|
RUN apt-get update
|
||||||
|
RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
|
||||||
|
liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
|
||||||
|
RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
||||||
|
RUN apt-get -y install jq lp-solve sudo
|
||||||
|
RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
||||||
|
|
||||||
|
RUN set -ex; \
|
||||||
|
mkdir qemu-build; \
|
||||||
|
cd qemu-build; \
|
||||||
|
dpkg-source -x /root/qemu*.dsc; \
|
||||||
|
cd qemu*/; \
|
||||||
|
debian/rules configure-qemu || debian/rules b/configure-stamp; \
|
||||||
|
cd b/qemu; \
|
||||||
|
make -j8 config-poison.h || true; \
|
||||||
|
make -j8 qapi/qapi-builtin-types.h
|
16
.gitea/workflows/test.Dockerfile
Normal file
16
.gitea/workflows/test.Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
FROM git.yourcmc.ru/vitalif/vitastor/buildenv
|
||||||
|
|
||||||
|
ADD . /root/vitastor
|
||||||
|
|
||||||
|
RUN set -e -x; \
|
||||||
|
mkdir -p /root/fio-build/; \
|
||||||
|
cd /root/fio-build/; \
|
||||||
|
dpkg-source -x /root/fio*.dsc; \
|
||||||
|
cd /root/vitastor; \
|
||||||
|
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||||
|
ln -s /root/qemu-build/qemu-*/ ./qemu; \
|
||||||
|
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||||
|
mkdir build; \
|
||||||
|
cd build; \
|
||||||
|
cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \
|
||||||
|
make -j16
|
660
.gitea/workflows/test.yml
Normal file
660
.gitea/workflows/test.yml
Normal file
@@ -0,0 +1,660 @@
|
|||||||
|
name: Test
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- '*'
|
||||||
|
paths:
|
||||||
|
- '.gitea/**'
|
||||||
|
- 'src/**'
|
||||||
|
- 'mon/**'
|
||||||
|
- 'json11'
|
||||||
|
- 'cpp-btree'
|
||||||
|
- 'tests/**'
|
||||||
|
|
||||||
|
env:
|
||||||
|
BUILDENV_IMAGE: git.yourcmc.ru/vitalif/vitastor/buildenv
|
||||||
|
TEST_IMAGE: git.yourcmc.ru/vitalif/vitastor/test
|
||||||
|
OSD_ARGS: '--etcd_quick_timeout 2000'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ci-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
|
||||||
|
buildenv:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
container: git.yourcmc.ru/vitalif/gitea-ci-dind
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Build and push
|
||||||
|
run: |
|
||||||
|
set -ex
|
||||||
|
if ! docker manifest inspect $BUILDENV_IMAGE >/dev/null; then
|
||||||
|
docker build -t $BUILDENV_IMAGE -f .gitea/workflows/buildenv.Dockerfile .
|
||||||
|
docker login git.yourcmc.ru -u vitalif -p "${{secrets.TOKEN}}"
|
||||||
|
docker push $BUILDENV_IMAGE
|
||||||
|
fi
|
||||||
|
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: buildenv
|
||||||
|
container: git.yourcmc.ru/vitalif/gitea-ci-dind
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
with:
|
||||||
|
submodules: true
|
||||||
|
|
||||||
|
- name: Build and push
|
||||||
|
run: |
|
||||||
|
set -ex
|
||||||
|
if ! docker manifest inspect $TEST_IMAGE:$GITHUB_SHA >/dev/null; then
|
||||||
|
docker build -t $TEST_IMAGE:$GITHUB_SHA -f .gitea/workflows/test.Dockerfile .
|
||||||
|
docker login git.yourcmc.ru -u vitalif -p "${{secrets.TOKEN}}"
|
||||||
|
docker push $TEST_IMAGE:$GITHUB_SHA
|
||||||
|
fi
|
||||||
|
|
||||||
|
make_test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
# leak sanitizer sometimes crashes
|
||||||
|
- run: cd /root/vitastor/build && ASAN_OPTIONS=detect_leaks=0 make -j16 test
|
||||||
|
|
||||||
|
test_add_osd:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_add_osd.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_cas:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_cas.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_change_pg_count:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_change_pg_count.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_change_pg_count_ec:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: SCHEME=ec /root/vitastor/tests/test_change_pg_count.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_change_pg_size:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_change_pg_size.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_create_nomaxid:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_create_nomaxid.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_etcd_fail:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 10
|
||||||
|
run: /root/vitastor/tests/test_etcd_fail.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_failure_domain:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_failure_domain.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_interrupted_rebalance:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 10
|
||||||
|
run: /root/vitastor/tests/test_interrupted_rebalance.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_interrupted_rebalance_imm:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 10
|
||||||
|
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_interrupted_rebalance_ec:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 10
|
||||||
|
run: SCHEME=ec /root/vitastor/tests/test_interrupted_rebalance.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_interrupted_rebalance_ec_imm:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 10
|
||||||
|
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_minsize_1:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_minsize_1.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_move_reappear:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_move_reappear.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_rebalance_verify:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_rebalance_verify.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_rebalance_verify_imm:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_rebalance_verify_ec:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: SCHEME=ec /root/vitastor/tests/test_rebalance_verify.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_rebalance_verify_ec_imm:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_rm:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_rm.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_snapshot:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_snapshot.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_snapshot_ec:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_splitbrain:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_splitbrain.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_write:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_write.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_write_xor:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: SCHEME=xor /root/vitastor/tests/test_write.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_write_no_same:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_write_no_same.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_heal_pg_size_2:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 10
|
||||||
|
run: PG_SIZE=2 /root/vitastor/tests/test_heal.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_heal_ec:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 10
|
||||||
|
run: SCHEME=ec /root/vitastor/tests/test_heal.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_scrub:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_scrub.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_scrub_zero_osd_2:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: ZERO_OSD=2 /root/vitastor/tests/test_scrub.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_scrub_xor:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: SCHEME=xor /root/vitastor/tests/test_scrub.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_scrub_pg_size_3:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: PG_SIZE=3 /root/vitastor/tests/test_scrub.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: PG_SIZE=6 PG_MINSIZE=4 OSD_COUNT=6 SCHEME=ec /root/vitastor/tests/test_scrub.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_scrub_ec:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: SCHEME=ec /root/vitastor/tests/test_scrub.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
68
.gitea/workflows/tests-to-yaml.pl
Executable file
68
.gitea/workflows/tests-to-yaml.pl
Executable file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
for my $line (<>)
|
||||||
|
{
|
||||||
|
if ($line =~ /\.\/(test_[^\.]+)/s)
|
||||||
|
{
|
||||||
|
chomp $line;
|
||||||
|
my $test_name = $1;
|
||||||
|
my $timeout = 3;
|
||||||
|
if ($test_name eq 'test_etcd_fail' || $test_name eq 'test_heal' || $test_name eq 'test_interrupted_rebalance')
|
||||||
|
{
|
||||||
|
$timeout = 10;
|
||||||
|
}
|
||||||
|
while ($line =~ /([^\s=]+)=(\S+)/gs)
|
||||||
|
{
|
||||||
|
if ($1 eq 'SCHEME' && $2 eq 'ec')
|
||||||
|
{
|
||||||
|
$test_name .= '_ec';
|
||||||
|
}
|
||||||
|
elsif ($1 eq 'SCHEME' && $2 eq 'xor')
|
||||||
|
{
|
||||||
|
$test_name .= '_xor';
|
||||||
|
}
|
||||||
|
elsif ($1 eq 'IMMEDIATE_COMMIT')
|
||||||
|
{
|
||||||
|
$test_name .= '_imm';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$test_name .= '_'.lc($1).'_'.$2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$line =~ s!\./test_!/root/vitastor/tests/test_!;
|
||||||
|
# Gitea CI doesn't support artifacts yet, lol
|
||||||
|
#- name: Upload results
|
||||||
|
# uses: actions/upload-artifact\@v3
|
||||||
|
# if: always()
|
||||||
|
# with:
|
||||||
|
# name: ${test_name}_result
|
||||||
|
# path: |
|
||||||
|
# /root/vitastor/testdata
|
||||||
|
# !/root/vitastor/testdata/*.bin
|
||||||
|
# retention-days: 5
|
||||||
|
print <<"EOF"
|
||||||
|
$test_name:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: \${{env.TEST_IMAGE}}:\${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: $timeout
|
||||||
|
run: $line
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- \$i --------"
|
||||||
|
cat \$i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
EOF
|
||||||
|
;
|
||||||
|
}
|
||||||
|
}
|
@@ -1,7 +1,7 @@
|
|||||||
cmake_minimum_required(VERSION 2.8)
|
cmake_minimum_required(VERSION 2.8.12)
|
||||||
|
|
||||||
project(vitastor)
|
project(vitastor)
|
||||||
|
|
||||||
set(VERSION "0.8.3")
|
set(VERSION "0.9.0")
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
@@ -48,9 +48,9 @@ Vitastor, составлены для того, чтобы убедиться,
|
|||||||
интерфейс (прокси), опять же, без открытия в свободный публичный доступ как
|
интерфейс (прокси), опять же, без открытия в свободный публичный доступ как
|
||||||
самой программы, так и прокси.
|
самой программы, так и прокси.
|
||||||
|
|
||||||
Сетевая Публичная Лицензия Vitastor разработана специально чтобы
|
Сетевая Публичная Лицензия Vitastor разработана специально, чтобы
|
||||||
гарантировать, что в таких случаях и модифицированная версия программы, и
|
гарантировать, что в таких случаях и модифицированная версия программы, и
|
||||||
прокси оставались доступными сообществу. Для этого лицензия требует от
|
прокси останутся доступными сообществу. Для этого лицензия требует от
|
||||||
операторов сетевых серверов предоставлять исходный код оригинальной программы,
|
операторов сетевых серверов предоставлять исходный код оригинальной программы,
|
||||||
а также всех других программ, взаимодействующих с ней на их серверах,
|
а также всех других программ, взаимодействующих с ней на их серверах,
|
||||||
пользователям этих серверов, на условиях свободных лицензий. Таким образом,
|
пользователям этих серверов, на условиях свободных лицензий. Таким образом,
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
VERSION ?= v0.8.3
|
VERSION ?= v0.9.0
|
||||||
|
|
||||||
all: build push
|
all: build push
|
||||||
|
|
||||||
|
@@ -49,7 +49,7 @@ spec:
|
|||||||
capabilities:
|
capabilities:
|
||||||
add: ["SYS_ADMIN"]
|
add: ["SYS_ADMIN"]
|
||||||
allowPrivilegeEscalation: true
|
allowPrivilegeEscalation: true
|
||||||
image: vitalif/vitastor-csi:v0.8.3
|
image: vitalif/vitastor-csi:v0.9.0
|
||||||
args:
|
args:
|
||||||
- "--node=$(NODE_ID)"
|
- "--node=$(NODE_ID)"
|
||||||
- "--endpoint=$(CSI_ENDPOINT)"
|
- "--endpoint=$(CSI_ENDPOINT)"
|
||||||
|
@@ -116,7 +116,7 @@ spec:
|
|||||||
privileged: true
|
privileged: true
|
||||||
capabilities:
|
capabilities:
|
||||||
add: ["SYS_ADMIN"]
|
add: ["SYS_ADMIN"]
|
||||||
image: vitalif/vitastor-csi:v0.8.3
|
image: vitalif/vitastor-csi:v0.9.0
|
||||||
args:
|
args:
|
||||||
- "--node=$(NODE_ID)"
|
- "--node=$(NODE_ID)"
|
||||||
- "--endpoint=$(CSI_ENDPOINT)"
|
- "--endpoint=$(CSI_ENDPOINT)"
|
||||||
|
17
csi/go.mod
17
csi/go.mod
@@ -4,25 +4,10 @@ go 1.15
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/container-storage-interface/spec v1.4.0
|
github.com/container-storage-interface/spec v1.4.0
|
||||||
github.com/coreos/bbolt v0.0.0-00010101000000-000000000000 // indirect
|
|
||||||
github.com/coreos/etcd v3.3.25+incompatible // indirect
|
|
||||||
github.com/coreos/go-semver v0.3.0 // indirect
|
|
||||||
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf // indirect
|
|
||||||
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f // indirect
|
|
||||||
github.com/dustin/go-humanize v1.0.0 // indirect
|
|
||||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
|
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
|
||||||
github.com/gorilla/websocket v1.4.2 // indirect
|
|
||||||
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect
|
|
||||||
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
|
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
|
|
||||||
github.com/jonboulle/clockwork v0.2.2 // indirect
|
|
||||||
github.com/kubernetes-csi/csi-lib-utils v0.9.1
|
github.com/kubernetes-csi/csi-lib-utils v0.9.1
|
||||||
github.com/soheilhy/cmux v0.1.5 // indirect
|
|
||||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect
|
|
||||||
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect
|
|
||||||
go.etcd.io/bbolt v0.0.0-00010101000000-000000000000 // indirect
|
|
||||||
go.etcd.io/etcd v3.3.25+incompatible
|
|
||||||
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
|
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
|
||||||
|
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||||
google.golang.org/grpc v1.33.1
|
google.golang.org/grpc v1.33.1
|
||||||
k8s.io/klog v1.0.0
|
k8s.io/klog v1.0.0
|
||||||
k8s.io/utils v0.0.0-20210305010621-2afb4311ab10
|
k8s.io/utils v0.0.0-20210305010621-2afb4311ab10
|
||||||
|
82
csi/go.sum
82
csi/go.sum
@@ -31,14 +31,11 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuy
|
|||||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||||
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||||
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
|
|
||||||
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
|
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
|
||||||
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
|
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
|
||||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
|
||||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
|
github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
|
||||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||||
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
|
|
||||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||||
@@ -46,25 +43,12 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn
|
|||||||
github.com/container-storage-interface/spec v1.2.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
|
github.com/container-storage-interface/spec v1.2.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
|
||||||
github.com/container-storage-interface/spec v1.4.0 h1:ozAshSKxpJnYUfmkpZCTYyF/4MYeYlhdXbAvPvfGmkg=
|
github.com/container-storage-interface/spec v1.4.0 h1:ozAshSKxpJnYUfmkpZCTYyF/4MYeYlhdXbAvPvfGmkg=
|
||||||
github.com/container-storage-interface/spec v1.4.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
|
github.com/container-storage-interface/spec v1.4.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
|
||||||
github.com/coreos/bbolt v1.3.5 h1:XFv7xaq7701j8ZSEzR28VohFYSlyakMyqNMU5FQH6Ac=
|
|
||||||
github.com/coreos/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
|
|
||||||
github.com/coreos/etcd v3.3.25+incompatible h1:0GQEw6h3YnuOVdtwygkIfJ+Omx0tZ8/QkVyXI4LkbeY=
|
|
||||||
github.com/coreos/etcd v3.3.25+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
|
|
||||||
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
|
|
||||||
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
|
|
||||||
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
|
|
||||||
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
|
|
||||||
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg=
|
|
||||||
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
|
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
|
|
||||||
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
|
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
|
||||||
github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
|
github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
|
||||||
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
|
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
|
||||||
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
|
|
||||||
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
|
|
||||||
github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
|
github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
|
||||||
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
|
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
|
||||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||||
@@ -73,7 +57,6 @@ github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLi
|
|||||||
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
|
||||||
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
|
||||||
github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
||||||
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
|
|
||||||
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
|
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
|
||||||
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||||
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||||
@@ -88,14 +71,10 @@ github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nA
|
|||||||
github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
|
github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
|
||||||
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
|
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
|
||||||
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||||
github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls=
|
|
||||||
github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
|
github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
|
||||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
|
||||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
|
||||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
|
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
|
||||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||||
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
||||||
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7 h1:5ZkaAPbicIKTF2I64qf5Fh8Aa83Q/dnOafMYV0OMwjA=
|
|
||||||
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
||||||
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||||
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||||
@@ -113,7 +92,6 @@ github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QD
|
|||||||
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
|
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
|
||||||
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
||||||
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||||
github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
|
|
||||||
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
|
||||||
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
|
||||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||||
@@ -127,38 +105,24 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI
|
|||||||
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
|
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
|
||||||
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
|
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
|
||||||
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
|
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
|
||||||
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
|
|
||||||
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
|
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
|
||||||
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
|
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
|
||||||
github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg=
|
github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg=
|
||||||
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
|
|
||||||
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
|
||||||
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
|
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
|
||||||
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
|
|
||||||
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
|
|
||||||
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
|
|
||||||
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
|
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
|
||||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
|
||||||
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||||
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
|
||||||
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
|
||||||
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||||
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
|
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
|
||||||
github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ=
|
|
||||||
github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
|
|
||||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||||
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
|
|
||||||
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||||
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
|
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
|
||||||
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
||||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
|
||||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
|
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
||||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||||
@@ -171,14 +135,11 @@ github.com/kubernetes-csi/csi-lib-utils v0.9.1 h1:sGq6ifVujfMSkfTsMZip44Ttv8SDXv
|
|||||||
github.com/kubernetes-csi/csi-lib-utils v0.9.1/go.mod h1:8E2jVUX9j3QgspwHXa6LwyN7IHQDjW9jX3kwoWnSC+M=
|
github.com/kubernetes-csi/csi-lib-utils v0.9.1/go.mod h1:8E2jVUX9j3QgspwHXa6LwyN7IHQDjW9jX3kwoWnSC+M=
|
||||||
github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI=
|
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
|
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
|
||||||
github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo=
|
github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
|
|
||||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||||
@@ -188,38 +149,28 @@ github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+W
|
|||||||
github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
|
||||||
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
|
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
|
||||||
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
|
||||||
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
|
|
||||||
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
|
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
|
||||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
|
||||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||||
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
|
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
|
||||||
github.com/prometheus/client_golang v1.7.1 h1:NTGy1Ja9pByO+xAeH/qiWnLrKtr3hJPNjaVUwnjpdpA=
|
|
||||||
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
|
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
|
||||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||||
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||||
github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M=
|
|
||||||
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||||
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
|
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
|
||||||
github.com/prometheus/common v0.10.0 h1:RyRA7RzGXQZiW+tGMr7sxa85G1z0yOpM1qq5c8lNawc=
|
|
||||||
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
|
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
|
||||||
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||||
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
|
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
|
||||||
github.com/prometheus/procfs v0.1.3 h1:F0+tqvhOksq22sc6iCHF5WGlWjdwj92p0udFh1VFBS8=
|
|
||||||
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
|
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
|
||||||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
|
||||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||||
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
|
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
|
||||||
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
|
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
|
||||||
github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I=
|
|
||||||
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
|
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
|
||||||
github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
|
|
||||||
github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0=
|
|
||||||
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
|
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
|
||||||
github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
||||||
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
||||||
@@ -231,24 +182,11 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV
|
|||||||
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
|
||||||
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
|
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
|
||||||
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
|
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
|
||||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA=
|
|
||||||
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
|
|
||||||
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
|
|
||||||
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
|
|
||||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
|
||||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
|
||||||
go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
|
|
||||||
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
|
|
||||||
go.etcd.io/etcd v3.3.25+incompatible h1:V1RzkZJj9LqsJRy+TUBgpWSbZXITLB819lstuTFoZOY=
|
|
||||||
go.etcd.io/etcd v3.3.25+incompatible/go.mod h1:yaeTdrJi5lOmYerz05bd8+V7KubZs8YSFZfzsF9A6aI=
|
|
||||||
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
|
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
|
||||||
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
|
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
|
||||||
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
|
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
|
||||||
go.uber.org/atomic v1.4.0 h1:cxzIVoETapQEqDhQu3QfnvXAV4AlzcvUCxkVUFw3+EU=
|
|
||||||
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
|
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
|
||||||
go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI=
|
|
||||||
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
|
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
|
||||||
go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM=
|
|
||||||
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
|
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
|
||||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
@@ -256,7 +194,6 @@ golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8U
|
|||||||
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||||
golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI=
|
|
||||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||||
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||||
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
|
||||||
@@ -276,8 +213,6 @@ golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCc
|
|||||||
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
|
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
|
||||||
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
|
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
|
||||||
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
|
||||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
|
||||||
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
@@ -291,26 +226,20 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
|
|||||||
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
|
||||||
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
|
|
||||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||||
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
|
||||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
|
||||||
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
|
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
|
||||||
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||||
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||||
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||||
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
||||||
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
|
|
||||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
|
||||||
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
@@ -326,11 +255,9 @@ golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7w
|
|||||||
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
|
||||||
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4 h1:5/PjkGUjvEU5Gl6BxmvKRPpqo2uNMv4rcHBMwzk/st8=
|
|
||||||
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
|
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
|
||||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
@@ -341,7 +268,6 @@ golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
|||||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||||
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||||
golang.org/x/time v0.0.0-20191024005414-555d28b269f0 h1:/5xXl8Y5W96D+TtHSlonuFqGHIWVuyCkGJLwGh9JJFs=
|
|
||||||
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
@@ -360,14 +286,10 @@ golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgw
|
|||||||
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
|
||||||
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||||
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
|
||||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
|
||||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
|
|
||||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
|
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
|
||||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||||
@@ -388,8 +310,6 @@ google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98
|
|||||||
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
|
||||||
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
|
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
|
||||||
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
|
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
|
||||||
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
|
||||||
google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
|
|
||||||
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY=
|
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY=
|
||||||
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
|
||||||
google.golang.org/grpc v1.25.1 h1:wdKvqQk7IttEw92GoRyKG2IDrUIpgpj6H6m81yfeMW0=
|
google.golang.org/grpc v1.25.1 h1:wdKvqQk7IttEw92GoRyKG2IDrUIpgpj6H6m81yfeMW0=
|
||||||
@@ -415,7 +335,6 @@ gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
|
|||||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
||||||
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
|
||||||
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||||
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
|
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
|
||||||
@@ -444,5 +363,4 @@ k8s.io/utils v0.0.0-20210305010621-2afb4311ab10/go.mod h1:jPW/WVKK9YHAvNhRxK0md/
|
|||||||
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
|
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
|
||||||
sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw=
|
sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw=
|
||||||
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
|
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
|
||||||
sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q=
|
|
||||||
sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=
|
sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=
|
||||||
|
@@ -5,7 +5,7 @@ package vitastor
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
vitastorCSIDriverName = "csi.vitastor.io"
|
vitastorCSIDriverName = "csi.vitastor.io"
|
||||||
vitastorCSIDriverVersion = "0.8.3"
|
vitastorCSIDriverVersion = "0.9.0"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Config struct fills the parameters of request or user input
|
// Config struct fills the parameters of request or user input
|
||||||
|
@@ -6,11 +6,11 @@ package vitastor
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"bytes"
|
"bytes"
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
"fmt"
|
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
@@ -21,8 +21,6 @@ import (
|
|||||||
"google.golang.org/grpc/codes"
|
"google.golang.org/grpc/codes"
|
||||||
"google.golang.org/grpc/status"
|
"google.golang.org/grpc/status"
|
||||||
|
|
||||||
"go.etcd.io/etcd/clientv3"
|
|
||||||
|
|
||||||
"github.com/container-storage-interface/spec/lib/go/csi"
|
"github.com/container-storage-interface/spec/lib/go/csi"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -114,6 +112,34 @@ func GetConnectionParams(params map[string]string) (map[string]string, []string,
|
|||||||
return ctxVars, etcdUrl, etcdPrefix
|
return ctxVars, etcdUrl, etcdPrefix
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func invokeCLI(ctxVars map[string]string, args []string) ([]byte, error)
|
||||||
|
{
|
||||||
|
if (ctxVars["etcdUrl"] != "")
|
||||||
|
{
|
||||||
|
args = append(args, "--etcd_address", ctxVars["etcdUrl"])
|
||||||
|
}
|
||||||
|
if (ctxVars["etcdPrefix"] != "")
|
||||||
|
{
|
||||||
|
args = append(args, "--etcd_prefix", ctxVars["etcdPrefix"])
|
||||||
|
}
|
||||||
|
if (ctxVars["configPath"] != "")
|
||||||
|
{
|
||||||
|
args = append(args, "--config_path", ctxVars["configPath"])
|
||||||
|
}
|
||||||
|
c := exec.Command("/usr/bin/vitastor-cli", args...)
|
||||||
|
var stdout, stderr bytes.Buffer
|
||||||
|
c.Stdout = &stdout
|
||||||
|
c.Stderr = &stderr
|
||||||
|
err := c.Run()
|
||||||
|
stderrStr := string(stderr.Bytes())
|
||||||
|
if (err != nil)
|
||||||
|
{
|
||||||
|
klog.Errorf("vitastor-cli %s failed: %s, status %s\n", strings.Join(args, " "), stderrStr, err)
|
||||||
|
return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
|
||||||
|
}
|
||||||
|
return stdout.Bytes(), nil
|
||||||
|
}
|
||||||
|
|
||||||
// Create the volume
|
// Create the volume
|
||||||
func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error)
|
func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error)
|
||||||
{
|
{
|
||||||
@@ -146,128 +172,41 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
|
|||||||
volSize = ((capRange.GetRequiredBytes() + MB - 1) / MB) * MB
|
volSize = ((capRange.GetRequiredBytes() + MB - 1) / MB) * MB
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: The following should PROBABLY be implemented externally in a management tool
|
ctxVars, etcdUrl, _ := GetConnectionParams(req.Parameters)
|
||||||
|
|
||||||
ctxVars, etcdUrl, etcdPrefix := GetConnectionParams(req.Parameters)
|
|
||||||
if (len(etcdUrl) == 0)
|
if (len(etcdUrl) == 0)
|
||||||
{
|
{
|
||||||
return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
|
return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Connect to etcd
|
// Create image using vitastor-cli
|
||||||
cli, err := clientv3.New(clientv3.Config{
|
_, err := invokeCLI(ctxVars, []string{ "create", volName, "-s", fmt.Sprintf("%v", volSize), "--pool", fmt.Sprintf("%v", poolId) })
|
||||||
DialTimeout: ETCD_TIMEOUT,
|
|
||||||
Endpoints: etcdUrl,
|
|
||||||
})
|
|
||||||
if (err != nil)
|
if (err != nil)
|
||||||
{
|
{
|
||||||
return nil, status.Error(codes.Internal, "failed to connect to etcd at "+strings.Join(etcdUrl, ",")+": "+err.Error())
|
if (strings.Index(err.Error(), "already exists") > 0)
|
||||||
}
|
|
||||||
defer cli.Close()
|
|
||||||
|
|
||||||
var imageId uint64 = 0
|
|
||||||
for
|
|
||||||
{
|
|
||||||
// Check if the image exists
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
|
|
||||||
resp, err := cli.Get(ctx, etcdPrefix+"/index/image/"+volName)
|
|
||||||
cancel()
|
|
||||||
if (err != nil)
|
|
||||||
{
|
{
|
||||||
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
|
stat, err := invokeCLI(ctxVars, []string{ "ls", "--json", volName })
|
||||||
}
|
|
||||||
if (len(resp.Kvs) > 0)
|
|
||||||
{
|
|
||||||
kv := resp.Kvs[0]
|
|
||||||
var v InodeIndex
|
|
||||||
err := json.Unmarshal(kv.Value, &v)
|
|
||||||
if (err != nil)
|
if (err != nil)
|
||||||
{
|
{
|
||||||
return nil, status.Error(codes.Internal, "invalid /index/image/"+volName+" key in etcd: "+err.Error())
|
return nil, err
|
||||||
}
|
}
|
||||||
poolId = v.PoolId
|
var inodeCfg []InodeConfig
|
||||||
imageId = v.Id
|
err = json.Unmarshal(stat, &inodeCfg)
|
||||||
inodeCfgKey := fmt.Sprintf("/config/inode/%d/%d", poolId, imageId)
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
|
|
||||||
resp, err := cli.Get(ctx, etcdPrefix+inodeCfgKey)
|
|
||||||
cancel()
|
|
||||||
if (err != nil)
|
if (err != nil)
|
||||||
{
|
{
|
||||||
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
|
return nil, status.Error(codes.Internal, "Invalid JSON in vitastor-cli ls: "+err.Error())
|
||||||
}
|
}
|
||||||
if (len(resp.Kvs) == 0)
|
if (len(inodeCfg) == 0)
|
||||||
{
|
{
|
||||||
return nil, status.Error(codes.Internal, "missing "+inodeCfgKey+" key in etcd")
|
return nil, status.Error(codes.Internal, "vitastor-cli create said that image already exists, but ls can't find it")
|
||||||
}
|
}
|
||||||
var inodeCfg InodeConfig
|
if (inodeCfg[0].Size < uint64(volSize))
|
||||||
err = json.Unmarshal(resp.Kvs[0].Value, &inodeCfg)
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
|
|
||||||
}
|
|
||||||
if (inodeCfg.Size < uint64(volSize))
|
|
||||||
{
|
{
|
||||||
return nil, status.Error(codes.Internal, "image "+volName+" is already created, but size is less than expected")
|
return nil, status.Error(codes.Internal, "image "+volName+" is already created, but size is less than expected")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Find a free ID
|
return nil, err
|
||||||
// Create image metadata in a transaction verifying that the image doesn't exist yet AND ID is still free
|
|
||||||
maxIdKey := fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId)
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
|
|
||||||
resp, err := cli.Get(ctx, maxIdKey)
|
|
||||||
cancel()
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
|
|
||||||
}
|
|
||||||
var modRev int64
|
|
||||||
var nextId uint64
|
|
||||||
if (len(resp.Kvs) > 0)
|
|
||||||
{
|
|
||||||
var err error
|
|
||||||
nextId, err = strconv.ParseUint(string(resp.Kvs[0].Value), 10, 64)
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, maxIdKey+" contains invalid ID")
|
|
||||||
}
|
|
||||||
modRev = resp.Kvs[0].ModRevision
|
|
||||||
nextId++
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
nextId = 1
|
|
||||||
}
|
|
||||||
inodeIdxJson, _ := json.Marshal(InodeIndex{
|
|
||||||
Id: nextId,
|
|
||||||
PoolId: poolId,
|
|
||||||
})
|
|
||||||
inodeCfgJson, _ := json.Marshal(InodeConfig{
|
|
||||||
Name: volName,
|
|
||||||
Size: uint64(volSize),
|
|
||||||
})
|
|
||||||
ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
|
|
||||||
txnResp, err := cli.Txn(ctx).If(
|
|
||||||
clientv3.Compare(clientv3.ModRevision(fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId)), "=", modRev),
|
|
||||||
clientv3.Compare(clientv3.CreateRevision(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName)), "=", 0),
|
|
||||||
clientv3.Compare(clientv3.CreateRevision(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, poolId, nextId)), "=", 0),
|
|
||||||
).Then(
|
|
||||||
clientv3.OpPut(fmt.Sprintf("%s/index/maxid/%d", etcdPrefix, poolId), fmt.Sprintf("%d", nextId)),
|
|
||||||
clientv3.OpPut(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName), string(inodeIdxJson)),
|
|
||||||
clientv3.OpPut(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, poolId, nextId), string(inodeCfgJson)),
|
|
||||||
).Commit()
|
|
||||||
cancel()
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, "failed to commit transaction in etcd: "+err.Error())
|
|
||||||
}
|
|
||||||
if (txnResp.Succeeded)
|
|
||||||
{
|
|
||||||
imageId = nextId
|
|
||||||
break
|
|
||||||
}
|
|
||||||
// Start over if the transaction fails
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -299,97 +238,12 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
|
|||||||
}
|
}
|
||||||
volName := ctxVars["name"]
|
volName := ctxVars["name"]
|
||||||
|
|
||||||
_, etcdUrl, etcdPrefix := GetConnectionParams(ctxVars)
|
ctxVars, _, _ = GetConnectionParams(ctxVars)
|
||||||
if (len(etcdUrl) == 0)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
|
|
||||||
}
|
|
||||||
|
|
||||||
cli, err := clientv3.New(clientv3.Config{
|
_, err = invokeCLI(ctxVars, []string{ "rm", volName })
|
||||||
DialTimeout: ETCD_TIMEOUT,
|
|
||||||
Endpoints: etcdUrl,
|
|
||||||
})
|
|
||||||
if (err != nil)
|
if (err != nil)
|
||||||
{
|
{
|
||||||
return nil, status.Error(codes.Internal, "failed to connect to etcd at "+strings.Join(etcdUrl, ",")+": "+err.Error())
|
return nil, err
|
||||||
}
|
|
||||||
defer cli.Close()
|
|
||||||
|
|
||||||
// Find inode by name
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), ETCD_TIMEOUT)
|
|
||||||
resp, err := cli.Get(ctx, etcdPrefix+"/index/image/"+volName)
|
|
||||||
cancel()
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
|
|
||||||
}
|
|
||||||
if (len(resp.Kvs) == 0)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.NotFound, "volume "+volName+" does not exist")
|
|
||||||
}
|
|
||||||
var idx InodeIndex
|
|
||||||
err = json.Unmarshal(resp.Kvs[0].Value, &idx)
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, "invalid /index/image/"+volName+" key in etcd: "+err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get inode config
|
|
||||||
inodeCfgKey := fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, idx.PoolId, idx.Id)
|
|
||||||
ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
|
|
||||||
resp, err = cli.Get(ctx, inodeCfgKey)
|
|
||||||
cancel()
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, "failed to read key from etcd: "+err.Error())
|
|
||||||
}
|
|
||||||
if (len(resp.Kvs) == 0)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.NotFound, "volume "+volName+" does not exist")
|
|
||||||
}
|
|
||||||
var inodeCfg InodeConfig
|
|
||||||
err = json.Unmarshal(resp.Kvs[0].Value, &inodeCfg)
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete inode data by invoking vitastor-cli
|
|
||||||
args := []string{
|
|
||||||
"rm-data", "--etcd_address", strings.Join(etcdUrl, ","),
|
|
||||||
"--pool", fmt.Sprintf("%d", idx.PoolId),
|
|
||||||
"--inode", fmt.Sprintf("%d", idx.Id),
|
|
||||||
}
|
|
||||||
if (ctxVars["configPath"] != "")
|
|
||||||
{
|
|
||||||
args = append(args, "--config_path", ctxVars["configPath"])
|
|
||||||
}
|
|
||||||
c := exec.Command("/usr/bin/vitastor-cli", args...)
|
|
||||||
var stderr bytes.Buffer
|
|
||||||
c.Stdout = nil
|
|
||||||
c.Stderr = &stderr
|
|
||||||
err = c.Run()
|
|
||||||
stderrStr := string(stderr.Bytes())
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
klog.Errorf("vitastor-cli rm-data failed: %s, status %s\n", stderrStr, err)
|
|
||||||
return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete inode config in etcd
|
|
||||||
ctx, cancel = context.WithTimeout(context.Background(), ETCD_TIMEOUT)
|
|
||||||
txnResp, err := cli.Txn(ctx).Then(
|
|
||||||
clientv3.OpDelete(fmt.Sprintf("%s/index/image/%s", etcdPrefix, volName)),
|
|
||||||
clientv3.OpDelete(fmt.Sprintf("%s/config/inode/%d/%d", etcdPrefix, idx.PoolId, idx.Id)),
|
|
||||||
).Commit()
|
|
||||||
cancel()
|
|
||||||
if (err != nil)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, "failed to delete keys in etcd: "+err.Error())
|
|
||||||
}
|
|
||||||
if (!txnResp.Succeeded)
|
|
||||||
{
|
|
||||||
return nil, status.Error(codes.Internal, "failed to delete keys in etcd: transaction failed")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &csi.DeleteVolumeResponse{}, nil
|
return &csi.DeleteVolumeResponse{}, nil
|
||||||
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@@ -1,10 +1,10 @@
|
|||||||
vitastor (0.8.3-1) unstable; urgency=medium
|
vitastor (0.9.0-1) unstable; urgency=medium
|
||||||
|
|
||||||
* Bugfixes
|
* Bugfixes
|
||||||
|
|
||||||
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
||||||
|
|
||||||
vitastor (0.8.3-1) unstable; urgency=medium
|
vitastor (0.9.0-1) unstable; urgency=medium
|
||||||
|
|
||||||
* Implement NFS proxy
|
* Implement NFS proxy
|
||||||
* Add documentation
|
* Add documentation
|
||||||
|
8
debian/vitastor.Dockerfile
vendored
8
debian/vitastor.Dockerfile
vendored
@@ -34,8 +34,8 @@ RUN set -e -x; \
|
|||||||
mkdir -p /root/packages/vitastor-$REL; \
|
mkdir -p /root/packages/vitastor-$REL; \
|
||||||
rm -rf /root/packages/vitastor-$REL/*; \
|
rm -rf /root/packages/vitastor-$REL/*; \
|
||||||
cd /root/packages/vitastor-$REL; \
|
cd /root/packages/vitastor-$REL; \
|
||||||
cp -r /root/vitastor vitastor-0.8.3; \
|
cp -r /root/vitastor vitastor-0.9.0; \
|
||||||
cd vitastor-0.8.3; \
|
cd vitastor-0.9.0; \
|
||||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||||
@@ -48,8 +48,8 @@ RUN set -e -x; \
|
|||||||
rm -rf a b; \
|
rm -rf a b; \
|
||||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||||
cd /root/packages/vitastor-$REL; \
|
cd /root/packages/vitastor-$REL; \
|
||||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.3.orig.tar.xz vitastor-0.8.3; \
|
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.0.orig.tar.xz vitastor-0.9.0; \
|
||||||
cd vitastor-0.8.3; \
|
cd vitastor-0.9.0; \
|
||||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||||
|
Binary file not shown.
@@ -17,14 +17,16 @@ Configuration parameters can be set in 3 places:
|
|||||||
- Configuration file (`/etc/vitastor/vitastor.conf` or other path)
|
- Configuration file (`/etc/vitastor/vitastor.conf` or other path)
|
||||||
- etcd key `/vitastor/config/global`. Most variables can be set there, but etcd
|
- etcd key `/vitastor/config/global`. Most variables can be set there, but etcd
|
||||||
connection parameters should obviously be set in the configuration file.
|
connection parameters should obviously be set in the configuration file.
|
||||||
- Command line of Vitastor components: OSD, mon, fio and QEMU options,
|
- Command line of Vitastor components: OSD (when you run it without vitastor-disk),
|
||||||
OpenStack/Proxmox/etc configuration. The latter doesn't allow to set all
|
mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter
|
||||||
variables directly, but it allows to override the configuration file and
|
doesn't allow to set all variables directly, but it allows to override the
|
||||||
set everything you need inside it.
|
configuration file and set everything you need inside it.
|
||||||
|
- OSD superblocks created by [vitastor-disk](../usage/disk.en.md) contain
|
||||||
|
primarily disk layout parameters of specific OSDs. In fact, these parameters
|
||||||
|
are automatically passed into the command line of vitastor-osd process, so
|
||||||
|
they have the same "status" as command-line parameters.
|
||||||
|
|
||||||
In the future, additional configuration methods may be added:
|
In the future, additional configuration methods may be added:
|
||||||
- OSD superblock which will, by design, contain parameters related to the disk
|
|
||||||
layout and to one specific OSD.
|
|
||||||
- OSD-specific keys in etcd like `/vitastor/config/osd/<number>`.
|
- OSD-specific keys in etcd like `/vitastor/config/osd/<number>`.
|
||||||
|
|
||||||
## Parameter Reference
|
## Parameter Reference
|
||||||
|
@@ -19,14 +19,17 @@
|
|||||||
- Ключе в etcd `/vitastor/config/global`. Большая часть параметров может
|
- Ключе в etcd `/vitastor/config/global`. Большая часть параметров может
|
||||||
задаваться там, кроме, естественно, самих параметров соединения с etcd,
|
задаваться там, кроме, естественно, самих параметров соединения с etcd,
|
||||||
которые должны задаваться в файле конфигурации
|
которые должны задаваться в файле конфигурации
|
||||||
- В командной строке компонентов Vitastor: OSD, монитора, опциях fio и QEMU,
|
- В командной строке компонентов Vitastor: OSD (при ручном запуске без vitastor-disk),
|
||||||
настроек OpenStack, Proxmox и т.п. Последние, как правило, не включают полный
|
монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние,
|
||||||
набор параметров напрямую, но разрешают определить путь к файлу конфигурации
|
как правило, не включают полный набор параметров напрямую, но позволяют
|
||||||
и задать любые параметры в нём.
|
определить путь к файлу конфигурации и задать любые параметры в нём.
|
||||||
|
- В суперблоке OSD, записываемом [vitastor-disk](../usage/disk.ru.md) - параметры,
|
||||||
|
связанные с дисковым форматом и с этим конкретным OSD. На самом деле,
|
||||||
|
при запуске OSD эти параметры автоматически передаются в командную строку
|
||||||
|
процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам
|
||||||
|
командной строки OSD.
|
||||||
|
|
||||||
В будущем также могут быть добавлены другие способы конфигурации:
|
В будущем также могут быть добавлены другие способы конфигурации:
|
||||||
- Суперблок OSD, в котором будут храниться параметры OSD, связанные с дисковым
|
|
||||||
форматом и с этим конкретным OSD.
|
|
||||||
- OSD-специфичные ключи в etcd типа `/vitastor/config/osd/<номер>`.
|
- OSD-специфичные ключи в etcd типа `/vitastor/config/osd/<номер>`.
|
||||||
|
|
||||||
## Список параметров
|
## Список параметров
|
||||||
|
@@ -25,11 +25,16 @@ running if required parameters are specified.
|
|||||||
## etcd_address
|
## etcd_address
|
||||||
|
|
||||||
- Type: string or array of strings
|
- Type: string or array of strings
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
|
etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
|
||||||
specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
|
specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
|
||||||
Note that https is not supported for etcd connections yet.
|
Note that https is not supported for etcd connections yet.
|
||||||
|
|
||||||
|
etcd connection endpoints can be changed online by updating global
|
||||||
|
configuration in etcd itself - this allows to switch the cluster to new
|
||||||
|
etcd addresses without downtime.
|
||||||
|
|
||||||
## etcd_prefix
|
## etcd_prefix
|
||||||
|
|
||||||
- Type: string
|
- Type: string
|
||||||
@@ -42,5 +47,6 @@ example, use a single etcd cluster for multiple Vitastor clusters.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 0
|
- Default: 0
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Log level. Raise if you want more verbose output.
|
Log level. Raise if you want more verbose output.
|
||||||
|
@@ -24,10 +24,14 @@
|
|||||||
## etcd_address
|
## etcd_address
|
||||||
|
|
||||||
- Тип: строка или массив строк
|
- Тип: строка или массив строк
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
|
Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
|
||||||
или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
|
или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
|
||||||
|
|
||||||
|
Адреса подключения к etcd можно поменять на лету, обновив конфигурацию в
|
||||||
|
самом etcd - это позволяет переключить кластер на новые etcd без остановки.
|
||||||
|
|
||||||
## etcd_prefix
|
## etcd_prefix
|
||||||
|
|
||||||
- Тип: строка
|
- Тип: строка
|
||||||
@@ -41,5 +45,6 @@
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 0
|
- Значение по умолчанию: 0
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Уровень логгирования. Повысьте, если хотите более подробный вывод.
|
Уровень логгирования. Повысьте, если хотите более подробный вывод.
|
||||||
|
@@ -19,6 +19,7 @@ between clients, OSDs and etcd.
|
|||||||
- [rdma_max_sge](#rdma_max_sge)
|
- [rdma_max_sge](#rdma_max_sge)
|
||||||
- [rdma_max_msg](#rdma_max_msg)
|
- [rdma_max_msg](#rdma_max_msg)
|
||||||
- [rdma_max_recv](#rdma_max_recv)
|
- [rdma_max_recv](#rdma_max_recv)
|
||||||
|
- [rdma_max_send](#rdma_max_send)
|
||||||
- [peer_connect_interval](#peer_connect_interval)
|
- [peer_connect_interval](#peer_connect_interval)
|
||||||
- [peer_connect_timeout](#peer_connect_timeout)
|
- [peer_connect_timeout](#peer_connect_timeout)
|
||||||
- [osd_idle_timeout](#osd_idle_timeout)
|
- [osd_idle_timeout](#osd_idle_timeout)
|
||||||
@@ -74,6 +75,12 @@ to work. For example, Mellanox ConnectX-3 and older adapters don't have
|
|||||||
Implicit ODP, so they're unsupported by Vitastor. Run `ibv_devinfo -v` as
|
Implicit ODP, so they're unsupported by Vitastor. Run `ibv_devinfo -v` as
|
||||||
root to list available RDMA devices and their features.
|
root to list available RDMA devices and their features.
|
||||||
|
|
||||||
|
Remember that you also have to configure your network switches if you use
|
||||||
|
RoCE/RoCEv2, otherwise you may experience unstable performance. Refer to
|
||||||
|
the manual of your network vendor for details about setting up the switch
|
||||||
|
for RoCEv2 correctly. Usually it means setting up Lossless Ethernet with
|
||||||
|
PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
|
||||||
|
|
||||||
## rdma_port_num
|
## rdma_port_num
|
||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
@@ -116,26 +123,37 @@ required to change this parameter.
|
|||||||
## rdma_max_msg
|
## rdma_max_msg
|
||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 1048576
|
- Default: 132096
|
||||||
|
|
||||||
Maximum size of a single RDMA send or receive operation in bytes.
|
Maximum size of a single RDMA send or receive operation in bytes.
|
||||||
|
|
||||||
## rdma_max_recv
|
## rdma_max_recv
|
||||||
|
|
||||||
|
- Type: integer
|
||||||
|
- Default: 16
|
||||||
|
|
||||||
|
Maximum number of RDMA receive buffers per connection (RDMA requires
|
||||||
|
preallocated buffers to receive data). Each buffer is `rdma_max_msg` bytes
|
||||||
|
in size. So this setting directly affects memory usage: a single Vitastor
|
||||||
|
RDMA client uses `rdma_max_recv * rdma_max_msg * OSD_COUNT` bytes of memory.
|
||||||
|
Default is roughly 2 MB * number of OSDs.
|
||||||
|
|
||||||
|
## rdma_max_send
|
||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 8
|
- Default: 8
|
||||||
|
|
||||||
Maximum number of parallel RDMA receive operations. Note that this number
|
Maximum number of outstanding RDMA send operations per connection. Should be
|
||||||
of receive buffers `rdma_max_msg` in size are allocated for each client,
|
less than `rdma_max_recv` so the receiving side doesn't run out of buffers.
|
||||||
so this setting actually affects memory usage. This is because RDMA receive
|
Doesn't affect memory usage - additional memory isn't allocated for send
|
||||||
operations are (sadly) still not zero-copy in Vitastor. It may be fixed in
|
operations.
|
||||||
later versions.
|
|
||||||
|
|
||||||
## peer_connect_interval
|
## peer_connect_interval
|
||||||
|
|
||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: 5
|
- Default: 5
|
||||||
- Minimum: 1
|
- Minimum: 1
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Interval before attempting to reconnect to an unavailable OSD.
|
Interval before attempting to reconnect to an unavailable OSD.
|
||||||
|
|
||||||
@@ -144,6 +162,7 @@ Interval before attempting to reconnect to an unavailable OSD.
|
|||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: 5
|
- Default: 5
|
||||||
- Minimum: 1
|
- Minimum: 1
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Timeout for OSD connection attempts.
|
Timeout for OSD connection attempts.
|
||||||
|
|
||||||
@@ -152,6 +171,7 @@ Timeout for OSD connection attempts.
|
|||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: 5
|
- Default: 5
|
||||||
- Minimum: 1
|
- Minimum: 1
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
OSD connection inactivity time after which clients and other OSDs send
|
OSD connection inactivity time after which clients and other OSDs send
|
||||||
keepalive requests to check state of the connection.
|
keepalive requests to check state of the connection.
|
||||||
@@ -161,6 +181,7 @@ keepalive requests to check state of the connection.
|
|||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: 5
|
- Default: 5
|
||||||
- Minimum: 1
|
- Minimum: 1
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
|
Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
|
||||||
within this time, the connection to it is dropped and a reconnection attempt
|
within this time, the connection to it is dropped and a reconnection attempt
|
||||||
@@ -171,6 +192,7 @@ is scheduled.
|
|||||||
- Type: milliseconds
|
- Type: milliseconds
|
||||||
- Default: 500
|
- Default: 500
|
||||||
- Minimum: 50
|
- Minimum: 50
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
OSDs respond to clients with a special error code when they receive I/O
|
OSDs respond to clients with a special error code when they receive I/O
|
||||||
requests for a PG that's not synchronized and started. This parameter sets
|
requests for a PG that's not synchronized and started. This parameter sets
|
||||||
@@ -180,6 +202,7 @@ the time for the clients to wait before re-attempting such I/O requests.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 5
|
- Default: 5
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Maximum number of attempts for etcd requests which can't be retried
|
Maximum number of attempts for etcd requests which can't be retried
|
||||||
indefinitely.
|
indefinitely.
|
||||||
@@ -188,6 +211,7 @@ indefinitely.
|
|||||||
|
|
||||||
- Type: milliseconds
|
- Type: milliseconds
|
||||||
- Default: 1000
|
- Default: 1000
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Timeout for etcd requests which should complete quickly, like lease refresh.
|
Timeout for etcd requests which should complete quickly, like lease refresh.
|
||||||
|
|
||||||
@@ -195,6 +219,7 @@ Timeout for etcd requests which should complete quickly, like lease refresh.
|
|||||||
|
|
||||||
- Type: milliseconds
|
- Type: milliseconds
|
||||||
- Default: 5000
|
- Default: 5000
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Timeout for etcd requests which are allowed to wait for some time.
|
Timeout for etcd requests which are allowed to wait for some time.
|
||||||
|
|
||||||
@@ -202,6 +227,7 @@ Timeout for etcd requests which are allowed to wait for some time.
|
|||||||
|
|
||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: max(30, etcd_report_interval*2)
|
- Default: max(30, etcd_report_interval*2)
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Timeout for etcd connection HTTP Keep-Alive. Should be higher than
|
Timeout for etcd connection HTTP Keep-Alive. Should be higher than
|
||||||
etcd_report_interval to guarantee that keepalive actually works.
|
etcd_report_interval to guarantee that keepalive actually works.
|
||||||
@@ -210,6 +236,7 @@ etcd_report_interval to guarantee that keepalive actually works.
|
|||||||
|
|
||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: 30
|
- Default: 30
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
etcd websocket ping interval required to keep the connection alive and
|
etcd websocket ping interval required to keep the connection alive and
|
||||||
detect disconnections quickly.
|
detect disconnections quickly.
|
||||||
@@ -218,6 +245,7 @@ detect disconnections quickly.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 33554432
|
- Default: 33554432
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Without immediate_commit=all this parameter sets the limit of "dirty"
|
Without immediate_commit=all this parameter sets the limit of "dirty"
|
||||||
(not committed by fsync) data allowed by the client before forcing an
|
(not committed by fsync) data allowed by the client before forcing an
|
||||||
|
@@ -19,6 +19,7 @@
|
|||||||
- [rdma_max_sge](#rdma_max_sge)
|
- [rdma_max_sge](#rdma_max_sge)
|
||||||
- [rdma_max_msg](#rdma_max_msg)
|
- [rdma_max_msg](#rdma_max_msg)
|
||||||
- [rdma_max_recv](#rdma_max_recv)
|
- [rdma_max_recv](#rdma_max_recv)
|
||||||
|
- [rdma_max_send](#rdma_max_send)
|
||||||
- [peer_connect_interval](#peer_connect_interval)
|
- [peer_connect_interval](#peer_connect_interval)
|
||||||
- [peer_connect_timeout](#peer_connect_timeout)
|
- [peer_connect_timeout](#peer_connect_timeout)
|
||||||
- [osd_idle_timeout](#osd_idle_timeout)
|
- [osd_idle_timeout](#osd_idle_timeout)
|
||||||
@@ -78,6 +79,13 @@ Implicit On-Demand Paging (Implicit ODP) и Scatter/Gather (SG). Наприме
|
|||||||
суперпользователя, чтобы посмотреть список доступных RDMA-устройств, их
|
суперпользователя, чтобы посмотреть список доступных RDMA-устройств, их
|
||||||
параметры и возможности.
|
параметры и возможности.
|
||||||
|
|
||||||
|
Обратите внимание, что если вы используете RoCE/RoCEv2, вам также необходимо
|
||||||
|
правильно настроить для него коммутаторы, иначе вы можете столкнуться с
|
||||||
|
нестабильной производительностью. Подробную информацию о настройке
|
||||||
|
коммутатора для RoCEv2 ищите в документации производителя. Обычно это
|
||||||
|
подразумевает настройку сети без потерь на основе PFC (Priority Flow
|
||||||
|
Control) и ECN (Explicit Congestion Notification).
|
||||||
|
|
||||||
## rdma_port_num
|
## rdma_port_num
|
||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
@@ -121,28 +129,39 @@ OSD в любом случае согласовывают реальное зн
|
|||||||
## rdma_max_msg
|
## rdma_max_msg
|
||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 1048576
|
- Значение по умолчанию: 132096
|
||||||
|
|
||||||
Максимальный размер одной RDMA-операции отправки или приёма.
|
Максимальный размер одной RDMA-операции отправки или приёма.
|
||||||
|
|
||||||
## rdma_max_recv
|
## rdma_max_recv
|
||||||
|
|
||||||
|
- Тип: целое число
|
||||||
|
- Значение по умолчанию: 16
|
||||||
|
|
||||||
|
Максимальное число буферов для RDMA-приёма данных на одно соединение
|
||||||
|
(RDMA требует заранее выделенных буферов для приёма данных). Каждый буфер
|
||||||
|
имеет размер `rdma_max_msg` байт. Таким образом, настройка прямо влияет на
|
||||||
|
потребление памяти - один Vitastor-клиент с RDMA использует
|
||||||
|
`rdma_max_recv * rdma_max_msg * ЧИСЛО_OSD` байт памяти, по умолчанию -
|
||||||
|
примерно 2 МБ * число OSD.
|
||||||
|
|
||||||
|
## rdma_max_send
|
||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 8
|
- Значение по умолчанию: 8
|
||||||
|
|
||||||
Максимальное число параллельных RDMA-операций получения данных. Следует
|
Максимальное число RDMA-операций отправки, отправляемых в очередь одного
|
||||||
иметь в виду, что данное число буферов размером `rdma_max_msg` выделяется
|
соединения. Желательно, чтобы оно было меньше `rdma_max_recv`, чтобы
|
||||||
для каждого подключённого клиентского соединения, так что данная настройка
|
у принимающей стороны в процессе работы не заканчивались буферы на приём.
|
||||||
влияет на потребление памяти. Это так потому, что RDMA-приём данных в
|
Не влияет на потребление памяти - дополнительная память на операции отправки
|
||||||
Vitastor, увы, всё равно не является zero-copy, т.е. всё равно 1 раз
|
не выделяется.
|
||||||
копирует данные в памяти. Данная особенность, возможно, будет исправлена в
|
|
||||||
более новых версиях Vitastor.
|
|
||||||
|
|
||||||
## peer_connect_interval
|
## peer_connect_interval
|
||||||
|
|
||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: 5
|
- Значение по умолчанию: 5
|
||||||
- Минимальное значение: 1
|
- Минимальное значение: 1
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Время ожидания перед повторной попыткой соединиться с недоступным OSD.
|
Время ожидания перед повторной попыткой соединиться с недоступным OSD.
|
||||||
|
|
||||||
@@ -151,6 +170,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
|
|||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: 5
|
- Значение по умолчанию: 5
|
||||||
- Минимальное значение: 1
|
- Минимальное значение: 1
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Максимальное время ожидания попытки соединения с OSD.
|
Максимальное время ожидания попытки соединения с OSD.
|
||||||
|
|
||||||
@@ -159,6 +179,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
|
|||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: 5
|
- Значение по умолчанию: 5
|
||||||
- Минимальное значение: 1
|
- Минимальное значение: 1
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Время неактивности соединения с OSD, после которого клиенты или другие OSD
|
Время неактивности соединения с OSD, после которого клиенты или другие OSD
|
||||||
посылают запрос проверки состояния соединения.
|
посылают запрос проверки состояния соединения.
|
||||||
@@ -168,6 +189,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
|
|||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: 5
|
- Значение по умолчанию: 5
|
||||||
- Минимальное значение: 1
|
- Минимальное значение: 1
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Максимальное время ожидания ответа на запрос проверки состояния соединения.
|
Максимальное время ожидания ответа на запрос проверки состояния соединения.
|
||||||
Если OSD не отвечает за это время, соединение отключается и производится
|
Если OSD не отвечает за это время, соединение отключается и производится
|
||||||
@@ -178,6 +200,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
|
|||||||
- Тип: миллисекунды
|
- Тип: миллисекунды
|
||||||
- Значение по умолчанию: 500
|
- Значение по умолчанию: 500
|
||||||
- Минимальное значение: 50
|
- Минимальное значение: 50
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
|
Когда OSD получают от клиентов запросы ввода-вывода, относящиеся к не
|
||||||
поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
|
поднятым на данный момент на них PG, либо к PG в процессе синхронизации,
|
||||||
@@ -189,6 +212,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 5
|
- Значение по умолчанию: 5
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Максимальное число попыток выполнения запросов к etcd для тех запросов,
|
Максимальное число попыток выполнения запросов к etcd для тех запросов,
|
||||||
которые нельзя повторять бесконечно.
|
которые нельзя повторять бесконечно.
|
||||||
@@ -197,6 +221,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
|
|||||||
|
|
||||||
- Тип: миллисекунды
|
- Тип: миллисекунды
|
||||||
- Значение по умолчанию: 1000
|
- Значение по умолчанию: 1000
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Максимальное время выполнения запросов к etcd, которые должны завершаться
|
Максимальное время выполнения запросов к etcd, которые должны завершаться
|
||||||
быстро, таких, как обновление резервации (lease).
|
быстро, таких, как обновление резервации (lease).
|
||||||
@@ -205,6 +230,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
|
|||||||
|
|
||||||
- Тип: миллисекунды
|
- Тип: миллисекунды
|
||||||
- Значение по умолчанию: 5000
|
- Значение по умолчанию: 5000
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Максимальное время выполнения запросов к etcd, для которых не обязательно
|
Максимальное время выполнения запросов к etcd, для которых не обязательно
|
||||||
гарантировать быстрое выполнение.
|
гарантировать быстрое выполнение.
|
||||||
@@ -213,6 +239,7 @@ Vitastor, увы, всё равно не является zero-copy, т.е. вс
|
|||||||
|
|
||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: max(30, etcd_report_interval*2)
|
- Значение по умолчанию: max(30, etcd_report_interval*2)
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
|
Таймаут для HTTP Keep-Alive в соединениях к etcd. Должен быть больше, чем
|
||||||
etcd_report_interval, чтобы keepalive гарантированно работал.
|
etcd_report_interval, чтобы keepalive гарантированно работал.
|
||||||
@@ -221,6 +248,7 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
|
|||||||
|
|
||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: 30
|
- Значение по умолчанию: 30
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Интервал проверки живости вебсокет-подключений к etcd.
|
Интервал проверки живости вебсокет-подключений к etcd.
|
||||||
|
|
||||||
@@ -228,6 +256,7 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 33554432
|
- Значение по умолчанию: 33554432
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
При работе без immediate_commit=all - это лимит объёма "грязных" (не
|
При работе без immediate_commit=all - это лимит объёма "грязных" (не
|
||||||
зафиксированных fsync-ом) данных, при достижении которого клиент будет
|
зафиксированных fsync-ом) данных, при достижении которого клиент будет
|
||||||
|
@@ -7,7 +7,8 @@
|
|||||||
# Runtime OSD Parameters
|
# Runtime OSD Parameters
|
||||||
|
|
||||||
These parameters only apply to OSDs, are not fixed at the moment of OSD drive
|
These parameters only apply to OSDs, are not fixed at the moment of OSD drive
|
||||||
initialization and can be changed with an OSD restart.
|
initialization and can be changed - either with an OSD restart or, for some of
|
||||||
|
them, even without restarting by updating configuration in etcd.
|
||||||
|
|
||||||
- [etcd_report_interval](#etcd_report_interval)
|
- [etcd_report_interval](#etcd_report_interval)
|
||||||
- [run_primary](#run_primary)
|
- [run_primary](#run_primary)
|
||||||
@@ -38,6 +39,14 @@ initialization and can be changed with an OSD restart.
|
|||||||
- [throttle_target_parallelism](#throttle_target_parallelism)
|
- [throttle_target_parallelism](#throttle_target_parallelism)
|
||||||
- [throttle_threshold_us](#throttle_threshold_us)
|
- [throttle_threshold_us](#throttle_threshold_us)
|
||||||
- [osd_memlock](#osd_memlock)
|
- [osd_memlock](#osd_memlock)
|
||||||
|
- [auto_scrub](#auto_scrub)
|
||||||
|
- [no_scrub](#no_scrub)
|
||||||
|
- [scrub_interval](#scrub_interval)
|
||||||
|
- [scrub_queue_depth](#scrub_queue_depth)
|
||||||
|
- [scrub_sleep](#scrub_sleep)
|
||||||
|
- [scrub_list_limit](#scrub_list_limit)
|
||||||
|
- [scrub_find_best](#scrub_find_best)
|
||||||
|
- [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
|
||||||
|
|
||||||
## etcd_report_interval
|
## etcd_report_interval
|
||||||
|
|
||||||
@@ -91,6 +100,7 @@ OSD by hand.
|
|||||||
|
|
||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: 5
|
- Default: 5
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Time interval at which automatic fsyncs/flushes are issued by each OSD when
|
Time interval at which automatic fsyncs/flushes are issued by each OSD when
|
||||||
the immediate_commit mode if disabled. fsyncs are required because without
|
the immediate_commit mode if disabled. fsyncs are required because without
|
||||||
@@ -103,6 +113,7 @@ issue fsyncs at all.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 128
|
- Default: 128
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Same as autosync_interval, but sets the maximum number of uncommitted write
|
Same as autosync_interval, but sets the maximum number of uncommitted write
|
||||||
operations before issuing an fsync operation internally.
|
operations before issuing an fsync operation internally.
|
||||||
@@ -111,6 +122,7 @@ operations before issuing an fsync operation internally.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 4
|
- Default: 4
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Maximum recovery operations per one primary OSD at any given moment of time.
|
Maximum recovery operations per one primary OSD at any given moment of time.
|
||||||
Currently it's the only parameter available to tune the speed or recovery
|
Currently it's the only parameter available to tune the speed or recovery
|
||||||
@@ -120,6 +132,7 @@ and rebalancing, but it's planned to implement more.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 128
|
- Default: 128
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Number of recovery operations before switching to recovery of the next PG.
|
Number of recovery operations before switching to recovery of the next PG.
|
||||||
The idea is to mix all PGs during recovery for more even space and load
|
The idea is to mix all PGs during recovery for more even space and load
|
||||||
@@ -130,6 +143,7 @@ Degraded PGs are anyway scanned first.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 16
|
- Default: 16
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Maximum number of recovery operations before issuing an additional fsync.
|
Maximum number of recovery operations before issuing an additional fsync.
|
||||||
|
|
||||||
@@ -145,6 +159,7 @@ the underlying device. This may be useful for recovery purposes.
|
|||||||
|
|
||||||
- Type: boolean
|
- Type: boolean
|
||||||
- Default: false
|
- Default: false
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Disable automatic background recovery of objects. Note that it doesn't
|
Disable automatic background recovery of objects. Note that it doesn't
|
||||||
affect implicit recovery of objects happening during writes - a write is
|
affect implicit recovery of objects happening during writes - a write is
|
||||||
@@ -154,6 +169,7 @@ always made to a full set of at least pg_minsize OSDs.
|
|||||||
|
|
||||||
- Type: boolean
|
- Type: boolean
|
||||||
- Default: false
|
- Default: false
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Disable background movement of data between different OSDs. Disabling it
|
Disable background movement of data between different OSDs. Disabling it
|
||||||
means that PGs in the `has_misplaced` state will be left in it indefinitely.
|
means that PGs in the `has_misplaced` state will be left in it indefinitely.
|
||||||
@@ -162,6 +178,7 @@ means that PGs in the `has_misplaced` state will be left in it indefinitely.
|
|||||||
|
|
||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: 3
|
- Default: 3
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Time interval at which OSDs print simple human-readable operation
|
Time interval at which OSDs print simple human-readable operation
|
||||||
statistics on stdout.
|
statistics on stdout.
|
||||||
@@ -170,6 +187,7 @@ statistics on stdout.
|
|||||||
|
|
||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: 10
|
- Default: 10
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Time interval at which OSDs dump slow or stuck operations on stdout, if
|
Time interval at which OSDs dump slow or stuck operations on stdout, if
|
||||||
they're any. Also it's the time after which an operation is considered
|
they're any. Also it's the time after which an operation is considered
|
||||||
@@ -179,6 +197,7 @@ they're any. Also it's the time after which an operation is considered
|
|||||||
|
|
||||||
- Type: seconds
|
- Type: seconds
|
||||||
- Default: 60
|
- Default: 60
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Number of seconds after which a deleted inode is removed from OSD statistics.
|
Number of seconds after which a deleted inode is removed from OSD statistics.
|
||||||
|
|
||||||
@@ -186,6 +205,7 @@ Number of seconds after which a deleted inode is removed from OSD statistics.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 128
|
- Default: 128
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Parallel client write operation limit per one OSD. Operations that exceed
|
Parallel client write operation limit per one OSD. Operations that exceed
|
||||||
this limit are pushed to a temporary queue instead of being executed
|
this limit are pushed to a temporary queue instead of being executed
|
||||||
@@ -195,6 +215,7 @@ immediately.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 1
|
- Default: 1
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Flusher is a micro-thread that moves data from the journal to the data
|
Flusher is a micro-thread that moves data from the journal to the data
|
||||||
area of the device. Their number is auto-tuned between minimum and maximum.
|
area of the device. Their number is auto-tuned between minimum and maximum.
|
||||||
@@ -204,6 +225,7 @@ Minimum number is set by this parameter.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 256
|
- Default: 256
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Maximum number of journal flushers (see above min_flusher_count).
|
Maximum number of journal flushers (see above min_flusher_count).
|
||||||
|
|
||||||
@@ -260,6 +282,7 @@ Most (99%) other SSDs don't need this option.
|
|||||||
|
|
||||||
- Type: boolean
|
- Type: boolean
|
||||||
- Default: false
|
- Default: false
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Enable soft throttling of small journaled writes. Useful for hybrid OSDs
|
Enable soft throttling of small journaled writes. Useful for hybrid OSDs
|
||||||
with fast journal/metadata devices and slow data devices. The idea is that
|
with fast journal/metadata devices and slow data devices. The idea is that
|
||||||
@@ -277,6 +300,7 @@ fills up.
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 100
|
- Default: 100
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Target maximum number of throttled operations per second under the condition
|
Target maximum number of throttled operations per second under the condition
|
||||||
of full journal. Set it to approximate random write iops of your data devices
|
of full journal. Set it to approximate random write iops of your data devices
|
||||||
@@ -286,6 +310,7 @@ of full journal. Set it to approximate random write iops of your data devices
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 100
|
- Default: 100
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Target maximum bandwidth in MB/s of throttled operations per second under
|
Target maximum bandwidth in MB/s of throttled operations per second under
|
||||||
the condition of full journal. Set it to approximate linear write
|
the condition of full journal. Set it to approximate linear write
|
||||||
@@ -295,6 +320,7 @@ performance of your data devices (HDDs).
|
|||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
- Default: 1
|
- Default: 1
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Target maximum parallelism of throttled operations under the condition of
|
Target maximum parallelism of throttled operations under the condition of
|
||||||
full journal. Set it to approximate internal parallelism of your data
|
full journal. Set it to approximate internal parallelism of your data
|
||||||
@@ -304,6 +330,7 @@ devices (1 for HDDs, 4-8 for SSDs).
|
|||||||
|
|
||||||
- Type: microseconds
|
- Type: microseconds
|
||||||
- Default: 50
|
- Default: 50
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
Minimal computed delay to be applied to throttled operations. Usually
|
Minimal computed delay to be applied to throttled operations. Usually
|
||||||
doesn't need to be changed.
|
doesn't need to be changed.
|
||||||
@@ -313,4 +340,103 @@ doesn't need to be changed.
|
|||||||
- Type: boolean
|
- Type: boolean
|
||||||
- Default: false
|
- Default: false
|
||||||
|
|
||||||
Lock all OSD memory to prevent it from being unloaded into swap with mlockall(). Requires sufficient ulimit -l (max locked memory).
|
Lock all OSD memory to prevent it from being unloaded into swap with
|
||||||
|
mlockall(). Requires sufficient ulimit -l (max locked memory).
|
||||||
|
|
||||||
|
## auto_scrub
|
||||||
|
|
||||||
|
- Type: boolean
|
||||||
|
- Default: false
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
|
Data scrubbing is the process of background verification of copies to find
|
||||||
|
and repair corrupted blocks. It's not run automatically by default since
|
||||||
|
it's a new feature. Set this parameter to true to enable automatic scrubs.
|
||||||
|
|
||||||
|
This parameter makes OSDs automatically schedule data scrubbing of clean PGs
|
||||||
|
every `scrub_interval` (see below). You can also start/schedule scrubbing
|
||||||
|
manually by setting `next_scrub` JSON key to the desired UNIX time of the
|
||||||
|
next scrub in `/pg/history/...` values in etcd.
|
||||||
|
|
||||||
|
## no_scrub
|
||||||
|
|
||||||
|
- Type: boolean
|
||||||
|
- Default: false
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
|
Temporarily disable scrubbing and stop running scrubs.
|
||||||
|
|
||||||
|
## scrub_interval
|
||||||
|
|
||||||
|
- Type: string
|
||||||
|
- Default: 30d
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
|
Default automatic scrubbing interval for all pools. Numbers without suffix
|
||||||
|
are treated as seconds, possible unit suffixes include 's' (seconds),
|
||||||
|
'm' (minutes), 'h' (hours), 'd' (days), 'M' (months) and 'y' (years).
|
||||||
|
|
||||||
|
## scrub_queue_depth
|
||||||
|
|
||||||
|
- Type: integer
|
||||||
|
- Default: 1
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
|
Number of parallel scrubbing operations per one OSD.
|
||||||
|
|
||||||
|
## scrub_sleep
|
||||||
|
|
||||||
|
- Type: milliseconds
|
||||||
|
- Default: 0
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
|
Additional interval between two consecutive scrubbing operations on one OSD.
|
||||||
|
Can be used to slow down scrubbing if it affects user load too much.
|
||||||
|
|
||||||
|
## scrub_list_limit
|
||||||
|
|
||||||
|
- Type: integer
|
||||||
|
- Default: 1000
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
|
Number of objects to list in one listing operation during scrub.
|
||||||
|
|
||||||
|
## scrub_find_best
|
||||||
|
|
||||||
|
- Type: boolean
|
||||||
|
- Default: true
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
|
Find and automatically restore best versions of objects with unmatched
|
||||||
|
copies. In replicated setups, the best version is the version with most
|
||||||
|
matching replicas. In EC setups, the best version is the subset of data
|
||||||
|
and parity chunks without mismatches.
|
||||||
|
|
||||||
|
The hypothetical situation where you might want to disable it is when
|
||||||
|
you have 3 replicas and you are paranoid that 2 HDDs out of 3 may silently
|
||||||
|
corrupt an object in the same way (for example, zero it out) and only
|
||||||
|
1 HDD will remain good. In this case disabling scrub_find_best may help
|
||||||
|
you to recover the data! See also scrub_ec_max_bruteforce below.
|
||||||
|
|
||||||
|
## scrub_ec_max_bruteforce
|
||||||
|
|
||||||
|
- Type: integer
|
||||||
|
- Default: 100
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
|
Vitastor can locate corrupted chunks in EC setups with more than 1 parity
|
||||||
|
chunk by brute-forcing all possible error locations. This configuration
|
||||||
|
value limits the maximum number of checked combinations. You can try to
|
||||||
|
increase it if you have EC N+K setup with N and K large enough for
|
||||||
|
combination count `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!` to be greater
|
||||||
|
than the default 100.
|
||||||
|
|
||||||
|
If there are too many possible combinations or if multiple combinations give
|
||||||
|
correct results then objects are marked inconsistent and aren't recovered
|
||||||
|
automatically.
|
||||||
|
|
||||||
|
In replicated setups bruteforcing isn't needed, Vitastor just assumes that
|
||||||
|
the variant with most available equal copies is correct. For example, if
|
||||||
|
you have 3 replicas and 1 of them differs, this one is considered to be
|
||||||
|
corrupted. But if there is no "best" version with more copies than all
|
||||||
|
others have then the object is also marked as inconsistent.
|
||||||
|
@@ -8,7 +8,8 @@
|
|||||||
|
|
||||||
Данные параметры используются только OSD, но, в отличие от дисковых параметров,
|
Данные параметры используются только OSD, но, в отличие от дисковых параметров,
|
||||||
не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
|
не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
|
||||||
момент с перезапуском OSD.
|
момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
|
||||||
|
изменения конфигурации в etcd.
|
||||||
|
|
||||||
- [etcd_report_interval](#etcd_report_interval)
|
- [etcd_report_interval](#etcd_report_interval)
|
||||||
- [run_primary](#run_primary)
|
- [run_primary](#run_primary)
|
||||||
@@ -39,6 +40,14 @@
|
|||||||
- [throttle_target_parallelism](#throttle_target_parallelism)
|
- [throttle_target_parallelism](#throttle_target_parallelism)
|
||||||
- [throttle_threshold_us](#throttle_threshold_us)
|
- [throttle_threshold_us](#throttle_threshold_us)
|
||||||
- [osd_memlock](#osd_memlock)
|
- [osd_memlock](#osd_memlock)
|
||||||
|
- [auto_scrub](#auto_scrub)
|
||||||
|
- [no_scrub](#no_scrub)
|
||||||
|
- [scrub_interval](#scrub_interval)
|
||||||
|
- [scrub_queue_depth](#scrub_queue_depth)
|
||||||
|
- [scrub_sleep](#scrub_sleep)
|
||||||
|
- [scrub_list_limit](#scrub_list_limit)
|
||||||
|
- [scrub_find_best](#scrub_find_best)
|
||||||
|
- [scrub_ec_max_bruteforce](#scrub_ec_max_bruteforce)
|
||||||
|
|
||||||
## etcd_report_interval
|
## etcd_report_interval
|
||||||
|
|
||||||
@@ -93,6 +102,7 @@ RUNNING), подходящий под заданную маску. Также н
|
|||||||
|
|
||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: 5
|
- Значение по умолчанию: 5
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Временной интервал отправки автоматических fsync-ов (операций очистки кэша)
|
Временной интервал отправки автоматических fsync-ов (операций очистки кэша)
|
||||||
каждым OSD для случая, когда режим immediate_commit отключён. fsync-и нужны
|
каждым OSD для случая, когда режим immediate_commit отключён. fsync-и нужны
|
||||||
@@ -105,6 +115,7 @@ OSD, чтобы успевать очищать журнал - без них OSD
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 128
|
- Значение по умолчанию: 128
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Аналогично autosync_interval, но задаёт не временной интервал, а
|
Аналогично autosync_interval, но задаёт не временной интервал, а
|
||||||
максимальное количество незафиксированных операций записи перед
|
максимальное количество незафиксированных операций записи перед
|
||||||
@@ -114,6 +125,7 @@ OSD, чтобы успевать очищать журнал - без них OSD
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 4
|
- Значение по умолчанию: 4
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Максимальное число операций восстановления на одном первичном OSD в любой
|
Максимальное число операций восстановления на одном первичном OSD в любой
|
||||||
момент времени. На данный момент единственный параметр, который можно менять
|
момент времени. На данный момент единственный параметр, который можно менять
|
||||||
@@ -124,6 +136,7 @@ OSD, чтобы успевать очищать журнал - без них OSD
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 128
|
- Значение по умолчанию: 128
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Число операций восстановления перед переключением на восстановление другой PG.
|
Число операций восстановления перед переключением на восстановление другой PG.
|
||||||
Идея заключается в том, чтобы восстанавливать все PG одновременно для более
|
Идея заключается в том, чтобы восстанавливать все PG одновременно для более
|
||||||
@@ -135,6 +148,7 @@ OSD, чтобы успевать очищать журнал - без них OSD
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 16
|
- Значение по умолчанию: 16
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Максимальное число операций восстановления перед дополнительным fsync.
|
Максимальное число операций восстановления перед дополнительным fsync.
|
||||||
|
|
||||||
@@ -150,6 +164,7 @@ OSD, чтобы успевать очищать журнал - без них OSD
|
|||||||
|
|
||||||
- Тип: булево (да/нет)
|
- Тип: булево (да/нет)
|
||||||
- Значение по умолчанию: false
|
- Значение по умолчанию: false
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Отключить автоматическое фоновое восстановление объектов. Обратите внимание,
|
Отключить автоматическое фоновое восстановление объектов. Обратите внимание,
|
||||||
что эта опция не отключает восстановление объектов, происходящее при
|
что эта опция не отключает восстановление объектов, происходящее при
|
||||||
@@ -160,6 +175,7 @@ OSD.
|
|||||||
|
|
||||||
- Тип: булево (да/нет)
|
- Тип: булево (да/нет)
|
||||||
- Значение по умолчанию: false
|
- Значение по умолчанию: false
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Отключить фоновое перемещение объектов между разными OSD. Отключение
|
Отключить фоновое перемещение объектов между разными OSD. Отключение
|
||||||
означает, что PG, находящиеся в состоянии `has_misplaced`, будут оставлены
|
означает, что PG, находящиеся в состоянии `has_misplaced`, будут оставлены
|
||||||
@@ -169,6 +185,7 @@ OSD.
|
|||||||
|
|
||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: 3
|
- Значение по умолчанию: 3
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Временной интервал, с которым OSD печатают простую человекочитаемую
|
Временной интервал, с которым OSD печатают простую человекочитаемую
|
||||||
статистику выполнения операций в стандартный вывод.
|
статистику выполнения операций в стандартный вывод.
|
||||||
@@ -177,6 +194,7 @@ OSD.
|
|||||||
|
|
||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: 10
|
- Значение по умолчанию: 10
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Временной интервал, с которым OSD выводят в стандартный вывод список
|
Временной интервал, с которым OSD выводят в стандартный вывод список
|
||||||
медленных или зависших операций, если таковые имеются. Также время, при
|
медленных или зависших операций, если таковые имеются. Также время, при
|
||||||
@@ -186,6 +204,7 @@ OSD.
|
|||||||
|
|
||||||
- Тип: секунды
|
- Тип: секунды
|
||||||
- Значение по умолчанию: 60
|
- Значение по умолчанию: 60
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Число секунд, через которое удалённые инод удаляется и из статистики OSD.
|
Число секунд, через которое удалённые инод удаляется и из статистики OSD.
|
||||||
|
|
||||||
@@ -193,6 +212,7 @@ OSD.
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 128
|
- Значение по умолчанию: 128
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Максимальное число одновременных клиентских операций записи на один OSD.
|
Максимальное число одновременных клиентских операций записи на один OSD.
|
||||||
Операции, превышающие этот лимит, не исполняются сразу, а сохраняются во
|
Операции, превышающие этот лимит, не исполняются сразу, а сохраняются во
|
||||||
@@ -202,6 +222,7 @@ OSD.
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 1
|
- Значение по умолчанию: 1
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Flusher - это микро-поток (корутина), которая копирует данные из журнала в
|
Flusher - это микро-поток (корутина), которая копирует данные из журнала в
|
||||||
основную область устройства данных. Их число настраивается динамически между
|
основную область устройства данных. Их число настраивается динамически между
|
||||||
@@ -211,6 +232,7 @@ Flusher - это микро-поток (корутина), которая коп
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 256
|
- Значение по умолчанию: 256
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Максимальное число микро-потоков очистки журнала (см. выше min_flusher_count).
|
Максимальное число микро-потоков очистки журнала (см. выше min_flusher_count).
|
||||||
|
|
||||||
@@ -270,6 +292,7 @@ Flusher - это микро-поток (корутина), которая коп
|
|||||||
|
|
||||||
- Тип: булево (да/нет)
|
- Тип: булево (да/нет)
|
||||||
- Значение по умолчанию: false
|
- Значение по умолчанию: false
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Разрешить мягкое ограничение скорости журналируемой записи. Полезно для
|
Разрешить мягкое ограничение скорости журналируемой записи. Полезно для
|
||||||
гибридных OSD с быстрыми устройствами метаданных и медленными устройствами
|
гибридных OSD с быстрыми устройствами метаданных и медленными устройствами
|
||||||
@@ -288,6 +311,7 @@ Flusher - это микро-поток (корутина), которая коп
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 100
|
- Значение по умолчанию: 100
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Расчётное максимальное число ограничиваемых операций в секунду при условии
|
Расчётное максимальное число ограничиваемых операций в секунду при условии
|
||||||
отсутствия свободного места в журнале. Устанавливайте приблизительно равным
|
отсутствия свободного места в журнале. Устанавливайте приблизительно равным
|
||||||
@@ -298,6 +322,7 @@ Flusher - это микро-поток (корутина), которая коп
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 100
|
- Значение по умолчанию: 100
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Расчётный максимальный размер в МБ/с ограничиваемых операций в секунду при
|
Расчётный максимальный размер в МБ/с ограничиваемых операций в секунду при
|
||||||
условии отсутствия свободного места в журнале. Устанавливайте приблизительно
|
условии отсутствия свободного места в журнале. Устанавливайте приблизительно
|
||||||
@@ -308,6 +333,7 @@ Flusher - это микро-поток (корутина), которая коп
|
|||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
- Значение по умолчанию: 1
|
- Значение по умолчанию: 1
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Расчётный максимальный параллелизм ограничиваемых операций в секунду при
|
Расчётный максимальный параллелизм ограничиваемых операций в секунду при
|
||||||
условии отсутствия свободного места в журнале. Устанавливайте приблизительно
|
условии отсутствия свободного места в журнале. Устанавливайте приблизительно
|
||||||
@@ -318,6 +344,7 @@ Flusher - это микро-поток (корутина), которая коп
|
|||||||
|
|
||||||
- Тип: микросекунды
|
- Тип: микросекунды
|
||||||
- Значение по умолчанию: 50
|
- Значение по умолчанию: 50
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
Минимальная применимая к ограничиваемым операциям задержка. Обычно не
|
Минимальная применимая к ограничиваемым операциям задержка. Обычно не
|
||||||
требует изменений.
|
требует изменений.
|
||||||
@@ -327,4 +354,113 @@ Flusher - это микро-поток (корутина), которая коп
|
|||||||
- Тип: булево (да/нет)
|
- Тип: булево (да/нет)
|
||||||
- Значение по умолчанию: false
|
- Значение по умолчанию: false
|
||||||
|
|
||||||
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку в пространство подкачки. Требует достаточного значения ulimit -l (лимита заблокированной памяти).
|
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку
|
||||||
|
в пространство подкачки. Требует достаточного значения ulimit -l (лимита
|
||||||
|
заблокированной памяти).
|
||||||
|
|
||||||
|
## auto_scrub
|
||||||
|
|
||||||
|
- Тип: булево (да/нет)
|
||||||
|
- Значение по умолчанию: false
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
|
Скраб - процесс фоновой проверки копий данных, предназначенный, чтобы
|
||||||
|
находить и исправлять повреждённые блоки. По умолчанию эти проверки ещё не
|
||||||
|
запускаются автоматически, так как являются новой функцией. Чтобы включить
|
||||||
|
автоматическое планирование скрабов, установите данный параметр в true.
|
||||||
|
|
||||||
|
Включённый параметр заставляет OSD автоматически планировать фоновую
|
||||||
|
проверку чистых PG раз в `scrub_interval` (см. ниже). Вы также можете
|
||||||
|
запустить или запланировать проверку вручную, установив значение ключа JSON
|
||||||
|
`next_scrub` внутри ключей etcd `/pg/history/...` в UNIX-время следующей
|
||||||
|
желаемой проверки.
|
||||||
|
|
||||||
|
## no_scrub
|
||||||
|
|
||||||
|
- Тип: булево (да/нет)
|
||||||
|
- Значение по умолчанию: false
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
|
Временно отключить и остановить запущенные скрабы.
|
||||||
|
|
||||||
|
## scrub_interval
|
||||||
|
|
||||||
|
- Тип: строка
|
||||||
|
- Значение по умолчанию: 30d
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
|
Интервал автоматической фоновой проверки по умолчанию для всех пулов.
|
||||||
|
Значения без указанной единицы измерения считаются в секундах, допустимые
|
||||||
|
символы единиц измерения в конце: 's' (секунды),
|
||||||
|
'm' (минуты), 'h' (часы), 'd' (дни), 'M' (месяца) или 'y' (годы).
|
||||||
|
|
||||||
|
## scrub_queue_depth
|
||||||
|
|
||||||
|
- Тип: целое число
|
||||||
|
- Значение по умолчанию: 1
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
|
Число параллельных операций фоновой проверки на один OSD.
|
||||||
|
|
||||||
|
## scrub_sleep
|
||||||
|
|
||||||
|
- Тип: миллисекунды
|
||||||
|
- Значение по умолчанию: 0
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
|
Дополнительный интервал ожидания после фоновой проверки каждого объекта на
|
||||||
|
одном OSD. Может использоваться для замедления скраба, если он слишком
|
||||||
|
сильно влияет на пользовательскую нагрузку.
|
||||||
|
|
||||||
|
## scrub_list_limit
|
||||||
|
|
||||||
|
- Тип: целое число
|
||||||
|
- Значение по умолчанию: 1000
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
|
Размер загружаемых за одну операцию списков объектов в процессе фоновой
|
||||||
|
проверки.
|
||||||
|
|
||||||
|
## scrub_find_best
|
||||||
|
|
||||||
|
- Тип: булево (да/нет)
|
||||||
|
- Значение по умолчанию: true
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
|
Находить и автоматически восстанавливать "лучшие версии" объектов с
|
||||||
|
несовпадающими копиями/частями. При использовании репликации "лучшая"
|
||||||
|
версия - версия, доступная в большем числе экземпляров, чем другие. При
|
||||||
|
использовании кодов коррекции ошибок "лучшая" весрия - это подмножество
|
||||||
|
частей данных и чётности, полностью соответствующих друг другу.
|
||||||
|
|
||||||
|
Гипотетическая ситуация, в которой вы можете захотеть отключить этот
|
||||||
|
поиск - это если у вас 3 реплики и вы боитесь, что 2 диска из 3 могут
|
||||||
|
незаметно и одинаково повредить данные одного и того же объекта, например,
|
||||||
|
занулив его, и только 1 диск останется неповреждённым. В этой ситуации
|
||||||
|
отключение этого параметра поможет вам восстановить данные! Смотрите также
|
||||||
|
описание следующего параметра - scrub_ec_max_bruteforce.
|
||||||
|
|
||||||
|
## scrub_ec_max_bruteforce
|
||||||
|
|
||||||
|
- Тип: целое число
|
||||||
|
- Значение по умолчанию: 100
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
|
Vitastor старается определить повреждённые части объектов при использовании
|
||||||
|
EC (кодов коррекции ошибок) с более, чем 1 диском чётности, путём перебора
|
||||||
|
всех возможных комбинаций ошибочных частей. Данное значение конфигурации
|
||||||
|
ограничивает число перебираемых комбинаций. Вы можете попробовать поднять
|
||||||
|
его, если используете схему кодирования EC N+K с N и K, достаточно большими
|
||||||
|
для того, чтобы число сочетаний `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!`
|
||||||
|
было больше, чем стандартное значение 100.
|
||||||
|
|
||||||
|
Если возможных комбинаций слишком много или если корректная комбинаций не
|
||||||
|
определяется однозначно, объекты помечаются неконсистентными (inconsistent)
|
||||||
|
и не восстанавливаются автоматически.
|
||||||
|
|
||||||
|
При использовании репликации перебор не нужен, Vitastor просто предполагает,
|
||||||
|
что вариант объекта с наибольшим количеством одинаковых копий корректен.
|
||||||
|
Например, если вы используете 3 реплики и 1 из них отличается, эта 1 копия
|
||||||
|
считается некорректной. Однако, если "лучшую" версию с числом доступных
|
||||||
|
копий большим, чем у всех других версий, найти невозможно, то объект тоже
|
||||||
|
маркируется неконсистентным.
|
||||||
|
@@ -40,6 +40,7 @@ Parameters:
|
|||||||
- [root_node](#root_node)
|
- [root_node](#root_node)
|
||||||
- [osd_tags](#osd_tags)
|
- [osd_tags](#osd_tags)
|
||||||
- [primary_affinity_tags](#primary_affinity_tags)
|
- [primary_affinity_tags](#primary_affinity_tags)
|
||||||
|
- [scrub_interval](#scrub_interval)
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
@@ -272,6 +273,13 @@ Specifies OSD tags to prefer putting primary OSDs in this pool to.
|
|||||||
Note that for EC/XOR pools Vitastor always prefers to put primary OSD on one
|
Note that for EC/XOR pools Vitastor always prefers to put primary OSD on one
|
||||||
of the OSDs containing a data chunk for a PG.
|
of the OSDs containing a data chunk for a PG.
|
||||||
|
|
||||||
|
## scrub_interval
|
||||||
|
|
||||||
|
- Type: time interval (number + unit s/m/h/d/M/y)
|
||||||
|
|
||||||
|
Automatic scrubbing interval for this pool. Overrides
|
||||||
|
[global scrub_interval setting](osd.en.md#scrub_interval).
|
||||||
|
|
||||||
# Examples
|
# Examples
|
||||||
|
|
||||||
## Replicated pool
|
## Replicated pool
|
||||||
|
@@ -39,6 +39,7 @@
|
|||||||
- [root_node](#root_node)
|
- [root_node](#root_node)
|
||||||
- [osd_tags](#osd_tags)
|
- [osd_tags](#osd_tags)
|
||||||
- [primary_affinity_tags](#primary_affinity_tags)
|
- [primary_affinity_tags](#primary_affinity_tags)
|
||||||
|
- [scrub_interval](#scrub_interval)
|
||||||
|
|
||||||
Примеры:
|
Примеры:
|
||||||
|
|
||||||
@@ -276,6 +277,13 @@ PG в Vitastor эферемерны, то есть вы можете менят
|
|||||||
для PG этого пула. Имейте в виду, что для EC-пулов Vitastor также всегда
|
для PG этого пула. Имейте в виду, что для EC-пулов Vitastor также всегда
|
||||||
предпочитает помещать первичный OSD на один из OSD с данными, а не с чётностью.
|
предпочитает помещать первичный OSD на один из OSD с данными, а не с чётностью.
|
||||||
|
|
||||||
|
## scrub_interval
|
||||||
|
|
||||||
|
- Тип: временной интервал (число + единица измерения s/m/h/d/M/y)
|
||||||
|
|
||||||
|
Интервал скраба, то есть, автоматической фоновой проверки данных для данного пула.
|
||||||
|
Переопределяет [глобальную настройку scrub_interval](osd.ru.md#scrub_interval).
|
||||||
|
|
||||||
# Примеры
|
# Примеры
|
||||||
|
|
||||||
## Реплицированный пул
|
## Реплицированный пул
|
||||||
|
@@ -11,13 +11,21 @@
|
|||||||
- name: etcd_address
|
- name: etcd_address
|
||||||
type: string or array of strings
|
type: string or array of strings
|
||||||
type_ru: строка или массив строк
|
type_ru: строка или массив строк
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
|
etcd connection endpoint(s). Multiple endpoints may be delimited by "," or
|
||||||
specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
|
specified in a JSON array `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
|
||||||
Note that https is not supported for etcd connections yet.
|
Note that https is not supported for etcd connections yet.
|
||||||
|
|
||||||
|
etcd connection endpoints can be changed online by updating global
|
||||||
|
configuration in etcd itself - this allows to switch the cluster to new
|
||||||
|
etcd addresses without downtime.
|
||||||
info_ru: |
|
info_ru: |
|
||||||
Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
|
Адрес(а) подключения к etcd. Несколько адресов могут разделяться запятой
|
||||||
или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
|
или указываться в виде JSON-массива `["10.0.115.10:2379/v3","10.0.115.11:2379/v3"]`.
|
||||||
|
|
||||||
|
Адреса подключения к etcd можно поменять на лету, обновив конфигурацию в
|
||||||
|
самом etcd - это позволяет переключить кластер на новые etcd без остановки.
|
||||||
- name: etcd_prefix
|
- name: etcd_prefix
|
||||||
type: string
|
type: string
|
||||||
default: "/vitastor"
|
default: "/vitastor"
|
||||||
@@ -31,5 +39,6 @@
|
|||||||
- name: log_level
|
- name: log_level
|
||||||
type: int
|
type: int
|
||||||
default: 0
|
default: 0
|
||||||
|
online: true
|
||||||
info: Log level. Raise if you want more verbose output.
|
info: Log level. Raise if you want more verbose output.
|
||||||
info_ru: Уровень логгирования. Повысьте, если хотите более подробный вывод.
|
info_ru: Уровень логгирования. Повысьте, если хотите более подробный вывод.
|
||||||
|
@@ -14,6 +14,7 @@ const L = {
|
|||||||
toc_config: '[Configuration](../config.en.md)',
|
toc_config: '[Configuration](../config.en.md)',
|
||||||
toc_usage: 'Usage',
|
toc_usage: 'Usage',
|
||||||
toc_performance: 'Performance',
|
toc_performance: 'Performance',
|
||||||
|
online: 'Can be changed online: yes',
|
||||||
},
|
},
|
||||||
ru: {
|
ru: {
|
||||||
Documentation: 'Документация',
|
Documentation: 'Документация',
|
||||||
@@ -28,6 +29,7 @@ const L = {
|
|||||||
toc_config: '[Конфигурация](../config.ru.md)',
|
toc_config: '[Конфигурация](../config.ru.md)',
|
||||||
toc_usage: 'Использование',
|
toc_usage: 'Использование',
|
||||||
toc_performance: 'Производительность',
|
toc_performance: 'Производительность',
|
||||||
|
online: 'Можно менять на лету: да',
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
const types = {
|
const types = {
|
||||||
@@ -70,6 +72,8 @@ for (const file of params_files)
|
|||||||
out += `- ${L[lang]['Default'] || 'Default'}: ${c.default}\n`;
|
out += `- ${L[lang]['Default'] || 'Default'}: ${c.default}\n`;
|
||||||
if (c.min !== undefined)
|
if (c.min !== undefined)
|
||||||
out += `- ${L[lang]['Minimum'] || 'Minimum'}: ${c.min}\n`;
|
out += `- ${L[lang]['Minimum'] || 'Minimum'}: ${c.min}\n`;
|
||||||
|
if (c.online)
|
||||||
|
out += `- ${L[lang]['online'] || 'Can be changed online: yes'}\n`;
|
||||||
out += `\n`+(c["info_"+lang] || c["info"]).replace(/\s+$/, '');
|
out += `\n`+(c["info_"+lang] || c["info"]).replace(/\s+$/, '');
|
||||||
}
|
}
|
||||||
const head = fs.readFileSync(__dirname+'/'+file+'.'+lang+'.md', { encoding: 'utf-8' });
|
const head = fs.readFileSync(__dirname+'/'+file+'.'+lang+'.md', { encoding: 'utf-8' });
|
||||||
|
@@ -53,6 +53,12 @@
|
|||||||
to work. For example, Mellanox ConnectX-3 and older adapters don't have
|
to work. For example, Mellanox ConnectX-3 and older adapters don't have
|
||||||
Implicit ODP, so they're unsupported by Vitastor. Run `ibv_devinfo -v` as
|
Implicit ODP, so they're unsupported by Vitastor. Run `ibv_devinfo -v` as
|
||||||
root to list available RDMA devices and their features.
|
root to list available RDMA devices and their features.
|
||||||
|
|
||||||
|
Remember that you also have to configure your network switches if you use
|
||||||
|
RoCE/RoCEv2, otherwise you may experience unstable performance. Refer to
|
||||||
|
the manual of your network vendor for details about setting up the switch
|
||||||
|
for RoCEv2 correctly. Usually it means setting up Lossless Ethernet with
|
||||||
|
PFC (Priority Flow Control) and ECN (Explicit Congestion Notification).
|
||||||
info_ru: |
|
info_ru: |
|
||||||
Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
|
Название RDMA-устройства для связи с Vitastor OSD (например, "rocep5s0f0").
|
||||||
Имейте в виду, что поддержка RDMA в Vitastor требует функций устройства
|
Имейте в виду, что поддержка RDMA в Vitastor требует функций устройства
|
||||||
@@ -61,6 +67,13 @@
|
|||||||
потому не поддерживаются в Vitastor. Запустите `ibv_devinfo -v` от имени
|
потому не поддерживаются в Vitastor. Запустите `ibv_devinfo -v` от имени
|
||||||
суперпользователя, чтобы посмотреть список доступных RDMA-устройств, их
|
суперпользователя, чтобы посмотреть список доступных RDMA-устройств, их
|
||||||
параметры и возможности.
|
параметры и возможности.
|
||||||
|
|
||||||
|
Обратите внимание, что если вы используете RoCE/RoCEv2, вам также необходимо
|
||||||
|
правильно настроить для него коммутаторы, иначе вы можете столкнуться с
|
||||||
|
нестабильной производительностью. Подробную информацию о настройке
|
||||||
|
коммутатора для RoCEv2 ищите в документации производителя. Обычно это
|
||||||
|
подразумевает настройку сети без потерь на основе PFC (Priority Flow
|
||||||
|
Control) и ECN (Explicit Congestion Notification).
|
||||||
- name: rdma_port_num
|
- name: rdma_port_num
|
||||||
type: int
|
type: int
|
||||||
default: 1
|
default: 1
|
||||||
@@ -114,42 +127,58 @@
|
|||||||
так что менять этот параметр обычно не нужно.
|
так что менять этот параметр обычно не нужно.
|
||||||
- name: rdma_max_msg
|
- name: rdma_max_msg
|
||||||
type: int
|
type: int
|
||||||
default: 1048576
|
default: 132096
|
||||||
info: Maximum size of a single RDMA send or receive operation in bytes.
|
info: Maximum size of a single RDMA send or receive operation in bytes.
|
||||||
info_ru: Максимальный размер одной RDMA-операции отправки или приёма.
|
info_ru: Максимальный размер одной RDMA-операции отправки или приёма.
|
||||||
- name: rdma_max_recv
|
- name: rdma_max_recv
|
||||||
|
type: int
|
||||||
|
default: 16
|
||||||
|
info: |
|
||||||
|
Maximum number of RDMA receive buffers per connection (RDMA requires
|
||||||
|
preallocated buffers to receive data). Each buffer is `rdma_max_msg` bytes
|
||||||
|
in size. So this setting directly affects memory usage: a single Vitastor
|
||||||
|
RDMA client uses `rdma_max_recv * rdma_max_msg * OSD_COUNT` bytes of memory.
|
||||||
|
Default is roughly 2 MB * number of OSDs.
|
||||||
|
info_ru: |
|
||||||
|
Максимальное число буферов для RDMA-приёма данных на одно соединение
|
||||||
|
(RDMA требует заранее выделенных буферов для приёма данных). Каждый буфер
|
||||||
|
имеет размер `rdma_max_msg` байт. Таким образом, настройка прямо влияет на
|
||||||
|
потребление памяти - один Vitastor-клиент с RDMA использует
|
||||||
|
`rdma_max_recv * rdma_max_msg * ЧИСЛО_OSD` байт памяти, по умолчанию -
|
||||||
|
примерно 2 МБ * число OSD.
|
||||||
|
- name: rdma_max_send
|
||||||
type: int
|
type: int
|
||||||
default: 8
|
default: 8
|
||||||
info: |
|
info: |
|
||||||
Maximum number of parallel RDMA receive operations. Note that this number
|
Maximum number of outstanding RDMA send operations per connection. Should be
|
||||||
of receive buffers `rdma_max_msg` in size are allocated for each client,
|
less than `rdma_max_recv` so the receiving side doesn't run out of buffers.
|
||||||
so this setting actually affects memory usage. This is because RDMA receive
|
Doesn't affect memory usage - additional memory isn't allocated for send
|
||||||
operations are (sadly) still not zero-copy in Vitastor. It may be fixed in
|
operations.
|
||||||
later versions.
|
|
||||||
info_ru: |
|
info_ru: |
|
||||||
Максимальное число параллельных RDMA-операций получения данных. Следует
|
Максимальное число RDMA-операций отправки, отправляемых в очередь одного
|
||||||
иметь в виду, что данное число буферов размером `rdma_max_msg` выделяется
|
соединения. Желательно, чтобы оно было меньше `rdma_max_recv`, чтобы
|
||||||
для каждого подключённого клиентского соединения, так что данная настройка
|
у принимающей стороны в процессе работы не заканчивались буферы на приём.
|
||||||
влияет на потребление памяти. Это так потому, что RDMA-приём данных в
|
Не влияет на потребление памяти - дополнительная память на операции отправки
|
||||||
Vitastor, увы, всё равно не является zero-copy, т.е. всё равно 1 раз
|
не выделяется.
|
||||||
копирует данные в памяти. Данная особенность, возможно, будет исправлена в
|
|
||||||
более новых версиях Vitastor.
|
|
||||||
- name: peer_connect_interval
|
- name: peer_connect_interval
|
||||||
type: sec
|
type: sec
|
||||||
min: 1
|
min: 1
|
||||||
default: 5
|
default: 5
|
||||||
|
online: true
|
||||||
info: Interval before attempting to reconnect to an unavailable OSD.
|
info: Interval before attempting to reconnect to an unavailable OSD.
|
||||||
info_ru: Время ожидания перед повторной попыткой соединиться с недоступным OSD.
|
info_ru: Время ожидания перед повторной попыткой соединиться с недоступным OSD.
|
||||||
- name: peer_connect_timeout
|
- name: peer_connect_timeout
|
||||||
type: sec
|
type: sec
|
||||||
min: 1
|
min: 1
|
||||||
default: 5
|
default: 5
|
||||||
|
online: true
|
||||||
info: Timeout for OSD connection attempts.
|
info: Timeout for OSD connection attempts.
|
||||||
info_ru: Максимальное время ожидания попытки соединения с OSD.
|
info_ru: Максимальное время ожидания попытки соединения с OSD.
|
||||||
- name: osd_idle_timeout
|
- name: osd_idle_timeout
|
||||||
type: sec
|
type: sec
|
||||||
min: 1
|
min: 1
|
||||||
default: 5
|
default: 5
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
OSD connection inactivity time after which clients and other OSDs send
|
OSD connection inactivity time after which clients and other OSDs send
|
||||||
keepalive requests to check state of the connection.
|
keepalive requests to check state of the connection.
|
||||||
@@ -160,6 +189,7 @@
|
|||||||
type: sec
|
type: sec
|
||||||
min: 1
|
min: 1
|
||||||
default: 5
|
default: 5
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
|
Maximum time to wait for OSD keepalive responses. If an OSD doesn't respond
|
||||||
within this time, the connection to it is dropped and a reconnection attempt
|
within this time, the connection to it is dropped and a reconnection attempt
|
||||||
@@ -172,6 +202,7 @@
|
|||||||
type: ms
|
type: ms
|
||||||
min: 50
|
min: 50
|
||||||
default: 500
|
default: 500
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
OSDs respond to clients with a special error code when they receive I/O
|
OSDs respond to clients with a special error code when they receive I/O
|
||||||
requests for a PG that's not synchronized and started. This parameter sets
|
requests for a PG that's not synchronized and started. This parameter sets
|
||||||
@@ -185,6 +216,7 @@
|
|||||||
- name: max_etcd_attempts
|
- name: max_etcd_attempts
|
||||||
type: int
|
type: int
|
||||||
default: 5
|
default: 5
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Maximum number of attempts for etcd requests which can't be retried
|
Maximum number of attempts for etcd requests which can't be retried
|
||||||
indefinitely.
|
indefinitely.
|
||||||
@@ -194,6 +226,7 @@
|
|||||||
- name: etcd_quick_timeout
|
- name: etcd_quick_timeout
|
||||||
type: ms
|
type: ms
|
||||||
default: 1000
|
default: 1000
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Timeout for etcd requests which should complete quickly, like lease refresh.
|
Timeout for etcd requests which should complete quickly, like lease refresh.
|
||||||
info_ru: |
|
info_ru: |
|
||||||
@@ -202,6 +235,7 @@
|
|||||||
- name: etcd_slow_timeout
|
- name: etcd_slow_timeout
|
||||||
type: ms
|
type: ms
|
||||||
default: 5000
|
default: 5000
|
||||||
|
online: true
|
||||||
info: Timeout for etcd requests which are allowed to wait for some time.
|
info: Timeout for etcd requests which are allowed to wait for some time.
|
||||||
info_ru: |
|
info_ru: |
|
||||||
Максимальное время выполнения запросов к etcd, для которых не обязательно
|
Максимальное время выполнения запросов к etcd, для которых не обязательно
|
||||||
@@ -209,6 +243,7 @@
|
|||||||
- name: etcd_keepalive_timeout
|
- name: etcd_keepalive_timeout
|
||||||
type: sec
|
type: sec
|
||||||
default: max(30, etcd_report_interval*2)
|
default: max(30, etcd_report_interval*2)
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Timeout for etcd connection HTTP Keep-Alive. Should be higher than
|
Timeout for etcd connection HTTP Keep-Alive. Should be higher than
|
||||||
etcd_report_interval to guarantee that keepalive actually works.
|
etcd_report_interval to guarantee that keepalive actually works.
|
||||||
@@ -218,6 +253,7 @@
|
|||||||
- name: etcd_ws_keepalive_timeout
|
- name: etcd_ws_keepalive_timeout
|
||||||
type: sec
|
type: sec
|
||||||
default: 30
|
default: 30
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
etcd websocket ping interval required to keep the connection alive and
|
etcd websocket ping interval required to keep the connection alive and
|
||||||
detect disconnections quickly.
|
detect disconnections quickly.
|
||||||
@@ -226,6 +262,7 @@
|
|||||||
- name: client_dirty_limit
|
- name: client_dirty_limit
|
||||||
type: int
|
type: int
|
||||||
default: 33554432
|
default: 33554432
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Without immediate_commit=all this parameter sets the limit of "dirty"
|
Without immediate_commit=all this parameter sets the limit of "dirty"
|
||||||
(not committed by fsync) data allowed by the client before forcing an
|
(not committed by fsync) data allowed by the client before forcing an
|
||||||
|
@@ -1,4 +1,5 @@
|
|||||||
# Runtime OSD Parameters
|
# Runtime OSD Parameters
|
||||||
|
|
||||||
These parameters only apply to OSDs, are not fixed at the moment of OSD drive
|
These parameters only apply to OSDs, are not fixed at the moment of OSD drive
|
||||||
initialization and can be changed with an OSD restart.
|
initialization and can be changed - either with an OSD restart or, for some of
|
||||||
|
them, even without restarting by updating configuration in etcd.
|
||||||
|
@@ -2,4 +2,5 @@
|
|||||||
|
|
||||||
Данные параметры используются только OSD, но, в отличие от дисковых параметров,
|
Данные параметры используются только OSD, но, в отличие от дисковых параметров,
|
||||||
не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
|
не фиксируются в момент инициализации дисков OSD и могут быть изменены в любой
|
||||||
момент с перезапуском OSD.
|
момент с помощью перезапуска OSD, а некоторые и без перезапуска, с помощью
|
||||||
|
изменения конфигурации в etcd.
|
||||||
|
@@ -66,6 +66,7 @@
|
|||||||
- name: autosync_interval
|
- name: autosync_interval
|
||||||
type: sec
|
type: sec
|
||||||
default: 5
|
default: 5
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Time interval at which automatic fsyncs/flushes are issued by each OSD when
|
Time interval at which automatic fsyncs/flushes are issued by each OSD when
|
||||||
the immediate_commit mode if disabled. fsyncs are required because without
|
the immediate_commit mode if disabled. fsyncs are required because without
|
||||||
@@ -83,6 +84,7 @@
|
|||||||
- name: autosync_writes
|
- name: autosync_writes
|
||||||
type: int
|
type: int
|
||||||
default: 128
|
default: 128
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Same as autosync_interval, but sets the maximum number of uncommitted write
|
Same as autosync_interval, but sets the maximum number of uncommitted write
|
||||||
operations before issuing an fsync operation internally.
|
operations before issuing an fsync operation internally.
|
||||||
@@ -93,6 +95,7 @@
|
|||||||
- name: recovery_queue_depth
|
- name: recovery_queue_depth
|
||||||
type: int
|
type: int
|
||||||
default: 4
|
default: 4
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Maximum recovery operations per one primary OSD at any given moment of time.
|
Maximum recovery operations per one primary OSD at any given moment of time.
|
||||||
Currently it's the only parameter available to tune the speed or recovery
|
Currently it's the only parameter available to tune the speed or recovery
|
||||||
@@ -105,6 +108,7 @@
|
|||||||
- name: recovery_pg_switch
|
- name: recovery_pg_switch
|
||||||
type: int
|
type: int
|
||||||
default: 128
|
default: 128
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Number of recovery operations before switching to recovery of the next PG.
|
Number of recovery operations before switching to recovery of the next PG.
|
||||||
The idea is to mix all PGs during recovery for more even space and load
|
The idea is to mix all PGs during recovery for more even space and load
|
||||||
@@ -119,6 +123,7 @@
|
|||||||
- name: recovery_sync_batch
|
- name: recovery_sync_batch
|
||||||
type: int
|
type: int
|
||||||
default: 16
|
default: 16
|
||||||
|
online: true
|
||||||
info: Maximum number of recovery operations before issuing an additional fsync.
|
info: Maximum number of recovery operations before issuing an additional fsync.
|
||||||
info_ru: Максимальное число операций восстановления перед дополнительным fsync.
|
info_ru: Максимальное число операций восстановления перед дополнительным fsync.
|
||||||
- name: readonly
|
- name: readonly
|
||||||
@@ -133,6 +138,7 @@
|
|||||||
- name: no_recovery
|
- name: no_recovery
|
||||||
type: bool
|
type: bool
|
||||||
default: false
|
default: false
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Disable automatic background recovery of objects. Note that it doesn't
|
Disable automatic background recovery of objects. Note that it doesn't
|
||||||
affect implicit recovery of objects happening during writes - a write is
|
affect implicit recovery of objects happening during writes - a write is
|
||||||
@@ -145,6 +151,7 @@
|
|||||||
- name: no_rebalance
|
- name: no_rebalance
|
||||||
type: bool
|
type: bool
|
||||||
default: false
|
default: false
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Disable background movement of data between different OSDs. Disabling it
|
Disable background movement of data between different OSDs. Disabling it
|
||||||
means that PGs in the `has_misplaced` state will be left in it indefinitely.
|
means that PGs in the `has_misplaced` state will be left in it indefinitely.
|
||||||
@@ -155,6 +162,7 @@
|
|||||||
- name: print_stats_interval
|
- name: print_stats_interval
|
||||||
type: sec
|
type: sec
|
||||||
default: 3
|
default: 3
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Time interval at which OSDs print simple human-readable operation
|
Time interval at which OSDs print simple human-readable operation
|
||||||
statistics on stdout.
|
statistics on stdout.
|
||||||
@@ -164,6 +172,7 @@
|
|||||||
- name: slow_log_interval
|
- name: slow_log_interval
|
||||||
type: sec
|
type: sec
|
||||||
default: 10
|
default: 10
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Time interval at which OSDs dump slow or stuck operations on stdout, if
|
Time interval at which OSDs dump slow or stuck operations on stdout, if
|
||||||
they're any. Also it's the time after which an operation is considered
|
they're any. Also it's the time after which an operation is considered
|
||||||
@@ -175,6 +184,7 @@
|
|||||||
- name: inode_vanish_time
|
- name: inode_vanish_time
|
||||||
type: sec
|
type: sec
|
||||||
default: 60
|
default: 60
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Number of seconds after which a deleted inode is removed from OSD statistics.
|
Number of seconds after which a deleted inode is removed from OSD statistics.
|
||||||
info_ru: |
|
info_ru: |
|
||||||
@@ -182,6 +192,7 @@
|
|||||||
- name: max_write_iodepth
|
- name: max_write_iodepth
|
||||||
type: int
|
type: int
|
||||||
default: 128
|
default: 128
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Parallel client write operation limit per one OSD. Operations that exceed
|
Parallel client write operation limit per one OSD. Operations that exceed
|
||||||
this limit are pushed to a temporary queue instead of being executed
|
this limit are pushed to a temporary queue instead of being executed
|
||||||
@@ -193,6 +204,7 @@
|
|||||||
- name: min_flusher_count
|
- name: min_flusher_count
|
||||||
type: int
|
type: int
|
||||||
default: 1
|
default: 1
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Flusher is a micro-thread that moves data from the journal to the data
|
Flusher is a micro-thread that moves data from the journal to the data
|
||||||
area of the device. Their number is auto-tuned between minimum and maximum.
|
area of the device. Their number is auto-tuned between minimum and maximum.
|
||||||
@@ -204,6 +216,7 @@
|
|||||||
- name: max_flusher_count
|
- name: max_flusher_count
|
||||||
type: int
|
type: int
|
||||||
default: 256
|
default: 256
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Maximum number of journal flushers (see above min_flusher_count).
|
Maximum number of journal flushers (see above min_flusher_count).
|
||||||
info_ru: |
|
info_ru: |
|
||||||
@@ -284,6 +297,7 @@
|
|||||||
- name: throttle_small_writes
|
- name: throttle_small_writes
|
||||||
type: bool
|
type: bool
|
||||||
default: false
|
default: false
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Enable soft throttling of small journaled writes. Useful for hybrid OSDs
|
Enable soft throttling of small journaled writes. Useful for hybrid OSDs
|
||||||
with fast journal/metadata devices and slow data devices. The idea is that
|
with fast journal/metadata devices and slow data devices. The idea is that
|
||||||
@@ -312,6 +326,7 @@
|
|||||||
- name: throttle_target_iops
|
- name: throttle_target_iops
|
||||||
type: int
|
type: int
|
||||||
default: 100
|
default: 100
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Target maximum number of throttled operations per second under the condition
|
Target maximum number of throttled operations per second under the condition
|
||||||
of full journal. Set it to approximate random write iops of your data devices
|
of full journal. Set it to approximate random write iops of your data devices
|
||||||
@@ -324,6 +339,7 @@
|
|||||||
- name: throttle_target_mbs
|
- name: throttle_target_mbs
|
||||||
type: int
|
type: int
|
||||||
default: 100
|
default: 100
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Target maximum bandwidth in MB/s of throttled operations per second under
|
Target maximum bandwidth in MB/s of throttled operations per second under
|
||||||
the condition of full journal. Set it to approximate linear write
|
the condition of full journal. Set it to approximate linear write
|
||||||
@@ -336,6 +352,7 @@
|
|||||||
- name: throttle_target_parallelism
|
- name: throttle_target_parallelism
|
||||||
type: int
|
type: int
|
||||||
default: 1
|
default: 1
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Target maximum parallelism of throttled operations under the condition of
|
Target maximum parallelism of throttled operations under the condition of
|
||||||
full journal. Set it to approximate internal parallelism of your data
|
full journal. Set it to approximate internal parallelism of your data
|
||||||
@@ -348,6 +365,7 @@
|
|||||||
- name: throttle_threshold_us
|
- name: throttle_threshold_us
|
||||||
type: us
|
type: us
|
||||||
default: 50
|
default: 50
|
||||||
|
online: true
|
||||||
info: |
|
info: |
|
||||||
Minimal computed delay to be applied to throttled operations. Usually
|
Minimal computed delay to be applied to throttled operations. Usually
|
||||||
doesn't need to be changed.
|
doesn't need to be changed.
|
||||||
@@ -357,10 +375,151 @@
|
|||||||
- name: osd_memlock
|
- name: osd_memlock
|
||||||
type: bool
|
type: bool
|
||||||
default: false
|
default: false
|
||||||
info: >
|
info: |
|
||||||
Lock all OSD memory to prevent it from being unloaded into swap with
|
Lock all OSD memory to prevent it from being unloaded into swap with
|
||||||
mlockall(). Requires sufficient ulimit -l (max locked memory).
|
mlockall(). Requires sufficient ulimit -l (max locked memory).
|
||||||
info_ru: >
|
info_ru: |
|
||||||
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку
|
Блокировать всю память OSD с помощью mlockall, чтобы запретить её выгрузку
|
||||||
в пространство подкачки. Требует достаточного значения ulimit -l (лимита
|
в пространство подкачки. Требует достаточного значения ulimit -l (лимита
|
||||||
заблокированной памяти).
|
заблокированной памяти).
|
||||||
|
- name: auto_scrub
|
||||||
|
type: bool
|
||||||
|
default: false
|
||||||
|
online: true
|
||||||
|
info: |
|
||||||
|
Data scrubbing is the process of background verification of copies to find
|
||||||
|
and repair corrupted blocks. It's not run automatically by default since
|
||||||
|
it's a new feature. Set this parameter to true to enable automatic scrubs.
|
||||||
|
|
||||||
|
This parameter makes OSDs automatically schedule data scrubbing of clean PGs
|
||||||
|
every `scrub_interval` (see below). You can also start/schedule scrubbing
|
||||||
|
manually by setting `next_scrub` JSON key to the desired UNIX time of the
|
||||||
|
next scrub in `/pg/history/...` values in etcd.
|
||||||
|
info_ru: |
|
||||||
|
Скраб - процесс фоновой проверки копий данных, предназначенный, чтобы
|
||||||
|
находить и исправлять повреждённые блоки. По умолчанию эти проверки ещё не
|
||||||
|
запускаются автоматически, так как являются новой функцией. Чтобы включить
|
||||||
|
автоматическое планирование скрабов, установите данный параметр в true.
|
||||||
|
|
||||||
|
Включённый параметр заставляет OSD автоматически планировать фоновую
|
||||||
|
проверку чистых PG раз в `scrub_interval` (см. ниже). Вы также можете
|
||||||
|
запустить или запланировать проверку вручную, установив значение ключа JSON
|
||||||
|
`next_scrub` внутри ключей etcd `/pg/history/...` в UNIX-время следующей
|
||||||
|
желаемой проверки.
|
||||||
|
- name: no_scrub
|
||||||
|
type: bool
|
||||||
|
default: false
|
||||||
|
online: true
|
||||||
|
info: |
|
||||||
|
Temporarily disable scrubbing and stop running scrubs.
|
||||||
|
info_ru: |
|
||||||
|
Временно отключить и остановить запущенные скрабы.
|
||||||
|
- name: scrub_interval
|
||||||
|
type: string
|
||||||
|
default: 30d
|
||||||
|
online: true
|
||||||
|
info: |
|
||||||
|
Default automatic scrubbing interval for all pools. Numbers without suffix
|
||||||
|
are treated as seconds, possible unit suffixes include 's' (seconds),
|
||||||
|
'm' (minutes), 'h' (hours), 'd' (days), 'M' (months) and 'y' (years).
|
||||||
|
info_ru: |
|
||||||
|
Интервал автоматической фоновой проверки по умолчанию для всех пулов.
|
||||||
|
Значения без указанной единицы измерения считаются в секундах, допустимые
|
||||||
|
символы единиц измерения в конце: 's' (секунды),
|
||||||
|
'm' (минуты), 'h' (часы), 'd' (дни), 'M' (месяца) или 'y' (годы).
|
||||||
|
- name: scrub_queue_depth
|
||||||
|
type: int
|
||||||
|
default: 1
|
||||||
|
online: true
|
||||||
|
info: |
|
||||||
|
Number of parallel scrubbing operations per one OSD.
|
||||||
|
info_ru: |
|
||||||
|
Число параллельных операций фоновой проверки на один OSD.
|
||||||
|
- name: scrub_sleep
|
||||||
|
type: ms
|
||||||
|
default: 0
|
||||||
|
online: true
|
||||||
|
info: |
|
||||||
|
Additional interval between two consecutive scrubbing operations on one OSD.
|
||||||
|
Can be used to slow down scrubbing if it affects user load too much.
|
||||||
|
info_ru: |
|
||||||
|
Дополнительный интервал ожидания после фоновой проверки каждого объекта на
|
||||||
|
одном OSD. Может использоваться для замедления скраба, если он слишком
|
||||||
|
сильно влияет на пользовательскую нагрузку.
|
||||||
|
- name: scrub_list_limit
|
||||||
|
type: int
|
||||||
|
default: 1000
|
||||||
|
online: true
|
||||||
|
info: |
|
||||||
|
Number of objects to list in one listing operation during scrub.
|
||||||
|
info_ru: |
|
||||||
|
Размер загружаемых за одну операцию списков объектов в процессе фоновой
|
||||||
|
проверки.
|
||||||
|
- name: scrub_find_best
|
||||||
|
type: bool
|
||||||
|
default: true
|
||||||
|
online: true
|
||||||
|
info: |
|
||||||
|
Find and automatically restore best versions of objects with unmatched
|
||||||
|
copies. In replicated setups, the best version is the version with most
|
||||||
|
matching replicas. In EC setups, the best version is the subset of data
|
||||||
|
and parity chunks without mismatches.
|
||||||
|
|
||||||
|
The hypothetical situation where you might want to disable it is when
|
||||||
|
you have 3 replicas and you are paranoid that 2 HDDs out of 3 may silently
|
||||||
|
corrupt an object in the same way (for example, zero it out) and only
|
||||||
|
1 HDD will remain good. In this case disabling scrub_find_best may help
|
||||||
|
you to recover the data! See also scrub_ec_max_bruteforce below.
|
||||||
|
info_ru: |
|
||||||
|
Находить и автоматически восстанавливать "лучшие версии" объектов с
|
||||||
|
несовпадающими копиями/частями. При использовании репликации "лучшая"
|
||||||
|
версия - версия, доступная в большем числе экземпляров, чем другие. При
|
||||||
|
использовании кодов коррекции ошибок "лучшая" весрия - это подмножество
|
||||||
|
частей данных и чётности, полностью соответствующих друг другу.
|
||||||
|
|
||||||
|
Гипотетическая ситуация, в которой вы можете захотеть отключить этот
|
||||||
|
поиск - это если у вас 3 реплики и вы боитесь, что 2 диска из 3 могут
|
||||||
|
незаметно и одинаково повредить данные одного и того же объекта, например,
|
||||||
|
занулив его, и только 1 диск останется неповреждённым. В этой ситуации
|
||||||
|
отключение этого параметра поможет вам восстановить данные! Смотрите также
|
||||||
|
описание следующего параметра - scrub_ec_max_bruteforce.
|
||||||
|
- name: scrub_ec_max_bruteforce
|
||||||
|
type: int
|
||||||
|
default: 100
|
||||||
|
online: true
|
||||||
|
info: |
|
||||||
|
Vitastor can locate corrupted chunks in EC setups with more than 1 parity
|
||||||
|
chunk by brute-forcing all possible error locations. This configuration
|
||||||
|
value limits the maximum number of checked combinations. You can try to
|
||||||
|
increase it if you have EC N+K setup with N and K large enough for
|
||||||
|
combination count `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!` to be greater
|
||||||
|
than the default 100.
|
||||||
|
|
||||||
|
If there are too many possible combinations or if multiple combinations give
|
||||||
|
correct results then objects are marked inconsistent and aren't recovered
|
||||||
|
automatically.
|
||||||
|
|
||||||
|
In replicated setups bruteforcing isn't needed, Vitastor just assumes that
|
||||||
|
the variant with most available equal copies is correct. For example, if
|
||||||
|
you have 3 replicas and 1 of them differs, this one is considered to be
|
||||||
|
corrupted. But if there is no "best" version with more copies than all
|
||||||
|
others have then the object is also marked as inconsistent.
|
||||||
|
info_ru: |
|
||||||
|
Vitastor старается определить повреждённые части объектов при использовании
|
||||||
|
EC (кодов коррекции ошибок) с более, чем 1 диском чётности, путём перебора
|
||||||
|
всех возможных комбинаций ошибочных частей. Данное значение конфигурации
|
||||||
|
ограничивает число перебираемых комбинаций. Вы можете попробовать поднять
|
||||||
|
его, если используете схему кодирования EC N+K с N и K, достаточно большими
|
||||||
|
для того, чтобы число сочетаний `C(N+K-1, K-1) = (N+K-1)! / (K-1)! / N!`
|
||||||
|
было больше, чем стандартное значение 100.
|
||||||
|
|
||||||
|
Если возможных комбинаций слишком много или если корректная комбинаций не
|
||||||
|
определяется однозначно, объекты помечаются неконсистентными (inconsistent)
|
||||||
|
и не восстанавливаются автоматически.
|
||||||
|
|
||||||
|
При использовании репликации перебор не нужен, Vitastor просто предполагает,
|
||||||
|
что вариант объекта с наибольшим количеством одинаковых копий корректен.
|
||||||
|
Например, если вы используете 3 реплики и 1 из них отличается, эта 1 копия
|
||||||
|
считается некорректной. Однако, если "лучшую" версию с числом доступных
|
||||||
|
копий большим, чем у всех других версий, найти невозможно, то объект тоже
|
||||||
|
маркируется неконсистентным.
|
||||||
|
@@ -9,7 +9,7 @@
|
|||||||
## Debian
|
## Debian
|
||||||
|
|
||||||
- Trust Vitastor package signing key:
|
- Trust Vitastor package signing key:
|
||||||
`wget -q -O - https://vitastor.io/debian/pubkey | sudo apt-key add -`
|
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||||
- Add Vitastor package repository to your /etc/apt/sources.list:
|
- Add Vitastor package repository to your /etc/apt/sources.list:
|
||||||
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
||||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||||
@@ -20,15 +20,18 @@
|
|||||||
## CentOS
|
## CentOS
|
||||||
|
|
||||||
- Add Vitastor package repository:
|
- Add Vitastor package repository:
|
||||||
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm`
|
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release.rpm`
|
||||||
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm`
|
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release.rpm`
|
||||||
|
- AlmaLinux 9 and other RHEL 9 clones (Rocky, Oracle...): `dnf install https://vitastor.io/rpms/centos/9/vitastor-release.rpm`
|
||||||
- Enable EPEL: `yum/dnf install epel-release`
|
- Enable EPEL: `yum/dnf install epel-release`
|
||||||
- Enable additional CentOS repositories:
|
- Enable additional CentOS repositories:
|
||||||
- CentOS 7: `yum install centos-release-scl`
|
- CentOS 7: `yum install centos-release-scl`
|
||||||
- CentOS 8: `dnf install centos-release-advanced-virtualization`
|
- CentOS 8: `dnf install centos-release-advanced-virtualization`
|
||||||
|
- RHEL 9 clones: not required
|
||||||
- Enable elrepo-kernel:
|
- Enable elrepo-kernel:
|
||||||
- CentOS 7: `yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm`
|
- CentOS 7: `yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm`
|
||||||
- CentOS 8: `dnf install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm`
|
- CentOS 8: `dnf install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm`
|
||||||
|
- RHEL 9 clones: `dnf install https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm`
|
||||||
- Install packages: `yum/dnf install vitastor lpsolve etcd kernel-ml qemu-kvm`
|
- Install packages: `yum/dnf install vitastor lpsolve etcd kernel-ml qemu-kvm`
|
||||||
|
|
||||||
## Installation requirements
|
## Installation requirements
|
||||||
@@ -42,3 +45,10 @@
|
|||||||
- etcd 3.4.15 or newer. Earlier versions won't work because of various bugs,
|
- etcd 3.4.15 or newer. Earlier versions won't work because of various bugs,
|
||||||
for example [#12402](https://github.com/etcd-io/etcd/pull/12402).
|
for example [#12402](https://github.com/etcd-io/etcd/pull/12402).
|
||||||
- node.js 10 or newer
|
- node.js 10 or newer
|
||||||
|
|
||||||
|
## Version archive
|
||||||
|
|
||||||
|
All previous Vitastor and other components (QEMU, etcd...) package builds
|
||||||
|
can be found here:
|
||||||
|
|
||||||
|
https://vitastor.io/archive/
|
||||||
|
@@ -9,7 +9,7 @@
|
|||||||
## Debian
|
## Debian
|
||||||
|
|
||||||
- Добавьте ключ репозитория Vitastor:
|
- Добавьте ключ репозитория Vitastor:
|
||||||
`wget -q -O - https://vitastor.io/debian/pubkey | sudo apt-key add -`
|
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||||
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
|
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
|
||||||
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
||||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||||
@@ -20,15 +20,18 @@
|
|||||||
## CentOS
|
## CentOS
|
||||||
|
|
||||||
- Добавьте в систему репозиторий Vitastor:
|
- Добавьте в систему репозиторий Vitastor:
|
||||||
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm`
|
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release.rpm`
|
||||||
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm`
|
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release.rpm`
|
||||||
|
- AlmaLinux 9 и другие клоны RHEL 9 (Rocky, Oracle...): `dnf install https://vitastor.io/rpms/centos/9/vitastor-release.rpm`
|
||||||
- Включите EPEL: `yum/dnf install epel-release`
|
- Включите EPEL: `yum/dnf install epel-release`
|
||||||
- Включите дополнительные репозитории CentOS:
|
- Включите дополнительные репозитории CentOS:
|
||||||
- CentOS 7: `yum install centos-release-scl`
|
- CentOS 7: `yum install centos-release-scl`
|
||||||
- CentOS 8: `dnf install centos-release-advanced-virtualization`
|
- CentOS 8: `dnf install centos-release-advanced-virtualization`
|
||||||
|
- Клоны RHEL 9: не нужно
|
||||||
- Включите elrepo-kernel:
|
- Включите elrepo-kernel:
|
||||||
- CentOS 7: `yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm`
|
- CentOS 7: `yum install https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm`
|
||||||
- CentOS 8: `dnf install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm`
|
- CentOS 8: `dnf install https://www.elrepo.org/elrepo-release-8.el8.elrepo.noarch.rpm`
|
||||||
|
- Клоны RHEL 9: `dnf install https://www.elrepo.org/elrepo-release-9.el9.elrepo.noarch.rpm`
|
||||||
- Установите пакеты: `yum/dnf install vitastor lpsolve etcd kernel-ml qemu-kvm`
|
- Установите пакеты: `yum/dnf install vitastor lpsolve etcd kernel-ml qemu-kvm`
|
||||||
|
|
||||||
## Установочные требования
|
## Установочные требования
|
||||||
@@ -41,3 +44,10 @@
|
|||||||
- etcd 3.4.15 или новее. Более старые версии не будут работать из-за разных багов,
|
- etcd 3.4.15 или новее. Более старые версии не будут работать из-за разных багов,
|
||||||
например, [#12402](https://github.com/etcd-io/etcd/pull/12402).
|
например, [#12402](https://github.com/etcd-io/etcd/pull/12402).
|
||||||
- node.js 10 или новее
|
- node.js 10 или новее
|
||||||
|
|
||||||
|
## Архив предыдущих версий
|
||||||
|
|
||||||
|
Все предыдущие сборки пакетов Vitastor и других компонентов, таких, как QEMU
|
||||||
|
и etcd, можно скачать по следующей ссылке:
|
||||||
|
|
||||||
|
https://vitastor.io/archive/
|
||||||
|
@@ -6,10 +6,10 @@
|
|||||||
|
|
||||||
# Proxmox VE
|
# Proxmox VE
|
||||||
|
|
||||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-7.3 are supported):
|
To enable Vitastor support in Proxmox Virtual Environment (6.4-7.4 are supported):
|
||||||
|
|
||||||
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
|
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
|
||||||
buster for 6.4, bullseye for 7.3, pve7.1 for 7.1, pve7.2 for 7.2
|
buster for 6.4, bullseye for 7.4, pve7.1 for 7.1, pve7.2 for 7.2, pve7.3 for 7.3
|
||||||
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
|
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
|
||||||
- Define storage in `/etc/pve/storage.cfg` (see below)
|
- Define storage in `/etc/pve/storage.cfg` (see below)
|
||||||
- Block network access from VMs to Vitastor network (to OSDs and etcd),
|
- Block network access from VMs to Vitastor network (to OSDs and etcd),
|
||||||
|
@@ -6,10 +6,10 @@
|
|||||||
|
|
||||||
# Proxmox
|
# Proxmox
|
||||||
|
|
||||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-7.3):
|
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-7.4):
|
||||||
|
|
||||||
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
|
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
|
||||||
buster для 6.4, bullseye для 7.3, pve7.1 для 7.1, pve7.2 для 7.2
|
buster для 6.4, bullseye для 7.4, pve7.1 для 7.1, pve7.2 для 7.2, pve7.3 для 7.3
|
||||||
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
|
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
|
||||||
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
|
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
|
||||||
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
|
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
|
||||||
|
@@ -29,6 +29,7 @@
|
|||||||
- Snapshots and copy-on-write image clones
|
- Snapshots and copy-on-write image clones
|
||||||
- [Write throttling to smooth random write workloads in SSD+HDD configurations](../config/osd.en.md#throttle_small_writes)
|
- [Write throttling to smooth random write workloads in SSD+HDD configurations](../config/osd.en.md#throttle_small_writes)
|
||||||
- [RDMA/RoCEv2 support via libibverbs](../config/network.en.md#rdma_device)
|
- [RDMA/RoCEv2 support via libibverbs](../config/network.en.md#rdma_device)
|
||||||
|
- [Scrubbing without checksums](../config/osd.en.md#auto_scrub) (verification of copies)
|
||||||
|
|
||||||
## Plugins and tools
|
## Plugins and tools
|
||||||
|
|
||||||
@@ -54,7 +55,6 @@ The following features are planned for the future:
|
|||||||
- iSCSI proxy
|
- iSCSI proxy
|
||||||
- Multi-threaded client
|
- Multi-threaded client
|
||||||
- Faster failover
|
- Faster failover
|
||||||
- Scrubbing without checksums (verification of replicas)
|
|
||||||
- Checksums
|
- Checksums
|
||||||
- Tiered storage (SSD caching)
|
- Tiered storage (SSD caching)
|
||||||
- NVDIMM support
|
- NVDIMM support
|
||||||
|
@@ -31,6 +31,7 @@
|
|||||||
- Снапшоты и copy-on-write клоны
|
- Снапшоты и copy-on-write клоны
|
||||||
- [Сглаживание производительности случайной записи в SSD+HDD конфигурациях](../config/osd.ru.md#throttle_small_writes)
|
- [Сглаживание производительности случайной записи в SSD+HDD конфигурациях](../config/osd.ru.md#throttle_small_writes)
|
||||||
- [Поддержка RDMA/RoCEv2 через libibverbs](../config/network.ru.md#rdma_device)
|
- [Поддержка RDMA/RoCEv2 через libibverbs](../config/network.ru.md#rdma_device)
|
||||||
|
- [Фоновая проверка целостности без контрольных сумм](../config/osd.ru.md#auto_scrub) (сверка копий)
|
||||||
|
|
||||||
## Драйверы и инструменты
|
## Драйверы и инструменты
|
||||||
|
|
||||||
@@ -54,7 +55,6 @@
|
|||||||
- iSCSI-прокси
|
- iSCSI-прокси
|
||||||
- Многопоточный клиент
|
- Многопоточный клиент
|
||||||
- Более быстрое переключение при отказах
|
- Более быстрое переключение при отказах
|
||||||
- Фоновая проверка целостности без контрольных сумм (сверка реплик)
|
|
||||||
- Контрольные суммы
|
- Контрольные суммы
|
||||||
- Поддержка SSD-кэширования (tiered storage)
|
- Поддержка SSD-кэширования (tiered storage)
|
||||||
- Поддержка NVDIMM
|
- Поддержка NVDIMM
|
||||||
|
@@ -45,7 +45,9 @@ On the monitor hosts:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
- Initialize OSDs:
|
- Initialize OSDs:
|
||||||
- SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`
|
- SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. You can add
|
||||||
|
`--disable_data_fsync off` to leave disk cache enabled if you use desktop
|
||||||
|
SSDs without capacitors.
|
||||||
- Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
- Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
||||||
Pass all your devices (HDD and SSD) to this script — it will partition disks and initialize journals on its own.
|
Pass all your devices (HDD and SSD) to this script — it will partition disks and initialize journals on its own.
|
||||||
This script skips HDDs which are already partitioned so if you want to use non-empty disks for
|
This script skips HDDs which are already partitioned so if you want to use non-empty disks for
|
||||||
@@ -53,7 +55,9 @@ On the monitor hosts:
|
|||||||
but some free unpartitioned space must be available because the script creates new partitions for journals.
|
but some free unpartitioned space must be available because the script creates new partitions for journals.
|
||||||
- You can change OSD configuration in units or in `vitastor.conf`.
|
- You can change OSD configuration in units or in `vitastor.conf`.
|
||||||
Check [Configuration Reference](../config.en.md) for parameter descriptions.
|
Check [Configuration Reference](../config.en.md) for parameter descriptions.
|
||||||
- If all your drives have capacitors, create global configuration in etcd: \
|
- If all your drives have capacitors, and even if not, but if you ran `vitastor-disk`
|
||||||
|
without `--disable_data_fsync off` at the first step, then put the following
|
||||||
|
setting into etcd: \
|
||||||
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
|
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
|
||||||
- Start all OSDs: `systemctl start vitastor.target`
|
- Start all OSDs: `systemctl start vitastor.target`
|
||||||
|
|
||||||
@@ -70,11 +74,15 @@ For EC pools the configuration should look like the following:
|
|||||||
|
|
||||||
```
|
```
|
||||||
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
||||||
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}'
|
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}}'
|
||||||
```
|
```
|
||||||
|
|
||||||
After you do this, one of the monitors will configure PGs and OSDs will start them.
|
After you do this, one of the monitors will configure PGs and OSDs will start them.
|
||||||
|
|
||||||
|
If you use HDDs you should also add `"block_size": 1048576` to pool configuration.
|
||||||
|
The other option is to add it into /vitastor/config/global, in this case it will
|
||||||
|
apply to all pools by default.
|
||||||
|
|
||||||
## Check cluster status
|
## Check cluster status
|
||||||
|
|
||||||
`vitastor-cli status`
|
`vitastor-cli status`
|
||||||
|
@@ -45,7 +45,9 @@
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
- Инициализуйте OSD:
|
- Инициализуйте OSD:
|
||||||
- SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`
|
- SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. Если вы используете
|
||||||
|
десктопные SSD без конденсаторов, можете оставить кэш включённым, добавив
|
||||||
|
опцию `--disable_data_fsync off`.
|
||||||
- Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
- Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
||||||
Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
|
Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
|
||||||
разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы
|
разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы
|
||||||
@@ -54,8 +56,11 @@
|
|||||||
для журналов, на SSD должно быть доступно свободное нераспределённое место.
|
для журналов, на SSD должно быть доступно свободное нераспределённое место.
|
||||||
- Вы можете менять параметры OSD в юнитах systemd или в `vitastor.conf`. Описания параметров
|
- Вы можете менять параметры OSD в юнитах systemd или в `vitastor.conf`. Описания параметров
|
||||||
смотрите в [справке по конфигурации](../config.ru.md).
|
смотрите в [справке по конфигурации](../config.ru.md).
|
||||||
- Если все ваши диски - серверные с конденсаторами, пропишите это в глобальную конфигурацию в etcd: \
|
- Если все ваши диски - серверные с конденсаторами, и даже если нет, но при этом
|
||||||
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`
|
вы не добавляли опцию `--disable_data_fsync off` на первом шаге, а `vitastor-disk`
|
||||||
|
не ругался на невозможность отключения кэша дисков, пропишите следующую настройку
|
||||||
|
в глобальную конфигурацию в etcd: \
|
||||||
|
`etcdctl --endpoints=... put /vitastor/config/global '{"immediate_commit":"all"}'`.
|
||||||
- Запустите все OSD: `systemctl start vitastor.target`
|
- Запустите все OSD: `systemctl start vitastor.target`
|
||||||
|
|
||||||
## Создайте пул
|
## Создайте пул
|
||||||
@@ -71,11 +76,15 @@ etcdctl --endpoints=... put /vitastor/config/pools '{"1":{"name":"testpool",
|
|||||||
|
|
||||||
```
|
```
|
||||||
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
etcdctl --endpoints=... put /vitastor/config/pools '{"2":{"name":"ecpool",
|
||||||
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}'
|
"scheme":"ec","pg_size":4,"parity_chunks":2,"pg_minsize":2,"pg_count":256,"failure_domain":"host"}}'
|
||||||
```
|
```
|
||||||
|
|
||||||
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
|
После этого один из мониторов должен сконфигурировать PG, а OSD должны запустить их.
|
||||||
|
|
||||||
|
Если вы используете HDD-диски, то добавьте в конфигурацию пулов опцию `"block_size": 1048576`.
|
||||||
|
Также эту опцию можно добавить в /vitastor/config/global, в этом случае она будет
|
||||||
|
применяться ко всем пулам по умолчанию.
|
||||||
|
|
||||||
## Проверьте состояние кластера
|
## Проверьте состояние кластера
|
||||||
|
|
||||||
`vitastor-cli status`
|
`vitastor-cli status`
|
||||||
|
@@ -35,15 +35,24 @@ Write amplification for 4 KB blocks is usually 3-5 in Vitastor:
|
|||||||
If you manage to get an SSD which handles 512 byte blocks well (Optane?) you may
|
If you manage to get an SSD which handles 512 byte blocks well (Optane?) you may
|
||||||
lower 1, 3 and 4 to 512 bytes (1/8 of data size) and get WA as low as 2.375.
|
lower 1, 3 and 4 to 512 bytes (1/8 of data size) and get WA as low as 2.375.
|
||||||
|
|
||||||
|
Implemented NVDIMM support can basically eliminate WA at all - all extra writes will
|
||||||
|
go to DRAM memory. But this requires a test cluster with NVDIMM - please contact me
|
||||||
|
if you want to provide me with such cluster for tests.
|
||||||
|
|
||||||
Lazy fsync also reduces WA for parallel workloads because journal blocks are only
|
Lazy fsync also reduces WA for parallel workloads because journal blocks are only
|
||||||
written when they fill up or fsync is requested.
|
written when they fill up or fsync is requested.
|
||||||
|
|
||||||
## In Practice
|
## In Practice
|
||||||
|
|
||||||
In practice, using tests from [Understanding Performance](understanding.en.md)
|
In practice, using tests from [Understanding Performance](understanding.en.md), decent TCP network,
|
||||||
and good server-grade SSD/NVMe drives, you should head for:
|
good server-grade SSD/NVMe drives and disabled CPU power saving, you should head for:
|
||||||
- At least 5000 T1Q1 replicated read and write iops (maximum 0.2ms latency)
|
- At least 5000 T1Q1 replicated read and write iops (maximum 0.2ms latency)
|
||||||
|
- At least 5000 T1Q1 EC read IOPS and at least 2200 EC write IOPS (maximum 0.45ms latency)
|
||||||
- At least ~80k parallel read iops or ~30k write iops per 1 core (1 OSD)
|
- At least ~80k parallel read iops or ~30k write iops per 1 core (1 OSD)
|
||||||
- Disk-speed or wire-speed linear reads and writes, whichever is the bottleneck in your case
|
- Disk-speed or wire-speed linear reads and writes, whichever is the bottleneck in your case
|
||||||
|
|
||||||
Lower results may mean that you have bad drives, bad network or some kind of misconfiguration.
|
Lower results may mean that you have bad drives, bad network or some kind of misconfiguration.
|
||||||
|
|
||||||
|
Current latency records:
|
||||||
|
- 9668 T1Q1 replicated write iops (0.103 ms latency) with TCP and NVMe
|
||||||
|
- 9143 T1Q1 replicated read iops (0.109 ms latency) with TCP and NVMe
|
||||||
|
@@ -36,6 +36,25 @@ WA (мультипликатор записи) для 4 КБ блоков в Vit
|
|||||||
Если вы найдёте SSD, хорошо работающий с 512-байтными блоками данных (Optane?),
|
Если вы найдёте SSD, хорошо работающий с 512-байтными блоками данных (Optane?),
|
||||||
то 1, 3 и 4 можно снизить до 512 байт (1/8 от размера данных) и получить WA всего 2.375.
|
то 1, 3 и 4 можно снизить до 512 байт (1/8 от размера данных) и получить WA всего 2.375.
|
||||||
|
|
||||||
|
Если реализовать поддержку NVDIMM, то WA можно, условно говоря, ликвидировать вообще - все
|
||||||
|
дополнительные операции записи смогут обслуживаться DRAM памятью. Но для этого необходим
|
||||||
|
тестовый кластер с NVDIMM - пишите, если готовы предоставить такой для тестов.
|
||||||
|
|
||||||
Кроме того, WA снижается при использовании отложенного/ленивого сброса при параллельной
|
Кроме того, WA снижается при использовании отложенного/ленивого сброса при параллельной
|
||||||
нагрузке, т.к. блоки журнала записываются на диск только когда они заполняются или явным
|
нагрузке, т.к. блоки журнала записываются на диск только когда они заполняются или явным
|
||||||
образом запрашивается fsync.
|
образом запрашивается fsync.
|
||||||
|
|
||||||
|
## На практике
|
||||||
|
|
||||||
|
На практике, используя тесты fio со страницы [Понимание сути производительности систем хранения](understanding.ru.md),
|
||||||
|
нормальную TCP-сеть, хорошие серверные SSD/NVMe, при отключённом энергосбережении процессоров вы можете рассчитывать на:
|
||||||
|
- От 5000 IOPS в 1 поток (T1Q1) и на чтение, и на запись при использовании репликации (задержка до 0.2мс)
|
||||||
|
- От 5000 IOPS в 1 поток (T1Q1) на чтение и 2200 IOPS в 1 поток на запись при использовании EC (задержка до 0.45мс)
|
||||||
|
- От 80000 IOPS на чтение в параллельном режиме на 1 ядро, от 30000 IOPS на запись на 1 ядро (на 1 OSD)
|
||||||
|
- Скорость параллельного линейного чтения и записи, равная меньшему значению из скорости дисков или сети
|
||||||
|
|
||||||
|
Худшие результаты означают, что у вас либо медленные диски, либо медленная сеть, либо что-то неправильно настроено.
|
||||||
|
|
||||||
|
Зафиксированный на данный момент рекорд задержки:
|
||||||
|
- 9668 IOPS (0.103 мс задержка) в 1 поток (T1Q1) на запись с TCP и NVMe при использовании репликации
|
||||||
|
- 9143 IOPS (0.109 мс задержка) в 1 поток (T1Q1) на чтение с TCP и NVMe при использовании репликации
|
||||||
|
@@ -14,11 +14,14 @@ It supports the following commands:
|
|||||||
- [df](#df)
|
- [df](#df)
|
||||||
- [ls](#ls)
|
- [ls](#ls)
|
||||||
- [create](#create)
|
- [create](#create)
|
||||||
|
- [snap-create](#create)
|
||||||
- [modify](#modify)
|
- [modify](#modify)
|
||||||
- [rm](#rm)
|
- [rm](#rm)
|
||||||
- [flatten](#flatten)
|
- [flatten](#flatten)
|
||||||
- [rm-data](#rm-data)
|
- [rm-data](#rm-data)
|
||||||
- [merge-data](#merge-data)
|
- [merge-data](#merge-data)
|
||||||
|
- [describe](#describe)
|
||||||
|
- [fix](#fix)
|
||||||
- [alloc-osd](#alloc-osd)
|
- [alloc-osd](#alloc-osd)
|
||||||
- [rm-osd](#rm-osd)
|
- [rm-osd](#rm-osd)
|
||||||
|
|
||||||
@@ -123,6 +126,8 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
|||||||
|
|
||||||
Create a snapshot of image `<name>` (either form can be used). May be used live if only a single writer is active.
|
Create a snapshot of image `<name>` (either form can be used). May be used live if only a single writer is active.
|
||||||
|
|
||||||
|
See also about [how to export snapshots](qemu.en.md#exporting-snapshots).
|
||||||
|
|
||||||
## modify
|
## modify
|
||||||
|
|
||||||
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
|
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
|
||||||
@@ -171,6 +176,51 @@ Merge layer data without changing metadata. Merge `<from>`..`<to>` to `<target>`
|
|||||||
`<to>` must be a child of `<from>` and `<target>` may be one of the layers between
|
`<to>` must be a child of `<from>` and `<target>` may be one of the layers between
|
||||||
`<from>` and `<to>`, including `<from>` and `<to>`.
|
`<from>` and `<to>`, including `<from>` and `<to>`.
|
||||||
|
|
||||||
|
## describe
|
||||||
|
|
||||||
|
`vitastor-cli describe [--osds <osds>] [--object-state <states>] [--pool <pool>]
|
||||||
|
[--inode <ino>] [--min-inode <ino>] [--max-inode <ino>]
|
||||||
|
[--min-offset <offset>] [--max-offset <offset>]`
|
||||||
|
|
||||||
|
Describe unclean object locations in the cluster.
|
||||||
|
|
||||||
|
```
|
||||||
|
--osds <osds>
|
||||||
|
Only list objects from primary OSD(s) <osds>.
|
||||||
|
--object-state <states>
|
||||||
|
Only list objects in given state(s). State(s) may include:
|
||||||
|
degraded, misplaced, incomplete, corrupted, inconsistent.
|
||||||
|
--pool <pool name or number>
|
||||||
|
Only list objects in the given pool.
|
||||||
|
--inode, --min-inode, --max-inode
|
||||||
|
Restrict listing to specific inode numbers.
|
||||||
|
--min-offset, --max-offset
|
||||||
|
Restrict listing to specific offsets inside inodes.
|
||||||
|
```
|
||||||
|
|
||||||
|
## fix
|
||||||
|
|
||||||
|
`vitastor-cli fix [--objects <objects>] [--bad-osds <osds>] [--part <part>] [--check no]`
|
||||||
|
|
||||||
|
Fix inconsistent objects in the cluster by deleting some copies.
|
||||||
|
|
||||||
|
```
|
||||||
|
--objects <objects>
|
||||||
|
Objects to fix, either in plain text or JSON format. If not specified,
|
||||||
|
object list will be read from STDIN in one of the same formats.
|
||||||
|
Plain text format: 0x<inode>:0x<stripe> <any delimiter> 0x<inode>:0x<stripe> ...
|
||||||
|
JSON format: [{"inode":"0x...","stripe":"0x..."},...]
|
||||||
|
--bad-osds <osds>
|
||||||
|
Remove inconsistent copies/parts of objects from these OSDs, effectively
|
||||||
|
marking them bad and allowing Vitastor to recover objects from other copies.
|
||||||
|
--part <number>
|
||||||
|
Only remove EC part <number> (from 0 to pg_size-1), required for extreme
|
||||||
|
edge cases where one OSD has multiple parts of a EC object.
|
||||||
|
--check no
|
||||||
|
Do not recheck that requested objects are actually inconsistent,
|
||||||
|
delete requested copies/parts anyway.
|
||||||
|
```
|
||||||
|
|
||||||
## alloc-osd
|
## alloc-osd
|
||||||
|
|
||||||
`vitastor-cli alloc-osd`
|
`vitastor-cli alloc-osd`
|
||||||
|
@@ -15,6 +15,7 @@ vitastor-cli - интерфейс командной строки для адм
|
|||||||
- [df](#df)
|
- [df](#df)
|
||||||
- [ls](#ls)
|
- [ls](#ls)
|
||||||
- [create](#create)
|
- [create](#create)
|
||||||
|
- [snap-create](#create)
|
||||||
- [modify](#modify)
|
- [modify](#modify)
|
||||||
- [rm](#rm)
|
- [rm](#rm)
|
||||||
- [flatten](#flatten)
|
- [flatten](#flatten)
|
||||||
@@ -126,6 +127,8 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
|||||||
Создать снимок образа `<name>` (можно использовать любую форму команды). Снимок можно создавать без остановки
|
Создать снимок образа `<name>` (можно использовать любую форму команды). Снимок можно создавать без остановки
|
||||||
клиентов, если пишущий клиент максимум 1.
|
клиентов, если пишущий клиент максимум 1.
|
||||||
|
|
||||||
|
Смотрите также информацию о том, [как экспортировать снимки](qemu.ru.md#экспорт-снимков).
|
||||||
|
|
||||||
## modify
|
## modify
|
||||||
|
|
||||||
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
|
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
|
||||||
@@ -181,6 +184,59 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
|||||||
в целевой образ `<target>`. `<to>` должен быть дочерним образом `<from>`, а `<target>`
|
в целевой образ `<target>`. `<to>` должен быть дочерним образом `<from>`, а `<target>`
|
||||||
должен быть одним из слоёв между `<from>` и `<to>`, включая сами `<from>` и `<to>`.
|
должен быть одним из слоёв между `<from>` и `<to>`, включая сами `<from>` и `<to>`.
|
||||||
|
|
||||||
|
## describe
|
||||||
|
|
||||||
|
`vitastor-cli describe [--osds <osds>] [--object-state <состояния>] [--pool <пул>]
|
||||||
|
[--inode <номер>] [--min-inode <номер>] [--max-inode <номер>]
|
||||||
|
[--min-offset <смещение>] [--max-offset <смещение>]`
|
||||||
|
|
||||||
|
Описать состояние "грязных" объектов в кластере, то есть таких объектов, копии
|
||||||
|
или части которых хранятся на наборе OSD, не равном целевому.
|
||||||
|
|
||||||
|
```
|
||||||
|
--osds <osds>
|
||||||
|
Перечислять только объекты с первичных OSD из списка <osds>.
|
||||||
|
--object-state <состояния>
|
||||||
|
Перечислять только объекты в указанных состояниях. Возможные состояния
|
||||||
|
объектов:
|
||||||
|
- degraded - деградированная избыточность
|
||||||
|
- misplaced - перемещённый
|
||||||
|
- incomplete - нечитаемый из-за потери большего числа частей, чем допустимо
|
||||||
|
- corrupted - с одной или более повреждённой частью
|
||||||
|
- inconsistent - неконсистентный, с неоднозначным расхождением копий/частей
|
||||||
|
--pool <имя или ID пула>
|
||||||
|
Перечислять только объекты из заданного пула.
|
||||||
|
--inode, --min-inode, --max-inode
|
||||||
|
Перечислять только объекты из указанных номеров инодов (образов).
|
||||||
|
--min-offset, --max-offset
|
||||||
|
Перечислять только объекты с заданных смещений внутри образов.
|
||||||
|
```
|
||||||
|
|
||||||
|
## fix
|
||||||
|
|
||||||
|
`vitastor-cli fix [--objects <объекты>] [--bad-osds <osds>] [--part <номер>] [--check no]`
|
||||||
|
|
||||||
|
Исправить неконсистентные (неоднозначные) объекты путём удаления части копий.
|
||||||
|
|
||||||
|
```
|
||||||
|
--objects <объекты>
|
||||||
|
Объекты для исправления - в простом текстовом или JSON формате. Если опция
|
||||||
|
не указана, список объектов читается со стандартного ввода в тех же форматах.
|
||||||
|
Простой формат: 0x<инод>:0x<смещение> <любой разделитель> 0x<инод>:0x<смещение> ...
|
||||||
|
Формат JSON: [{"inode":"0x<инод>","stripe":"0x<смещение>"},...]
|
||||||
|
--bad-osds <osds>
|
||||||
|
Удалить неконсистентные копии/части объектов с данных OSD, таким образом
|
||||||
|
признавая потерю этих копий и позволяя Vitastor-у восстановить объекты из
|
||||||
|
других копий.
|
||||||
|
--part <номер>
|
||||||
|
Удалить только части EC с заданным номером (от 0 до pg_size-1). Нужно только
|
||||||
|
в редких граничных случаях, когда один и тот же OSD содержит несколько частей
|
||||||
|
одного EC-объекта.
|
||||||
|
--check no
|
||||||
|
Не перепроверять, что заданные объекты действительно в неконсистентном
|
||||||
|
состоянии и просто удалять заданные части.
|
||||||
|
```
|
||||||
|
|
||||||
## alloc-osd
|
## alloc-osd
|
||||||
|
|
||||||
`vitastor-cli alloc-osd`
|
`vitastor-cli alloc-osd`
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
[Documentation](../../README.md#documentation) → Usage → Disk Tool
|
[Documentation](../../README.md#documentation) → Usage → Disk management tool
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
[Документация](../../README-ru.md#документация) → Использование → Управление дисками
|
[Документация](../../README-ru.md#документация) → Использование → Инструмент управления дисками
|
||||||
|
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
@@ -25,6 +25,23 @@ It will output a block device name like /dev/nbd0 which you can then use as a no
|
|||||||
|
|
||||||
You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--image <IMAGE>` if you want.
|
You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--image <IMAGE>` if you want.
|
||||||
|
|
||||||
|
Additional options for map command:
|
||||||
|
|
||||||
|
* `--nbd_timeout 30` \
|
||||||
|
Timeout for I/O operations in seconds after exceeding which the kernel stops
|
||||||
|
the device. You can set it to 0 to disable the timeout, but beware that you
|
||||||
|
won't be able to stop the device at all if vitastor-nbd process dies.
|
||||||
|
* `--nbd_max_devices 64 --nbd_max_part 3` \
|
||||||
|
Options for the `nbd` kernel module when modprobing it (`nbds_max` and `max_part`).
|
||||||
|
note that maximum allowed (nbds_max)*(1+max_part) is 256.
|
||||||
|
* `--logfile /path/to/log/file.txt` \
|
||||||
|
Write log messages to the specified file instead of dropping them (in background mode)
|
||||||
|
or printing them to the standard output (in foreground mode).
|
||||||
|
* `--dev_num N` \
|
||||||
|
Use the specified device /dev/nbdN instead of automatic selection.
|
||||||
|
* `--foreground 1` \
|
||||||
|
Stay in foreground, do not daemonize.
|
||||||
|
|
||||||
## Unmap image
|
## Unmap image
|
||||||
|
|
||||||
To unmap the device run:
|
To unmap the device run:
|
||||||
@@ -32,3 +49,27 @@ To unmap the device run:
|
|||||||
```
|
```
|
||||||
vitastor-nbd unmap /dev/nbd0
|
vitastor-nbd unmap /dev/nbd0
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## List mapped images
|
||||||
|
|
||||||
|
```
|
||||||
|
vitastor-nbd ls [--json]
|
||||||
|
```
|
||||||
|
|
||||||
|
Example output (normal format):
|
||||||
|
|
||||||
|
```
|
||||||
|
/dev/nbd0
|
||||||
|
image: bench
|
||||||
|
pid: 584536
|
||||||
|
|
||||||
|
/dev/nbd1
|
||||||
|
image: bench1
|
||||||
|
pid: 584546
|
||||||
|
```
|
||||||
|
|
||||||
|
Example output (JSON format):
|
||||||
|
|
||||||
|
```
|
||||||
|
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
|
||||||
|
```
|
||||||
|
@@ -30,6 +30,27 @@ vitastor-nbd map --etcd_address 10.115.0.10:2379/v3 --image testimg
|
|||||||
Для обращения по номеру инода, аналогично другим командам, можно использовать опции
|
Для обращения по номеру инода, аналогично другим командам, можно использовать опции
|
||||||
`--pool <POOL> --inode <INODE> --size <SIZE>` вместо `--image testimg`.
|
`--pool <POOL> --inode <INODE> --size <SIZE>` вместо `--image testimg`.
|
||||||
|
|
||||||
|
Дополнительные опции для команды подключения NBD-устройства:
|
||||||
|
|
||||||
|
* `--nbd_timeout 30` \
|
||||||
|
Максимальное время выполнения любой операции чтения/записи в секундах, при
|
||||||
|
превышении которого ядро остановит NBD-устройство. Вы можете установить опцию
|
||||||
|
в 0, чтобы отключить ограничение времени, но имейте в виду, что в этом случае
|
||||||
|
вы вообще не сможете отключить NBD-устройство при нештатном завершении процесса
|
||||||
|
vitastor-nbd.
|
||||||
|
* `--nbd_max_devices 64 --nbd_max_part 3` \
|
||||||
|
Опции, передаваемые модулю ядра nbd, если его загружает vitastor-nbd
|
||||||
|
(`nbds_max` и `max_part`). Имейте в виду, что (nbds_max)*(1+max_part)
|
||||||
|
обычно не должно превышать 256.
|
||||||
|
* `--logfile /path/to/log/file.txt` \
|
||||||
|
Писать сообщения о процессе работы в заданный файл, вместо пропуска их
|
||||||
|
при фоновом режиме запуска или печати на стандартный вывод при запуске
|
||||||
|
в консоли с `--foreground 1`.
|
||||||
|
* `--dev_num N` \
|
||||||
|
Использовать заданное устройство `/dev/nbdN` вместо автоматического подбора.
|
||||||
|
* `--foreground 1` \
|
||||||
|
Не уводить процесс в фоновый режим.
|
||||||
|
|
||||||
## Отключить устройство
|
## Отключить устройство
|
||||||
|
|
||||||
Для отключения устройства выполните:
|
Для отключения устройства выполните:
|
||||||
@@ -37,3 +58,27 @@ vitastor-nbd map --etcd_address 10.115.0.10:2379/v3 --image testimg
|
|||||||
```
|
```
|
||||||
vitastor-nbd unmap /dev/nbd0
|
vitastor-nbd unmap /dev/nbd0
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Вывести подключённые устройства
|
||||||
|
|
||||||
|
```
|
||||||
|
vitastor-nbd ls [--json]
|
||||||
|
```
|
||||||
|
|
||||||
|
Пример вывода в обычном формате:
|
||||||
|
|
||||||
|
```
|
||||||
|
/dev/nbd0
|
||||||
|
image: bench
|
||||||
|
pid: 584536
|
||||||
|
|
||||||
|
/dev/nbd1
|
||||||
|
image: bench1
|
||||||
|
pid: 584546
|
||||||
|
```
|
||||||
|
|
||||||
|
Пример вывода в JSON-формате:
|
||||||
|
|
||||||
|
```
|
||||||
|
{"/dev/nbd0": {"image": "bench", "pid": 584536}, "/dev/nbd1": {"image": "bench1", "pid": 584546}}
|
||||||
|
```
|
||||||
|
@@ -46,3 +46,40 @@ qemu-img convert -f qcow2 debian10.qcow2 -p -O raw 'vitastor:etcd_host=192.168.7
|
|||||||
|
|
||||||
You can also specify `:pool=<POOL>:inode=<INODE>:size=<SIZE>` instead of `:image=<IMAGE>`
|
You can also specify `:pool=<POOL>:inode=<INODE>:size=<SIZE>` instead of `:image=<IMAGE>`
|
||||||
if you don't want to use inode metadata.
|
if you don't want to use inode metadata.
|
||||||
|
|
||||||
|
### Exporting snapshots
|
||||||
|
|
||||||
|
Starting with 0.8.4, you can also export individual layers (snapshot diffs) using `qemu-img`.
|
||||||
|
|
||||||
|
Suppose you have an image `testimg` and a snapshot `testimg@0` created with `vitastor-cli snap-create testimg@0`.
|
||||||
|
|
||||||
|
Then you can export the `testimg@0` snapshot and the data written to `testimg` after creating
|
||||||
|
the snapshot separately using the following commands (key points are using `skip-parents=1` and
|
||||||
|
`-B backing_file` option):
|
||||||
|
|
||||||
|
```
|
||||||
|
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg@0' \
|
||||||
|
-O qcow2 testimg_0.qcow2
|
||||||
|
|
||||||
|
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg:skip-parents=1' \
|
||||||
|
-O qcow2 -o 'cluster_size=4k' -B testimg_0.qcow2 testimg.qcow2
|
||||||
|
```
|
||||||
|
|
||||||
|
In fact, with `cluster_size=4k` any QCOW2 file can be used instead `-B testimg_0.qcow2`, even an empty one.
|
||||||
|
|
||||||
|
QCOW2 `cluster_size=4k` option is required if you want `testimg.qcow2` to contain only the data
|
||||||
|
overwritten **exactly** in the child layer. With the default 64 KB QCOW2 cluster size you'll
|
||||||
|
get a bit of extra data from parent layers, i.e. a 4 KB overwrite will result in `testimg.qcow2`
|
||||||
|
containing 64 KB of data. And this extra data will be taken by `qemu-img` from the file passed
|
||||||
|
in `-B` option, so you really need 4 KB cluster if you use an empty image in `-B`.
|
||||||
|
|
||||||
|
After this procedure you'll get two chained QCOW2 images. To detach `testimg.qcow2` from
|
||||||
|
its parent, run:
|
||||||
|
|
||||||
|
```
|
||||||
|
qemu-img rebase -u -b '' testimg.qcow2
|
||||||
|
```
|
||||||
|
|
||||||
|
This can be used for backups. Just note that exporting an image that is currently being written to
|
||||||
|
is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
|
||||||
|
on a live VM.
|
||||||
|
@@ -50,3 +50,40 @@ qemu-img convert -f qcow2 debian10.qcow2 -p -O raw 'vitastor:etcd_host=10.115.0.
|
|||||||
|
|
||||||
Если вы не хотите обращаться к образу по имени, вместо `:image=<IMAGE>` можно указать номер пула, номер инода и размер:
|
Если вы не хотите обращаться к образу по имени, вместо `:image=<IMAGE>` можно указать номер пула, номер инода и размер:
|
||||||
`:pool=<POOL>:inode=<INODE>:size=<SIZE>`.
|
`:pool=<POOL>:inode=<INODE>:size=<SIZE>`.
|
||||||
|
|
||||||
|
### Экспорт снимков
|
||||||
|
|
||||||
|
Начиная с 0.8.4 вы можете экспортировать отдельные слои (изменения в снимках) с помощью `qemu-img`.
|
||||||
|
|
||||||
|
Допустим, что у вас есть образ `testimg` и его снимок `testimg@0`, созданный с помощью `vitastor-cli snap-create testimg@0`.
|
||||||
|
|
||||||
|
Тогда вы можете выгрузить снимок `testimg@0` и данные, изменённые в `testimg` после создания снимка, отдельно,
|
||||||
|
с помощью следующих команд (ключевые моменты - использование `skip-parents=1` и опции `-B backing_file.qcow2`):
|
||||||
|
|
||||||
|
```
|
||||||
|
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg@0' \
|
||||||
|
-O qcow2 testimg_0.qcow2
|
||||||
|
|
||||||
|
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg:skip-parents=1' \
|
||||||
|
-O qcow2 -o 'cluster_size=4k' -B testimg_0.qcow2 testimg.qcow2
|
||||||
|
```
|
||||||
|
|
||||||
|
На самом деле, с `cluster_size=4k` вместо `-B testimg_0.qcow2` можно использовать любой qcow2-файл,
|
||||||
|
даже пустой.
|
||||||
|
|
||||||
|
Опция QCOW2 `cluster_size=4k` нужна, если вы хотите, чтобы `testimg.qcow2` содержал **в точности**
|
||||||
|
данные, перезаписанные в дочернем слое. С размером кластера QCOW2 по умолчанию, составляющим 64 КБ,
|
||||||
|
вы получите немного "лишних" данных из родительских слоёв - перезапись 4 КБ будет приводить к тому,
|
||||||
|
что в `testimg.qcow2` будет появляться 64 КБ данных. Причём "лишние" данные qemu-img будет брать
|
||||||
|
как раз из файла, указанного в опции `-B`, так что если там указан пустой образ, кластер обязан быть 4 КБ.
|
||||||
|
|
||||||
|
После данной процедуры вы получите два QCOW2-образа, связанных в цепочку. Чтобы "отцепить" образ
|
||||||
|
`testimg.qcow2` от базового, выполните:
|
||||||
|
|
||||||
|
```
|
||||||
|
qemu-img rebase -u -b '' testimg.qcow2
|
||||||
|
```
|
||||||
|
|
||||||
|
Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
|
||||||
|
в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
|
||||||
|
с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.
|
||||||
|
2
json11
2
json11
Submodule json11 updated: 52a3af664f...fd37016cf8
@@ -43,16 +43,16 @@ function finish_pg_history(merged_history)
|
|||||||
merged_history.all_peers = Object.values(merged_history.all_peers);
|
merged_history.all_peers = Object.values(merged_history.all_peers);
|
||||||
}
|
}
|
||||||
|
|
||||||
function scale_pg_count(prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
|
function scale_pg_count(prev_pgs, real_prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
|
||||||
{
|
{
|
||||||
const old_pg_count = prev_pgs.length;
|
const old_pg_count = real_prev_pgs.length;
|
||||||
// Add all possibly intersecting PGs to the history of new PGs
|
// Add all possibly intersecting PGs to the history of new PGs
|
||||||
if (!(new_pg_count % old_pg_count))
|
if (!(new_pg_count % old_pg_count))
|
||||||
{
|
{
|
||||||
// New PG count is a multiple of old PG count
|
// New PG count is a multiple of old PG count
|
||||||
for (let i = 0; i < new_pg_count; i++)
|
for (let i = 0; i < new_pg_count; i++)
|
||||||
{
|
{
|
||||||
add_pg_history(new_pg_history, i, prev_pgs, prev_pg_history, i % old_pg_count);
|
add_pg_history(new_pg_history, i, real_prev_pgs, prev_pg_history, i % old_pg_count);
|
||||||
finish_pg_history(new_pg_history[i]);
|
finish_pg_history(new_pg_history[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -64,7 +64,7 @@ function scale_pg_count(prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
|
|||||||
{
|
{
|
||||||
for (let j = 0; j < mul; j++)
|
for (let j = 0; j < mul; j++)
|
||||||
{
|
{
|
||||||
add_pg_history(new_pg_history, i, prev_pgs, prev_pg_history, i+j*new_pg_count);
|
add_pg_history(new_pg_history, i, real_prev_pgs, prev_pg_history, i+j*new_pg_count);
|
||||||
}
|
}
|
||||||
finish_pg_history(new_pg_history[i]);
|
finish_pg_history(new_pg_history[i]);
|
||||||
}
|
}
|
||||||
@@ -76,7 +76,7 @@ function scale_pg_count(prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
|
|||||||
let merged_history = {};
|
let merged_history = {};
|
||||||
for (let i = 0; i < old_pg_count; i++)
|
for (let i = 0; i < old_pg_count; i++)
|
||||||
{
|
{
|
||||||
add_pg_history(merged_history, 1, prev_pgs, prev_pg_history, i);
|
add_pg_history(merged_history, 1, real_prev_pgs, prev_pg_history, i);
|
||||||
}
|
}
|
||||||
finish_pg_history(merged_history[1]);
|
finish_pg_history(merged_history[1]);
|
||||||
for (let i = 0; i < new_pg_count; i++)
|
for (let i = 0; i < new_pg_count; i++)
|
||||||
@@ -90,15 +90,15 @@ function scale_pg_count(prev_pgs, prev_pg_history, new_pg_history, new_pg_count)
|
|||||||
new_pg_history[i] = null;
|
new_pg_history[i] = null;
|
||||||
}
|
}
|
||||||
// Just for the lp_solve optimizer - pick a "previous" PG for each "new" one
|
// Just for the lp_solve optimizer - pick a "previous" PG for each "new" one
|
||||||
if (old_pg_count < new_pg_count)
|
if (prev_pgs.length < new_pg_count)
|
||||||
{
|
{
|
||||||
for (let i = old_pg_count; i < new_pg_count; i++)
|
for (let i = prev_pgs.length; i < new_pg_count; i++)
|
||||||
{
|
{
|
||||||
prev_pgs[i] = prev_pgs[i % old_pg_count];
|
prev_pgs[i] = prev_pgs[i % prev_pgs.length];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (old_pg_count > new_pg_count)
|
else if (prev_pgs.length > new_pg_count)
|
||||||
{
|
{
|
||||||
prev_pgs.splice(new_pg_count, old_pg_count-new_pg_count);
|
prev_pgs.splice(new_pg_count, prev_pgs.length-new_pg_count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -550,8 +550,8 @@ function random_combinations(osd_tree, pg_size, count, ordered)
|
|||||||
seed ^= seed << 5;
|
seed ^= seed << 5;
|
||||||
return seed + 2147483648;
|
return seed + 2147483648;
|
||||||
};
|
};
|
||||||
const hosts = Object.keys(osd_tree).sort();
|
|
||||||
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
|
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
|
||||||
|
const hosts = Object.keys(osd_tree).sort().filter(h => osds[h].length > 0);
|
||||||
const r = {};
|
const r = {};
|
||||||
// Generate random combinations including each OSD at least once
|
// Generate random combinations including each OSD at least once
|
||||||
for (let h = 0; h < hosts.length; h++)
|
for (let h = 0; h < hosts.length; h++)
|
||||||
|
@@ -79,7 +79,7 @@ StartLimitInterval=0
|
|||||||
RestartSec=10
|
RestartSec=10
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=local.target
|
WantedBy=multi-user.target
|
||||||
`);
|
`);
|
||||||
await system(`useradd etcd`);
|
await system(`useradd etcd`);
|
||||||
await system(`systemctl daemon-reload`);
|
await system(`systemctl daemon-reload`);
|
||||||
|
81
mon/mon.js
81
mon/mon.js
@@ -51,8 +51,9 @@ const etcd_tree = {
|
|||||||
// THIS IS JUST A POOR MAN'S CONFIG DOCUMENTATION
|
// THIS IS JUST A POOR MAN'S CONFIG DOCUMENTATION
|
||||||
// etcd connection
|
// etcd connection
|
||||||
config_path: "/etc/vitastor/vitastor.conf",
|
config_path: "/etc/vitastor/vitastor.conf",
|
||||||
etcd_address: "10.0.115.10:2379/v3",
|
|
||||||
etcd_prefix: "/vitastor",
|
etcd_prefix: "/vitastor",
|
||||||
|
// etcd connection - configurable online
|
||||||
|
etcd_address: "10.0.115.10:2379/v3",
|
||||||
// mon
|
// mon
|
||||||
etcd_mon_ttl: 30, // min: 10
|
etcd_mon_ttl: 30, // min: 10
|
||||||
etcd_mon_timeout: 1000, // ms. min: 0
|
etcd_mon_timeout: 1000, // ms. min: 0
|
||||||
@@ -70,14 +71,15 @@ const etcd_tree = {
|
|||||||
rdma_gid_index: 0,
|
rdma_gid_index: 0,
|
||||||
rdma_mtu: 4096,
|
rdma_mtu: 4096,
|
||||||
rdma_max_sge: 128,
|
rdma_max_sge: 128,
|
||||||
rdma_max_send: 32,
|
rdma_max_send: 8,
|
||||||
rdma_max_recv: 8,
|
rdma_max_recv: 16,
|
||||||
rdma_max_msg: 1048576,
|
rdma_max_msg: 132096,
|
||||||
log_level: 0,
|
|
||||||
block_size: 131072,
|
block_size: 131072,
|
||||||
disk_alignment: 4096,
|
disk_alignment: 4096,
|
||||||
bitmap_granularity: 4096,
|
bitmap_granularity: 4096,
|
||||||
immediate_commit: false, // 'all' or 'small'
|
immediate_commit: false, // 'all' or 'small'
|
||||||
|
// client and osd - configurable online
|
||||||
|
log_level: 0,
|
||||||
client_dirty_limit: 33554432,
|
client_dirty_limit: 33554432,
|
||||||
peer_connect_interval: 5, // seconds. min: 1
|
peer_connect_interval: 5, // seconds. min: 1
|
||||||
peer_connect_timeout: 5, // seconds. min: 1
|
peer_connect_timeout: 5, // seconds. min: 1
|
||||||
@@ -95,18 +97,28 @@ const etcd_tree = {
|
|||||||
osd_network: null, // "192.168.7.0/24" or an array of masks
|
osd_network: null, // "192.168.7.0/24" or an array of masks
|
||||||
bind_address: "0.0.0.0",
|
bind_address: "0.0.0.0",
|
||||||
bind_port: 0,
|
bind_port: 0,
|
||||||
|
readonly: false,
|
||||||
|
osd_memlock: false,
|
||||||
|
// osd - configurable online
|
||||||
autosync_interval: 5,
|
autosync_interval: 5,
|
||||||
autosync_writes: 128,
|
autosync_writes: 128,
|
||||||
client_queue_depth: 128, // unused
|
client_queue_depth: 128, // unused
|
||||||
recovery_queue_depth: 4,
|
recovery_queue_depth: 4,
|
||||||
|
recovery_pg_switch: 128,
|
||||||
recovery_sync_batch: 16,
|
recovery_sync_batch: 16,
|
||||||
readonly: false,
|
|
||||||
no_recovery: false,
|
no_recovery: false,
|
||||||
no_rebalance: false,
|
no_rebalance: false,
|
||||||
print_stats_interval: 3,
|
print_stats_interval: 3,
|
||||||
slow_log_interval: 10,
|
slow_log_interval: 10,
|
||||||
inode_vanish_time: 60,
|
inode_vanish_time: 60,
|
||||||
osd_memlock: false,
|
auto_scrub: false,
|
||||||
|
no_scrub: false,
|
||||||
|
scrub_interval: '30d', // 1s/1m/1h/1d
|
||||||
|
scrub_queue_depth: 1,
|
||||||
|
scrub_sleep: 0, // milliseconds
|
||||||
|
scrub_list_limit: 1000, // objects to list on one scrub iteration
|
||||||
|
scrub_find_best: true,
|
||||||
|
scrub_ec_max_bruteforce: 100, // maximum EC error locator brute-force iterators
|
||||||
// blockstore - fixed in superblock
|
// blockstore - fixed in superblock
|
||||||
block_size,
|
block_size,
|
||||||
disk_alignment,
|
disk_alignment,
|
||||||
@@ -125,14 +137,15 @@ const etcd_tree = {
|
|||||||
meta_offset,
|
meta_offset,
|
||||||
disable_meta_fsync,
|
disable_meta_fsync,
|
||||||
disable_device_lock,
|
disable_device_lock,
|
||||||
// blockstore - configurable
|
// blockstore - configurable offline
|
||||||
max_write_iodepth,
|
|
||||||
min_flusher_count: 1,
|
|
||||||
max_flusher_count: 256,
|
|
||||||
inmemory_metadata,
|
inmemory_metadata,
|
||||||
inmemory_journal,
|
inmemory_journal,
|
||||||
journal_sector_buffer_count,
|
journal_sector_buffer_count,
|
||||||
journal_no_same_sector_overwrites,
|
journal_no_same_sector_overwrites,
|
||||||
|
// blockstore - configurable online
|
||||||
|
max_write_iodepth,
|
||||||
|
min_flusher_count: 1,
|
||||||
|
max_flusher_count: 256,
|
||||||
throttle_small_writes: false,
|
throttle_small_writes: false,
|
||||||
throttle_target_iops: 100,
|
throttle_target_iops: 100,
|
||||||
throttle_target_mbs: 100,
|
throttle_target_mbs: 100,
|
||||||
@@ -168,6 +181,8 @@ const etcd_tree = {
|
|||||||
osd_tags?: 'nvme' | [ 'nvme', ... ],
|
osd_tags?: 'nvme' | [ 'nvme', ... ],
|
||||||
// prefer to put primary on OSD with these tags
|
// prefer to put primary on OSD with these tags
|
||||||
primary_affinity_tags?: 'nvme' | [ 'nvme', ... ],
|
primary_affinity_tags?: 'nvme' | [ 'nvme', ... ],
|
||||||
|
// scrub interval
|
||||||
|
scrub_interval?: '30d',
|
||||||
},
|
},
|
||||||
...
|
...
|
||||||
}, */
|
}, */
|
||||||
@@ -261,9 +276,9 @@ const etcd_tree = {
|
|||||||
/* <pool_id>: {
|
/* <pool_id>: {
|
||||||
<pg_id>: {
|
<pg_id>: {
|
||||||
primary: osd_num_t,
|
primary: osd_num_t,
|
||||||
state: ("starting"|"peering"|"peered"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
|
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
|
||||||
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
|
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
|
||||||
"has_invalid"|"left_on_dead")[],
|
"has_invalid"|"has_inconsistent"|"has_corrupted"|"left_on_dead"|"scrubbing")[],
|
||||||
}
|
}
|
||||||
}, */
|
}, */
|
||||||
},
|
},
|
||||||
@@ -285,6 +300,7 @@ const etcd_tree = {
|
|||||||
osd_sets: osd_num_t[][],
|
osd_sets: osd_num_t[][],
|
||||||
all_peers: osd_num_t[],
|
all_peers: osd_num_t[],
|
||||||
epoch: uint64_t,
|
epoch: uint64_t,
|
||||||
|
next_scrub: uint64_t,
|
||||||
},
|
},
|
||||||
}, */
|
}, */
|
||||||
},
|
},
|
||||||
@@ -844,7 +860,7 @@ class Mon
|
|||||||
}
|
}
|
||||||
for (const node_id in tree)
|
for (const node_id in tree)
|
||||||
{
|
{
|
||||||
if (node_id === '')
|
if (node_id === '' || tree[node_id].level === 'osd' && (!tree[node_id].size || tree[node_id].size <= 0))
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -952,9 +968,9 @@ class Mon
|
|||||||
return alive_set[this.rng() % alive_set.length];
|
return alive_set[this.rng() % alive_set.length];
|
||||||
}
|
}
|
||||||
|
|
||||||
save_new_pgs_txn(request, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
|
save_new_pgs_txn(save_to, request, pool_id, up_osds, osd_tree, prev_pgs, new_pgs, pg_history)
|
||||||
{
|
{
|
||||||
const aff_osds = this.get_affinity_osds(this.state.config.pools[pool_id], up_osds, osd_tree);
|
const aff_osds = this.get_affinity_osds(this.state.config.pools[pool_id] || {}, up_osds, osd_tree);
|
||||||
const pg_items = {};
|
const pg_items = {};
|
||||||
this.reset_rng();
|
this.reset_rng();
|
||||||
new_pgs.map((osd_set, i) =>
|
new_pgs.map((osd_set, i) =>
|
||||||
@@ -1005,14 +1021,14 @@ class Mon
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.state.config.pgs.items = this.state.config.pgs.items || {};
|
save_to.items = save_to.items || {};
|
||||||
if (!new_pgs.length)
|
if (!new_pgs.length)
|
||||||
{
|
{
|
||||||
delete this.state.config.pgs.items[pool_id];
|
delete save_to.items[pool_id];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
this.state.config.pgs.items[pool_id] = pg_items;
|
save_to.items[pool_id] = pg_items;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1156,6 +1172,7 @@ class Mon
|
|||||||
if (this.state.config.pgs.hash != tree_hash)
|
if (this.state.config.pgs.hash != tree_hash)
|
||||||
{
|
{
|
||||||
// Something has changed
|
// Something has changed
|
||||||
|
const new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
|
||||||
const etcd_request = { compare: [], success: [] };
|
const etcd_request = { compare: [], success: [] };
|
||||||
for (const pool_id in (this.state.config.pgs||{}).items||{})
|
for (const pool_id in (this.state.config.pgs||{}).items||{})
|
||||||
{
|
{
|
||||||
@@ -1176,7 +1193,7 @@ class Mon
|
|||||||
etcd_request.success.push({ requestDeleteRange: {
|
etcd_request.success.push({ requestDeleteRange: {
|
||||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||||
} });
|
} });
|
||||||
this.save_new_pgs_txn(etcd_request, pool_id, up_osds, osd_tree, prev_pgs, [], []);
|
this.save_new_pgs_txn(new_config_pgs, etcd_request, pool_id, up_osds, osd_tree, prev_pgs, [], []);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const pool_id in this.state.config.pools)
|
for (const pool_id in this.state.config.pools)
|
||||||
@@ -1230,7 +1247,7 @@ class Mon
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const new_pg_history = [];
|
const new_pg_history = [];
|
||||||
PGUtil.scale_pg_count(prev_pgs, pg_history, new_pg_history, pool_cfg.pg_count);
|
PGUtil.scale_pg_count(prev_pgs, real_prev_pgs, pg_history, new_pg_history, pool_cfg.pg_count);
|
||||||
pg_history = new_pg_history;
|
pg_history = new_pg_history;
|
||||||
}
|
}
|
||||||
for (const pg of prev_pgs)
|
for (const pg of prev_pgs)
|
||||||
@@ -1283,14 +1300,15 @@ class Mon
|
|||||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||||
value: b64(JSON.stringify(this.state.pool.stats[pool_id])),
|
value: b64(JSON.stringify(this.state.pool.stats[pool_id])),
|
||||||
} });
|
} });
|
||||||
this.save_new_pgs_txn(etcd_request, pool_id, up_osds, osd_tree, real_prev_pgs, optimize_result.int_pgs, pg_history);
|
this.save_new_pgs_txn(new_config_pgs, etcd_request, pool_id, up_osds, osd_tree, real_prev_pgs, optimize_result.int_pgs, pg_history);
|
||||||
}
|
}
|
||||||
this.state.config.pgs.hash = tree_hash;
|
new_config_pgs.hash = tree_hash;
|
||||||
await this.save_pg_config(etcd_request);
|
await this.save_pg_config(new_config_pgs, etcd_request);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Nothing changed, but we still want to recheck the distribution of primaries
|
// Nothing changed, but we still want to recheck the distribution of primaries
|
||||||
|
let new_config_pgs;
|
||||||
let changed = false;
|
let changed = false;
|
||||||
for (const pool_id in this.state.config.pools)
|
for (const pool_id in this.state.config.pools)
|
||||||
{
|
{
|
||||||
@@ -1310,31 +1328,35 @@ class Mon
|
|||||||
const new_primary = this.pick_primary(pool_id, pg_cfg.osd_set, up_osds, aff_osds);
|
const new_primary = this.pick_primary(pool_id, pg_cfg.osd_set, up_osds, aff_osds);
|
||||||
if (pg_cfg.primary != new_primary)
|
if (pg_cfg.primary != new_primary)
|
||||||
{
|
{
|
||||||
|
if (!new_config_pgs)
|
||||||
|
{
|
||||||
|
new_config_pgs = JSON.parse(JSON.stringify(this.state.config.pgs));
|
||||||
|
}
|
||||||
console.log(
|
console.log(
|
||||||
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
|
`Moving pool ${pool_id} (${pool_cfg.name || 'unnamed'}) PG ${pg_num}`+
|
||||||
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
|
` primary OSD from ${pg_cfg.primary} to ${new_primary}`
|
||||||
);
|
);
|
||||||
changed = true;
|
changed = true;
|
||||||
pg_cfg.primary = new_primary;
|
new_config_pgs.items[pool_id][pg_num].primary = new_primary;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (changed)
|
if (changed)
|
||||||
{
|
{
|
||||||
await this.save_pg_config();
|
await this.save_pg_config(new_config_pgs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async save_pg_config(etcd_request = { compare: [], success: [] })
|
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
|
||||||
{
|
{
|
||||||
etcd_request.compare.push(
|
etcd_request.compare.push(
|
||||||
{ key: b64(this.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
{ key: b64(this.etcd_prefix+'/mon/master'), target: 'LEASE', lease: ''+this.etcd_lease_id },
|
||||||
{ key: b64(this.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
{ key: b64(this.etcd_prefix+'/config/pgs'), target: 'MOD', mod_revision: ''+this.etcd_watch_revision, result: 'LESS' },
|
||||||
);
|
);
|
||||||
etcd_request.success.push(
|
etcd_request.success.push(
|
||||||
{ requestPut: { key: b64(this.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(this.state.config.pgs)) } },
|
{ requestPut: { key: b64(this.etcd_prefix+'/config/pgs'), value: b64(JSON.stringify(new_config_pgs)) } },
|
||||||
);
|
);
|
||||||
const res = await this.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
|
const res = await this.etcd_call('/kv/txn', etcd_request, this.config.etcd_mon_timeout, 0);
|
||||||
if (!res.succeeded)
|
if (!res.succeeded)
|
||||||
@@ -1813,6 +1835,7 @@ function POST(url, body, timeout)
|
|||||||
clearTimeout(timer_id);
|
clearTimeout(timer_id);
|
||||||
let res_body = '';
|
let res_body = '';
|
||||||
res.setEncoding('utf8');
|
res.setEncoding('utf8');
|
||||||
|
res.on('error', (error) => ok({ error }));
|
||||||
res.on('data', chunk => { res_body += chunk; });
|
res.on('data', chunk => { res_body += chunk; });
|
||||||
res.on('end', () =>
|
res.on('end', () =>
|
||||||
{
|
{
|
||||||
@@ -1832,6 +1855,8 @@ function POST(url, body, timeout)
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
req.on('error', (error) => ok({ error }));
|
||||||
|
req.on('close', () => ok({ error: new Error('Connection closed prematurely') }));
|
||||||
req.write(body_text);
|
req.write(body_text);
|
||||||
req.end();
|
req.end();
|
||||||
});
|
});
|
||||||
|
@@ -15,4 +15,4 @@ StartLimitInterval=0
|
|||||||
RestartSec=10
|
RestartSec=10
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=vitastor.target
|
WantedBy=multi-user.target
|
||||||
|
@@ -16,6 +16,11 @@ use PVE::Tools qw(run_command);
|
|||||||
|
|
||||||
use base qw(PVE::Storage::Plugin);
|
use base qw(PVE::Storage::Plugin);
|
||||||
|
|
||||||
|
if (@PVE::Storage::Plugin::SHARED_STORAGE)
|
||||||
|
{
|
||||||
|
push @PVE::Storage::Plugin::SHARED_STORAGE, 'vitastor';
|
||||||
|
}
|
||||||
|
|
||||||
sub api
|
sub api
|
||||||
{
|
{
|
||||||
# Trick it :)
|
# Trick it :)
|
||||||
@@ -133,9 +138,11 @@ sub properties
|
|||||||
sub options
|
sub options
|
||||||
{
|
{
|
||||||
return {
|
return {
|
||||||
|
shared => { optional => 1 },
|
||||||
|
content => { optional => 1 },
|
||||||
nodes => { optional => 1 },
|
nodes => { optional => 1 },
|
||||||
disable => { optional => 1 },
|
disable => { optional => 1 },
|
||||||
vitastor_etcd_address => { optional => 1},
|
vitastor_etcd_address => { optional => 1 },
|
||||||
vitastor_etcd_prefix => { optional => 1 },
|
vitastor_etcd_prefix => { optional => 1 },
|
||||||
vitastor_config_path => { optional => 1 },
|
vitastor_config_path => { optional => 1 },
|
||||||
vitastor_prefix => { optional => 1 },
|
vitastor_prefix => { optional => 1 },
|
||||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
|||||||
from cinder.volume import driver
|
from cinder.volume import driver
|
||||||
from cinder.volume import volume_utils
|
from cinder.volume import volume_utils
|
||||||
|
|
||||||
VERSION = '0.8.3'
|
VERSION = '0.9.0'
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
169
patches/pve-qemu-7.2-vitastor.patch
Normal file
169
patches/pve-qemu-7.2-vitastor.patch
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
Index: pve-qemu-kvm-7.2.0/block/meson.build
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-7.2.0.orig/block/meson.build
|
||||||
|
+++ pve-qemu-kvm-7.2.0/block/meson.build
|
||||||
|
@@ -113,6 +113,7 @@ foreach m : [
|
||||||
|
[libnfs, 'nfs', files('nfs.c')],
|
||||||
|
[libssh, 'ssh', files('ssh.c')],
|
||||||
|
[rbd, 'rbd', files('rbd.c')],
|
||||||
|
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||||
|
]
|
||||||
|
if m[0].found()
|
||||||
|
module_ss = ss.source_set()
|
||||||
|
Index: pve-qemu-kvm-7.2.0/meson.build
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-7.2.0.orig/meson.build
|
||||||
|
+++ pve-qemu-kvm-7.2.0/meson.build
|
||||||
|
@@ -1026,6 +1026,26 @@ if not get_option('rbd').auto() or have_
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
+vitastor = not_found
|
||||||
|
+if not get_option('vitastor').auto() or have_block
|
||||||
|
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||||
|
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||||
|
+ if libvitastor_client.found()
|
||||||
|
+ if cc.links('''
|
||||||
|
+ #include <vitastor_c.h>
|
||||||
|
+ int main(void) {
|
||||||
|
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
+ return 0;
|
||||||
|
+ }''', dependencies: libvitastor_client)
|
||||||
|
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||||
|
+ elif get_option('vitastor').enabled()
|
||||||
|
+ error('could not link libvitastor_client')
|
||||||
|
+ else
|
||||||
|
+ warning('could not link libvitastor_client, disabling')
|
||||||
|
+ endif
|
||||||
|
+ endif
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
glusterfs = not_found
|
||||||
|
glusterfs_ftruncate_has_stat = false
|
||||||
|
glusterfs_iocb_has_stat = false
|
||||||
|
@@ -1865,6 +1885,7 @@ config_host_data.set('CONFIG_NUMA', numa
|
||||||
|
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||||
|
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
||||||
|
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||||
|
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||||
|
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||||
|
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||||
|
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||||
|
@@ -3957,6 +3978,7 @@ if spice_protocol.found()
|
||||||
|
summary_info += {' spice server support': spice}
|
||||||
|
endif
|
||||||
|
summary_info += {'rbd support': rbd}
|
||||||
|
+summary_info += {'vitastor support': vitastor}
|
||||||
|
summary_info += {'smartcard support': cacard}
|
||||||
|
summary_info += {'U2F support': u2f}
|
||||||
|
summary_info += {'libusb': libusb}
|
||||||
|
Index: pve-qemu-kvm-7.2.0/meson_options.txt
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-7.2.0.orig/meson_options.txt
|
||||||
|
+++ pve-qemu-kvm-7.2.0/meson_options.txt
|
||||||
|
@@ -169,6 +169,8 @@ option('lzo', type : 'feature', value :
|
||||||
|
description: 'lzo compression support')
|
||||||
|
option('rbd', type : 'feature', value : 'auto',
|
||||||
|
description: 'Ceph block device driver')
|
||||||
|
+option('vitastor', type : 'feature', value : 'auto',
|
||||||
|
+ description: 'Vitastor block device driver')
|
||||||
|
option('opengl', type : 'feature', value : 'auto',
|
||||||
|
description: 'OpenGL support')
|
||||||
|
option('rdma', type : 'feature', value : 'auto',
|
||||||
|
Index: pve-qemu-kvm-7.2.0/qapi/block-core.json
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-7.2.0.orig/qapi/block-core.json
|
||||||
|
+++ pve-qemu-kvm-7.2.0/qapi/block-core.json
|
||||||
|
@@ -3213,7 +3213,7 @@
|
||||||
|
'raw', 'rbd',
|
||||||
|
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||||
|
'pbs',
|
||||||
|
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||||
|
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||||
|
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||||
|
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||||
|
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||||
|
@@ -4223,6 +4223,28 @@
|
||||||
|
'*server': ['InetSocketAddressBase'] } }
|
||||||
|
|
||||||
|
##
|
||||||
|
+# @BlockdevOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific block device options for vitastor
|
||||||
|
+#
|
||||||
|
+# @image: Image name
|
||||||
|
+# @inode: Inode number
|
||||||
|
+# @pool: Pool ID
|
||||||
|
+# @size: Desired image size in bytes
|
||||||
|
+# @config-path: Path to Vitastor configuration
|
||||||
|
+# @etcd-host: etcd connection address(es)
|
||||||
|
+# @etcd-prefix: etcd key/value prefix
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'data': { '*inode': 'uint64',
|
||||||
|
+ '*pool': 'uint64',
|
||||||
|
+ '*size': 'uint64',
|
||||||
|
+ '*image': 'str',
|
||||||
|
+ '*config-path': 'str',
|
||||||
|
+ '*etcd-host': 'str',
|
||||||
|
+ '*etcd-prefix': 'str' } }
|
||||||
|
+
|
||||||
|
+##
|
||||||
|
# @ReplicationMode:
|
||||||
|
#
|
||||||
|
# An enumeration of replication modes.
|
||||||
|
@@ -4671,6 +4693,7 @@
|
||||||
|
'throttle': 'BlockdevOptionsThrottle',
|
||||||
|
'vdi': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||||
|
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||||
|
'virtio-blk-vfio-pci':
|
||||||
|
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||||
|
'if': 'CONFIG_BLKIO' },
|
||||||
|
@@ -5072,6 +5095,17 @@
|
||||||
|
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||||
|
|
||||||
|
##
|
||||||
|
+# @BlockdevCreateOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific image creation options for Vitastor.
|
||||||
|
+#
|
||||||
|
+# @size: Size of the virtual disk in bytes
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||||
|
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'size': 'size' } }
|
||||||
|
+
|
||||||
|
+##
|
||||||
|
# @BlockdevVmdkSubformat:
|
||||||
|
#
|
||||||
|
# Subformat options for VMDK images
|
||||||
|
@@ -5269,6 +5303,7 @@
|
||||||
|
'ssh': 'BlockdevCreateOptionsSsh',
|
||||||
|
'vdi': 'BlockdevCreateOptionsVdi',
|
||||||
|
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||||
|
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||||
|
'vpc': 'BlockdevCreateOptionsVpc'
|
||||||
|
} }
|
||||||
|
Index: pve-qemu-kvm-7.2.0/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-7.2.0.orig/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
+++ pve-qemu-kvm-7.2.0/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
@@ -31,7 +31,7 @@
|
||||||
|
--with-git=meson \
|
||||||
|
--with-git-submodules=update \
|
||||||
|
--target-list="x86_64-softmmu" \
|
||||||
|
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
--audio-drv-list="" \
|
||||||
|
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||||
|
--with-coroutine=ucontext \
|
||||||
|
@@ -179,6 +179,7 @@
|
||||||
|
--enable-opengl \
|
||||||
|
--enable-pie \
|
||||||
|
--enable-rbd \
|
||||||
|
+--enable-vitastor \
|
||||||
|
--enable-rdma \
|
||||||
|
--enable-seccomp \
|
||||||
|
--enable-snappy \
|
169
patches/qemu-6.2-vitastor.patch
Normal file
169
patches/qemu-6.2-vitastor.patch
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
diff --git a/block/meson.build b/block/meson.build
|
||||||
|
index deb73ca389..e269f599a1 100644
|
||||||
|
--- a/block/meson.build
|
||||||
|
+++ b/block/meson.build
|
||||||
|
@@ -78,6 +78,7 @@ foreach m : [
|
||||||
|
[libnfs, 'nfs', files('nfs.c')],
|
||||||
|
[libssh, 'ssh', files('ssh.c')],
|
||||||
|
[rbd, 'rbd', files('rbd.c')],
|
||||||
|
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||||
|
]
|
||||||
|
if m[0].found()
|
||||||
|
module_ss = ss.source_set()
|
||||||
|
diff --git a/meson.build b/meson.build
|
||||||
|
index 96de1a6ef9..2e3994777d 100644
|
||||||
|
--- a/meson.build
|
||||||
|
+++ b/meson.build
|
||||||
|
@@ -838,6 +838,26 @@ if not get_option('rbd').auto() or have_block
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
+vitastor = not_found
|
||||||
|
+if not get_option('vitastor').auto() or have_block
|
||||||
|
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||||
|
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||||
|
+ if libvitastor_client.found()
|
||||||
|
+ if cc.links('''
|
||||||
|
+ #include <vitastor_c.h>
|
||||||
|
+ int main(void) {
|
||||||
|
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
+ return 0;
|
||||||
|
+ }''', dependencies: libvitastor_client)
|
||||||
|
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||||
|
+ elif get_option('vitastor').enabled()
|
||||||
|
+ error('could not link libvitastor_client')
|
||||||
|
+ else
|
||||||
|
+ warning('could not link libvitastor_client, disabling')
|
||||||
|
+ endif
|
||||||
|
+ endif
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
glusterfs = not_found
|
||||||
|
glusterfs_ftruncate_has_stat = false
|
||||||
|
glusterfs_iocb_has_stat = false
|
||||||
|
@@ -1455,6 +1475,7 @@ config_host_data.set('CONFIG_LINUX_AIO', libaio.found())
|
||||||
|
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
|
||||||
|
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
|
||||||
|
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||||
|
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||||
|
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||||
|
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||||
|
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
|
||||||
|
@@ -3412,6 +3433,7 @@ if spice_protocol.found()
|
||||||
|
summary_info += {' spice server support': spice}
|
||||||
|
endif
|
||||||
|
summary_info += {'rbd support': rbd}
|
||||||
|
+summary_info += {'vitastor support': vitastor}
|
||||||
|
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
|
||||||
|
summary_info += {'smartcard support': cacard}
|
||||||
|
summary_info += {'U2F support': u2f}
|
||||||
|
diff --git a/meson_options.txt b/meson_options.txt
|
||||||
|
index e392323732..5b56007475 100644
|
||||||
|
--- a/meson_options.txt
|
||||||
|
+++ b/meson_options.txt
|
||||||
|
@@ -121,6 +121,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||||
|
description: 'lzo compression support')
|
||||||
|
option('rbd', type : 'feature', value : 'auto',
|
||||||
|
description: 'Ceph block device driver')
|
||||||
|
+option('vitastor', type : 'feature', value : 'auto',
|
||||||
|
+ description: 'Vitastor block device driver')
|
||||||
|
option('gtk', type : 'feature', value : 'auto',
|
||||||
|
description: 'GTK+ user interface')
|
||||||
|
option('sdl', type : 'feature', value : 'auto',
|
||||||
|
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||||
|
index 1d3dd9cb48..88453405e5 100644
|
||||||
|
--- a/qapi/block-core.json
|
||||||
|
+++ b/qapi/block-core.json
|
||||||
|
@@ -2930,7 +2930,7 @@
|
||||||
|
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
|
||||||
|
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
|
||||||
|
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||||
|
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
|
||||||
|
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
|
||||||
|
|
||||||
|
##
|
||||||
|
# @BlockdevOptionsFile:
|
||||||
|
@@ -3864,6 +3864,28 @@
|
||||||
|
'*key-secret': 'str',
|
||||||
|
'*server': ['InetSocketAddressBase'] } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific block device options for vitastor
|
||||||
|
+#
|
||||||
|
+# @image: Image name
|
||||||
|
+# @inode: Inode number
|
||||||
|
+# @pool: Pool ID
|
||||||
|
+# @size: Desired image size in bytes
|
||||||
|
+# @config-path: Path to Vitastor configuration
|
||||||
|
+# @etcd-host: etcd connection address(es)
|
||||||
|
+# @etcd-prefix: etcd key/value prefix
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'data': { '*inode': 'uint64',
|
||||||
|
+ '*pool': 'uint64',
|
||||||
|
+ '*size': 'uint64',
|
||||||
|
+ '*image': 'str',
|
||||||
|
+ '*config-path': 'str',
|
||||||
|
+ '*etcd-host': 'str',
|
||||||
|
+ '*etcd-prefix': 'str' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @ReplicationMode:
|
||||||
|
#
|
||||||
|
@@ -4259,6 +4281,7 @@
|
||||||
|
'throttle': 'BlockdevOptionsThrottle',
|
||||||
|
'vdi': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||||
|
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevOptionsGenericCOWFormat',
|
||||||
|
'vpc': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vvfat': 'BlockdevOptionsVVFAT'
|
||||||
|
@@ -4647,6 +4670,17 @@
|
||||||
|
'*cluster-size' : 'size',
|
||||||
|
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevCreateOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific image creation options for Vitastor.
|
||||||
|
+#
|
||||||
|
+# @size: Size of the virtual disk in bytes
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||||
|
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'size': 'size' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @BlockdevVmdkSubformat:
|
||||||
|
#
|
||||||
|
@@ -4846,6 +4880,7 @@
|
||||||
|
'ssh': 'BlockdevCreateOptionsSsh',
|
||||||
|
'vdi': 'BlockdevCreateOptionsVdi',
|
||||||
|
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||||
|
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||||
|
'vpc': 'BlockdevCreateOptionsVpc'
|
||||||
|
} }
|
||||||
|
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||||
|
index 7a17ff4218..cdddbf32aa 100644
|
||||||
|
--- a/scripts/meson-buildoptions.sh
|
||||||
|
+++ b/scripts/meson-buildoptions.sh
|
||||||
|
@@ -69,6 +69,7 @@ meson_options_help() {
|
||||||
|
printf "%s\n" ' oss OSS sound support'
|
||||||
|
printf "%s\n" ' pa PulseAudio sound support'
|
||||||
|
printf "%s\n" ' rbd Ceph block device driver'
|
||||||
|
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||||
|
printf "%s\n" ' sdl SDL user interface'
|
||||||
|
printf "%s\n" ' sdl-image SDL Image support for icons'
|
||||||
|
printf "%s\n" ' seccomp seccomp support'
|
||||||
|
@@ -210,6 +211,8 @@ _meson_option_parse() {
|
||||||
|
--disable-pa) printf "%s" -Dpa=disabled ;;
|
||||||
|
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||||
|
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||||
|
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||||
|
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||||
|
--enable-sdl) printf "%s" -Dsdl=enabled ;;
|
||||||
|
--disable-sdl) printf "%s" -Dsdl=disabled ;;
|
||||||
|
--enable-sdl-image) printf "%s" -Dsdl_image=enabled ;;
|
190
patches/qemu-7.0-vitastor.patch
Normal file
190
patches/qemu-7.0-vitastor.patch
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
diff --git a/block/meson.build b/block/meson.build
|
||||||
|
index 0b2a60c99b..d923713804 100644
|
||||||
|
--- a/block/meson.build
|
||||||
|
+++ b/block/meson.build
|
||||||
|
@@ -98,6 +98,7 @@ foreach m : [
|
||||||
|
[libnfs, 'nfs', files('nfs.c')],
|
||||||
|
[libssh, 'ssh', files('ssh.c')],
|
||||||
|
[rbd, 'rbd', files('rbd.c')],
|
||||||
|
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||||
|
]
|
||||||
|
if m[0].found()
|
||||||
|
module_ss = ss.source_set()
|
||||||
|
diff --git a/meson.build b/meson.build
|
||||||
|
index 861de93c4f..272f72af11 100644
|
||||||
|
--- a/meson.build
|
||||||
|
+++ b/meson.build
|
||||||
|
@@ -884,6 +884,26 @@ if not get_option('rbd').auto() or have_block
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
+vitastor = not_found
|
||||||
|
+if not get_option('vitastor').auto() or have_block
|
||||||
|
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||||
|
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||||
|
+ if libvitastor_client.found()
|
||||||
|
+ if cc.links('''
|
||||||
|
+ #include <vitastor_c.h>
|
||||||
|
+ int main(void) {
|
||||||
|
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
+ return 0;
|
||||||
|
+ }''', dependencies: libvitastor_client)
|
||||||
|
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||||
|
+ elif get_option('vitastor').enabled()
|
||||||
|
+ error('could not link libvitastor_client')
|
||||||
|
+ else
|
||||||
|
+ warning('could not link libvitastor_client, disabling')
|
||||||
|
+ endif
|
||||||
|
+ endif
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
glusterfs = not_found
|
||||||
|
glusterfs_ftruncate_has_stat = false
|
||||||
|
glusterfs_iocb_has_stat = false
|
||||||
|
@@ -1546,6 +1566,7 @@ config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
|
||||||
|
config_host_data.set('CONFIG_NUMA', numa.found())
|
||||||
|
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
||||||
|
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||||
|
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||||
|
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||||
|
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||||
|
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
|
||||||
|
@@ -3709,6 +3730,7 @@ if spice_protocol.found()
|
||||||
|
summary_info += {' spice server support': spice}
|
||||||
|
endif
|
||||||
|
summary_info += {'rbd support': rbd}
|
||||||
|
+summary_info += {'vitastor support': vitastor}
|
||||||
|
summary_info += {'smartcard support': cacard}
|
||||||
|
summary_info += {'U2F support': u2f}
|
||||||
|
summary_info += {'libusb': libusb}
|
||||||
|
diff --git a/meson_options.txt b/meson_options.txt
|
||||||
|
index 52b11cead4..d8d0868174 100644
|
||||||
|
--- a/meson_options.txt
|
||||||
|
+++ b/meson_options.txt
|
||||||
|
@@ -149,6 +149,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||||
|
description: 'lzo compression support')
|
||||||
|
option('rbd', type : 'feature', value : 'auto',
|
||||||
|
description: 'Ceph block device driver')
|
||||||
|
+option('vitastor', type : 'feature', value : 'auto',
|
||||||
|
+ description: 'Vitastor block device driver')
|
||||||
|
option('gtk', type : 'feature', value : 'auto',
|
||||||
|
description: 'GTK+ user interface')
|
||||||
|
option('sdl', type : 'feature', value : 'auto',
|
||||||
|
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||||
|
index beeb91952a..1c98dc0e12 100644
|
||||||
|
--- a/qapi/block-core.json
|
||||||
|
+++ b/qapi/block-core.json
|
||||||
|
@@ -2929,7 +2929,7 @@
|
||||||
|
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
|
||||||
|
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
|
||||||
|
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||||
|
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
|
||||||
|
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
|
||||||
|
|
||||||
|
##
|
||||||
|
# @BlockdevOptionsFile:
|
||||||
|
@@ -3863,6 +3863,28 @@
|
||||||
|
'*key-secret': 'str',
|
||||||
|
'*server': ['InetSocketAddressBase'] } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific block device options for vitastor
|
||||||
|
+#
|
||||||
|
+# @image: Image name
|
||||||
|
+# @inode: Inode number
|
||||||
|
+# @pool: Pool ID
|
||||||
|
+# @size: Desired image size in bytes
|
||||||
|
+# @config-path: Path to Vitastor configuration
|
||||||
|
+# @etcd-host: etcd connection address(es)
|
||||||
|
+# @etcd-prefix: etcd key/value prefix
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'data': { '*inode': 'uint64',
|
||||||
|
+ '*pool': 'uint64',
|
||||||
|
+ '*size': 'uint64',
|
||||||
|
+ '*image': 'str',
|
||||||
|
+ '*config-path': 'str',
|
||||||
|
+ '*etcd-host': 'str',
|
||||||
|
+ '*etcd-prefix': 'str' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @ReplicationMode:
|
||||||
|
#
|
||||||
|
@@ -4277,6 +4299,7 @@
|
||||||
|
'throttle': 'BlockdevOptionsThrottle',
|
||||||
|
'vdi': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||||
|
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevOptionsGenericCOWFormat',
|
||||||
|
'vpc': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vvfat': 'BlockdevOptionsVVFAT'
|
||||||
|
@@ -4665,6 +4688,17 @@
|
||||||
|
'*cluster-size' : 'size',
|
||||||
|
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevCreateOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific image creation options for Vitastor.
|
||||||
|
+#
|
||||||
|
+# @size: Size of the virtual disk in bytes
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||||
|
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'size': 'size' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @BlockdevVmdkSubformat:
|
||||||
|
#
|
||||||
|
@@ -4864,6 +4898,7 @@
|
||||||
|
'ssh': 'BlockdevCreateOptionsSsh',
|
||||||
|
'vdi': 'BlockdevCreateOptionsVdi',
|
||||||
|
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||||
|
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||||
|
'vpc': 'BlockdevCreateOptionsVpc'
|
||||||
|
} }
|
||||||
|
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
index 9850dd4444..72b1287520 100755
|
||||||
|
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
@@ -31,7 +31,7 @@
|
||||||
|
--with-git=meson \
|
||||||
|
--with-git-submodules=update \
|
||||||
|
--target-list="x86_64-softmmu" \
|
||||||
|
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
--audio-drv-list="" \
|
||||||
|
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||||
|
--with-coroutine=ucontext \
|
||||||
|
@@ -181,6 +181,7 @@
|
||||||
|
--enable-opengl \
|
||||||
|
--enable-pie \
|
||||||
|
--enable-rbd \
|
||||||
|
+--enable-vitastor \
|
||||||
|
--enable-rdma \
|
||||||
|
--enable-seccomp \
|
||||||
|
--enable-snappy \
|
||||||
|
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||||
|
index 1e26f4571e..370898d48c 100644
|
||||||
|
--- a/scripts/meson-buildoptions.sh
|
||||||
|
+++ b/scripts/meson-buildoptions.sh
|
||||||
|
@@ -98,6 +98,7 @@ meson_options_help() {
|
||||||
|
printf "%s\n" ' qed qed image format support'
|
||||||
|
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||||
|
printf "%s\n" ' rbd Ceph block device driver'
|
||||||
|
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||||
|
printf "%s\n" ' replication replication support'
|
||||||
|
printf "%s\n" ' sdl SDL user interface'
|
||||||
|
printf "%s\n" ' sdl-image SDL Image support for icons'
|
||||||
|
@@ -289,6 +290,8 @@ _meson_option_parse() {
|
||||||
|
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||||
|
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||||
|
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||||
|
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||||
|
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||||
|
--enable-replication) printf "%s" -Dreplication=enabled ;;
|
||||||
|
--disable-replication) printf "%s" -Dreplication=disabled ;;
|
||||||
|
--enable-rng-none) printf "%s" -Drng_none=true ;;
|
190
patches/qemu-7.1-vitastor.patch
Normal file
190
patches/qemu-7.1-vitastor.patch
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
diff --git a/block/meson.build b/block/meson.build
|
||||||
|
index 60bc305597..89a042216f 100644
|
||||||
|
--- a/block/meson.build
|
||||||
|
+++ b/block/meson.build
|
||||||
|
@@ -98,6 +98,7 @@ foreach m : [
|
||||||
|
[libnfs, 'nfs', files('nfs.c')],
|
||||||
|
[libssh, 'ssh', files('ssh.c')],
|
||||||
|
[rbd, 'rbd', files('rbd.c')],
|
||||||
|
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||||
|
]
|
||||||
|
if m[0].found()
|
||||||
|
module_ss = ss.source_set()
|
||||||
|
diff --git a/meson.build b/meson.build
|
||||||
|
index 20fddbd707..600db4e2fb 100644
|
||||||
|
--- a/meson.build
|
||||||
|
+++ b/meson.build
|
||||||
|
@@ -967,6 +967,26 @@ if not get_option('rbd').auto() or have_block
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
+vitastor = not_found
|
||||||
|
+if not get_option('vitastor').auto() or have_block
|
||||||
|
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||||
|
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||||
|
+ if libvitastor_client.found()
|
||||||
|
+ if cc.links('''
|
||||||
|
+ #include <vitastor_c.h>
|
||||||
|
+ int main(void) {
|
||||||
|
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
+ return 0;
|
||||||
|
+ }''', dependencies: libvitastor_client)
|
||||||
|
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||||
|
+ elif get_option('vitastor').enabled()
|
||||||
|
+ error('could not link libvitastor_client')
|
||||||
|
+ else
|
||||||
|
+ warning('could not link libvitastor_client, disabling')
|
||||||
|
+ endif
|
||||||
|
+ endif
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
glusterfs = not_found
|
||||||
|
glusterfs_ftruncate_has_stat = false
|
||||||
|
glusterfs_iocb_has_stat = false
|
||||||
|
@@ -1799,6 +1819,7 @@ config_host_data.set('CONFIG_NUMA', numa.found())
|
||||||
|
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||||
|
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
||||||
|
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||||
|
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||||
|
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||||
|
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||||
|
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||||
|
@@ -3954,6 +3975,7 @@ if spice_protocol.found()
|
||||||
|
summary_info += {' spice server support': spice}
|
||||||
|
endif
|
||||||
|
summary_info += {'rbd support': rbd}
|
||||||
|
+summary_info += {'vitastor support': vitastor}
|
||||||
|
summary_info += {'smartcard support': cacard}
|
||||||
|
summary_info += {'U2F support': u2f}
|
||||||
|
summary_info += {'libusb': libusb}
|
||||||
|
diff --git a/meson_options.txt b/meson_options.txt
|
||||||
|
index e58e158396..9747b38fd0 100644
|
||||||
|
--- a/meson_options.txt
|
||||||
|
+++ b/meson_options.txt
|
||||||
|
@@ -167,6 +167,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||||
|
description: 'lzo compression support')
|
||||||
|
option('rbd', type : 'feature', value : 'auto',
|
||||||
|
description: 'Ceph block device driver')
|
||||||
|
+option('vitastor', type : 'feature', value : 'auto',
|
||||||
|
+ description: 'Vitastor block device driver')
|
||||||
|
option('opengl', type : 'feature', value : 'auto',
|
||||||
|
description: 'OpenGL support')
|
||||||
|
option('rdma', type : 'feature', value : 'auto',
|
||||||
|
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||||
|
index 2173e7734a..5a4900b322 100644
|
||||||
|
--- a/qapi/block-core.json
|
||||||
|
+++ b/qapi/block-core.json
|
||||||
|
@@ -2955,7 +2955,7 @@
|
||||||
|
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
|
||||||
|
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
|
||||||
|
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||||
|
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
|
||||||
|
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
|
||||||
|
|
||||||
|
##
|
||||||
|
# @BlockdevOptionsFile:
|
||||||
|
@@ -3883,6 +3883,28 @@
|
||||||
|
'*key-secret': 'str',
|
||||||
|
'*server': ['InetSocketAddressBase'] } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific block device options for vitastor
|
||||||
|
+#
|
||||||
|
+# @image: Image name
|
||||||
|
+# @inode: Inode number
|
||||||
|
+# @pool: Pool ID
|
||||||
|
+# @size: Desired image size in bytes
|
||||||
|
+# @config-path: Path to Vitastor configuration
|
||||||
|
+# @etcd-host: etcd connection address(es)
|
||||||
|
+# @etcd-prefix: etcd key/value prefix
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'data': { '*inode': 'uint64',
|
||||||
|
+ '*pool': 'uint64',
|
||||||
|
+ '*size': 'uint64',
|
||||||
|
+ '*image': 'str',
|
||||||
|
+ '*config-path': 'str',
|
||||||
|
+ '*etcd-host': 'str',
|
||||||
|
+ '*etcd-prefix': 'str' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @ReplicationMode:
|
||||||
|
#
|
||||||
|
@@ -4327,6 +4349,7 @@
|
||||||
|
'throttle': 'BlockdevOptionsThrottle',
|
||||||
|
'vdi': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||||
|
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevOptionsGenericCOWFormat',
|
||||||
|
'vpc': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vvfat': 'BlockdevOptionsVVFAT'
|
||||||
|
@@ -4717,6 +4740,17 @@
|
||||||
|
'*cluster-size' : 'size',
|
||||||
|
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevCreateOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific image creation options for Vitastor.
|
||||||
|
+#
|
||||||
|
+# @size: Size of the virtual disk in bytes
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||||
|
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'size': 'size' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @BlockdevVmdkSubformat:
|
||||||
|
#
|
||||||
|
@@ -4915,6 +4949,7 @@
|
||||||
|
'ssh': 'BlockdevCreateOptionsSsh',
|
||||||
|
'vdi': 'BlockdevCreateOptionsVdi',
|
||||||
|
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||||
|
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||||
|
'vpc': 'BlockdevCreateOptionsVpc'
|
||||||
|
} }
|
||||||
|
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
index a7f92aff90..53dc55be2e 100755
|
||||||
|
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
@@ -31,7 +31,7 @@
|
||||||
|
--with-git=meson \
|
||||||
|
--with-git-submodules=update \
|
||||||
|
--target-list="x86_64-softmmu" \
|
||||||
|
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
--audio-drv-list="" \
|
||||||
|
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||||
|
--with-coroutine=ucontext \
|
||||||
|
@@ -179,6 +179,7 @@
|
||||||
|
--enable-opengl \
|
||||||
|
--enable-pie \
|
||||||
|
--enable-rbd \
|
||||||
|
+--enable-vitastor \
|
||||||
|
--enable-rdma \
|
||||||
|
--enable-seccomp \
|
||||||
|
--enable-snappy \
|
||||||
|
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||||
|
index 359b04e0e6..f5b85ba78c 100644
|
||||||
|
--- a/scripts/meson-buildoptions.sh
|
||||||
|
+++ b/scripts/meson-buildoptions.sh
|
||||||
|
@@ -135,6 +135,7 @@ meson_options_help() {
|
||||||
|
printf "%s\n" ' qed qed image format support'
|
||||||
|
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||||
|
printf "%s\n" ' rbd Ceph block device driver'
|
||||||
|
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||||
|
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||||
|
printf "%s\n" ' replication replication support'
|
||||||
|
printf "%s\n" ' sdl SDL user interface'
|
||||||
|
@@ -370,6 +371,8 @@ _meson_option_parse() {
|
||||||
|
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||||
|
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||||
|
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||||
|
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||||
|
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||||
|
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||||
|
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||||
|
--enable-replication) printf "%s" -Dreplication=enabled ;;
|
190
patches/qemu-7.2-vitastor.patch
Normal file
190
patches/qemu-7.2-vitastor.patch
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
diff --git a/block/meson.build b/block/meson.build
|
||||||
|
index b7c68b83a3..95d8a6f15d 100644
|
||||||
|
--- a/block/meson.build
|
||||||
|
+++ b/block/meson.build
|
||||||
|
@@ -100,6 +100,7 @@ foreach m : [
|
||||||
|
[libnfs, 'nfs', files('nfs.c')],
|
||||||
|
[libssh, 'ssh', files('ssh.c')],
|
||||||
|
[rbd, 'rbd', files('rbd.c')],
|
||||||
|
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||||
|
]
|
||||||
|
if m[0].found()
|
||||||
|
module_ss = ss.source_set()
|
||||||
|
diff --git a/meson.build b/meson.build
|
||||||
|
index 5c6b5a1c75..f31f73612e 100644
|
||||||
|
--- a/meson.build
|
||||||
|
+++ b/meson.build
|
||||||
|
@@ -1026,6 +1026,26 @@ if not get_option('rbd').auto() or have_block
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
+vitastor = not_found
|
||||||
|
+if not get_option('vitastor').auto() or have_block
|
||||||
|
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||||
|
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||||
|
+ if libvitastor_client.found()
|
||||||
|
+ if cc.links('''
|
||||||
|
+ #include <vitastor_c.h>
|
||||||
|
+ int main(void) {
|
||||||
|
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
+ return 0;
|
||||||
|
+ }''', dependencies: libvitastor_client)
|
||||||
|
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||||
|
+ elif get_option('vitastor').enabled()
|
||||||
|
+ error('could not link libvitastor_client')
|
||||||
|
+ else
|
||||||
|
+ warning('could not link libvitastor_client, disabling')
|
||||||
|
+ endif
|
||||||
|
+ endif
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
glusterfs = not_found
|
||||||
|
glusterfs_ftruncate_has_stat = false
|
||||||
|
glusterfs_iocb_has_stat = false
|
||||||
|
@@ -1861,6 +1881,7 @@ config_host_data.set('CONFIG_NUMA', numa.found())
|
||||||
|
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||||
|
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
||||||
|
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||||
|
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||||
|
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||||
|
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||||
|
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||||
|
@@ -3945,6 +3966,7 @@ if spice_protocol.found()
|
||||||
|
summary_info += {' spice server support': spice}
|
||||||
|
endif
|
||||||
|
summary_info += {'rbd support': rbd}
|
||||||
|
+summary_info += {'vitastor support': vitastor}
|
||||||
|
summary_info += {'smartcard support': cacard}
|
||||||
|
summary_info += {'U2F support': u2f}
|
||||||
|
summary_info += {'libusb': libusb}
|
||||||
|
diff --git a/meson_options.txt b/meson_options.txt
|
||||||
|
index 4b749ca549..6b37bd6b77 100644
|
||||||
|
--- a/meson_options.txt
|
||||||
|
+++ b/meson_options.txt
|
||||||
|
@@ -169,6 +169,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||||
|
description: 'lzo compression support')
|
||||||
|
option('rbd', type : 'feature', value : 'auto',
|
||||||
|
description: 'Ceph block device driver')
|
||||||
|
+option('vitastor', type : 'feature', value : 'auto',
|
||||||
|
+ description: 'Vitastor block device driver')
|
||||||
|
option('opengl', type : 'feature', value : 'auto',
|
||||||
|
description: 'OpenGL support')
|
||||||
|
option('rdma', type : 'feature', value : 'auto',
|
||||||
|
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||||
|
index 95ac4fa634..7a240827e4 100644
|
||||||
|
--- a/qapi/block-core.json
|
||||||
|
+++ b/qapi/block-core.json
|
||||||
|
@@ -2959,7 +2959,7 @@
|
||||||
|
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
|
||||||
|
'raw', 'rbd',
|
||||||
|
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||||
|
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||||
|
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||||
|
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||||
|
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||||
|
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||||
|
@@ -3957,6 +3957,28 @@
|
||||||
|
'*key-secret': 'str',
|
||||||
|
'*server': ['InetSocketAddressBase'] } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific block device options for vitastor
|
||||||
|
+#
|
||||||
|
+# @image: Image name
|
||||||
|
+# @inode: Inode number
|
||||||
|
+# @pool: Pool ID
|
||||||
|
+# @size: Desired image size in bytes
|
||||||
|
+# @config-path: Path to Vitastor configuration
|
||||||
|
+# @etcd-host: etcd connection address(es)
|
||||||
|
+# @etcd-prefix: etcd key/value prefix
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'data': { '*inode': 'uint64',
|
||||||
|
+ '*pool': 'uint64',
|
||||||
|
+ '*size': 'uint64',
|
||||||
|
+ '*image': 'str',
|
||||||
|
+ '*config-path': 'str',
|
||||||
|
+ '*etcd-host': 'str',
|
||||||
|
+ '*etcd-prefix': 'str' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @ReplicationMode:
|
||||||
|
#
|
||||||
|
@@ -4405,6 +4427,7 @@
|
||||||
|
'throttle': 'BlockdevOptionsThrottle',
|
||||||
|
'vdi': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||||
|
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||||
|
'virtio-blk-vfio-pci':
|
||||||
|
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||||
|
'if': 'CONFIG_BLKIO' },
|
||||||
|
@@ -4804,6 +4827,17 @@
|
||||||
|
'*cluster-size' : 'size',
|
||||||
|
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevCreateOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific image creation options for Vitastor.
|
||||||
|
+#
|
||||||
|
+# @size: Size of the virtual disk in bytes
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||||
|
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'size': 'size' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @BlockdevVmdkSubformat:
|
||||||
|
#
|
||||||
|
@@ -5002,6 +5036,7 @@
|
||||||
|
'ssh': 'BlockdevCreateOptionsSsh',
|
||||||
|
'vdi': 'BlockdevCreateOptionsVdi',
|
||||||
|
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||||
|
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||||
|
'vpc': 'BlockdevCreateOptionsVpc'
|
||||||
|
} }
|
||||||
|
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
index a7f92aff90..53dc55be2e 100755
|
||||||
|
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
@@ -31,7 +31,7 @@
|
||||||
|
--with-git=meson \
|
||||||
|
--with-git-submodules=update \
|
||||||
|
--target-list="x86_64-softmmu" \
|
||||||
|
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
--audio-drv-list="" \
|
||||||
|
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||||
|
--with-coroutine=ucontext \
|
||||||
|
@@ -179,6 +179,7 @@
|
||||||
|
--enable-opengl \
|
||||||
|
--enable-pie \
|
||||||
|
--enable-rbd \
|
||||||
|
+--enable-vitastor \
|
||||||
|
--enable-rdma \
|
||||||
|
--enable-seccomp \
|
||||||
|
--enable-snappy \
|
||||||
|
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||||
|
index aa6e30ea91..c45d21c40f 100644
|
||||||
|
--- a/scripts/meson-buildoptions.sh
|
||||||
|
+++ b/scripts/meson-buildoptions.sh
|
||||||
|
@@ -135,6 +135,7 @@ meson_options_help() {
|
||||||
|
printf "%s\n" ' qed qed image format support'
|
||||||
|
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||||
|
printf "%s\n" ' rbd Ceph block device driver'
|
||||||
|
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||||
|
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||||
|
printf "%s\n" ' replication replication support'
|
||||||
|
printf "%s\n" ' sdl SDL user interface'
|
||||||
|
@@ -376,6 +377,8 @@ _meson_option_parse() {
|
||||||
|
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||||
|
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||||
|
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||||
|
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||||
|
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||||
|
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||||
|
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||||
|
--enable-replication) printf "%s" -Dreplication=enabled ;;
|
190
patches/qemu-8.0-vitastor.patch
Normal file
190
patches/qemu-8.0-vitastor.patch
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
diff --git a/block/meson.build b/block/meson.build
|
||||||
|
index 382bec0e7d..af6207dbce 100644
|
||||||
|
--- a/block/meson.build
|
||||||
|
+++ b/block/meson.build
|
||||||
|
@@ -101,6 +101,7 @@ foreach m : [
|
||||||
|
[libnfs, 'nfs', files('nfs.c')],
|
||||||
|
[libssh, 'ssh', files('ssh.c')],
|
||||||
|
[rbd, 'rbd', files('rbd.c')],
|
||||||
|
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||||
|
]
|
||||||
|
if m[0].found()
|
||||||
|
module_ss = ss.source_set()
|
||||||
|
diff --git a/meson.build b/meson.build
|
||||||
|
index c44d05a13f..ebedb42843 100644
|
||||||
|
--- a/meson.build
|
||||||
|
+++ b/meson.build
|
||||||
|
@@ -1028,6 +1028,26 @@ if not get_option('rbd').auto() or have_block
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
+vitastor = not_found
|
||||||
|
+if not get_option('vitastor').auto() or have_block
|
||||||
|
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||||
|
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||||
|
+ if libvitastor_client.found()
|
||||||
|
+ if cc.links('''
|
||||||
|
+ #include <vitastor_c.h>
|
||||||
|
+ int main(void) {
|
||||||
|
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
+ return 0;
|
||||||
|
+ }''', dependencies: libvitastor_client)
|
||||||
|
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||||
|
+ elif get_option('vitastor').enabled()
|
||||||
|
+ error('could not link libvitastor_client')
|
||||||
|
+ else
|
||||||
|
+ warning('could not link libvitastor_client, disabling')
|
||||||
|
+ endif
|
||||||
|
+ endif
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
glusterfs = not_found
|
||||||
|
glusterfs_ftruncate_has_stat = false
|
||||||
|
glusterfs_iocb_has_stat = false
|
||||||
|
@@ -1878,6 +1898,7 @@ endif
|
||||||
|
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||||
|
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
||||||
|
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||||
|
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||||
|
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||||
|
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||||
|
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||||
|
@@ -4002,6 +4023,7 @@ if spice_protocol.found()
|
||||||
|
summary_info += {' spice server support': spice}
|
||||||
|
endif
|
||||||
|
summary_info += {'rbd support': rbd}
|
||||||
|
+summary_info += {'vitastor support': vitastor}
|
||||||
|
summary_info += {'smartcard support': cacard}
|
||||||
|
summary_info += {'U2F support': u2f}
|
||||||
|
summary_info += {'libusb': libusb}
|
||||||
|
diff --git a/meson_options.txt b/meson_options.txt
|
||||||
|
index fc9447d267..c4ac55c283 100644
|
||||||
|
--- a/meson_options.txt
|
||||||
|
+++ b/meson_options.txt
|
||||||
|
@@ -173,6 +173,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||||
|
description: 'lzo compression support')
|
||||||
|
option('rbd', type : 'feature', value : 'auto',
|
||||||
|
description: 'Ceph block device driver')
|
||||||
|
+option('vitastor', type : 'feature', value : 'auto',
|
||||||
|
+ description: 'Vitastor block device driver')
|
||||||
|
option('opengl', type : 'feature', value : 'auto',
|
||||||
|
description: 'OpenGL support')
|
||||||
|
option('rdma', type : 'feature', value : 'auto',
|
||||||
|
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||||
|
index c05ad0c07e..f5eb701604 100644
|
||||||
|
--- a/qapi/block-core.json
|
||||||
|
+++ b/qapi/block-core.json
|
||||||
|
@@ -3054,7 +3054,7 @@
|
||||||
|
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
|
||||||
|
'raw', 'rbd',
|
||||||
|
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||||
|
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||||
|
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||||
|
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||||
|
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||||
|
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||||
|
@@ -4073,6 +4073,28 @@
|
||||||
|
'*key-secret': 'str',
|
||||||
|
'*server': ['InetSocketAddressBase'] } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific block device options for vitastor
|
||||||
|
+#
|
||||||
|
+# @image: Image name
|
||||||
|
+# @inode: Inode number
|
||||||
|
+# @pool: Pool ID
|
||||||
|
+# @size: Desired image size in bytes
|
||||||
|
+# @config-path: Path to Vitastor configuration
|
||||||
|
+# @etcd-host: etcd connection address(es)
|
||||||
|
+# @etcd-prefix: etcd key/value prefix
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'data': { '*inode': 'uint64',
|
||||||
|
+ '*pool': 'uint64',
|
||||||
|
+ '*size': 'uint64',
|
||||||
|
+ '*image': 'str',
|
||||||
|
+ '*config-path': 'str',
|
||||||
|
+ '*etcd-host': 'str',
|
||||||
|
+ '*etcd-prefix': 'str' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @ReplicationMode:
|
||||||
|
#
|
||||||
|
@@ -4521,6 +4543,7 @@
|
||||||
|
'throttle': 'BlockdevOptionsThrottle',
|
||||||
|
'vdi': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||||
|
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||||
|
'virtio-blk-vfio-pci':
|
||||||
|
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||||
|
'if': 'CONFIG_BLKIO' },
|
||||||
|
@@ -4920,6 +4943,17 @@
|
||||||
|
'*cluster-size' : 'size',
|
||||||
|
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||||
|
|
||||||
|
+##
|
||||||
|
+# @BlockdevCreateOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific image creation options for Vitastor.
|
||||||
|
+#
|
||||||
|
+# @size: Size of the virtual disk in bytes
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||||
|
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'size': 'size' } }
|
||||||
|
+
|
||||||
|
##
|
||||||
|
# @BlockdevVmdkSubformat:
|
||||||
|
#
|
||||||
|
@@ -5118,6 +5152,7 @@
|
||||||
|
'ssh': 'BlockdevCreateOptionsSsh',
|
||||||
|
'vdi': 'BlockdevCreateOptionsVdi',
|
||||||
|
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||||
|
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||||
|
'vpc': 'BlockdevCreateOptionsVpc'
|
||||||
|
} }
|
||||||
|
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
index 6e8983f39c..1b0b9fcf3e 100755
|
||||||
|
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
@@ -32,7 +32,7 @@
|
||||||
|
--with-git=meson \
|
||||||
|
--with-git-submodules=update \
|
||||||
|
--target-list="x86_64-softmmu" \
|
||||||
|
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
--audio-drv-list="" \
|
||||||
|
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||||
|
--with-coroutine=ucontext \
|
||||||
|
@@ -179,6 +179,7 @@
|
||||||
|
--enable-opengl \
|
||||||
|
--enable-pie \
|
||||||
|
--enable-rbd \
|
||||||
|
+--enable-vitastor \
|
||||||
|
--enable-rdma \
|
||||||
|
--enable-seccomp \
|
||||||
|
--enable-snappy \
|
||||||
|
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||||
|
index 009fab1515..95914e6ebc 100644
|
||||||
|
--- a/scripts/meson-buildoptions.sh
|
||||||
|
+++ b/scripts/meson-buildoptions.sh
|
||||||
|
@@ -142,6 +142,7 @@ meson_options_help() {
|
||||||
|
printf "%s\n" ' qed qed image format support'
|
||||||
|
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||||
|
printf "%s\n" ' rbd Ceph block device driver'
|
||||||
|
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||||
|
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||||
|
printf "%s\n" ' replication replication support'
|
||||||
|
printf "%s\n" ' sdl SDL user interface'
|
||||||
|
@@ -388,6 +389,8 @@ _meson_option_parse() {
|
||||||
|
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||||
|
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||||
|
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||||
|
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||||
|
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||||
|
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||||
|
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||||
|
--enable-replication) printf "%s" -Dreplication=enabled ;;
|
@@ -7,13 +7,12 @@ set -e
|
|||||||
VITASTOR=$(dirname $0)
|
VITASTOR=$(dirname $0)
|
||||||
VITASTOR=$(realpath "$VITASTOR/..")
|
VITASTOR=$(realpath "$VITASTOR/..")
|
||||||
|
|
||||||
if [ -d /opt/rh/gcc-toolset-9 ]; then
|
EL=$(rpm --eval '%dist')
|
||||||
|
if [ "$EL" = ".el8" ]; then
|
||||||
# CentOS 8
|
# CentOS 8
|
||||||
EL=8
|
|
||||||
. /opt/rh/gcc-toolset-9/enable
|
. /opt/rh/gcc-toolset-9/enable
|
||||||
else
|
elif [ "$EL" = ".el7" ]; then
|
||||||
# CentOS 7
|
# CentOS 7
|
||||||
EL=7
|
|
||||||
. /opt/rh/devtoolset-9/enable
|
. /opt/rh/devtoolset-9/enable
|
||||||
fi
|
fi
|
||||||
cd ~/rpmbuild/SPECS
|
cd ~/rpmbuild/SPECS
|
||||||
@@ -25,4 +24,4 @@ rm fio
|
|||||||
mv fio-copy fio
|
mv fio-copy fio
|
||||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||||
tar --transform 's#^#vitastor-0.8.3/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.8.3$(rpm --eval '%dist').tar.gz *
|
tar --transform 's#^#vitastor-0.9.0/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.0$(rpm --eval '%dist').tar.gz *
|
||||||
|
93
rpm/qemu-kvm-7.0-el9.spec.patch
Normal file
93
rpm/qemu-kvm-7.0-el9.spec.patch
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
--- qemu-kvm.spec.orig 2023-02-28 08:04:06.000000000 +0000
|
||||||
|
+++ qemu-kvm.spec 2023-04-27 22:29:18.094878829 +0000
|
||||||
|
@@ -100,8 +100,6 @@
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%global target_list %{kvm_target}-softmmu
|
||||||
|
-%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress
|
||||||
|
-%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https
|
||||||
|
%define qemudocdir %{_docdir}/%{name}
|
||||||
|
%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios"
|
||||||
|
|
||||||
|
@@ -129,6 +127,7 @@ Requires: %{name}-device-usb-host = %{ep
|
||||||
|
Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \
|
||||||
|
%endif \
|
||||||
|
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
|
||||||
|
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
|
||||||
|
Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release}
|
||||||
|
|
||||||
|
# Since SPICE is removed from RHEL-9, the following Obsoletes:
|
||||||
|
@@ -151,7 +150,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}
|
||||||
|
Summary: QEMU is a machine emulator and virtualizer
|
||||||
|
Name: qemu-kvm
|
||||||
|
Version: 7.0.0
|
||||||
|
-Release: 13%{?rcrel}%{?dist}%{?cc_suffix}.2
|
||||||
|
+Release: 13.vitastor%{?rcrel}%{?dist}%{?cc_suffix}
|
||||||
|
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||||
|
# Epoch 15 used for RHEL 8
|
||||||
|
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
|
||||||
|
@@ -174,6 +173,7 @@ Source28: 95-kvm-memlock.conf
|
||||||
|
Source30: kvm-s390x.conf
|
||||||
|
Source31: kvm-x86.conf
|
||||||
|
Source36: README.tests
|
||||||
|
+Source37: qemu-vitastor.c
|
||||||
|
|
||||||
|
|
||||||
|
Patch0004: 0004-Initial-redhat-build.patch
|
||||||
|
@@ -498,6 +498,7 @@ Patch171: kvm-i386-do-kvm_put_msr_featur
|
||||||
|
Patch172: kvm-target-i386-kvm-fix-kvmclock_current_nsec-Assertion-.patch
|
||||||
|
# For bz#2168221 - while live-migrating many instances concurrently, libvirt sometimes return internal error: migration was active, but no RAM info was set [rhel-9.1.0.z]
|
||||||
|
Patch173: kvm-migration-Read-state-once.patch
|
||||||
|
+Patch174: qemu-7.0-vitastor.patch
|
||||||
|
|
||||||
|
# Source-git patches
|
||||||
|
|
||||||
|
@@ -531,6 +532,7 @@ BuildRequires: libcurl-devel
|
||||||
|
%if %{have_block_rbd}
|
||||||
|
BuildRequires: librbd-devel
|
||||||
|
%endif
|
||||||
|
+BuildRequires: vitastor-client-devel
|
||||||
|
# We need both because the 'stap' binary is probed for by configure
|
||||||
|
BuildRequires: systemtap
|
||||||
|
BuildRequires: systemtap-sdt-devel
|
||||||
|
@@ -718,6 +720,14 @@ using the rbd protocol.
|
||||||
|
%endif
|
||||||
|
|
||||||
|
|
||||||
|
+%package block-vitastor
|
||||||
|
+Summary: QEMU Vitastor block driver
|
||||||
|
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||||
|
+
|
||||||
|
+%description block-vitastor
|
||||||
|
+This package provides the additional Vitastor block driver for QEMU.
|
||||||
|
+
|
||||||
|
+
|
||||||
|
%package audio-pa
|
||||||
|
Summary: QEMU PulseAudio audio driver
|
||||||
|
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||||
|
@@ -811,6 +821,7 @@ This package provides usbredir support.
|
||||||
|
%prep
|
||||||
|
%setup -q -n qemu-%{version}%{?rcstr}
|
||||||
|
%autopatch -p1
|
||||||
|
+cp %{SOURCE37} ./block/vitastor.c
|
||||||
|
|
||||||
|
%global qemu_kvm_build qemu_kvm_build
|
||||||
|
mkdir -p %{qemu_kvm_build}
|
||||||
|
@@ -1032,6 +1043,7 @@ run_configure \
|
||||||
|
%if %{have_block_rbd}
|
||||||
|
--enable-rbd \
|
||||||
|
%endif
|
||||||
|
+ --enable-vitastor \
|
||||||
|
%if %{have_librdma}
|
||||||
|
--enable-rdma \
|
||||||
|
%endif
|
||||||
|
@@ -1511,6 +1523,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s
|
||||||
|
%files block-rbd
|
||||||
|
%{_libdir}/%{name}/block-rbd.so
|
||||||
|
%endif
|
||||||
|
+%files block-vitastor
|
||||||
|
+%{_libdir}/%{name}/block-vitastor.so
|
||||||
|
+
|
||||||
|
%files audio-pa
|
||||||
|
%{_libdir}/%{name}/audio-pa.so
|
||||||
|
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
|||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
cd /root/vitastor/rpm; \
|
cd /root/vitastor/rpm; \
|
||||||
sh build-tarball.sh; \
|
sh build-tarball.sh; \
|
||||||
cp /root/vitastor-0.8.3.el7.tar.gz ~/rpmbuild/SOURCES; \
|
cp /root/vitastor-0.9.0.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
cd ~/rpmbuild/SPECS/; \
|
cd ~/rpmbuild/SPECS/; \
|
||||||
rpmbuild -ba vitastor.spec; \
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
Name: vitastor
|
Name: vitastor
|
||||||
Version: 0.8.3
|
Version: 0.9.0
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Vitastor, a fast software-defined clustered block storage
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
License: Vitastor Network Public License 1.1
|
License: Vitastor Network Public License 1.1
|
||||||
URL: https://vitastor.io/
|
URL: https://vitastor.io/
|
||||||
Source0: vitastor-0.8.3.el7.tar.gz
|
Source0: vitastor-0.9.0.el7.tar.gz
|
||||||
|
|
||||||
BuildRequires: liburing-devel >= 0.6
|
BuildRequires: liburing-devel >= 0.6
|
||||||
BuildRequires: gperftools-devel
|
BuildRequires: gperftools-devel
|
||||||
@@ -35,6 +35,7 @@ Summary: Vitastor - OSD
|
|||||||
Requires: libJerasure2
|
Requires: libJerasure2
|
||||||
Requires: libisa-l
|
Requires: libisa-l
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
|
Requires: liburing < 2
|
||||||
Requires: vitastor-client = %{version}-%{release}
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
Requires: util-linux
|
Requires: util-linux
|
||||||
Requires: parted
|
Requires: parted
|
||||||
@@ -59,6 +60,7 @@ scheduling cluster-level operations.
|
|||||||
%package -n vitastor-client
|
%package -n vitastor-client
|
||||||
Summary: Vitastor - client
|
Summary: Vitastor - client
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
|
Requires: liburing < 2
|
||||||
|
|
||||||
|
|
||||||
%description -n vitastor-client
|
%description -n vitastor-client
|
||||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
|||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
cd /root/vitastor/rpm; \
|
cd /root/vitastor/rpm; \
|
||||||
sh build-tarball.sh; \
|
sh build-tarball.sh; \
|
||||||
cp /root/vitastor-0.8.3.el8.tar.gz ~/rpmbuild/SOURCES; \
|
cp /root/vitastor-0.9.0.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
cd ~/rpmbuild/SPECS/; \
|
cd ~/rpmbuild/SPECS/; \
|
||||||
rpmbuild -ba vitastor.spec; \
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
Name: vitastor
|
Name: vitastor
|
||||||
Version: 0.8.3
|
Version: 0.9.0
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Vitastor, a fast software-defined clustered block storage
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
License: Vitastor Network Public License 1.1
|
License: Vitastor Network Public License 1.1
|
||||||
URL: https://vitastor.io/
|
URL: https://vitastor.io/
|
||||||
Source0: vitastor-0.8.3.el8.tar.gz
|
Source0: vitastor-0.9.0.el8.tar.gz
|
||||||
|
|
||||||
BuildRequires: liburing-devel >= 0.6
|
BuildRequires: liburing-devel >= 0.6
|
||||||
BuildRequires: gperftools-devel
|
BuildRequires: gperftools-devel
|
||||||
@@ -34,6 +34,7 @@ Summary: Vitastor - OSD
|
|||||||
Requires: libJerasure2
|
Requires: libJerasure2
|
||||||
Requires: libisa-l
|
Requires: libisa-l
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
|
Requires: liburing < 2
|
||||||
Requires: vitastor-client = %{version}-%{release}
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
Requires: util-linux
|
Requires: util-linux
|
||||||
Requires: parted
|
Requires: parted
|
||||||
@@ -57,6 +58,7 @@ scheduling cluster-level operations.
|
|||||||
%package -n vitastor-client
|
%package -n vitastor-client
|
||||||
Summary: Vitastor - client
|
Summary: Vitastor - client
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
|
Requires: liburing < 2
|
||||||
|
|
||||||
|
|
||||||
%description -n vitastor-client
|
%description -n vitastor-client
|
||||||
|
28
rpm/vitastor-el9.Dockerfile
Normal file
28
rpm/vitastor-el9.Dockerfile
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Build packages for AlmaLinux 9 inside a container
|
||||||
|
# cd ..; podman build -t vitastor-el9 -v `pwd`/packages:/root/packages -f rpm/vitastor-el9.Dockerfile .
|
||||||
|
|
||||||
|
FROM almalinux:9
|
||||||
|
|
||||||
|
WORKDIR /root
|
||||||
|
|
||||||
|
RUN sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*.repo
|
||||||
|
RUN dnf -y install epel-release dnf-plugins-core
|
||||||
|
RUN dnf -y install https://vitastor.io/rpms/centos/9/vitastor-release-1.0-1.el9.noarch.rpm
|
||||||
|
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive liburing-devel cmake
|
||||||
|
RUN dnf download --source fio
|
||||||
|
RUN rpm --nomd5 -i fio*.src.rpm
|
||||||
|
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --spec fio.spec
|
||||||
|
|
||||||
|
ADD . /root/vitastor
|
||||||
|
|
||||||
|
RUN set -e; \
|
||||||
|
cd /root/vitastor/rpm; \
|
||||||
|
sh build-tarball.sh; \
|
||||||
|
cp /root/vitastor-0.9.0.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
|
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
|
cd ~/rpmbuild/SPECS/; \
|
||||||
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
mkdir -p /root/packages/vitastor-el9; \
|
||||||
|
rm -rf /root/packages/vitastor-el9/*; \
|
||||||
|
cp ~/rpmbuild/RPMS/*/vitastor* /root/packages/vitastor-el9/; \
|
||||||
|
cp ~/rpmbuild/SRPMS/vitastor* /root/packages/vitastor-el9/
|
158
rpm/vitastor-el9.spec
Normal file
158
rpm/vitastor-el9.spec
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
Name: vitastor
|
||||||
|
Version: 0.9.0
|
||||||
|
Release: 1%{?dist}
|
||||||
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
|
License: Vitastor Network Public License 1.1
|
||||||
|
URL: https://vitastor.io/
|
||||||
|
Source0: vitastor-0.9.0.el9.tar.gz
|
||||||
|
|
||||||
|
BuildRequires: liburing-devel >= 0.6
|
||||||
|
BuildRequires: gperftools-devel
|
||||||
|
BuildRequires: gcc-c++
|
||||||
|
BuildRequires: nodejs >= 10
|
||||||
|
BuildRequires: jerasure-devel
|
||||||
|
BuildRequires: libisa-l-devel
|
||||||
|
BuildRequires: gf-complete-devel
|
||||||
|
BuildRequires: rdma-core-devel
|
||||||
|
BuildRequires: cmake
|
||||||
|
Requires: vitastor-osd = %{version}-%{release}
|
||||||
|
Requires: vitastor-mon = %{version}-%{release}
|
||||||
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
|
Requires: vitastor-client-devel = %{version}-%{release}
|
||||||
|
Requires: vitastor-fio = %{version}-%{release}
|
||||||
|
|
||||||
|
%description
|
||||||
|
Vitastor is a small, simple and fast clustered block storage (storage for VM drives),
|
||||||
|
architecturally similar to Ceph which means strong consistency, primary-replication,
|
||||||
|
symmetric clustering and automatic data distribution over any number of drives of any
|
||||||
|
size with configurable redundancy (replication or erasure codes/XOR).
|
||||||
|
|
||||||
|
|
||||||
|
%package -n vitastor-osd
|
||||||
|
Summary: Vitastor - OSD
|
||||||
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
|
Requires: util-linux
|
||||||
|
Requires: parted
|
||||||
|
|
||||||
|
|
||||||
|
%description -n vitastor-osd
|
||||||
|
Vitastor object storage daemon, i.e. server program that stores data.
|
||||||
|
|
||||||
|
|
||||||
|
%package -n vitastor-mon
|
||||||
|
Summary: Vitastor - monitor
|
||||||
|
Requires: nodejs >= 10
|
||||||
|
Requires: lpsolve
|
||||||
|
|
||||||
|
|
||||||
|
%description -n vitastor-mon
|
||||||
|
Vitastor monitor, i.e. server program responsible for watching cluster state and
|
||||||
|
scheduling cluster-level operations.
|
||||||
|
|
||||||
|
|
||||||
|
%package -n vitastor-client
|
||||||
|
Summary: Vitastor - client
|
||||||
|
|
||||||
|
|
||||||
|
%description -n vitastor-client
|
||||||
|
Vitastor client library and command-line interface.
|
||||||
|
|
||||||
|
|
||||||
|
%package -n vitastor-client-devel
|
||||||
|
Summary: Vitastor - development files
|
||||||
|
Group: Development/Libraries
|
||||||
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
|
|
||||||
|
|
||||||
|
%description -n vitastor-client-devel
|
||||||
|
Vitastor library headers for development.
|
||||||
|
|
||||||
|
|
||||||
|
%package -n vitastor-fio
|
||||||
|
Summary: Vitastor - fio drivers
|
||||||
|
Group: Development/Libraries
|
||||||
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
|
Requires: fio = 3.27-8.el9
|
||||||
|
|
||||||
|
|
||||||
|
%description -n vitastor-fio
|
||||||
|
Vitastor fio drivers for benchmarking.
|
||||||
|
|
||||||
|
|
||||||
|
%prep
|
||||||
|
%setup -q
|
||||||
|
|
||||||
|
|
||||||
|
%build
|
||||||
|
%cmake
|
||||||
|
%cmake_build
|
||||||
|
|
||||||
|
|
||||||
|
%install
|
||||||
|
rm -rf $RPM_BUILD_ROOT
|
||||||
|
%cmake_install
|
||||||
|
cd mon
|
||||||
|
npm install
|
||||||
|
cd ..
|
||||||
|
mkdir -p %buildroot/usr/lib/vitastor
|
||||||
|
cp -r mon %buildroot/usr/lib/vitastor
|
||||||
|
mkdir -p %buildroot/lib/systemd/system
|
||||||
|
cp mon/vitastor.target mon/vitastor-mon.service mon/vitastor-osd@.service %buildroot/lib/systemd/system
|
||||||
|
mkdir -p %buildroot/lib/udev/rules.d
|
||||||
|
cp mon/90-vitastor.rules %buildroot/lib/udev/rules.d
|
||||||
|
|
||||||
|
|
||||||
|
%files
|
||||||
|
%doc GPL-2.0.txt VNPL-1.1.txt README.md README-ru.md
|
||||||
|
|
||||||
|
|
||||||
|
%files -n vitastor-osd
|
||||||
|
%_bindir/vitastor-osd
|
||||||
|
%_bindir/vitastor-disk
|
||||||
|
%_bindir/vitastor-dump-journal
|
||||||
|
/lib/systemd/system/vitastor-osd@.service
|
||||||
|
/lib/systemd/system/vitastor.target
|
||||||
|
/lib/udev/rules.d/90-vitastor.rules
|
||||||
|
|
||||||
|
|
||||||
|
%pre -n vitastor-osd
|
||||||
|
groupadd -r -f vitastor 2>/dev/null ||:
|
||||||
|
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
|
||||||
|
install -o vitastor -g vitastor -d /var/log/vitastor
|
||||||
|
mkdir -p /etc/vitastor
|
||||||
|
|
||||||
|
|
||||||
|
%files -n vitastor-mon
|
||||||
|
/usr/lib/vitastor/mon
|
||||||
|
/lib/systemd/system/vitastor-mon.service
|
||||||
|
|
||||||
|
|
||||||
|
%pre -n vitastor-mon
|
||||||
|
groupadd -r -f vitastor 2>/dev/null ||:
|
||||||
|
useradd -r -g vitastor -s /sbin/nologin -c "Vitastor daemons" -M -d /nonexistent vitastor 2>/dev/null ||:
|
||||||
|
mkdir -p /etc/vitastor
|
||||||
|
|
||||||
|
|
||||||
|
%files -n vitastor-client
|
||||||
|
%_bindir/vitastor-nbd
|
||||||
|
%_bindir/vitastor-nfs
|
||||||
|
%_bindir/vitastor-cli
|
||||||
|
%_bindir/vitastor-rm
|
||||||
|
%_bindir/vita
|
||||||
|
%_libdir/libvitastor_blk.so*
|
||||||
|
%_libdir/libvitastor_client.so*
|
||||||
|
|
||||||
|
|
||||||
|
%files -n vitastor-client-devel
|
||||||
|
%_includedir/vitastor_c.h
|
||||||
|
%_libdir/pkgconfig
|
||||||
|
|
||||||
|
|
||||||
|
%files -n vitastor-fio
|
||||||
|
%_libdir/libfio_vitastor.so
|
||||||
|
%_libdir/libfio_vitastor_blk.so
|
||||||
|
%_libdir/libfio_vitastor_sec.so
|
||||||
|
|
||||||
|
|
||||||
|
%changelog
|
@@ -1,8 +1,9 @@
|
|||||||
cmake_minimum_required(VERSION 2.8)
|
cmake_minimum_required(VERSION 2.8.12)
|
||||||
|
|
||||||
project(vitastor)
|
project(vitastor)
|
||||||
|
|
||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
|
include(CTest)
|
||||||
|
|
||||||
set(WITH_QEMU false CACHE BOOL "Build QEMU driver inside Vitastor source tree")
|
set(WITH_QEMU false CACHE BOOL "Build QEMU driver inside Vitastor source tree")
|
||||||
set(WITH_FIO true CACHE BOOL "Build FIO driver")
|
set(WITH_FIO true CACHE BOOL "Build FIO driver")
|
||||||
@@ -15,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
|||||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_definitions(-DVERSION="0.8.3")
|
add_definitions(-DVERSION="0.9.0")
|
||||||
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
||||||
if (${WITH_ASAN})
|
if (${WITH_ASAN})
|
||||||
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
||||||
@@ -55,6 +56,14 @@ if (ISAL_LIBRARIES)
|
|||||||
add_definitions(-DWITH_ISAL)
|
add_definitions(-DWITH_ISAL)
|
||||||
endif (ISAL_LIBRARIES)
|
endif (ISAL_LIBRARIES)
|
||||||
|
|
||||||
|
add_custom_target(build_tests)
|
||||||
|
add_custom_target(test
|
||||||
|
COMMAND
|
||||||
|
echo leak:tcmalloc > ${CMAKE_CURRENT_BINARY_DIR}/lsan-suppress.txt &&
|
||||||
|
env LSAN_OPTIONS=suppressions=${CMAKE_CURRENT_BINARY_DIR}/lsan-suppress.txt ${CMAKE_CTEST_COMMAND}
|
||||||
|
)
|
||||||
|
add_dependencies(test build_tests)
|
||||||
|
|
||||||
include_directories(
|
include_directories(
|
||||||
../
|
../
|
||||||
/usr/include/jerasure
|
/usr/include/jerasure
|
||||||
@@ -102,7 +111,7 @@ target_compile_options(vitastor_common PUBLIC -fPIC)
|
|||||||
add_executable(vitastor-osd
|
add_executable(vitastor-osd
|
||||||
osd_main.cpp osd.cpp osd_secondary.cpp osd_peering.cpp osd_flush.cpp osd_peering_pg.cpp
|
osd_main.cpp osd.cpp osd_secondary.cpp osd_peering.cpp osd_flush.cpp osd_peering_pg.cpp
|
||||||
osd_primary.cpp osd_primary_chain.cpp osd_primary_sync.cpp osd_primary_write.cpp osd_primary_subops.cpp
|
osd_primary.cpp osd_primary_chain.cpp osd_primary_sync.cpp osd_primary_write.cpp osd_primary_subops.cpp
|
||||||
osd_cluster.cpp osd_rmw.cpp
|
osd_cluster.cpp osd_rmw.cpp osd_scrub.cpp osd_primary_describe.cpp
|
||||||
)
|
)
|
||||||
target_link_libraries(vitastor-osd
|
target_link_libraries(vitastor-osd
|
||||||
vitastor_common
|
vitastor_common
|
||||||
@@ -132,6 +141,8 @@ add_library(vitastor_client SHARED
|
|||||||
cli_common.cpp
|
cli_common.cpp
|
||||||
cli_alloc_osd.cpp
|
cli_alloc_osd.cpp
|
||||||
cli_status.cpp
|
cli_status.cpp
|
||||||
|
cli_describe.cpp
|
||||||
|
cli_fix.cpp
|
||||||
cli_df.cpp
|
cli_df.cpp
|
||||||
cli_ls.cpp
|
cli_ls.cpp
|
||||||
cli_create.cpp
|
cli_create.cpp
|
||||||
@@ -145,7 +156,6 @@ add_library(vitastor_client SHARED
|
|||||||
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "vitastor_c.h")
|
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "vitastor_c.h")
|
||||||
target_link_libraries(vitastor_client
|
target_link_libraries(vitastor_client
|
||||||
vitastor_common
|
vitastor_common
|
||||||
tcmalloc_minimal
|
|
||||||
${LIBURING_LIBRARIES}
|
${LIBURING_LIBRARIES}
|
||||||
${IBVERBS_LIBRARIES}
|
${IBVERBS_LIBRARIES}
|
||||||
)
|
)
|
||||||
@@ -235,14 +245,17 @@ add_executable(osd_test osd_test.cpp rw_blocking.cpp addr_util.cpp)
|
|||||||
target_link_libraries(osd_test tcmalloc_minimal)
|
target_link_libraries(osd_test tcmalloc_minimal)
|
||||||
|
|
||||||
# osd_rmw_test
|
# osd_rmw_test
|
||||||
# FIXME: Move to tests
|
add_executable(osd_rmw_test EXCLUDE_FROM_ALL osd_rmw_test.cpp allocator.cpp)
|
||||||
add_executable(osd_rmw_test osd_rmw_test.cpp allocator.cpp)
|
|
||||||
target_link_libraries(osd_rmw_test Jerasure ${ISAL_LIBRARIES} tcmalloc_minimal)
|
target_link_libraries(osd_rmw_test Jerasure ${ISAL_LIBRARIES} tcmalloc_minimal)
|
||||||
|
add_dependencies(build_tests osd_rmw_test)
|
||||||
|
add_test(NAME osd_rmw_test COMMAND osd_rmw_test)
|
||||||
|
|
||||||
if (ISAL_LIBRARIES)
|
if (ISAL_LIBRARIES)
|
||||||
add_executable(osd_rmw_test_je osd_rmw_test.cpp allocator.cpp)
|
add_executable(osd_rmw_test_je EXCLUDE_FROM_ALL osd_rmw_test.cpp allocator.cpp)
|
||||||
target_compile_definitions(osd_rmw_test_je PUBLIC -DNO_ISAL)
|
target_compile_definitions(osd_rmw_test_je PUBLIC -DNO_ISAL)
|
||||||
target_link_libraries(osd_rmw_test_je Jerasure tcmalloc_minimal)
|
target_link_libraries(osd_rmw_test_je Jerasure tcmalloc_minimal)
|
||||||
|
add_dependencies(build_tests osd_rmw_test_je)
|
||||||
|
add_test(NAME osd_rmw_test_jerasure COMMAND osd_rmw_test_je)
|
||||||
endif (ISAL_LIBRARIES)
|
endif (ISAL_LIBRARIES)
|
||||||
|
|
||||||
# stub_uring_osd
|
# stub_uring_osd
|
||||||
@@ -257,11 +270,15 @@ target_link_libraries(stub_uring_osd
|
|||||||
)
|
)
|
||||||
|
|
||||||
# osd_peering_pg_test
|
# osd_peering_pg_test
|
||||||
add_executable(osd_peering_pg_test osd_peering_pg_test.cpp osd_peering_pg.cpp)
|
add_executable(osd_peering_pg_test EXCLUDE_FROM_ALL osd_peering_pg_test.cpp osd_peering_pg.cpp)
|
||||||
target_link_libraries(osd_peering_pg_test tcmalloc_minimal)
|
target_link_libraries(osd_peering_pg_test tcmalloc_minimal)
|
||||||
|
add_dependencies(build_tests osd_peering_pg_test)
|
||||||
|
add_test(NAME osd_peering_pg_test COMMAND osd_peering_pg_test)
|
||||||
|
|
||||||
# test_allocator
|
# test_allocator
|
||||||
add_executable(test_allocator test_allocator.cpp allocator.cpp)
|
add_executable(test_allocator EXCLUDE_FROM_ALL test_allocator.cpp allocator.cpp)
|
||||||
|
add_dependencies(build_tests test_allocator)
|
||||||
|
add_test(NAME test_allocator COMMAND test_allocator)
|
||||||
|
|
||||||
# test_cas
|
# test_cas
|
||||||
add_executable(test_cas
|
add_executable(test_cas
|
||||||
@@ -281,12 +298,15 @@ target_link_libraries(test_crc32
|
|||||||
|
|
||||||
# test_cluster_client
|
# test_cluster_client
|
||||||
add_executable(test_cluster_client
|
add_executable(test_cluster_client
|
||||||
|
EXCLUDE_FROM_ALL
|
||||||
test_cluster_client.cpp
|
test_cluster_client.cpp
|
||||||
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
|
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
|
||||||
etcd_state_client.cpp timerfd_manager.cpp ../json11/json11.cpp
|
etcd_state_client.cpp timerfd_manager.cpp str_util.cpp ../json11/json11.cpp
|
||||||
)
|
)
|
||||||
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
|
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
|
||||||
target_include_directories(test_cluster_client PUBLIC ${CMAKE_SOURCE_DIR}/src/mock)
|
target_include_directories(test_cluster_client PUBLIC ${CMAKE_SOURCE_DIR}/src/mock)
|
||||||
|
add_dependencies(build_tests test_cluster_client)
|
||||||
|
add_test(NAME test_cluster_client COMMAND test_cluster_client)
|
||||||
|
|
||||||
## test_blockstore, test_shit
|
## test_blockstore, test_shit
|
||||||
#add_executable(test_blockstore test_blockstore.cpp)
|
#add_executable(test_blockstore test_blockstore.cpp)
|
||||||
|
@@ -13,6 +13,11 @@ blockstore_t::~blockstore_t()
|
|||||||
delete impl;
|
delete impl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void blockstore_t::parse_config(blockstore_config_t & config)
|
||||||
|
{
|
||||||
|
impl->parse_config(config, false);
|
||||||
|
}
|
||||||
|
|
||||||
void blockstore_t::loop()
|
void blockstore_t::loop()
|
||||||
{
|
{
|
||||||
impl->loop();
|
impl->loop();
|
||||||
|
@@ -73,7 +73,10 @@ Input:
|
|||||||
write request is copied into the metadata area bitwise and stored there.
|
write request is copied into the metadata area bitwise and stored there.
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
- retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC)
|
- retval = number of bytes actually read/written or negative error number
|
||||||
|
-EINVAL = invalid input parameters
|
||||||
|
-ENOENT = requested object/version does not exist for reads
|
||||||
|
-ENOSPC = no space left in the store for writes
|
||||||
- version = the version actually read or written
|
- version = the version actually read or written
|
||||||
|
|
||||||
## BS_OP_DELETE
|
## BS_OP_DELETE
|
||||||
@@ -107,7 +110,7 @@ Input:
|
|||||||
- buf = pre-allocated obj_ver_id array <len> units long
|
- buf = pre-allocated obj_ver_id array <len> units long
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
- retval = 0 or negative error number (-EINVAL, -ENOENT if no such version or -EBUSY if not synced)
|
- retval = 0 or negative error number (-ENOENT if no such version for stabilize)
|
||||||
|
|
||||||
## BS_OP_SYNC_STAB_ALL
|
## BS_OP_SYNC_STAB_ALL
|
||||||
|
|
||||||
@@ -122,11 +125,14 @@ Output:
|
|||||||
Get a list of all objects in this Blockstore.
|
Get a list of all objects in this Blockstore.
|
||||||
|
|
||||||
Input:
|
Input:
|
||||||
- oid.stripe = PG alignment
|
- pg_alignment = PG alignment
|
||||||
- len = PG count or 0 to list all objects
|
- pg_count = PG count or 0 to list all objects
|
||||||
- offset = PG number
|
- pg_number = PG number
|
||||||
- oid.inode = min inode number or 0 to list all inodes
|
- list_stable_limit = max number of clean objects in the reply
|
||||||
- version = max inode number or 0 to list all inodes
|
it's guaranteed that dirty objects are returned from the same interval,
|
||||||
|
i.e. from (min_oid .. min(max_oid, max(returned stable OIDs)))
|
||||||
|
- min_oid = min inode/stripe or 0 to list all objects
|
||||||
|
- max_oid = max inode/stripe or 0 to list all objects
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
- retval = total obj_ver_id count
|
- retval = total obj_ver_id count
|
||||||
@@ -143,10 +149,27 @@ struct blockstore_op_t
|
|||||||
uint64_t opcode;
|
uint64_t opcode;
|
||||||
// finish callback
|
// finish callback
|
||||||
std::function<void (blockstore_op_t*)> callback;
|
std::function<void (blockstore_op_t*)> callback;
|
||||||
object_id oid;
|
union __attribute__((__packed__))
|
||||||
uint64_t version;
|
{
|
||||||
uint32_t offset;
|
// R/W
|
||||||
uint32_t len;
|
struct __attribute__((__packed__))
|
||||||
|
{
|
||||||
|
object_id oid;
|
||||||
|
uint64_t version;
|
||||||
|
uint32_t offset;
|
||||||
|
uint32_t len;
|
||||||
|
};
|
||||||
|
// List
|
||||||
|
struct __attribute__((__packed__))
|
||||||
|
{
|
||||||
|
object_id min_oid;
|
||||||
|
object_id max_oid;
|
||||||
|
uint32_t pg_alignment;
|
||||||
|
uint32_t pg_count;
|
||||||
|
uint32_t pg_number;
|
||||||
|
uint32_t list_stable_limit;
|
||||||
|
};
|
||||||
|
};
|
||||||
void *buf;
|
void *buf;
|
||||||
void *bitmap;
|
void *bitmap;
|
||||||
int retval;
|
int retval;
|
||||||
@@ -165,6 +188,9 @@ public:
|
|||||||
blockstore_t(blockstore_config_t & config, ring_loop_t *ringloop, timerfd_manager_t *tfd);
|
blockstore_t(blockstore_config_t & config, ring_loop_t *ringloop, timerfd_manager_t *tfd);
|
||||||
~blockstore_t();
|
~blockstore_t();
|
||||||
|
|
||||||
|
// Update configuration
|
||||||
|
void parse_config(blockstore_config_t & config);
|
||||||
|
|
||||||
// Event loop
|
// Event loop
|
||||||
void loop();
|
void loop();
|
||||||
|
|
||||||
|
@@ -932,7 +932,7 @@ bool journal_flusher_co::fsync_batch(bool fsync_meta, int wait_base)
|
|||||||
resume_1:
|
resume_1:
|
||||||
if (!cur_sync->state)
|
if (!cur_sync->state)
|
||||||
{
|
{
|
||||||
if (flusher->syncing_flushers >= flusher->cur_flusher_count || !flusher->flush_queue.size())
|
if (flusher->syncing_flushers >= flusher->active_flushers || !flusher->flush_queue.size())
|
||||||
{
|
{
|
||||||
// Sync batch is ready. Do it.
|
// Sync batch is ready. Do it.
|
||||||
await_sqe(0);
|
await_sqe(0);
|
||||||
|
@@ -11,7 +11,7 @@ blockstore_impl_t::blockstore_impl_t(blockstore_config_t & config, ring_loop_t *
|
|||||||
ring_consumer.loop = [this]() { loop(); };
|
ring_consumer.loop = [this]() { loop(); };
|
||||||
ringloop->register_consumer(&ring_consumer);
|
ringloop->register_consumer(&ring_consumer);
|
||||||
initialized = 0;
|
initialized = 0;
|
||||||
parse_config(config);
|
parse_config(config, true);
|
||||||
zero_object = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, dsk.data_block_size);
|
zero_object = (uint8_t*)memalign_or_die(MEM_ALIGNMENT, dsk.data_block_size);
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@@ -171,7 +171,7 @@ void blockstore_impl_t::loop()
|
|||||||
// Can't submit SYNC before previous writes
|
// Can't submit SYNC before previous writes
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
wr_st = continue_sync(op, false);
|
wr_st = continue_sync(op);
|
||||||
if (wr_st != 2)
|
if (wr_st != 2)
|
||||||
{
|
{
|
||||||
has_writes = wr_st > 0 ? 1 : 2;
|
has_writes = wr_st > 0 ? 1 : 2;
|
||||||
@@ -307,6 +307,18 @@ void blockstore_impl_t::check_wait(blockstore_op_t *op)
|
|||||||
}
|
}
|
||||||
PRIV(op)->wait_for = 0;
|
PRIV(op)->wait_for = 0;
|
||||||
}
|
}
|
||||||
|
else if (PRIV(op)->wait_for == WAIT_FREE)
|
||||||
|
{
|
||||||
|
if (!data_alloc->get_free_count() && big_to_flush > 0)
|
||||||
|
{
|
||||||
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
|
printf("Still waiting for free space on the data device\n");
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
flusher->release_trim();
|
||||||
|
PRIV(op)->wait_for = 0;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw std::runtime_error("BUG: op->wait_for value is unexpected");
|
throw std::runtime_error("BUG: op->wait_for value is unexpected");
|
||||||
@@ -325,7 +337,7 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op)
|
|||||||
{
|
{
|
||||||
// Basic verification not passed
|
// Basic verification not passed
|
||||||
op->retval = -EINVAL;
|
op->retval = -EINVAL;
|
||||||
std::function<void (blockstore_op_t*)>(op->callback)(op);
|
ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (op->opcode == BS_OP_SYNC_STAB_ALL)
|
if (op->opcode == BS_OP_SYNC_STAB_ALL)
|
||||||
@@ -368,16 +380,21 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op)
|
|||||||
}
|
}
|
||||||
if ((op->opcode == BS_OP_WRITE || op->opcode == BS_OP_WRITE_STABLE || op->opcode == BS_OP_DELETE) && !enqueue_write(op))
|
if ((op->opcode == BS_OP_WRITE || op->opcode == BS_OP_WRITE_STABLE || op->opcode == BS_OP_DELETE) && !enqueue_write(op))
|
||||||
{
|
{
|
||||||
std::function<void (blockstore_op_t*)>(op->callback)(op);
|
ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
init_op(op);
|
||||||
|
submit_queue.push_back(op);
|
||||||
|
ringloop->wakeup();
|
||||||
|
}
|
||||||
|
|
||||||
|
void blockstore_impl_t::init_op(blockstore_op_t *op)
|
||||||
|
{
|
||||||
// Call constructor without allocating memory. We'll call destructor before returning op back
|
// Call constructor without allocating memory. We'll call destructor before returning op back
|
||||||
new ((void*)op->private_data) blockstore_op_private_t;
|
new ((void*)op->private_data) blockstore_op_private_t;
|
||||||
PRIV(op)->wait_for = 0;
|
PRIV(op)->wait_for = 0;
|
||||||
PRIV(op)->op_state = 0;
|
PRIV(op)->op_state = 0;
|
||||||
PRIV(op)->pending_ops = 0;
|
PRIV(op)->pending_ops = 0;
|
||||||
submit_queue.push_back(op);
|
|
||||||
ringloop->wakeup();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool replace_stable(object_id oid, uint64_t version, int search_start, int search_end, obj_ver_id* list)
|
static bool replace_stable(object_id oid, uint64_t version, int search_start, int search_end, obj_ver_id* list)
|
||||||
@@ -445,11 +462,11 @@ void blockstore_impl_t::reshard_clean_db(pool_id_t pool, uint32_t pg_count, uint
|
|||||||
|
|
||||||
void blockstore_impl_t::process_list(blockstore_op_t *op)
|
void blockstore_impl_t::process_list(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
uint32_t list_pg = op->offset+1;
|
uint32_t list_pg = op->pg_number+1;
|
||||||
uint32_t pg_count = op->len;
|
uint32_t pg_count = op->pg_count;
|
||||||
uint64_t pg_stripe_size = op->oid.stripe;
|
uint64_t pg_stripe_size = op->pg_alignment;
|
||||||
uint64_t min_inode = op->oid.inode;
|
uint64_t min_inode = op->min_oid.inode;
|
||||||
uint64_t max_inode = op->version;
|
uint64_t max_inode = op->max_oid.inode;
|
||||||
// Check PG
|
// Check PG
|
||||||
if (pg_count != 0 && (pg_stripe_size < MIN_DATA_BLOCK_SIZE || list_pg > pg_count))
|
if (pg_count != 0 && (pg_stripe_size < MIN_DATA_BLOCK_SIZE || list_pg > pg_count))
|
||||||
{
|
{
|
||||||
@@ -496,7 +513,13 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
|
|||||||
stable_alloc += clean_db.size();
|
stable_alloc += clean_db.size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
if (op->list_stable_limit > 0)
|
||||||
|
{
|
||||||
|
stable_alloc = op->list_stable_limit;
|
||||||
|
if (stable_alloc > 1024*1024)
|
||||||
|
stable_alloc = 1024*1024;
|
||||||
|
}
|
||||||
|
if (stable_alloc < 32768)
|
||||||
{
|
{
|
||||||
stable_alloc = 32768;
|
stable_alloc = 32768;
|
||||||
}
|
}
|
||||||
@@ -507,22 +530,22 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
|
|||||||
FINISH_OP(op);
|
FINISH_OP(op);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
auto max_oid = op->max_oid;
|
||||||
|
bool limited = false;
|
||||||
|
pool_pg_id_t last_shard_id = 0;
|
||||||
for (auto shard_it = clean_db_shards.lower_bound(first_shard);
|
for (auto shard_it = clean_db_shards.lower_bound(first_shard);
|
||||||
shard_it != clean_db_shards.end() && shard_it->first <= last_shard;
|
shard_it != clean_db_shards.end() && shard_it->first <= last_shard;
|
||||||
shard_it++)
|
shard_it++)
|
||||||
{
|
{
|
||||||
auto & clean_db = shard_it->second;
|
auto & clean_db = shard_it->second;
|
||||||
auto clean_it = clean_db.begin(), clean_end = clean_db.end();
|
auto clean_it = clean_db.begin(), clean_end = clean_db.end();
|
||||||
if ((min_inode != 0 || max_inode != 0) && min_inode <= max_inode)
|
if (op->min_oid.inode != 0 || op->min_oid.stripe != 0)
|
||||||
{
|
{
|
||||||
clean_it = clean_db.lower_bound({
|
clean_it = clean_db.lower_bound(op->min_oid);
|
||||||
.inode = min_inode,
|
}
|
||||||
.stripe = 0,
|
if ((max_oid.inode != 0 || max_oid.stripe != 0) && !(max_oid < op->min_oid))
|
||||||
});
|
{
|
||||||
clean_end = clean_db.upper_bound({
|
clean_end = clean_db.upper_bound(max_oid);
|
||||||
.inode = max_inode,
|
|
||||||
.stripe = UINT64_MAX,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
for (; clean_it != clean_end; clean_it++)
|
for (; clean_it != clean_end; clean_it++)
|
||||||
{
|
{
|
||||||
@@ -541,11 +564,29 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
|
|||||||
.oid = clean_it->first,
|
.oid = clean_it->first,
|
||||||
.version = clean_it->second.version,
|
.version = clean_it->second.version,
|
||||||
};
|
};
|
||||||
|
if (op->list_stable_limit > 0 && stable_count >= op->list_stable_limit)
|
||||||
|
{
|
||||||
|
if (!limited)
|
||||||
|
{
|
||||||
|
limited = true;
|
||||||
|
max_oid = stable[stable_count-1].oid;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if (op->list_stable_limit > 0)
|
||||||
|
{
|
||||||
|
// To maintain the order, we have to include objects in the same range from other shards
|
||||||
|
if (last_shard_id != 0 && last_shard_id != shard_it->first)
|
||||||
|
std::sort(stable, stable+stable_count);
|
||||||
|
if (stable_count > op->list_stable_limit)
|
||||||
|
stable_count = op->list_stable_limit;
|
||||||
|
}
|
||||||
|
last_shard_id = shard_it->first;
|
||||||
}
|
}
|
||||||
if (first_shard != last_shard)
|
if (op->list_stable_limit == 0 && first_shard != last_shard)
|
||||||
{
|
{
|
||||||
// If that's not a per-PG listing, sort clean entries
|
// If that's not a per-PG listing, sort clean entries (already sorted if list_stable_limit != 0)
|
||||||
std::sort(stable, stable+stable_count);
|
std::sort(stable, stable+stable_count);
|
||||||
}
|
}
|
||||||
int clean_stable_count = stable_count;
|
int clean_stable_count = stable_count;
|
||||||
@@ -554,20 +595,17 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
|
|||||||
obj_ver_id *unstable = NULL;
|
obj_ver_id *unstable = NULL;
|
||||||
{
|
{
|
||||||
auto dirty_it = dirty_db.begin(), dirty_end = dirty_db.end();
|
auto dirty_it = dirty_db.begin(), dirty_end = dirty_db.end();
|
||||||
if ((min_inode != 0 || max_inode != 0) && min_inode <= max_inode)
|
if (op->min_oid.inode != 0 || op->min_oid.stripe != 0)
|
||||||
{
|
{
|
||||||
dirty_it = dirty_db.lower_bound({
|
dirty_it = dirty_db.lower_bound({
|
||||||
.oid = {
|
.oid = op->min_oid,
|
||||||
.inode = min_inode,
|
|
||||||
.stripe = 0,
|
|
||||||
},
|
|
||||||
.version = 0,
|
.version = 0,
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
if ((max_oid.inode != 0 || max_oid.stripe != 0) && !(max_oid < op->min_oid))
|
||||||
|
{
|
||||||
dirty_end = dirty_db.upper_bound({
|
dirty_end = dirty_db.upper_bound({
|
||||||
.oid = {
|
.oid = max_oid,
|
||||||
.inode = max_inode,
|
|
||||||
.stripe = UINT64_MAX,
|
|
||||||
},
|
|
||||||
.version = UINT64_MAX,
|
.version = UINT64_MAX,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -611,6 +649,11 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
|
|||||||
stable[stable_count++] = dirty_it->first;
|
stable[stable_count++] = dirty_it->first;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (op->list_stable_limit > 0 && stable_count >= op->list_stable_limit)
|
||||||
|
{
|
||||||
|
// Stop here
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@@ -160,6 +160,8 @@ struct __attribute__((__packed__)) dirty_entry
|
|||||||
#define WAIT_JOURNAL 3
|
#define WAIT_JOURNAL 3
|
||||||
// Suspend operation until the next journal sector buffer is free
|
// Suspend operation until the next journal sector buffer is free
|
||||||
#define WAIT_JOURNAL_BUFFER 4
|
#define WAIT_JOURNAL_BUFFER 4
|
||||||
|
// Suspend operation until there is some free space on the data device
|
||||||
|
#define WAIT_FREE 5
|
||||||
|
|
||||||
struct fulfill_read_t
|
struct fulfill_read_t
|
||||||
{
|
{
|
||||||
@@ -216,6 +218,11 @@ struct pool_shard_settings_t
|
|||||||
uint32_t pg_stripe_size;
|
uint32_t pg_stripe_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define STAB_SPLIT_DONE 1
|
||||||
|
#define STAB_SPLIT_WAIT 2
|
||||||
|
#define STAB_SPLIT_SYNC 3
|
||||||
|
#define STAB_SPLIT_TODO 4
|
||||||
|
|
||||||
class blockstore_impl_t
|
class blockstore_impl_t
|
||||||
{
|
{
|
||||||
blockstore_disk_t dsk;
|
blockstore_disk_t dsk;
|
||||||
@@ -258,6 +265,7 @@ class blockstore_impl_t
|
|||||||
|
|
||||||
struct journal_t journal;
|
struct journal_t journal;
|
||||||
journal_flusher_t *flusher;
|
journal_flusher_t *flusher;
|
||||||
|
int big_to_flush = 0;
|
||||||
int write_iodepth = 0;
|
int write_iodepth = 0;
|
||||||
|
|
||||||
bool live = false, queue_stall = false;
|
bool live = false, queue_stall = false;
|
||||||
@@ -277,7 +285,6 @@ class blockstore_impl_t
|
|||||||
friend class journal_flusher_t;
|
friend class journal_flusher_t;
|
||||||
friend class journal_flusher_co;
|
friend class journal_flusher_co;
|
||||||
|
|
||||||
void parse_config(blockstore_config_t & config);
|
|
||||||
void calc_lengths();
|
void calc_lengths();
|
||||||
void open_data();
|
void open_data();
|
||||||
void open_meta();
|
void open_meta();
|
||||||
@@ -299,6 +306,7 @@ class blockstore_impl_t
|
|||||||
blockstore_init_journal* journal_init_reader;
|
blockstore_init_journal* journal_init_reader;
|
||||||
|
|
||||||
void check_wait(blockstore_op_t *op);
|
void check_wait(blockstore_op_t *op);
|
||||||
|
void init_op(blockstore_op_t *op);
|
||||||
|
|
||||||
// Read
|
// Read
|
||||||
int dequeue_read(blockstore_op_t *read_op);
|
int dequeue_read(blockstore_op_t *read_op);
|
||||||
@@ -318,7 +326,7 @@ class blockstore_impl_t
|
|||||||
void handle_write_event(ring_data_t *data, blockstore_op_t *op);
|
void handle_write_event(ring_data_t *data, blockstore_op_t *op);
|
||||||
|
|
||||||
// Sync
|
// Sync
|
||||||
int continue_sync(blockstore_op_t *op, bool queue_has_in_progress_sync);
|
int continue_sync(blockstore_op_t *op);
|
||||||
void ack_sync(blockstore_op_t *op);
|
void ack_sync(blockstore_op_t *op);
|
||||||
|
|
||||||
// Stabilize
|
// Stabilize
|
||||||
@@ -326,6 +334,8 @@ class blockstore_impl_t
|
|||||||
int continue_stable(blockstore_op_t *op);
|
int continue_stable(blockstore_op_t *op);
|
||||||
void mark_stable(const obj_ver_id & ov, bool forget_dirty = false);
|
void mark_stable(const obj_ver_id & ov, bool forget_dirty = false);
|
||||||
void stabilize_object(object_id oid, uint64_t max_ver);
|
void stabilize_object(object_id oid, uint64_t max_ver);
|
||||||
|
blockstore_op_t* selective_sync(blockstore_op_t *op);
|
||||||
|
int split_stab_op(blockstore_op_t *op, std::function<int(obj_ver_id v)> decider);
|
||||||
|
|
||||||
// Rollback
|
// Rollback
|
||||||
int dequeue_rollback(blockstore_op_t *op);
|
int dequeue_rollback(blockstore_op_t *op);
|
||||||
@@ -341,6 +351,8 @@ public:
|
|||||||
blockstore_impl_t(blockstore_config_t & config, ring_loop_t *ringloop, timerfd_manager_t *tfd);
|
blockstore_impl_t(blockstore_config_t & config, ring_loop_t *ringloop, timerfd_manager_t *tfd);
|
||||||
~blockstore_impl_t();
|
~blockstore_impl_t();
|
||||||
|
|
||||||
|
void parse_config(blockstore_config_t & config, bool init);
|
||||||
|
|
||||||
// Event loop
|
// Event loop
|
||||||
void loop();
|
void loop();
|
||||||
|
|
||||||
|
@@ -4,8 +4,54 @@
|
|||||||
#include <sys/file.h>
|
#include <sys/file.h>
|
||||||
#include "blockstore_impl.h"
|
#include "blockstore_impl.h"
|
||||||
|
|
||||||
void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
void blockstore_impl_t::parse_config(blockstore_config_t & config, bool init)
|
||||||
{
|
{
|
||||||
|
// Online-configurable options:
|
||||||
|
max_flusher_count = strtoull(config["max_flusher_count"].c_str(), NULL, 10);
|
||||||
|
if (!max_flusher_count)
|
||||||
|
{
|
||||||
|
max_flusher_count = strtoull(config["flusher_count"].c_str(), NULL, 10);
|
||||||
|
}
|
||||||
|
min_flusher_count = strtoull(config["min_flusher_count"].c_str(), NULL, 10);
|
||||||
|
max_write_iodepth = strtoull(config["max_write_iodepth"].c_str(), NULL, 10);
|
||||||
|
throttle_small_writes = config["throttle_small_writes"] == "true" || config["throttle_small_writes"] == "1" || config["throttle_small_writes"] == "yes";
|
||||||
|
throttle_target_iops = strtoull(config["throttle_target_iops"].c_str(), NULL, 10);
|
||||||
|
throttle_target_mbs = strtoull(config["throttle_target_mbs"].c_str(), NULL, 10);
|
||||||
|
throttle_target_parallelism = strtoull(config["throttle_target_parallelism"].c_str(), NULL, 10);
|
||||||
|
throttle_threshold_us = strtoull(config["throttle_threshold_us"].c_str(), NULL, 10);
|
||||||
|
if (!max_flusher_count)
|
||||||
|
{
|
||||||
|
max_flusher_count = 256;
|
||||||
|
}
|
||||||
|
if (!min_flusher_count || journal.flush_journal)
|
||||||
|
{
|
||||||
|
min_flusher_count = 1;
|
||||||
|
}
|
||||||
|
if (!max_write_iodepth)
|
||||||
|
{
|
||||||
|
max_write_iodepth = 128;
|
||||||
|
}
|
||||||
|
if (!throttle_target_iops)
|
||||||
|
{
|
||||||
|
throttle_target_iops = 100;
|
||||||
|
}
|
||||||
|
if (!throttle_target_mbs)
|
||||||
|
{
|
||||||
|
throttle_target_mbs = 100;
|
||||||
|
}
|
||||||
|
if (!throttle_target_parallelism)
|
||||||
|
{
|
||||||
|
throttle_target_parallelism = 1;
|
||||||
|
}
|
||||||
|
if (!throttle_threshold_us)
|
||||||
|
{
|
||||||
|
throttle_threshold_us = 50;
|
||||||
|
}
|
||||||
|
if (!init)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Offline-configurable options:
|
||||||
// Common disk options
|
// Common disk options
|
||||||
dsk.parse_config(config);
|
dsk.parse_config(config);
|
||||||
// Parse
|
// Parse
|
||||||
@@ -44,29 +90,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
|||||||
journal.no_same_sector_overwrites = config["journal_no_same_sector_overwrites"] == "true" ||
|
journal.no_same_sector_overwrites = config["journal_no_same_sector_overwrites"] == "true" ||
|
||||||
config["journal_no_same_sector_overwrites"] == "1" || config["journal_no_same_sector_overwrites"] == "yes";
|
config["journal_no_same_sector_overwrites"] == "1" || config["journal_no_same_sector_overwrites"] == "yes";
|
||||||
journal.inmemory = config["inmemory_journal"] != "false";
|
journal.inmemory = config["inmemory_journal"] != "false";
|
||||||
max_flusher_count = strtoull(config["max_flusher_count"].c_str(), NULL, 10);
|
|
||||||
if (!max_flusher_count)
|
|
||||||
max_flusher_count = strtoull(config["flusher_count"].c_str(), NULL, 10);
|
|
||||||
min_flusher_count = strtoull(config["min_flusher_count"].c_str(), NULL, 10);
|
|
||||||
max_write_iodepth = strtoull(config["max_write_iodepth"].c_str(), NULL, 10);
|
|
||||||
throttle_small_writes = config["throttle_small_writes"] == "true" || config["throttle_small_writes"] == "1" || config["throttle_small_writes"] == "yes";
|
|
||||||
throttle_target_iops = strtoull(config["throttle_target_iops"].c_str(), NULL, 10);
|
|
||||||
throttle_target_mbs = strtoull(config["throttle_target_mbs"].c_str(), NULL, 10);
|
|
||||||
throttle_target_parallelism = strtoull(config["throttle_target_parallelism"].c_str(), NULL, 10);
|
|
||||||
throttle_threshold_us = strtoull(config["throttle_threshold_us"].c_str(), NULL, 10);
|
|
||||||
// Validate
|
// Validate
|
||||||
if (!max_flusher_count)
|
|
||||||
{
|
|
||||||
max_flusher_count = 256;
|
|
||||||
}
|
|
||||||
if (!min_flusher_count || journal.flush_journal)
|
|
||||||
{
|
|
||||||
min_flusher_count = 1;
|
|
||||||
}
|
|
||||||
if (!max_write_iodepth)
|
|
||||||
{
|
|
||||||
max_write_iodepth = 128;
|
|
||||||
}
|
|
||||||
if (journal.sector_count < 2)
|
if (journal.sector_count < 2)
|
||||||
{
|
{
|
||||||
journal.sector_count = 32;
|
journal.sector_count = 32;
|
||||||
@@ -91,22 +115,6 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
|||||||
{
|
{
|
||||||
throw std::runtime_error("immediate_commit=all requires disable_journal_fsync and disable_data_fsync");
|
throw std::runtime_error("immediate_commit=all requires disable_journal_fsync and disable_data_fsync");
|
||||||
}
|
}
|
||||||
if (!throttle_target_iops)
|
|
||||||
{
|
|
||||||
throttle_target_iops = 100;
|
|
||||||
}
|
|
||||||
if (!throttle_target_mbs)
|
|
||||||
{
|
|
||||||
throttle_target_mbs = 100;
|
|
||||||
}
|
|
||||||
if (!throttle_target_parallelism)
|
|
||||||
{
|
|
||||||
throttle_target_parallelism = 1;
|
|
||||||
}
|
|
||||||
if (!throttle_threshold_us)
|
|
||||||
{
|
|
||||||
throttle_threshold_us = 50;
|
|
||||||
}
|
|
||||||
// init some fields
|
// init some fields
|
||||||
journal.block_size = dsk.journal_block_size;
|
journal.block_size = dsk.journal_block_size;
|
||||||
journal.next_free = dsk.journal_block_size;
|
journal.next_free = dsk.journal_block_size;
|
||||||
|
@@ -124,10 +124,8 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
|||||||
bool dirty_found = (dirty_it != dirty_db.end() && dirty_it->first.oid == read_op->oid);
|
bool dirty_found = (dirty_it != dirty_db.end() && dirty_it->first.oid == read_op->oid);
|
||||||
if (!clean_found && !dirty_found)
|
if (!clean_found && !dirty_found)
|
||||||
{
|
{
|
||||||
// region is not allocated - return zeroes
|
|
||||||
memset(read_op->buf, 0, read_op->len);
|
|
||||||
read_op->version = 0;
|
read_op->version = 0;
|
||||||
read_op->retval = read_op->len;
|
read_op->retval = -ENOENT;
|
||||||
FINISH_OP(read_op);
|
FINISH_OP(read_op);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
@@ -142,12 +140,18 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
|||||||
bool version_ok = !IS_IN_FLIGHT(dirty.state) && read_op->version >= dirty_it->first.version;
|
bool version_ok = !IS_IN_FLIGHT(dirty.state) && read_op->version >= dirty_it->first.version;
|
||||||
if (IS_SYNCED(dirty.state))
|
if (IS_SYNCED(dirty.state))
|
||||||
{
|
{
|
||||||
if (!version_ok && read_op->version != 0)
|
|
||||||
read_op->version = dirty_it->first.version;
|
|
||||||
version_ok = true;
|
version_ok = true;
|
||||||
}
|
}
|
||||||
if (version_ok)
|
if (version_ok)
|
||||||
{
|
{
|
||||||
|
if (IS_DELETE(dirty.state))
|
||||||
|
{
|
||||||
|
assert(!result_version);
|
||||||
|
read_op->version = 0;
|
||||||
|
read_op->retval = -ENOENT;
|
||||||
|
FINISH_OP(read_op);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
if (!result_version)
|
if (!result_version)
|
||||||
{
|
{
|
||||||
result_version = dirty_it->first.version;
|
result_version = dirty_it->first.version;
|
||||||
@@ -234,12 +238,19 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (fulfilled < read_op->len)
|
if (!result_version)
|
||||||
{
|
{
|
||||||
// fill remaining parts with zeroes
|
// May happen if there are entries in dirty_db but all of them are !version_ok
|
||||||
assert(fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0));
|
read_op->version = 0;
|
||||||
|
read_op->retval = -ENOENT;
|
||||||
|
FINISH_OP(read_op);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
if (fulfilled < read_op->len)
|
||||||
|
{
|
||||||
|
assert(fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0));
|
||||||
|
assert(fulfilled == read_op->len);
|
||||||
}
|
}
|
||||||
assert(fulfilled == read_op->len);
|
|
||||||
read_op->version = result_version;
|
read_op->version = result_version;
|
||||||
if (!PRIV(read_op)->pending_ops)
|
if (!PRIV(read_op)->pending_ops)
|
||||||
{
|
{
|
||||||
|
@@ -9,48 +9,39 @@ int blockstore_impl_t::dequeue_rollback(blockstore_op_t *op)
|
|||||||
{
|
{
|
||||||
return continue_rollback(op);
|
return continue_rollback(op);
|
||||||
}
|
}
|
||||||
obj_ver_id *v, *nv;
|
int r = split_stab_op(op, [this](obj_ver_id ov)
|
||||||
int i, todo = op->len;
|
|
||||||
for (i = 0, v = (obj_ver_id*)op->buf, nv = (obj_ver_id*)op->buf; i < op->len; i++, v++, nv++)
|
|
||||||
{
|
{
|
||||||
if (nv != v)
|
|
||||||
{
|
|
||||||
*nv = *v;
|
|
||||||
}
|
|
||||||
// Check that there are some versions greater than v->version (which may be zero),
|
// Check that there are some versions greater than v->version (which may be zero),
|
||||||
// check that they're unstable, synced, and not currently written to
|
// check that they're unstable, synced, and not currently written to
|
||||||
auto dirty_it = dirty_db.lower_bound((obj_ver_id){
|
auto dirty_it = dirty_db.lower_bound((obj_ver_id){
|
||||||
.oid = v->oid,
|
.oid = ov.oid,
|
||||||
.version = UINT64_MAX,
|
.version = UINT64_MAX,
|
||||||
});
|
});
|
||||||
if (dirty_it == dirty_db.begin())
|
if (dirty_it == dirty_db.begin())
|
||||||
{
|
{
|
||||||
skip_ov:
|
|
||||||
// Already rolled back, skip this object version
|
// Already rolled back, skip this object version
|
||||||
todo--;
|
return STAB_SPLIT_DONE;
|
||||||
nv--;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
dirty_it--;
|
dirty_it--;
|
||||||
if (dirty_it->first.oid != v->oid || dirty_it->first.version < v->version)
|
if (dirty_it->first.oid != ov.oid || dirty_it->first.version < ov.version)
|
||||||
{
|
{
|
||||||
goto skip_ov;
|
// Already rolled back, skip this object version
|
||||||
|
return STAB_SPLIT_DONE;
|
||||||
}
|
}
|
||||||
while (dirty_it->first.oid == v->oid && dirty_it->first.version > v->version)
|
while (dirty_it->first.oid == ov.oid && dirty_it->first.version > ov.version)
|
||||||
{
|
{
|
||||||
if (IS_IN_FLIGHT(dirty_it->second.state))
|
if (IS_IN_FLIGHT(dirty_it->second.state))
|
||||||
{
|
{
|
||||||
// Object write is still in progress. Wait until the write request completes
|
// Object write is still in progress. Wait until the write request completes
|
||||||
return 0;
|
return STAB_SPLIT_WAIT;
|
||||||
}
|
}
|
||||||
else if (!IS_SYNCED(dirty_it->second.state) ||
|
else if (!IS_SYNCED(dirty_it->second.state) ||
|
||||||
IS_STABLE(dirty_it->second.state))
|
IS_STABLE(dirty_it->second.state))
|
||||||
{
|
{
|
||||||
op->retval = -EBUSY;
|
// Sync the object
|
||||||
FINISH_OP(op);
|
return STAB_SPLIT_SYNC;
|
||||||
return 2;
|
|
||||||
}
|
}
|
||||||
if (dirty_it == dirty_db.begin())
|
if (dirty_it == dirty_db.begin())
|
||||||
{
|
{
|
||||||
@@ -58,19 +49,16 @@ skip_ov:
|
|||||||
}
|
}
|
||||||
dirty_it--;
|
dirty_it--;
|
||||||
}
|
}
|
||||||
|
return STAB_SPLIT_TODO;
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
op->len = todo;
|
if (r != 1)
|
||||||
if (!todo)
|
|
||||||
{
|
{
|
||||||
// Already rolled back
|
return r;
|
||||||
op->retval = 0;
|
|
||||||
FINISH_OP(op);
|
|
||||||
return 2;
|
|
||||||
}
|
}
|
||||||
// Check journal space
|
// Check journal space
|
||||||
blockstore_journal_check_t space_check(this);
|
blockstore_journal_check_t space_check(this);
|
||||||
if (!space_check.check_available(op, todo, sizeof(journal_entry_rollback), 0))
|
if (!space_check.check_available(op, op->len, sizeof(journal_entry_rollback), 0))
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -78,7 +66,8 @@ skip_ov:
|
|||||||
BS_SUBMIT_CHECK_SQES(space_check.sectors_to_write);
|
BS_SUBMIT_CHECK_SQES(space_check.sectors_to_write);
|
||||||
// Prepare and submit journal entries
|
// Prepare and submit journal entries
|
||||||
int s = 0;
|
int s = 0;
|
||||||
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
auto v = (obj_ver_id*)op->buf;
|
||||||
|
for (int i = 0; i < op->len; i++, v++)
|
||||||
{
|
{
|
||||||
if (!journal.entry_fits(sizeof(journal_entry_rollback)) &&
|
if (!journal.entry_fits(sizeof(journal_entry_rollback)) &&
|
||||||
journal.sector_info[journal.cur_sector].dirty)
|
journal.sector_info[journal.cur_sector].dirty)
|
||||||
@@ -212,6 +201,11 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
|||||||
}
|
}
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
|
if ((IS_BIG_WRITE(dirty_it->second.state) || IS_DELETE(dirty_it->second.state)) &&
|
||||||
|
IS_STABLE(dirty_it->second.state))
|
||||||
|
{
|
||||||
|
big_to_flush--;
|
||||||
|
}
|
||||||
if (IS_BIG_WRITE(dirty_it->second.state) && dirty_it->second.location != clean_loc &&
|
if (IS_BIG_WRITE(dirty_it->second.state) && dirty_it->second.location != clean_loc &&
|
||||||
dirty_it->second.location != UINT64_MAX)
|
dirty_it->second.location != UINT64_MAX)
|
||||||
{
|
{
|
||||||
|
@@ -41,60 +41,309 @@
|
|||||||
// 4) after a while it takes his synced object list and sends stabilize requests
|
// 4) after a while it takes his synced object list and sends stabilize requests
|
||||||
// to peers and to its own blockstore, thus freeing the old version
|
// to peers and to its own blockstore, thus freeing the old version
|
||||||
|
|
||||||
int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
|
struct ver_vector_t
|
||||||
{
|
{
|
||||||
if (PRIV(op)->op_state)
|
obj_ver_id *items = NULL;
|
||||||
|
uint64_t alloc = 0, size = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void init_versions(ver_vector_t & vec, obj_ver_id *start, obj_ver_id *end, uint64_t len)
|
||||||
|
{
|
||||||
|
if (!vec.items)
|
||||||
{
|
{
|
||||||
return continue_stable(op);
|
vec.alloc = len;
|
||||||
|
vec.items = (obj_ver_id*)malloc_or_die(sizeof(obj_ver_id) * vec.alloc);
|
||||||
|
for (auto sv = start; sv < end; sv++)
|
||||||
|
{
|
||||||
|
vec.items[vec.size++] = *sv;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void append_version(ver_vector_t & vec, obj_ver_id ov)
|
||||||
|
{
|
||||||
|
if (vec.size >= vec.alloc)
|
||||||
|
{
|
||||||
|
vec.alloc = !vec.alloc ? 4 : vec.alloc*2;
|
||||||
|
vec.items = (obj_ver_id*)realloc_or_die(vec.items, sizeof(obj_ver_id) * vec.alloc);
|
||||||
|
}
|
||||||
|
vec.items[vec.size++] = ov;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool check_unsynced(std::vector<obj_ver_id> & check, obj_ver_id ov, std::vector<obj_ver_id> & to, int *count)
|
||||||
|
{
|
||||||
|
bool found = false;
|
||||||
|
int j = 0, k = 0;
|
||||||
|
while (j < check.size())
|
||||||
|
{
|
||||||
|
if (check[j] == ov)
|
||||||
|
found = true;
|
||||||
|
if (check[j].oid == ov.oid && check[j].version <= ov.version)
|
||||||
|
{
|
||||||
|
to.push_back(check[j++]);
|
||||||
|
if (count)
|
||||||
|
(*count)--;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
check[k++] = check[j++];
|
||||||
|
}
|
||||||
|
check.resize(k);
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
blockstore_op_t* blockstore_impl_t::selective_sync(blockstore_op_t *op)
|
||||||
|
{
|
||||||
|
unsynced_big_write_count -= unsynced_big_writes.size();
|
||||||
|
unsynced_big_writes.swap(PRIV(op)->sync_big_writes);
|
||||||
|
unsynced_big_write_count += unsynced_big_writes.size();
|
||||||
|
unsynced_small_writes.swap(PRIV(op)->sync_small_writes);
|
||||||
|
// Create a sync operation, insert into the end of the queue
|
||||||
|
// And move ourselves into the end too!
|
||||||
|
// Rather hacky but that's what we need...
|
||||||
|
blockstore_op_t *sync_op = new blockstore_op_t;
|
||||||
|
sync_op->opcode = BS_OP_SYNC;
|
||||||
|
sync_op->buf = NULL;
|
||||||
|
sync_op->callback = [](blockstore_op_t *sync_op)
|
||||||
|
{
|
||||||
|
delete sync_op;
|
||||||
|
};
|
||||||
|
init_op(sync_op);
|
||||||
|
int sync_res = continue_sync(sync_op);
|
||||||
|
if (sync_res != 2)
|
||||||
|
{
|
||||||
|
// Put SYNC into the queue if it's not finished yet
|
||||||
|
submit_queue.push_back(sync_op);
|
||||||
|
}
|
||||||
|
// Restore unsynced_writes
|
||||||
|
unsynced_small_writes.swap(PRIV(op)->sync_small_writes);
|
||||||
|
unsynced_big_write_count -= unsynced_big_writes.size();
|
||||||
|
unsynced_big_writes.swap(PRIV(op)->sync_big_writes);
|
||||||
|
unsynced_big_write_count += unsynced_big_writes.size();
|
||||||
|
if (sync_res == 2)
|
||||||
|
{
|
||||||
|
// Sync is immediately completed
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return sync_op;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns: 2 = stop processing and dequeue, 0 = stop processing and do not dequeue, 1 = proceed with op itself
|
||||||
|
int blockstore_impl_t::split_stab_op(blockstore_op_t *op, std::function<int(obj_ver_id v)> decider)
|
||||||
|
{
|
||||||
|
bool add_sync = false;
|
||||||
|
ver_vector_t good_vers, bad_vers;
|
||||||
obj_ver_id* v;
|
obj_ver_id* v;
|
||||||
int i, todo = 0;
|
int i, todo = 0;
|
||||||
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
||||||
{
|
{
|
||||||
auto dirty_it = dirty_db.find(*v);
|
int action = decider(*v);
|
||||||
if (dirty_it == dirty_db.end())
|
if (action < 0)
|
||||||
{
|
{
|
||||||
auto & clean_db = clean_db_shard(v->oid);
|
// Rollback changes
|
||||||
auto clean_it = clean_db.find(v->oid);
|
for (auto & ov: PRIV(op)->sync_big_writes)
|
||||||
if (clean_it == clean_db.end() || clean_it->second.version < v->version)
|
|
||||||
{
|
{
|
||||||
// No such object version
|
unsynced_big_writes.push_back(ov);
|
||||||
op->retval = -ENOENT;
|
unsynced_big_write_count++;
|
||||||
FINISH_OP(op);
|
|
||||||
return 2;
|
|
||||||
}
|
}
|
||||||
else
|
for (auto & ov: PRIV(op)->sync_small_writes)
|
||||||
{
|
{
|
||||||
// Already stable
|
unsynced_small_writes.push_back(ov);
|
||||||
}
|
}
|
||||||
}
|
free(good_vers.items);
|
||||||
else if (IS_IN_FLIGHT(dirty_it->second.state))
|
good_vers.items = NULL;
|
||||||
{
|
free(bad_vers.items);
|
||||||
// Object write is still in progress. Wait until the write request completes
|
bad_vers.items = NULL;
|
||||||
return 0;
|
// Error
|
||||||
}
|
op->retval = action;
|
||||||
else if (!IS_SYNCED(dirty_it->second.state))
|
|
||||||
{
|
|
||||||
// Object not synced yet. Caller must sync it first
|
|
||||||
op->retval = -EBUSY;
|
|
||||||
FINISH_OP(op);
|
FINISH_OP(op);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
else if (!IS_STABLE(dirty_it->second.state))
|
else if (action == STAB_SPLIT_DONE)
|
||||||
{
|
{
|
||||||
|
// Already done
|
||||||
|
init_versions(good_vers, (obj_ver_id*)op->buf, v, op->len);
|
||||||
|
}
|
||||||
|
else if (action == STAB_SPLIT_WAIT)
|
||||||
|
{
|
||||||
|
// Already in progress, we just have to wait until it finishes
|
||||||
|
init_versions(good_vers, (obj_ver_id*)op->buf, v, op->len);
|
||||||
|
append_version(bad_vers, *v);
|
||||||
|
}
|
||||||
|
else if (action == STAB_SPLIT_SYNC)
|
||||||
|
{
|
||||||
|
// Needs a SYNC, we have to send a SYNC if not already in progress
|
||||||
|
//
|
||||||
|
// If the object is not present in unsynced_(big|small)_writes then
|
||||||
|
// it's currently being synced. If it's present then we can initiate
|
||||||
|
// its sync ourselves.
|
||||||
|
init_versions(good_vers, (obj_ver_id*)op->buf, v, op->len);
|
||||||
|
append_version(bad_vers, *v);
|
||||||
|
if (!add_sync)
|
||||||
|
{
|
||||||
|
PRIV(op)->sync_big_writes.clear();
|
||||||
|
PRIV(op)->sync_small_writes.clear();
|
||||||
|
add_sync = true;
|
||||||
|
}
|
||||||
|
check_unsynced(unsynced_small_writes, *v, PRIV(op)->sync_small_writes, NULL);
|
||||||
|
check_unsynced(unsynced_big_writes, *v, PRIV(op)->sync_big_writes, &unsynced_big_write_count);
|
||||||
|
}
|
||||||
|
else /* if (action == STAB_SPLIT_TODO) */
|
||||||
|
{
|
||||||
|
if (good_vers.items)
|
||||||
|
{
|
||||||
|
// If we're selecting versions then append it
|
||||||
|
// Main idea is that 99% of the time all versions passed to BS_OP_STABLE are synced
|
||||||
|
// And we don't want to select/allocate anything in that optimistic case
|
||||||
|
append_version(good_vers, *v);
|
||||||
|
}
|
||||||
todo++;
|
todo++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!todo)
|
// In a pessimistic scenario, an operation may be split into 3:
|
||||||
|
// - Stabilize synced entries
|
||||||
|
// - Sync unsynced entries
|
||||||
|
// - Continue for unsynced entries after sync
|
||||||
|
add_sync = add_sync && (PRIV(op)->sync_big_writes.size() || PRIV(op)->sync_small_writes.size());
|
||||||
|
if (!todo && !bad_vers.size)
|
||||||
{
|
{
|
||||||
// Already stable
|
// Already stable
|
||||||
op->retval = 0;
|
op->retval = 0;
|
||||||
FINISH_OP(op);
|
FINISH_OP(op);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
op->retval = 0;
|
||||||
|
if (!todo && !add_sync)
|
||||||
|
{
|
||||||
|
// Only wait for inflight writes or current in-progress syncs
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
blockstore_op_t *sync_op = NULL, *split_stab_op = NULL;
|
||||||
|
if (add_sync)
|
||||||
|
{
|
||||||
|
// Initiate a selective sync for PRIV(op)->sync_(big|small)_writes
|
||||||
|
sync_op = selective_sync(op);
|
||||||
|
}
|
||||||
|
if (bad_vers.size)
|
||||||
|
{
|
||||||
|
// Split part of the request into a separate operation
|
||||||
|
split_stab_op = new blockstore_op_t;
|
||||||
|
split_stab_op->opcode = op->opcode;
|
||||||
|
split_stab_op->buf = bad_vers.items;
|
||||||
|
split_stab_op->len = bad_vers.size;
|
||||||
|
init_op(split_stab_op);
|
||||||
|
submit_queue.push_back(split_stab_op);
|
||||||
|
}
|
||||||
|
if (sync_op || split_stab_op || good_vers.items)
|
||||||
|
{
|
||||||
|
void *orig_buf = op->buf;
|
||||||
|
if (good_vers.items)
|
||||||
|
{
|
||||||
|
op->buf = good_vers.items;
|
||||||
|
op->len = good_vers.size;
|
||||||
|
}
|
||||||
|
// Make a wrapped callback
|
||||||
|
int *split_op_counter = (int*)malloc_or_die(sizeof(int));
|
||||||
|
*split_op_counter = (sync_op ? 1 : 0) + (split_stab_op ? 1 : 0) + (todo ? 1 : 0);
|
||||||
|
auto cb = [op, good_items = good_vers.items,
|
||||||
|
bad_items = bad_vers.items, split_op_counter,
|
||||||
|
orig_buf, real_cb = op->callback](blockstore_op_t *split_op)
|
||||||
|
{
|
||||||
|
if (split_op->retval != 0)
|
||||||
|
op->retval = split_op->retval;
|
||||||
|
(*split_op_counter)--;
|
||||||
|
assert((*split_op_counter) >= 0);
|
||||||
|
if (op != split_op)
|
||||||
|
delete split_op;
|
||||||
|
if (!*split_op_counter)
|
||||||
|
{
|
||||||
|
free(good_items);
|
||||||
|
free(bad_items);
|
||||||
|
free(split_op_counter);
|
||||||
|
op->buf = orig_buf;
|
||||||
|
real_cb(op);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if (sync_op)
|
||||||
|
{
|
||||||
|
sync_op->callback = cb;
|
||||||
|
}
|
||||||
|
if (split_stab_op)
|
||||||
|
{
|
||||||
|
split_stab_op->callback = cb;
|
||||||
|
}
|
||||||
|
op->callback = cb;
|
||||||
|
}
|
||||||
|
if (!todo)
|
||||||
|
{
|
||||||
|
// All work is postponed
|
||||||
|
op->callback = NULL;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
|
||||||
|
{
|
||||||
|
if (PRIV(op)->op_state)
|
||||||
|
{
|
||||||
|
return continue_stable(op);
|
||||||
|
}
|
||||||
|
int r = split_stab_op(op, [this](obj_ver_id ov)
|
||||||
|
{
|
||||||
|
auto dirty_it = dirty_db.find(ov);
|
||||||
|
if (dirty_it == dirty_db.end())
|
||||||
|
{
|
||||||
|
auto & clean_db = clean_db_shard(ov.oid);
|
||||||
|
auto clean_it = clean_db.find(ov.oid);
|
||||||
|
if (clean_it == clean_db.end() || clean_it->second.version < ov.version)
|
||||||
|
{
|
||||||
|
// No such object version
|
||||||
|
printf("Error: %lx:%lx v%lu not found while stabilizing\n", ov.oid.inode, ov.oid.stripe, ov.version);
|
||||||
|
return -ENOENT;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Already stable
|
||||||
|
return STAB_SPLIT_DONE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (IS_IN_FLIGHT(dirty_it->second.state))
|
||||||
|
{
|
||||||
|
// Object write is still in progress. Wait until the write request completes
|
||||||
|
return STAB_SPLIT_WAIT;
|
||||||
|
}
|
||||||
|
else if (!IS_SYNCED(dirty_it->second.state))
|
||||||
|
{
|
||||||
|
// Object not synced yet - sync it
|
||||||
|
// In previous versions we returned EBUSY here and required
|
||||||
|
// the caller (OSD) to issue a global sync first. But a global sync
|
||||||
|
// waits for all writes in the queue including inflight writes. And
|
||||||
|
// inflight writes may themselves be blocked by unstable writes being
|
||||||
|
// still present in the journal and not flushed away from it.
|
||||||
|
// So we must sync specific objects here.
|
||||||
|
//
|
||||||
|
// Even more, we have to process "stabilize" request in parts. That is,
|
||||||
|
// we must stabilize all objects which are already synced. Otherwise
|
||||||
|
// they may block objects which are NOT synced yet.
|
||||||
|
return STAB_SPLIT_SYNC;
|
||||||
|
}
|
||||||
|
else if (IS_STABLE(dirty_it->second.state))
|
||||||
|
{
|
||||||
|
// Already stable
|
||||||
|
return STAB_SPLIT_DONE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return STAB_SPLIT_TODO;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (r != 1)
|
||||||
|
{
|
||||||
|
return r;
|
||||||
|
}
|
||||||
// Check journal space
|
// Check journal space
|
||||||
blockstore_journal_check_t space_check(this);
|
blockstore_journal_check_t space_check(this);
|
||||||
if (!space_check.check_available(op, todo, sizeof(journal_entry_stable), 0))
|
if (!space_check.check_available(op, op->len, sizeof(journal_entry_stable), 0))
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -102,9 +351,9 @@ int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
|
|||||||
BS_SUBMIT_CHECK_SQES(space_check.sectors_to_write);
|
BS_SUBMIT_CHECK_SQES(space_check.sectors_to_write);
|
||||||
// Prepare and submit journal entries
|
// Prepare and submit journal entries
|
||||||
int s = 0;
|
int s = 0;
|
||||||
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
auto v = (obj_ver_id*)op->buf;
|
||||||
|
for (int i = 0; i < op->len; i++, v++)
|
||||||
{
|
{
|
||||||
// FIXME: Only stabilize versions that aren't stable yet
|
|
||||||
if (!journal.entry_fits(sizeof(journal_entry_stable)) &&
|
if (!journal.entry_fits(sizeof(journal_entry_stable)) &&
|
||||||
journal.sector_info[journal.cur_sector].dirty)
|
journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
@@ -197,6 +446,7 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty)
|
|||||||
{
|
{
|
||||||
inode_space_stats[dirty_it->first.oid.inode] += dsk.data_block_size;
|
inode_space_stats[dirty_it->first.oid.inode] += dsk.data_block_size;
|
||||||
}
|
}
|
||||||
|
big_to_flush++;
|
||||||
}
|
}
|
||||||
else if (IS_DELETE(dirty_it->second.state))
|
else if (IS_DELETE(dirty_it->second.state))
|
||||||
{
|
{
|
||||||
@@ -205,6 +455,7 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty)
|
|||||||
sp -= dsk.data_block_size;
|
sp -= dsk.data_block_size;
|
||||||
else
|
else
|
||||||
inode_space_stats.erase(dirty_it->first.oid.inode);
|
inode_space_stats.erase(dirty_it->first.oid.inode);
|
||||||
|
big_to_flush++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (forget_dirty && (IS_BIG_WRITE(dirty_it->second.state) ||
|
if (forget_dirty && (IS_BIG_WRITE(dirty_it->second.state) ||
|
||||||
|
@@ -12,7 +12,7 @@
|
|||||||
#define SYNC_JOURNAL_SYNC_SENT 7
|
#define SYNC_JOURNAL_SYNC_SENT 7
|
||||||
#define SYNC_DONE 8
|
#define SYNC_DONE 8
|
||||||
|
|
||||||
int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_progress_sync)
|
int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
if (immediate_commit == IMMEDIATE_ALL)
|
if (immediate_commit == IMMEDIATE_ALL)
|
||||||
{
|
{
|
||||||
@@ -145,7 +145,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op, bool queue_has_in_prog
|
|||||||
PRIV(op)->op_state = SYNC_DONE;
|
PRIV(op)->op_state = SYNC_DONE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (PRIV(op)->op_state == SYNC_DONE && !queue_has_in_progress_sync)
|
if (PRIV(op)->op_state == SYNC_DONE)
|
||||||
{
|
{
|
||||||
ack_sync(op);
|
ack_sync(op);
|
||||||
return 2;
|
return 2;
|
||||||
|
@@ -6,7 +6,7 @@
|
|||||||
bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
// Check or assign version number
|
// Check or assign version number
|
||||||
bool found = false, deleted = false, is_del = (op->opcode == BS_OP_DELETE);
|
bool found = false, deleted = false, unsynced = false, is_del = (op->opcode == BS_OP_DELETE);
|
||||||
bool wait_big = false, wait_del = false;
|
bool wait_big = false, wait_del = false;
|
||||||
void *bmp = NULL;
|
void *bmp = NULL;
|
||||||
uint64_t version = 1;
|
uint64_t version = 1;
|
||||||
@@ -26,6 +26,7 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
|||||||
found = true;
|
found = true;
|
||||||
version = dirty_it->first.version + 1;
|
version = dirty_it->first.version + 1;
|
||||||
deleted = IS_DELETE(dirty_it->second.state);
|
deleted = IS_DELETE(dirty_it->second.state);
|
||||||
|
unsynced = !IS_SYNCED(dirty_it->second.state);
|
||||||
wait_del = ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_DEL);
|
wait_del = ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_WAIT_DEL);
|
||||||
wait_big = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE
|
wait_big = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE
|
||||||
? !IS_SYNCED(dirty_it->second.state)
|
? !IS_SYNCED(dirty_it->second.state)
|
||||||
@@ -81,10 +82,28 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
|||||||
wait_del = true;
|
wait_del = true;
|
||||||
PRIV(op)->real_version = op->version;
|
PRIV(op)->real_version = op->version;
|
||||||
op->version = version;
|
op->version = version;
|
||||||
flusher->unshift_flush((obj_ver_id){
|
if (unsynced)
|
||||||
.oid = op->oid,
|
{
|
||||||
.version = version-1,
|
// Issue an additional sync so the delete reaches the journal
|
||||||
}, true);
|
blockstore_op_t *sync_op = new blockstore_op_t;
|
||||||
|
sync_op->opcode = BS_OP_SYNC;
|
||||||
|
sync_op->callback = [this, op](blockstore_op_t *sync_op)
|
||||||
|
{
|
||||||
|
flusher->unshift_flush((obj_ver_id){
|
||||||
|
.oid = op->oid,
|
||||||
|
.version = op->version-1,
|
||||||
|
}, true);
|
||||||
|
delete sync_op;
|
||||||
|
};
|
||||||
|
enqueue_op(sync_op);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
flusher->unshift_flush((obj_ver_id){
|
||||||
|
.oid = op->oid,
|
||||||
|
.version = version-1,
|
||||||
|
}, true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -271,6 +290,13 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
|||||||
if (loc == UINT64_MAX)
|
if (loc == UINT64_MAX)
|
||||||
{
|
{
|
||||||
// no space
|
// no space
|
||||||
|
if (big_to_flush > 0)
|
||||||
|
{
|
||||||
|
// hope that some space will be available after flush
|
||||||
|
flusher->request_trim();
|
||||||
|
PRIV(op)->wait_for = WAIT_FREE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
cancel_all_writes(op, dirty_it, -ENOSPC);
|
cancel_all_writes(op, dirty_it, -ENOSPC);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
42
src/cli.cpp
42
src/cli.cpp
@@ -73,6 +73,37 @@ static const char* help_text =
|
|||||||
" <to> must be a child of <from> and <target> may be one of the layers between\n"
|
" <to> must be a child of <from> and <target> may be one of the layers between\n"
|
||||||
" <from> and <to>, including <from> and <to>.\n"
|
" <from> and <to>, including <from> and <to>.\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
"vitastor-cli describe [--osds <osds>] [--object-state <states>] [--pool <pool>] [--inode <ino>] [--min-inode <ino>] [--max-inode <ino>] [--min-offset <offset>] [--max-offset <offset>]\n"
|
||||||
|
" Describe unclean object locations in the cluster.\n"
|
||||||
|
" --osds <osds>\n"
|
||||||
|
" Only list objects from primary OSD(s) <osds>.\n"
|
||||||
|
" --object-state <states>\n"
|
||||||
|
" Only list objects in given state(s). State(s) may include:\n"
|
||||||
|
" degraded, misplaced, incomplete, corrupted, inconsistent.\n"
|
||||||
|
" --pool <pool name or number>\n"
|
||||||
|
" Only list objects in the given pool.\n"
|
||||||
|
" --inode, --min-inode, --max-inode\n"
|
||||||
|
" Restrict listing to specific inode numbers.\n"
|
||||||
|
" --min-offset, --max-offset\n"
|
||||||
|
" Restrict listing to specific offsets inside inodes.\n"
|
||||||
|
"\n"
|
||||||
|
"vitastor-cli fix [--objects <objects>] [--bad-osds <osds>] [--part <part>] [--check no]\n"
|
||||||
|
" Fix inconsistent objects in the cluster by deleting some copies.\n"
|
||||||
|
" --objects <objects>\n"
|
||||||
|
" Objects to fix, either in plain text or JSON format. If not specified,\n"
|
||||||
|
" object list will be read from STDIN in one of the same formats.\n"
|
||||||
|
" Plain text format: 0x<inode>:0x<stripe> <any delimiter> 0x<inode>:0x<stripe> ...\n"
|
||||||
|
" JSON format: [{\"inode\":\"0x...\",\"stripe\":\"0x...\"},...]\n"
|
||||||
|
" --bad-osds <osds>\n"
|
||||||
|
" Remove inconsistent copies/parts of objects from these OSDs, effectively\n"
|
||||||
|
" marking them bad and allowing Vitastor to recover objects from other copies.\n"
|
||||||
|
" --part <number>\n"
|
||||||
|
" Only remove EC part <number> (from 0 to pg_size-1), required for extreme\n"
|
||||||
|
" edge cases where one OSD has multiple parts of a EC object.\n"
|
||||||
|
" --check no\n"
|
||||||
|
" Do not recheck that requested objects are actually inconsistent,\n"
|
||||||
|
" delete requested copies/parts anyway.\n"
|
||||||
|
"\n"
|
||||||
"vitastor-cli alloc-osd\n"
|
"vitastor-cli alloc-osd\n"
|
||||||
" Allocate a new OSD number and reserve it by creating empty /osd/stats/<n> key.\n"
|
" Allocate a new OSD number and reserve it by creating empty /osd/stats/<n> key.\n"
|
||||||
"\n"
|
"\n"
|
||||||
@@ -168,6 +199,7 @@ static json11::Json::object parse_args(int narg, const char *args[])
|
|||||||
static int run(cli_tool_t *p, json11::Json::object cfg)
|
static int run(cli_tool_t *p, json11::Json::object cfg)
|
||||||
{
|
{
|
||||||
cli_result_t result = {};
|
cli_result_t result = {};
|
||||||
|
p->is_command_line = true;
|
||||||
p->parse_config(cfg);
|
p->parse_config(cfg);
|
||||||
json11::Json::array cmd = cfg["command"].array_items();
|
json11::Json::array cmd = cfg["command"].array_items();
|
||||||
cfg.erase("command");
|
cfg.erase("command");
|
||||||
@@ -276,6 +308,16 @@ static int run(cli_tool_t *p, json11::Json::object cfg)
|
|||||||
}
|
}
|
||||||
action_cb = p->start_rm(cfg);
|
action_cb = p->start_rm(cfg);
|
||||||
}
|
}
|
||||||
|
else if (cmd[0] == "describe")
|
||||||
|
{
|
||||||
|
// Describe unclean objects
|
||||||
|
action_cb = p->start_describe(cfg);
|
||||||
|
}
|
||||||
|
else if (cmd[0] == "fix")
|
||||||
|
{
|
||||||
|
// Fix inconsistent objects (by deleting some copies)
|
||||||
|
action_cb = p->start_fix(cfg);
|
||||||
|
}
|
||||||
else if (cmd[0] == "alloc-osd")
|
else if (cmd[0] == "alloc-osd")
|
||||||
{
|
{
|
||||||
// Allocate a new OSD number
|
// Allocate a new OSD number
|
||||||
|
@@ -34,12 +34,12 @@ public:
|
|||||||
bool list_first = false;
|
bool list_first = false;
|
||||||
bool json_output = false;
|
bool json_output = false;
|
||||||
int log_level = 0;
|
int log_level = 0;
|
||||||
|
bool is_command_line = false;
|
||||||
bool color = false;
|
bool color = false;
|
||||||
|
|
||||||
ring_loop_t *ringloop = NULL;
|
ring_loop_t *ringloop = NULL;
|
||||||
epoll_manager_t *epmgr = NULL;
|
epoll_manager_t *epmgr = NULL;
|
||||||
cluster_client_t *cli = NULL;
|
cluster_client_t *cli = NULL;
|
||||||
bool no_recovery = false, no_rebalance = false, readonly = false;
|
|
||||||
|
|
||||||
int waiting = 0;
|
int waiting = 0;
|
||||||
cli_result_t etcd_err;
|
cli_result_t etcd_err;
|
||||||
@@ -56,6 +56,8 @@ public:
|
|||||||
friend struct snap_remover_t;
|
friend struct snap_remover_t;
|
||||||
|
|
||||||
std::function<bool(cli_result_t &)> start_status(json11::Json);
|
std::function<bool(cli_result_t &)> start_status(json11::Json);
|
||||||
|
std::function<bool(cli_result_t &)> start_describe(json11::Json);
|
||||||
|
std::function<bool(cli_result_t &)> start_fix(json11::Json);
|
||||||
std::function<bool(cli_result_t &)> start_df(json11::Json);
|
std::function<bool(cli_result_t &)> start_df(json11::Json);
|
||||||
std::function<bool(cli_result_t &)> start_ls(json11::Json);
|
std::function<bool(cli_result_t &)> start_ls(json11::Json);
|
||||||
std::function<bool(cli_result_t &)> start_create(json11::Json);
|
std::function<bool(cli_result_t &)> start_create(json11::Json);
|
||||||
|
256
src/cli_describe.cpp
Normal file
256
src/cli_describe.cpp
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
#include "cli_fix.h"
|
||||||
|
#include "cluster_client.h"
|
||||||
|
#include "pg_states.h"
|
||||||
|
#include "str_util.h"
|
||||||
|
|
||||||
|
std::vector<uint64_t> parse_uint64_list(json11::Json val)
|
||||||
|
{
|
||||||
|
std::vector<uint64_t> ret;
|
||||||
|
if (val.is_number())
|
||||||
|
ret.push_back(val.uint64_value());
|
||||||
|
else if (val.is_string())
|
||||||
|
{
|
||||||
|
const std::string & s = val.string_value();
|
||||||
|
for (int i = 0, p = -1; i <= s.size(); i++)
|
||||||
|
{
|
||||||
|
if (p < 0 && i < s.size() && (isdigit(s[i]) || s[i] == 'x'))
|
||||||
|
p = i;
|
||||||
|
else if (p >= 0 && (i >= s.size() || !isdigit(s[i]) && s[i] != 'x'))
|
||||||
|
{
|
||||||
|
ret.push_back(stoull_full(s.substr(p, i-p), 0));
|
||||||
|
p = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (val.is_array())
|
||||||
|
{
|
||||||
|
for (auto & pg_num: val.array_items())
|
||||||
|
ret.push_back(pg_num.uint64_value());
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct cli_describe_t
|
||||||
|
{
|
||||||
|
uint64_t object_state = 0;
|
||||||
|
pool_id_t only_pool = 0;
|
||||||
|
std::vector<uint64_t> only_osds;
|
||||||
|
uint64_t min_inode = 0, max_inode = 0;
|
||||||
|
uint64_t min_offset = 0, max_offset = 0;
|
||||||
|
|
||||||
|
cli_tool_t *parent = NULL;
|
||||||
|
int state = 0;
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
json11::Json options;
|
||||||
|
cli_result_t result;
|
||||||
|
json11::Json::array describe_items;
|
||||||
|
|
||||||
|
bool is_done()
|
||||||
|
{
|
||||||
|
return state == 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_options(json11::Json cfg)
|
||||||
|
{
|
||||||
|
only_pool = cfg["pool"].uint64_value();
|
||||||
|
if (!only_pool && cfg["pool"].is_string())
|
||||||
|
{
|
||||||
|
for (auto & pp: parent->cli->st_cli.pool_config)
|
||||||
|
{
|
||||||
|
if (pp.second.name == cfg["pool"].string_value())
|
||||||
|
{
|
||||||
|
only_pool = pp.first;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
min_inode = cfg["inode"].uint64_value();
|
||||||
|
if (min_inode)
|
||||||
|
{
|
||||||
|
if (!INODE_POOL(min_inode))
|
||||||
|
min_inode |= (uint64_t)only_pool << (64-POOL_ID_BITS);
|
||||||
|
max_inode = min_inode;
|
||||||
|
min_offset = max_offset = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
min_inode = stoull_full(cfg["min_inode"].string_value(), 0); // to support 0x...
|
||||||
|
max_inode = stoull_full(cfg["max_inode"].string_value(), 0);
|
||||||
|
min_offset = stoull_full(cfg["min_offset"].string_value(), 0);
|
||||||
|
max_offset = stoull_full(cfg["max_offset"].string_value(), 0);
|
||||||
|
if (!min_inode && !max_inode && only_pool)
|
||||||
|
{
|
||||||
|
min_inode = (uint64_t)only_pool << (64-POOL_ID_BITS);
|
||||||
|
max_inode = ((uint64_t)only_pool << (64-POOL_ID_BITS)) |
|
||||||
|
(((uint64_t)1 << (64-POOL_ID_BITS)) - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
only_osds = parse_uint64_list(cfg["osds"]);
|
||||||
|
object_state = stoull_full(cfg["object_state"].string_value(), 0);
|
||||||
|
if (!object_state && cfg["object_state"].is_string())
|
||||||
|
{
|
||||||
|
if (cfg["object_state"].string_value().find("inconsistent") != std::string::npos)
|
||||||
|
object_state |= OBJ_INCONSISTENT;
|
||||||
|
if (cfg["object_state"].string_value().find("corrupted") != std::string::npos)
|
||||||
|
object_state |= OBJ_CORRUPTED;
|
||||||
|
if (cfg["object_state"].string_value().find("incomplete") != std::string::npos)
|
||||||
|
object_state |= OBJ_INCOMPLETE;
|
||||||
|
if (cfg["object_state"].string_value().find("degraded") != std::string::npos)
|
||||||
|
object_state |= OBJ_DEGRADED;
|
||||||
|
if (cfg["object_state"].string_value().find("misplaced") != std::string::npos)
|
||||||
|
object_state |= OBJ_MISPLACED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void loop()
|
||||||
|
{
|
||||||
|
if (state == 1)
|
||||||
|
goto resume_1;
|
||||||
|
if (state == 100)
|
||||||
|
return;
|
||||||
|
parse_options(options);
|
||||||
|
if (min_inode && !INODE_POOL(min_inode))
|
||||||
|
{
|
||||||
|
result = (cli_result_t){ .err = EINVAL, .text = "Pool is not specified" };
|
||||||
|
state = 100;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!only_osds.size())
|
||||||
|
{
|
||||||
|
uint64_t min_pool = min_inode >> (64-POOL_ID_BITS);
|
||||||
|
uint64_t max_pool = max_inode >> (64-POOL_ID_BITS);
|
||||||
|
for (auto & pp: parent->cli->st_cli.pool_config)
|
||||||
|
{
|
||||||
|
if (pp.first >= min_pool && (!max_pool || pp.first <= max_pool))
|
||||||
|
{
|
||||||
|
for (auto & pgp: pp.second.pg_config)
|
||||||
|
only_osds.push_back(pgp.second.cur_primary);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
remove_duplicates(only_osds);
|
||||||
|
parent->cli->init_msgr();
|
||||||
|
if (parent->json_output && parent->is_command_line)
|
||||||
|
{
|
||||||
|
printf("[\n");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < only_osds.size(); i++)
|
||||||
|
{
|
||||||
|
osd_op_t *op = new osd_op_t;
|
||||||
|
op->req = (osd_any_op_t){
|
||||||
|
.describe = {
|
||||||
|
.header = {
|
||||||
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
|
.id = parent->cli->next_op_id(),
|
||||||
|
.opcode = OSD_OP_DESCRIBE,
|
||||||
|
},
|
||||||
|
.object_state = object_state,
|
||||||
|
.min_inode = min_inode,
|
||||||
|
.min_offset = min_offset,
|
||||||
|
.max_inode = max_inode,
|
||||||
|
.max_offset = max_offset,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
op->callback = [this, osd_num = only_osds[i]](osd_op_t *op)
|
||||||
|
{
|
||||||
|
if (op->reply.hdr.retval < 0)
|
||||||
|
{
|
||||||
|
fprintf(
|
||||||
|
stderr, "Failed to describe objects on OSD %lu (retval=%ld)\n",
|
||||||
|
osd_num, op->reply.hdr.retval
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else if (op->reply.describe.result_bytes != op->reply.hdr.retval * sizeof(osd_reply_describe_item_t))
|
||||||
|
{
|
||||||
|
fprintf(
|
||||||
|
stderr, "Invalid response size from OSD %lu (expected %lu bytes, got %lu bytes)\n",
|
||||||
|
osd_num, op->reply.hdr.retval * sizeof(osd_reply_describe_item_t), op->reply.describe.result_bytes
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
osd_reply_describe_item_t *items = (osd_reply_describe_item_t *)op->buf;
|
||||||
|
for (int i = 0; i < op->reply.hdr.retval; i++)
|
||||||
|
{
|
||||||
|
if (!parent->json_output || parent->is_command_line)
|
||||||
|
{
|
||||||
|
#define FMT "{\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"part\":%u,\"osd_num\":%lu%s%s%s}"
|
||||||
|
printf(
|
||||||
|
(parent->json_output
|
||||||
|
? (count > 0 ? ",\n " FMT : " " FMT)
|
||||||
|
: "%lx:%lx part %u on OSD %lu%s%s%s\n"),
|
||||||
|
#undef FMT
|
||||||
|
items[i].inode, items[i].stripe,
|
||||||
|
items[i].role, items[i].osd_num,
|
||||||
|
(items[i].loc_bad & LOC_CORRUPTED ? (parent->json_output ? ",\"corrupted\":true" : " corrupted") : ""),
|
||||||
|
(items[i].loc_bad & LOC_INCONSISTENT ? (parent->json_output ? ",\"inconsistent\":true" : " inconsistent") : ""),
|
||||||
|
(items[i].loc_bad & LOC_OUTDATED ? (parent->json_output ? ",\"outdated\":true" : " outdated") : "")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto json_item = json11::Json::object {
|
||||||
|
{ "inode", (uint64_t)items[i].inode },
|
||||||
|
{ "stripe", (uint64_t)items[i].stripe },
|
||||||
|
{ "part", (uint64_t)items[i].role },
|
||||||
|
{ "osd_num", (uint64_t)items[i].osd_num },
|
||||||
|
};
|
||||||
|
if (items[i].loc_bad & LOC_CORRUPTED)
|
||||||
|
json_item["corrupted"] = true;
|
||||||
|
if (items[i].loc_bad & LOC_INCONSISTENT)
|
||||||
|
json_item["inconsistent"] = true;
|
||||||
|
if (items[i].loc_bad & LOC_OUTDATED)
|
||||||
|
json_item["outdated"] = true;
|
||||||
|
describe_items.push_back(json_item);
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete op;
|
||||||
|
parent->waiting--;
|
||||||
|
if (!parent->waiting)
|
||||||
|
loop();
|
||||||
|
};
|
||||||
|
parent->waiting++;
|
||||||
|
parent->cli->execute_raw(only_osds[i], op);
|
||||||
|
}
|
||||||
|
resume_1:
|
||||||
|
state = 1;
|
||||||
|
if (parent->waiting > 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (parent->json_output && parent->is_command_line)
|
||||||
|
{
|
||||||
|
printf(count > 0 ? "\n]\n" : "]\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result.data = describe_items;
|
||||||
|
}
|
||||||
|
state = 100;
|
||||||
|
describe_items.clear();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::function<bool(cli_result_t &)> cli_tool_t::start_describe(json11::Json cfg)
|
||||||
|
{
|
||||||
|
auto describer = new cli_describe_t();
|
||||||
|
describer->parent = this;
|
||||||
|
describer->options = cfg;
|
||||||
|
return [describer](cli_result_t & result)
|
||||||
|
{
|
||||||
|
describer->loop();
|
||||||
|
if (describer->is_done())
|
||||||
|
{
|
||||||
|
result = describer->result;
|
||||||
|
delete describer;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
}
|
@@ -121,8 +121,7 @@ resume_1:
|
|||||||
}
|
}
|
||||||
if (pool_cfg.scheme != POOL_SCHEME_REPLICATED)
|
if (pool_cfg.scheme != POOL_SCHEME_REPLICATED)
|
||||||
{
|
{
|
||||||
uint64_t pg_real_size = pool_stats[pool_cfg.id]["pg_real_size"].uint64_value();
|
pool_avail *= (pool_cfg.pg_size - pool_cfg.parity_chunks);
|
||||||
pool_avail = pg_real_size > 0 ? pool_avail * (pool_cfg.pg_size - pool_cfg.parity_chunks) / pg_real_size : 0;
|
|
||||||
}
|
}
|
||||||
pool_stats[pool_cfg.id] = json11::Json::object {
|
pool_stats[pool_cfg.id] = json11::Json::object {
|
||||||
{ "name", pool_cfg.name },
|
{ "name", pool_cfg.name },
|
||||||
|
313
src/cli_fix.cpp
Normal file
313
src/cli_fix.cpp
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
#include "cli_fix.h"
|
||||||
|
#include "cluster_client.h"
|
||||||
|
#include "pg_states.h"
|
||||||
|
#include "str_util.h"
|
||||||
|
|
||||||
|
struct cli_fix_t
|
||||||
|
{
|
||||||
|
std::vector<object_id> objects;
|
||||||
|
int part = -1;
|
||||||
|
int processed_count = 0;
|
||||||
|
std::set<osd_num_t> bad_osds;
|
||||||
|
bool no_check = false;
|
||||||
|
|
||||||
|
cli_tool_t *parent = NULL;
|
||||||
|
int state = 0;
|
||||||
|
|
||||||
|
json11::Json options;
|
||||||
|
cli_result_t result;
|
||||||
|
json11::Json::array fix_result;
|
||||||
|
|
||||||
|
bool is_done()
|
||||||
|
{
|
||||||
|
return state == 100;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_objects_str(std::string str)
|
||||||
|
{
|
||||||
|
str = trim(str);
|
||||||
|
if (str[0] == '[')
|
||||||
|
{
|
||||||
|
std::string json_err;
|
||||||
|
json11::Json list = json11::Json::parse(str, json_err);
|
||||||
|
if (json_err != "")
|
||||||
|
fprintf(stderr, "Invalid JSON object list input: %s\n", json_err.c_str());
|
||||||
|
else
|
||||||
|
parse_object_list(list);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const char *s = str.c_str();
|
||||||
|
char *e = NULL;
|
||||||
|
int len = str.size();
|
||||||
|
object_id oid;
|
||||||
|
for (int p = 0; p < len; p++)
|
||||||
|
{
|
||||||
|
if (isdigit(s[p]))
|
||||||
|
{
|
||||||
|
int p0 = p;
|
||||||
|
oid.inode = strtoull(s+p, &e, 0);
|
||||||
|
p = e-s;
|
||||||
|
while (p < len && !isdigit(s[p]) && s[p] != ':')
|
||||||
|
p++;
|
||||||
|
if (s[p] != ':')
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Invalid object ID in input: %s\n", std::string(s+p0, p-p0).c_str());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
while (p < len && !isdigit(s[p]))
|
||||||
|
p++;
|
||||||
|
oid.stripe = strtoull(s+p, &e, 0) & ~STRIPE_MASK;
|
||||||
|
p = e-s;
|
||||||
|
if (oid.inode)
|
||||||
|
objects.push_back(oid);
|
||||||
|
else
|
||||||
|
fprintf(stderr, "Invalid object ID in input: %s\n", std::string(s+p0, p-p0).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_object_list(json11::Json list)
|
||||||
|
{
|
||||||
|
for (auto & obj: list.array_items())
|
||||||
|
{
|
||||||
|
object_id oid = (object_id){
|
||||||
|
.inode = stoull_full(obj["inode"].string_value(), 0),
|
||||||
|
.stripe = stoull_full(obj["stripe"].string_value(), 0) & ~STRIPE_MASK,
|
||||||
|
};
|
||||||
|
if (oid.inode)
|
||||||
|
objects.push_back(oid);
|
||||||
|
else
|
||||||
|
fprintf(stderr, "Invalid JSON object ID in input: %s, bad or missing \"inode\" field\n", obj.dump().c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void parse_options(json11::Json cfg)
|
||||||
|
{
|
||||||
|
json11::Json object_list;
|
||||||
|
if (cfg["objects"].is_null())
|
||||||
|
parse_objects_str(read_all_fd(0));
|
||||||
|
else if (cfg["objects"].is_string())
|
||||||
|
parse_objects_str(cfg["objects"].string_value());
|
||||||
|
else
|
||||||
|
parse_object_list(cfg["objects"].array_items());
|
||||||
|
for (auto osd_num: parse_uint64_list(cfg["bad_osds"]))
|
||||||
|
bad_osds.insert(osd_num);
|
||||||
|
no_check = json_is_false(cfg["check"]);
|
||||||
|
if (cfg["part"].is_number() || cfg["part"].is_string())
|
||||||
|
part = cfg["part"].uint64_value();
|
||||||
|
}
|
||||||
|
|
||||||
|
void loop()
|
||||||
|
{
|
||||||
|
if (state == 1)
|
||||||
|
goto resume_1;
|
||||||
|
if (state == 100)
|
||||||
|
return;
|
||||||
|
parse_options(options);
|
||||||
|
if (!objects.size())
|
||||||
|
{
|
||||||
|
result = (cli_result_t){ .err = EINVAL, .text = "Object list is not specified" };
|
||||||
|
state = 100;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!bad_osds.size())
|
||||||
|
{
|
||||||
|
result = (cli_result_t){ .err = EINVAL, .text = "OSDs are not specified" };
|
||||||
|
state = 100;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
remove_duplicates(objects);
|
||||||
|
parent->cli->init_msgr();
|
||||||
|
resume_1:
|
||||||
|
state = 1;
|
||||||
|
while (processed_count < objects.size())
|
||||||
|
{
|
||||||
|
if (parent->waiting >= parent->iodepth*parent->parallel_osds)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto & obj = objects[processed_count++];
|
||||||
|
auto pool_cfg_it = parent->cli->st_cli.pool_config.find(INODE_POOL(obj.inode));
|
||||||
|
if (pool_cfg_it == parent->cli->st_cli.pool_config.end())
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Object %lx:%lx is from unknown pool\n", obj.inode, obj.stripe);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto & pool_cfg = pool_cfg_it->second;
|
||||||
|
pg_num_t pg_num = (obj.stripe/pool_cfg.pg_stripe_size) % pool_cfg.real_pg_count + 1; // like map_to_pg()
|
||||||
|
auto pg_it = pool_cfg.pg_config.find(pg_num);
|
||||||
|
if (pg_it == pool_cfg.pg_config.end() ||
|
||||||
|
!pg_it->second.cur_primary || !(pg_it->second.cur_state & PG_ACTIVE))
|
||||||
|
{
|
||||||
|
fprintf(
|
||||||
|
stderr, "Object %lx:%lx is from PG %u/%u which is not currently active\n",
|
||||||
|
obj.inode, obj.stripe, pool_cfg_it->first, pg_num
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
osd_num_t primary_osd = pg_it->second.cur_primary;
|
||||||
|
// Describe -> Remove some copies -> Scrub again
|
||||||
|
osd_op_t *op = new osd_op_t;
|
||||||
|
op->req = (osd_any_op_t){
|
||||||
|
.describe = {
|
||||||
|
.header = {
|
||||||
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
|
.id = parent->cli->next_op_id(),
|
||||||
|
.opcode = OSD_OP_DESCRIBE,
|
||||||
|
},
|
||||||
|
.min_inode = obj.inode,
|
||||||
|
.min_offset = obj.stripe,
|
||||||
|
.max_inode = obj.inode,
|
||||||
|
.max_offset = obj.stripe,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
op->callback = [this, primary_osd, &obj](osd_op_t *op)
|
||||||
|
{
|
||||||
|
if (op->reply.hdr.retval < 0 || op->reply.describe.result_bytes != op->reply.hdr.retval * sizeof(osd_reply_describe_item_t))
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Failed to describe objects on OSD %lu (retval=%ld)\n", primary_osd, op->reply.hdr.retval);
|
||||||
|
parent->waiting--;
|
||||||
|
loop();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
osd_reply_describe_item_t *items = (osd_reply_describe_item_t *)op->buf;
|
||||||
|
int *rm_count = (int*)malloc_or_die(sizeof(int));
|
||||||
|
*rm_count = 1; // just in case if anything gets called instantly
|
||||||
|
for (int i = 0; i < op->reply.hdr.retval; i++)
|
||||||
|
{
|
||||||
|
if (((items[i].loc_bad & LOC_INCONSISTENT) || no_check) &&
|
||||||
|
bad_osds.find(items[i].osd_num) != bad_osds.end() &&
|
||||||
|
(part == -1 || items[i].role == part))
|
||||||
|
{
|
||||||
|
// Remove
|
||||||
|
uint64_t rm_osd_num = items[i].osd_num;
|
||||||
|
osd_op_t *rm_op = new osd_op_t;
|
||||||
|
rm_op->req = (osd_any_op_t){
|
||||||
|
.sec_del = {
|
||||||
|
.header = {
|
||||||
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
|
.id = parent->cli->next_op_id(),
|
||||||
|
.opcode = OSD_OP_SEC_DELETE,
|
||||||
|
},
|
||||||
|
.oid = {
|
||||||
|
.inode = op->req.describe.min_inode,
|
||||||
|
.stripe = op->req.describe.min_offset | items[i].role,
|
||||||
|
},
|
||||||
|
.version = 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
rm_op->callback = [this, primary_osd, rm_osd_num, rm_count, &obj](osd_op_t *rm_op)
|
||||||
|
{
|
||||||
|
(*rm_count)--;
|
||||||
|
if (rm_op->reply.hdr.retval < 0)
|
||||||
|
{
|
||||||
|
fprintf(
|
||||||
|
stderr, "Failed to remove object %lx:%lx from OSD %lu (retval=%ld)\n",
|
||||||
|
rm_op->req.sec_del.oid.inode, rm_op->req.sec_del.oid.stripe,
|
||||||
|
rm_osd_num, rm_op->reply.hdr.retval
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else if (parent->json_output)
|
||||||
|
{
|
||||||
|
fix_result.push_back(json11::Json::object {
|
||||||
|
{ "inode", (uint64_t)rm_op->req.sec_del.oid.inode },
|
||||||
|
{ "stripe", (uint64_t)rm_op->req.sec_del.oid.stripe & ~STRIPE_MASK },
|
||||||
|
{ "part", (uint64_t)rm_op->req.sec_del.oid.stripe & STRIPE_MASK },
|
||||||
|
{ "osd_num", (uint64_t)rm_osd_num },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf(
|
||||||
|
"Removed %lx:%lx (part %lu) from OSD %lu\n",
|
||||||
|
rm_op->req.sec_del.oid.inode, rm_op->req.sec_del.oid.stripe & ~STRIPE_MASK,
|
||||||
|
rm_op->req.sec_del.oid.stripe & STRIPE_MASK, rm_osd_num
|
||||||
|
);
|
||||||
|
}
|
||||||
|
delete rm_op;
|
||||||
|
if (!(*rm_count))
|
||||||
|
{
|
||||||
|
// Scrub
|
||||||
|
free(rm_count);
|
||||||
|
osd_op_t *scrub_op = new osd_op_t;
|
||||||
|
scrub_op->req = (osd_any_op_t){
|
||||||
|
.rw = {
|
||||||
|
.header = {
|
||||||
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
|
.id = parent->cli->next_op_id(),
|
||||||
|
.opcode = OSD_OP_SCRUB,
|
||||||
|
},
|
||||||
|
.inode = obj.inode,
|
||||||
|
.offset = obj.stripe,
|
||||||
|
.len = 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
scrub_op->callback = [this, primary_osd, &obj](osd_op_t *scrub_op)
|
||||||
|
{
|
||||||
|
if (scrub_op->reply.hdr.retval < 0 && scrub_op->reply.hdr.retval != -ENOENT)
|
||||||
|
{
|
||||||
|
fprintf(
|
||||||
|
stderr, "Failed to scrub %lx:%lx on OSD %lu (retval=%ld)\n",
|
||||||
|
obj.inode, obj.stripe, primary_osd, scrub_op->reply.hdr.retval
|
||||||
|
);
|
||||||
|
}
|
||||||
|
delete scrub_op;
|
||||||
|
parent->waiting--;
|
||||||
|
loop();
|
||||||
|
};
|
||||||
|
parent->cli->execute_raw(primary_osd, scrub_op);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
(*rm_count)++;
|
||||||
|
parent->cli->execute_raw(rm_osd_num, rm_op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(*rm_count)--;
|
||||||
|
if (!*rm_count)
|
||||||
|
{
|
||||||
|
free(rm_count);
|
||||||
|
parent->waiting--;
|
||||||
|
loop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete op;
|
||||||
|
};
|
||||||
|
parent->waiting++;
|
||||||
|
parent->cli->execute_raw(primary_osd, op);
|
||||||
|
}
|
||||||
|
if (parent->waiting > 0)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (parent->json_output)
|
||||||
|
{
|
||||||
|
result.data = fix_result;
|
||||||
|
}
|
||||||
|
state = 100;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::function<bool(cli_result_t &)> cli_tool_t::start_fix(json11::Json cfg)
|
||||||
|
{
|
||||||
|
auto fixer = new cli_fix_t();
|
||||||
|
fixer->parent = this;
|
||||||
|
fixer->options = cfg;
|
||||||
|
return [fixer](cli_result_t & result)
|
||||||
|
{
|
||||||
|
fixer->loop();
|
||||||
|
if (fixer->is_done())
|
||||||
|
{
|
||||||
|
result = fixer->result;
|
||||||
|
delete fixer;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
}
|
26
src/cli_fix.h
Normal file
26
src/cli_fix.h
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
// Copyright (c) Vitaliy Filippov, 2019+
|
||||||
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "cli.h"
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
std::vector<uint64_t> parse_uint64_list(json11::Json val);
|
||||||
|
|
||||||
|
template<class T> void remove_duplicates(std::vector<T> & ret)
|
||||||
|
{
|
||||||
|
if (!ret.size())
|
||||||
|
return;
|
||||||
|
std::sort(ret.begin(), ret.end());
|
||||||
|
int j = 0;
|
||||||
|
for (int i = 1; i < ret.size(); i++)
|
||||||
|
{
|
||||||
|
if (ret[i] != ret[j])
|
||||||
|
ret[++j] = ret[i];
|
||||||
|
}
|
||||||
|
ret.resize(j+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// from http_client.cpp...
|
||||||
|
bool json_is_false(const json11::Json & val);
|
@@ -403,7 +403,7 @@ struct snap_merger_t
|
|||||||
op->opcode = OSD_OP_READ_BITMAP;
|
op->opcode = OSD_OP_READ_BITMAP;
|
||||||
op->inode = target;
|
op->inode = target;
|
||||||
op->offset = offset;
|
op->offset = offset;
|
||||||
op->len = 0;
|
op->len = target_block_size;
|
||||||
op->callback = [this](cluster_op_t *op)
|
op->callback = [this](cluster_op_t *op)
|
||||||
{
|
{
|
||||||
if (op->retval < 0)
|
if (op->retval < 0)
|
||||||
|
@@ -92,6 +92,7 @@ struct rm_inode_t
|
|||||||
|
|
||||||
void send_ops(rm_pg_t *cur_list)
|
void send_ops(rm_pg_t *cur_list)
|
||||||
{
|
{
|
||||||
|
parent->cli->init_msgr();
|
||||||
if (parent->cli->msgr.osd_peer_fds.find(cur_list->rm_osd_num) ==
|
if (parent->cli->msgr.osd_peer_fds.find(cur_list->rm_osd_num) ==
|
||||||
parent->cli->msgr.osd_peer_fds.end())
|
parent->cli->msgr.osd_peer_fds.end())
|
||||||
{
|
{
|
||||||
|
@@ -278,7 +278,7 @@ struct rm_osd_t
|
|||||||
if (rsp["response_delete_range"]["deleted"].uint64_value() > 0)
|
if (rsp["response_delete_range"]["deleted"].uint64_value() > 0)
|
||||||
{
|
{
|
||||||
// Wait for mon_change_timeout before updating PG history, or the monitor's change will likely interfere with ours
|
// Wait for mon_change_timeout before updating PG history, or the monitor's change will likely interfere with ours
|
||||||
retry_wait = parent->cli->merged_config["mon_change_timeout"].uint64_value();
|
retry_wait = parent->cli->config["mon_change_timeout"].uint64_value();
|
||||||
if (!retry_wait)
|
if (!retry_wait)
|
||||||
retry_wait = 1000;
|
retry_wait = 1000;
|
||||||
retry_wait += etcd_tx_retry_ms;
|
retry_wait += etcd_tx_retry_ms;
|
||||||
@@ -410,14 +410,17 @@ struct rm_osd_t
|
|||||||
parent->cli->st_cli.etcd_prefix+"/pg/history/"+
|
parent->cli->st_cli.etcd_prefix+"/pg/history/"+
|
||||||
std::to_string(pool_cfg.id)+"/"+std::to_string(pg_num)
|
std::to_string(pool_cfg.id)+"/"+std::to_string(pg_num)
|
||||||
);
|
);
|
||||||
|
auto hist = json11::Json::object {
|
||||||
|
{ "epoch", pg_cfg.epoch },
|
||||||
|
{ "all_peers", pg_cfg.all_peers },
|
||||||
|
{ "osd_sets", pg_cfg.target_history },
|
||||||
|
};
|
||||||
|
if (pg_cfg.next_scrub)
|
||||||
|
hist["next_scrub"] = pg_cfg.next_scrub;
|
||||||
history_updates.push_back(json11::Json::object {
|
history_updates.push_back(json11::Json::object {
|
||||||
{ "request_put", json11::Json::object {
|
{ "request_put", json11::Json::object {
|
||||||
{ "key", history_key },
|
{ "key", history_key },
|
||||||
{ "value", base64_encode(json11::Json(json11::Json::object {
|
{ "value", base64_encode(json11::Json(hist).dump()) },
|
||||||
{ "epoch", pg_cfg.epoch },
|
|
||||||
{ "all_peers", pg_cfg.all_peers },
|
|
||||||
{ "osd_sets", pg_cfg.target_history },
|
|
||||||
}).dump()) },
|
|
||||||
} },
|
} },
|
||||||
});
|
});
|
||||||
history_checks.push_back(json11::Json::object {
|
history_checks.push_back(json11::Json::object {
|
||||||
|
@@ -198,9 +198,10 @@ resume_2:
|
|||||||
}
|
}
|
||||||
pgs_by_state_str += std::to_string(kv.second)+" "+kv.first;
|
pgs_by_state_str += std::to_string(kv.second)+" "+kv.first;
|
||||||
}
|
}
|
||||||
bool readonly = json_is_true(parent->cli->merged_config["readonly"]);
|
bool readonly = json_is_true(parent->cli->config["readonly"]);
|
||||||
bool no_recovery = json_is_true(parent->cli->merged_config["no_recovery"]);
|
bool no_recovery = json_is_true(parent->cli->config["no_recovery"]);
|
||||||
bool no_rebalance = json_is_true(parent->cli->merged_config["no_rebalance"]);
|
bool no_rebalance = json_is_true(parent->cli->config["no_rebalance"]);
|
||||||
|
bool no_scrub = json_is_true(parent->cli->config["no_scrub"]);
|
||||||
if (parent->json_output)
|
if (parent->json_output)
|
||||||
{
|
{
|
||||||
// JSON output
|
// JSON output
|
||||||
@@ -219,6 +220,7 @@ resume_2:
|
|||||||
{ "readonly", readonly },
|
{ "readonly", readonly },
|
||||||
{ "no_recovery", no_recovery },
|
{ "no_recovery", no_recovery },
|
||||||
{ "no_rebalance", no_rebalance },
|
{ "no_rebalance", no_rebalance },
|
||||||
|
{ "no_scrub", no_scrub },
|
||||||
{ "pool_count", pool_count },
|
{ "pool_count", pool_count },
|
||||||
{ "active_pool_count", pools_active },
|
{ "active_pool_count", pools_active },
|
||||||
{ "pg_states", pgs_by_state },
|
{ "pg_states", pgs_by_state },
|
||||||
|
@@ -18,11 +18,12 @@
|
|||||||
|
|
||||||
cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config)
|
cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config)
|
||||||
{
|
{
|
||||||
config = osd_messenger_t::read_config(config);
|
cli_config = config.object_items();
|
||||||
|
file_config = osd_messenger_t::read_config(config);
|
||||||
|
config = osd_messenger_t::merge_configs(cli_config, file_config, etcd_global_config, {});
|
||||||
|
|
||||||
this->ringloop = ringloop;
|
this->ringloop = ringloop;
|
||||||
this->tfd = tfd;
|
this->tfd = tfd;
|
||||||
this->config = config;
|
|
||||||
|
|
||||||
msgr.osd_num = 0;
|
msgr.osd_num = 0;
|
||||||
msgr.tfd = tfd;
|
msgr.tfd = tfd;
|
||||||
@@ -34,6 +35,7 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
|
|||||||
// peer_osd just connected
|
// peer_osd just connected
|
||||||
continue_ops();
|
continue_ops();
|
||||||
continue_lists();
|
continue_lists();
|
||||||
|
continue_raw_ops(peer_osd);
|
||||||
}
|
}
|
||||||
else if (dirty_buffers.size())
|
else if (dirty_buffers.size())
|
||||||
{
|
{
|
||||||
@@ -58,8 +60,7 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
|
|||||||
msgr.stop_client(op->peer_fd);
|
msgr.stop_client(op->peer_fd);
|
||||||
delete op;
|
delete op;
|
||||||
};
|
};
|
||||||
msgr.parse_config(this->config);
|
msgr.parse_config(config);
|
||||||
msgr.init();
|
|
||||||
|
|
||||||
st_cli.tfd = tfd;
|
st_cli.tfd = tfd;
|
||||||
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
|
st_cli.on_load_config_hook = [this](json11::Json::object & cfg) { on_load_config_hook(cfg); };
|
||||||
@@ -73,17 +74,6 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
|
|||||||
|
|
||||||
scrap_buffer_size = SCRAP_BUFFER_SIZE;
|
scrap_buffer_size = SCRAP_BUFFER_SIZE;
|
||||||
scrap_buffer = malloc_or_die(scrap_buffer_size);
|
scrap_buffer = malloc_or_die(scrap_buffer_size);
|
||||||
|
|
||||||
if (ringloop)
|
|
||||||
{
|
|
||||||
consumer.loop = [this]()
|
|
||||||
{
|
|
||||||
msgr.read_requests();
|
|
||||||
msgr.send_replies();
|
|
||||||
this->ringloop->submit();
|
|
||||||
};
|
|
||||||
ringloop->register_consumer(&consumer);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cluster_client_t::~cluster_client_t()
|
cluster_client_t::~cluster_client_t()
|
||||||
@@ -115,6 +105,37 @@ cluster_op_t::~cluster_op_t()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cluster_client_t::continue_raw_ops(osd_num_t peer_osd)
|
||||||
|
{
|
||||||
|
auto it = raw_ops.find(peer_osd);
|
||||||
|
while (it != raw_ops.end() && it->first == peer_osd)
|
||||||
|
{
|
||||||
|
auto op = it->second;
|
||||||
|
op->op_type = OSD_OP_OUT;
|
||||||
|
op->peer_fd = msgr.osd_peer_fds.at(peer_osd);
|
||||||
|
msgr.outbox_push(op);
|
||||||
|
raw_ops.erase(it++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cluster_client_t::init_msgr()
|
||||||
|
{
|
||||||
|
if (msgr_initialized)
|
||||||
|
return;
|
||||||
|
msgr.init();
|
||||||
|
msgr_initialized = true;
|
||||||
|
if (ringloop)
|
||||||
|
{
|
||||||
|
consumer.loop = [this]()
|
||||||
|
{
|
||||||
|
msgr.read_requests();
|
||||||
|
msgr.send_replies();
|
||||||
|
this->ringloop->submit();
|
||||||
|
};
|
||||||
|
ringloop->register_consumer(&consumer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void cluster_client_t::calc_wait(cluster_op_t *op)
|
void cluster_client_t::calc_wait(cluster_op_t *op)
|
||||||
{
|
{
|
||||||
op->prev_wait = 0;
|
op->prev_wait = 0;
|
||||||
@@ -143,7 +164,7 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
|
|||||||
if (!op->prev_wait)
|
if (!op->prev_wait)
|
||||||
continue_sync(op);
|
continue_sync(op);
|
||||||
}
|
}
|
||||||
else /* if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP) */
|
else /* if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP) */
|
||||||
{
|
{
|
||||||
for (auto prev = op_queue_head; prev && prev != op; prev = prev->next)
|
for (auto prev = op_queue_head; prev && prev != op; prev = prev->next)
|
||||||
{
|
{
|
||||||
@@ -151,7 +172,8 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
|
|||||||
{
|
{
|
||||||
op->prev_wait++;
|
op->prev_wait++;
|
||||||
}
|
}
|
||||||
else if (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_READ || prev->opcode == OSD_OP_READ_BITMAP)
|
else if (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_READ ||
|
||||||
|
prev->opcode == OSD_OP_READ_BITMAP || prev->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
{
|
{
|
||||||
// Flushes are always in the beginning (we're scanning from the beginning of the queue)
|
// Flushes are always in the beginning (we're scanning from the beginning of the queue)
|
||||||
break;
|
break;
|
||||||
@@ -171,7 +193,8 @@ void cluster_client_t::inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *n
|
|||||||
auto n2 = next->next;
|
auto n2 = next->next;
|
||||||
if (next->opcode == OSD_OP_SYNC && !(flags & OP_IMMEDIATE_COMMIT) ||
|
if (next->opcode == OSD_OP_SYNC && !(flags & OP_IMMEDIATE_COMMIT) ||
|
||||||
next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER) ||
|
next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER) ||
|
||||||
(next->opcode == OSD_OP_READ || next->opcode == OSD_OP_READ_BITMAP) && (flags & OP_FLUSH_BUFFER))
|
(next->opcode == OSD_OP_READ || next->opcode == OSD_OP_READ_BITMAP ||
|
||||||
|
next->opcode == OSD_OP_READ_CHAIN_BITMAP) && (flags & OP_FLUSH_BUFFER))
|
||||||
{
|
{
|
||||||
next->prev_wait += inc;
|
next->prev_wait += inc;
|
||||||
assert(next->prev_wait >= 0);
|
assert(next->prev_wait >= 0);
|
||||||
@@ -221,11 +244,14 @@ void cluster_client_t::erase_op(cluster_op_t *op)
|
|||||||
if (op_queue_tail == op)
|
if (op_queue_tail == op)
|
||||||
op_queue_tail = op->prev;
|
op_queue_tail = op->prev;
|
||||||
op->next = op->prev = NULL;
|
op->next = op->prev = NULL;
|
||||||
|
if (flags & OP_FLUSH_BUFFER)
|
||||||
|
std::function<void(cluster_op_t*)>(op->callback)(op);
|
||||||
if (!(flags & OP_IMMEDIATE_COMMIT))
|
if (!(flags & OP_IMMEDIATE_COMMIT))
|
||||||
inc_wait(opcode, flags, next, -1);
|
inc_wait(opcode, flags, next, -1);
|
||||||
// Call callback at the end to avoid inconsistencies in prev_wait
|
// Call callback at the end to avoid inconsistencies in prev_wait
|
||||||
// if the callback adds more operations itself
|
// if the callback adds more operations itself
|
||||||
std::function<void(cluster_op_t*)>(op->callback)(op);
|
if (!(flags & OP_FLUSH_BUFFER))
|
||||||
|
std::function<void(cluster_op_t*)>(op->callback)(op);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cluster_client_t::continue_ops(bool up_retry)
|
void cluster_client_t::continue_ops(bool up_retry)
|
||||||
@@ -265,13 +291,10 @@ restart:
|
|||||||
continuing_ops = 0;
|
continuing_ops = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void cluster_client_t::on_load_config_hook(json11::Json::object & config)
|
void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_config)
|
||||||
{
|
{
|
||||||
this->merged_config = config;
|
this->etcd_global_config = etcd_global_config;
|
||||||
for (auto & kv: this->config.object_items())
|
config = osd_messenger_t::merge_configs(cli_config, file_config, etcd_global_config, {});
|
||||||
{
|
|
||||||
this->merged_config[kv.first] = kv.second;
|
|
||||||
}
|
|
||||||
if (config.find("client_max_dirty_bytes") != config.end())
|
if (config.find("client_max_dirty_bytes") != config.end())
|
||||||
{
|
{
|
||||||
client_max_dirty_bytes = config["client_max_dirty_bytes"].uint64_value();
|
client_max_dirty_bytes = config["client_max_dirty_bytes"].uint64_value();
|
||||||
@@ -281,14 +304,13 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & config)
|
|||||||
// Old name
|
// Old name
|
||||||
client_max_dirty_bytes = config["client_dirty_limit"].uint64_value();
|
client_max_dirty_bytes = config["client_dirty_limit"].uint64_value();
|
||||||
}
|
}
|
||||||
if (config.find("client_max_dirty_ops") != config.end())
|
else
|
||||||
{
|
client_max_dirty_bytes = 0;
|
||||||
client_max_dirty_ops = config["client_max_dirty_ops"].uint64_value();
|
|
||||||
}
|
|
||||||
if (!client_max_dirty_bytes)
|
if (!client_max_dirty_bytes)
|
||||||
{
|
{
|
||||||
client_max_dirty_bytes = DEFAULT_CLIENT_MAX_DIRTY_BYTES;
|
client_max_dirty_bytes = DEFAULT_CLIENT_MAX_DIRTY_BYTES;
|
||||||
}
|
}
|
||||||
|
client_max_dirty_ops = config["client_max_dirty_ops"].uint64_value();
|
||||||
if (!client_max_dirty_ops)
|
if (!client_max_dirty_ops)
|
||||||
{
|
{
|
||||||
client_max_dirty_ops = DEFAULT_CLIENT_MAX_DIRTY_OPS;
|
client_max_dirty_ops = DEFAULT_CLIENT_MAX_DIRTY_OPS;
|
||||||
@@ -303,7 +325,7 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & config)
|
|||||||
up_wait_retry_interval = 50;
|
up_wait_retry_interval = 50;
|
||||||
}
|
}
|
||||||
msgr.parse_config(config);
|
msgr.parse_config(config);
|
||||||
msgr.parse_config(this->config);
|
st_cli.parse_config(config);
|
||||||
st_cli.load_pgs();
|
st_cli.load_pgs();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -337,7 +359,8 @@ void cluster_client_t::on_change_hook(std::map<std::string, etcd_kv_t> & changes
|
|||||||
// And now they have to be resliced!
|
// And now they have to be resliced!
|
||||||
for (auto op = op_queue_head; op; op = op->next)
|
for (auto op = op_queue_head; op; op = op->next)
|
||||||
{
|
{
|
||||||
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP) &&
|
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_READ ||
|
||||||
|
op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP) &&
|
||||||
INODE_POOL(op->cur_inode) == pool_item.first)
|
INODE_POOL(op->cur_inode) == pool_item.first)
|
||||||
{
|
{
|
||||||
op->needs_reslice = true;
|
op->needs_reslice = true;
|
||||||
@@ -409,7 +432,7 @@ void cluster_client_t::on_ready(std::function<void(void)> fn)
|
|||||||
void cluster_client_t::execute(cluster_op_t *op)
|
void cluster_client_t::execute(cluster_op_t *op)
|
||||||
{
|
{
|
||||||
if (op->opcode != OSD_OP_SYNC && op->opcode != OSD_OP_READ &&
|
if (op->opcode != OSD_OP_SYNC && op->opcode != OSD_OP_READ &&
|
||||||
op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_WRITE)
|
op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_WRITE)
|
||||||
{
|
{
|
||||||
op->retval = -EINVAL;
|
op->retval = -EINVAL;
|
||||||
std::function<void(cluster_op_t*)>(op->callback)(op);
|
std::function<void(cluster_op_t*)>(op->callback)(op);
|
||||||
@@ -441,7 +464,7 @@ void cluster_client_t::execute(cluster_op_t *op)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Check alignment
|
// Check alignment
|
||||||
if ((op->opcode == OSD_OP_READ || op->opcode == OSD_OP_WRITE) && !op->len ||
|
if (!op->len && (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP || op->opcode == OSD_OP_WRITE) ||
|
||||||
op->offset % pool_it->second.bitmap_granularity || op->len % pool_it->second.bitmap_granularity)
|
op->offset % pool_it->second.bitmap_granularity || op->len % pool_it->second.bitmap_granularity)
|
||||||
{
|
{
|
||||||
op->retval = -EINVAL;
|
op->retval = -EINVAL;
|
||||||
@@ -503,6 +526,23 @@ void cluster_client_t::execute(cluster_op_t *op)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void cluster_client_t::execute_raw(osd_num_t osd_num, osd_op_t *op)
|
||||||
|
{
|
||||||
|
auto fd_it = msgr.osd_peer_fds.find(osd_num);
|
||||||
|
if (fd_it != msgr.osd_peer_fds.end())
|
||||||
|
{
|
||||||
|
op->op_type = OSD_OP_OUT;
|
||||||
|
op->peer_fd = fd_it->second;
|
||||||
|
msgr.outbox_push(op);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (msgr.wanted_peers.find(osd_num) == msgr.wanted_peers.end())
|
||||||
|
msgr.connect_peer(osd_num, st_cli.peer_states[osd_num]);
|
||||||
|
raw_ops.emplace(osd_num, op);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void cluster_client_t::copy_write(cluster_op_t *op, std::map<object_id, cluster_buffer_t> & dirty_buffers)
|
void cluster_client_t::copy_write(cluster_op_t *op, std::map<object_id, cluster_buffer_t> & dirty_buffers)
|
||||||
{
|
{
|
||||||
// Save operation for replay when one of PGs goes out of sync
|
// Save operation for replay when one of PGs goes out of sync
|
||||||
@@ -702,8 +742,7 @@ resume_3:
|
|||||||
// Finished successfully
|
// Finished successfully
|
||||||
// Even if the PG count has changed in meanwhile we treat it as success
|
// Even if the PG count has changed in meanwhile we treat it as success
|
||||||
// because if some operations were invalid for the new PG count we'd get errors
|
// because if some operations were invalid for the new PG count we'd get errors
|
||||||
bool is_read = op->opcode == OSD_OP_READ;
|
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
if (is_read)
|
|
||||||
{
|
{
|
||||||
// Check parent inode
|
// Check parent inode
|
||||||
auto ino_it = st_cli.inode_config.find(op->cur_inode);
|
auto ino_it = st_cli.inode_config.find(op->cur_inode);
|
||||||
@@ -727,18 +766,24 @@ resume_3:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
op->retval = op->len;
|
op->retval = op->len;
|
||||||
|
if (op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
|
{
|
||||||
|
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(op->inode));
|
||||||
|
op->retval = op->len / pool_cfg.bitmap_granularity;
|
||||||
|
}
|
||||||
erase_op(op);
|
erase_op(op);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else if (op->retval != 0 && op->retval != -EPIPE)
|
else if (op->retval != 0 && op->retval != -EPIPE && op->retval != -EIO && op->retval != -ENOSPC)
|
||||||
{
|
{
|
||||||
// Fatal error (not -EPIPE)
|
// Fatal error (neither -EPIPE, -EIO nor -ENOSPC)
|
||||||
|
// FIXME: Add a parameter to allow to not wait for EIOs (incomplete or corrupted objects) to heal
|
||||||
erase_op(op);
|
erase_op(op);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// -EPIPE - clear the error and retry
|
// Non-fatal error - clear the error and retry
|
||||||
op->retval = 0;
|
op->retval = 0;
|
||||||
if (op->needs_reslice)
|
if (op->needs_reslice)
|
||||||
{
|
{
|
||||||
@@ -750,7 +795,10 @@ resume_3:
|
|||||||
{
|
{
|
||||||
for (int i = 0; i < op->parts.size(); i++)
|
for (int i = 0; i < op->parts.size(); i++)
|
||||||
{
|
{
|
||||||
op->parts[i].flags = PART_RETRY;
|
if (!(op->parts[i].flags & PART_DONE))
|
||||||
|
{
|
||||||
|
op->parts[i].flags = PART_RETRY;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
goto resume_2;
|
goto resume_2;
|
||||||
}
|
}
|
||||||
@@ -809,23 +857,19 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
|
|||||||
uint64_t last_stripe = op->len > 0 ? ((op->offset + op->len - 1) / pg_block_size) * pg_block_size : first_stripe;
|
uint64_t last_stripe = op->len > 0 ? ((op->offset + op->len - 1) / pg_block_size) * pg_block_size : first_stripe;
|
||||||
op->retval = 0;
|
op->retval = 0;
|
||||||
op->parts.resize((last_stripe - first_stripe) / pg_block_size + 1);
|
op->parts.resize((last_stripe - first_stripe) / pg_block_size + 1);
|
||||||
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP)
|
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
{
|
{
|
||||||
// Allocate memory for the bitmap
|
// Allocate memory for the bitmap
|
||||||
unsigned object_bitmap_size = (((op->opcode == OSD_OP_READ_BITMAP ? pg_block_size : op->len) / pool_cfg.bitmap_granularity + 7) / 8);
|
unsigned object_bitmap_size = ((op->len / pool_cfg.bitmap_granularity + 7) / 8);
|
||||||
object_bitmap_size = (object_bitmap_size < 8 ? 8 : object_bitmap_size);
|
object_bitmap_size = (object_bitmap_size < 8 ? 8 : object_bitmap_size);
|
||||||
unsigned bitmap_mem = object_bitmap_size + (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8 * pg_data_size) * op->parts.size();
|
unsigned bitmap_mem = object_bitmap_size + (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8 * pg_data_size) * op->parts.size();
|
||||||
if (op->bitmap_buf_size < bitmap_mem)
|
if (!op->bitmap_buf || op->bitmap_buf_size < bitmap_mem)
|
||||||
{
|
{
|
||||||
op->bitmap_buf = realloc_or_die(op->bitmap_buf, bitmap_mem);
|
op->bitmap_buf = realloc_or_die(op->bitmap_buf, bitmap_mem);
|
||||||
if (!op->bitmap_buf_size)
|
|
||||||
{
|
|
||||||
// First allocation
|
|
||||||
memset(op->bitmap_buf, 0, object_bitmap_size);
|
|
||||||
}
|
|
||||||
op->part_bitmaps = (uint8_t*)op->bitmap_buf + object_bitmap_size;
|
op->part_bitmaps = (uint8_t*)op->bitmap_buf + object_bitmap_size;
|
||||||
op->bitmap_buf_size = bitmap_mem;
|
op->bitmap_buf_size = bitmap_mem;
|
||||||
}
|
}
|
||||||
|
memset(op->bitmap_buf, 0, bitmap_mem);
|
||||||
}
|
}
|
||||||
int iov_idx = 0;
|
int iov_idx = 0;
|
||||||
size_t iov_pos = 0;
|
size_t iov_pos = 0;
|
||||||
@@ -876,13 +920,14 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
|
|||||||
if (end == begin)
|
if (end == begin)
|
||||||
op->done_count++;
|
op->done_count++;
|
||||||
}
|
}
|
||||||
else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE)
|
else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_DELETE)
|
||||||
{
|
{
|
||||||
add_iov(end-begin, false, op, iov_idx, iov_pos, op->parts[i].iov, NULL, 0);
|
add_iov(end-begin, false, op, iov_idx, iov_pos, op->parts[i].iov, NULL, 0);
|
||||||
}
|
}
|
||||||
op->parts[i].parent = op;
|
op->parts[i].parent = op;
|
||||||
op->parts[i].offset = begin;
|
op->parts[i].offset = begin;
|
||||||
op->parts[i].len = op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_DELETE ? 0 : (uint32_t)(end - begin);
|
op->parts[i].len = op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP ||
|
||||||
|
op->opcode == OSD_OP_DELETE ? 0 : (uint32_t)(end - begin);
|
||||||
op->parts[i].pg_num = pg_num;
|
op->parts[i].pg_num = pg_num;
|
||||||
op->parts[i].osd_num = 0;
|
op->parts[i].osd_num = 0;
|
||||||
op->parts[i].flags = 0;
|
op->parts[i].flags = 0;
|
||||||
@@ -911,6 +956,10 @@ bool cluster_client_t::affects_osd(uint64_t inode, uint64_t offset, uint64_t len
|
|||||||
|
|
||||||
bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
||||||
{
|
{
|
||||||
|
if (!msgr_initialized)
|
||||||
|
{
|
||||||
|
init_msgr();
|
||||||
|
}
|
||||||
auto part = &op->parts[i];
|
auto part = &op->parts[i];
|
||||||
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(op->cur_inode));
|
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(op->cur_inode));
|
||||||
auto pg_it = pool_cfg.pg_config.find(part->pg_num);
|
auto pg_it = pool_cfg.pg_config.find(part->pg_num);
|
||||||
@@ -929,7 +978,7 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
|||||||
pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks
|
pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks
|
||||||
);
|
);
|
||||||
uint64_t meta_rev = 0;
|
uint64_t meta_rev = 0;
|
||||||
if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE)
|
if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_DELETE)
|
||||||
{
|
{
|
||||||
auto ino_it = st_cli.inode_config.find(op->inode);
|
auto ino_it = st_cli.inode_config.find(op->inode);
|
||||||
if (ino_it != st_cli.inode_config.end())
|
if (ino_it != st_cli.inode_config.end())
|
||||||
@@ -942,7 +991,7 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
|||||||
.header = {
|
.header = {
|
||||||
.magic = SECONDARY_OSD_OP_MAGIC,
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
.id = next_op_id(),
|
.id = next_op_id(),
|
||||||
.opcode = op->opcode == OSD_OP_READ_BITMAP ? OSD_OP_READ : op->opcode,
|
.opcode = op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP ? OSD_OP_READ : op->opcode,
|
||||||
},
|
},
|
||||||
.inode = op->cur_inode,
|
.inode = op->cur_inode,
|
||||||
.offset = part->offset,
|
.offset = part->offset,
|
||||||
@@ -950,8 +999,10 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
|||||||
.meta_revision = meta_rev,
|
.meta_revision = meta_rev,
|
||||||
.version = op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE ? op->version : 0,
|
.version = op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE ? op->version : 0,
|
||||||
} },
|
} },
|
||||||
.bitmap = (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP ? (uint8_t*)op->part_bitmaps + pg_bitmap_size*i : NULL),
|
.bitmap = (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP
|
||||||
.bitmap_len = (unsigned)(op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP ? pg_bitmap_size : 0),
|
? (uint8_t*)op->part_bitmaps + pg_bitmap_size*i : NULL),
|
||||||
|
.bitmap_len = (unsigned)(op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP
|
||||||
|
? pg_bitmap_size : 0),
|
||||||
.callback = [this, part](osd_op_t *op_part)
|
.callback = [this, part](osd_op_t *op_part)
|
||||||
{
|
{
|
||||||
handle_op_part(part);
|
handle_op_part(part);
|
||||||
@@ -1029,7 +1080,7 @@ resume_1:
|
|||||||
uw_it->second.state = CACHE_DIRTY;
|
uw_it->second.state = CACHE_DIRTY;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (op->retval == -EPIPE)
|
if (op->retval == -EPIPE || op->retval == -EIO || op->retval == -ENOSPC)
|
||||||
{
|
{
|
||||||
// Retry later
|
// Retry later
|
||||||
op->parts.clear();
|
op->parts.clear();
|
||||||
@@ -1099,53 +1150,72 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
|
|||||||
if (part->op.reply.hdr.retval != expected)
|
if (part->op.reply.hdr.retval != expected)
|
||||||
{
|
{
|
||||||
// Operation failed, retry
|
// Operation failed, retry
|
||||||
if (part->op.reply.hdr.retval == -EPIPE)
|
part->flags |= PART_ERROR;
|
||||||
|
if (!op->retval || op->retval == -EPIPE || part->op.reply.hdr.retval == -EIO)
|
||||||
{
|
{
|
||||||
// Mark op->up_wait = true before stopping the client
|
// Error priority: EIO > ENOSPC > EPIPE
|
||||||
op->up_wait = true;
|
|
||||||
if (!retry_timeout_id)
|
|
||||||
{
|
|
||||||
retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int)
|
|
||||||
{
|
|
||||||
retry_timeout_id = 0;
|
|
||||||
continue_ops(true);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!op->retval || op->retval == -EPIPE)
|
|
||||||
{
|
|
||||||
// Don't overwrite other errors with -EPIPE
|
|
||||||
op->retval = part->op.reply.hdr.retval;
|
op->retval = part->op.reply.hdr.retval;
|
||||||
}
|
}
|
||||||
if (op->retval != -EINTR && op->retval != -EIO)
|
int stop_fd = -1;
|
||||||
|
if (op->retval != -EINTR && op->retval != -EIO && op->retval != -ENOSPC)
|
||||||
{
|
{
|
||||||
|
stop_fd = part->op.peer_fd;
|
||||||
fprintf(
|
fprintf(
|
||||||
stderr, "%s operation failed on OSD %lu: retval=%ld (expected %d), dropping connection\n",
|
stderr, "%s operation failed on OSD %lu: retval=%ld (expected %d), dropping connection\n",
|
||||||
osd_op_names[part->op.req.hdr.opcode], part->osd_num, part->op.reply.hdr.retval, expected
|
osd_op_names[part->op.req.hdr.opcode], part->osd_num, part->op.reply.hdr.retval, expected
|
||||||
);
|
);
|
||||||
msgr.stop_client(part->op.peer_fd);
|
|
||||||
}
|
}
|
||||||
part->flags |= PART_ERROR;
|
else
|
||||||
|
{
|
||||||
|
fprintf(
|
||||||
|
stderr, "%s operation failed on OSD %lu: retval=%ld (expected %d)\n",
|
||||||
|
osd_op_names[part->op.req.hdr.opcode], part->osd_num, part->op.reply.hdr.retval, expected
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// All next things like timer, continue_sync/rw and stop_client may affect the operation again
|
||||||
|
// So do all these things after modifying operation state, otherwise we may hit reenterability bugs
|
||||||
|
// FIXME postpone such things to set_immediate here to avoid bugs
|
||||||
|
// Mark op->up_wait = true to retry operation after a short pause (not immediately)
|
||||||
|
op->up_wait = true;
|
||||||
|
if (!retry_timeout_id)
|
||||||
|
{
|
||||||
|
retry_timeout_id = tfd->set_timer(up_wait_retry_interval, false, [this](int)
|
||||||
|
{
|
||||||
|
retry_timeout_id = 0;
|
||||||
|
continue_ops(true);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if (op->inflight_count == 0)
|
||||||
|
{
|
||||||
|
if (op->opcode == OSD_OP_SYNC)
|
||||||
|
continue_sync(op);
|
||||||
|
else
|
||||||
|
continue_rw(op);
|
||||||
|
}
|
||||||
|
if (stop_fd >= 0)
|
||||||
|
{
|
||||||
|
msgr.stop_client(stop_fd);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// OK
|
// OK
|
||||||
if (!(op->flags & OP_IMMEDIATE_COMMIT))
|
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE) && !(op->flags & OP_IMMEDIATE_COMMIT))
|
||||||
dirty_osds.insert(part->osd_num);
|
dirty_osds.insert(part->osd_num);
|
||||||
part->flags |= PART_DONE;
|
part->flags |= PART_DONE;
|
||||||
op->done_count++;
|
op->done_count++;
|
||||||
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP)
|
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
{
|
{
|
||||||
copy_part_bitmap(op, part);
|
copy_part_bitmap(op, part);
|
||||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
||||||
}
|
}
|
||||||
}
|
if (op->inflight_count == 0)
|
||||||
if (op->inflight_count == 0)
|
{
|
||||||
{
|
if (op->opcode == OSD_OP_SYNC)
|
||||||
if (op->opcode == OSD_OP_SYNC)
|
continue_sync(op);
|
||||||
continue_sync(op);
|
else
|
||||||
else
|
continue_rw(op);
|
||||||
continue_rw(op);
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1158,7 +1228,12 @@ void cluster_client_t::copy_part_bitmap(cluster_op_t *op, cluster_op_part_t *par
|
|||||||
);
|
);
|
||||||
uint32_t object_offset = (part->op.req.rw.offset - op->offset) / pool_cfg.bitmap_granularity;
|
uint32_t object_offset = (part->op.req.rw.offset - op->offset) / pool_cfg.bitmap_granularity;
|
||||||
uint32_t part_offset = (part->op.req.rw.offset % pg_block_size) / pool_cfg.bitmap_granularity;
|
uint32_t part_offset = (part->op.req.rw.offset % pg_block_size) / pool_cfg.bitmap_granularity;
|
||||||
uint32_t part_len = (op->opcode == OSD_OP_READ_BITMAP ? pg_block_size : part->op.req.rw.len) / pool_cfg.bitmap_granularity;
|
uint32_t op_len = op->len / pool_cfg.bitmap_granularity;
|
||||||
|
uint32_t part_len = pg_block_size/pool_cfg.bitmap_granularity - part_offset;
|
||||||
|
if (part_len > op_len-object_offset)
|
||||||
|
{
|
||||||
|
part_len = op_len-object_offset;
|
||||||
|
}
|
||||||
if (!(object_offset & 0x7) && !(part_offset & 0x7) && (part_len >= 8))
|
if (!(object_offset & 0x7) && !(part_offset & 0x7) && (part_len >= 8))
|
||||||
{
|
{
|
||||||
// Copy bytes
|
// Copy bytes
|
||||||
|
@@ -11,6 +11,7 @@
|
|||||||
#define INODE_LIST_DONE 1
|
#define INODE_LIST_DONE 1
|
||||||
#define INODE_LIST_HAS_UNSTABLE 2
|
#define INODE_LIST_HAS_UNSTABLE 2
|
||||||
#define OSD_OP_READ_BITMAP OSD_OP_SEC_READ_BMP
|
#define OSD_OP_READ_BITMAP OSD_OP_SEC_READ_BMP
|
||||||
|
#define OSD_OP_READ_CHAIN_BITMAP 0x102
|
||||||
|
|
||||||
#define OSD_OP_IGNORE_READONLY 0x08
|
#define OSD_OP_IGNORE_READONLY 0x08
|
||||||
|
|
||||||
@@ -30,7 +31,7 @@ struct cluster_op_part_t
|
|||||||
|
|
||||||
struct cluster_op_t
|
struct cluster_op_t
|
||||||
{
|
{
|
||||||
uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE, OSD_OP_READ_BITMAP
|
uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE, OSD_OP_READ_BITMAP, OSD_OP_READ_CHAIN_BITMAP
|
||||||
uint64_t inode;
|
uint64_t inode;
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
uint64_t len;
|
uint64_t len;
|
||||||
@@ -39,9 +40,13 @@ struct cluster_op_t
|
|||||||
uint64_t version = 0;
|
uint64_t version = 0;
|
||||||
// now only OSD_OP_IGNORE_READONLY is supported
|
// now only OSD_OP_IGNORE_READONLY is supported
|
||||||
uint64_t flags = 0;
|
uint64_t flags = 0;
|
||||||
|
// negative retval is an error number
|
||||||
|
// write and read return len on success
|
||||||
|
// sync and delete return 0 on success
|
||||||
|
// read_bitmap and read_chain_bitmap return the length of bitmap in bits(!)
|
||||||
int retval;
|
int retval;
|
||||||
osd_op_buf_list_t iov;
|
osd_op_buf_list_t iov;
|
||||||
// READ and READ_BITMAP return the bitmap here
|
// READ, READ_BITMAP, READ_CHAIN_BITMAP return the bitmap here
|
||||||
void *bitmap_buf = NULL;
|
void *bitmap_buf = NULL;
|
||||||
std::function<void(cluster_op_t*)> callback;
|
std::function<void(cluster_op_t*)> callback;
|
||||||
~cluster_op_t();
|
~cluster_op_t();
|
||||||
@@ -98,17 +103,23 @@ class cluster_client_t
|
|||||||
ring_consumer_t consumer;
|
ring_consumer_t consumer;
|
||||||
std::vector<std::function<void(void)>> on_ready_hooks;
|
std::vector<std::function<void(void)>> on_ready_hooks;
|
||||||
std::vector<inode_list_t*> lists;
|
std::vector<inode_list_t*> lists;
|
||||||
|
std::multimap<osd_num_t, osd_op_t*> raw_ops;
|
||||||
int continuing_ops = 0;
|
int continuing_ops = 0;
|
||||||
|
bool msgr_initialized = false;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
etcd_state_client_t st_cli;
|
etcd_state_client_t st_cli;
|
||||||
|
|
||||||
osd_messenger_t msgr;
|
osd_messenger_t msgr;
|
||||||
json11::Json config;
|
void init_msgr();
|
||||||
json11::Json::object merged_config;
|
|
||||||
|
json11::Json::object cli_config, file_config, etcd_global_config;
|
||||||
|
json11::Json::object config;
|
||||||
|
|
||||||
cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config);
|
cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config);
|
||||||
~cluster_client_t();
|
~cluster_client_t();
|
||||||
void execute(cluster_op_t *op);
|
void execute(cluster_op_t *op);
|
||||||
|
void execute_raw(osd_num_t osd_num, osd_op_t *op);
|
||||||
bool is_ready();
|
bool is_ready();
|
||||||
void on_ready(std::function<void(void)> fn);
|
void on_ready(std::function<void(void)> fn);
|
||||||
|
|
||||||
@@ -144,4 +155,5 @@ protected:
|
|||||||
void continue_lists();
|
void continue_lists();
|
||||||
void continue_listing(inode_list_t *lst);
|
void continue_listing(inode_list_t *lst);
|
||||||
void send_list(inode_list_osd_t *cur_list);
|
void send_list(inode_list_osd_t *cur_list);
|
||||||
|
void continue_raw_ops(osd_num_t peer_osd);
|
||||||
};
|
};
|
||||||
|
@@ -43,6 +43,7 @@ struct inode_list_t
|
|||||||
inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
|
||||||
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback)
|
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback)
|
||||||
{
|
{
|
||||||
|
init_msgr();
|
||||||
int skipped_pgs = 0;
|
int skipped_pgs = 0;
|
||||||
pool_id_t pool_id = INODE_POOL(inode);
|
pool_id_t pool_id = INODE_POOL(inode);
|
||||||
if (!pool_id || st_cli.pool_config.find(pool_id) == st_cli.pool_config.end())
|
if (!pool_id || st_cli.pool_config.find(pool_id) == st_cli.pool_config.end())
|
||||||
|
@@ -281,7 +281,7 @@ void disk_tool_t::dump_journal_entry(int num, journal_entry *je, bool json)
|
|||||||
if (je->big_write.size > sizeof(journal_entry_big_write))
|
if (je->big_write.size > sizeof(journal_entry_big_write))
|
||||||
{
|
{
|
||||||
printf(json ? ",\"bitmap\":\"" : " (bitmap: ");
|
printf(json ? ",\"bitmap\":\"" : " (bitmap: ");
|
||||||
for (int i = sizeof(journal_entry_big_write); i < je->small_write.size; i++)
|
for (int i = sizeof(journal_entry_big_write); i < je->big_write.size; i++)
|
||||||
{
|
{
|
||||||
printf("%02x", ((uint8_t*)je)[i]);
|
printf("%02x", ((uint8_t*)je)[i]);
|
||||||
}
|
}
|
||||||
|
@@ -26,7 +26,7 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v1_t *)>
|
|||||||
buf_size = dsk.meta_len;
|
buf_size = dsk.meta_len;
|
||||||
void *data = memalign_or_die(MEM_ALIGNMENT, buf_size);
|
void *data = memalign_or_die(MEM_ALIGNMENT, buf_size);
|
||||||
lseek64(dsk.meta_fd, dsk.meta_offset, 0);
|
lseek64(dsk.meta_fd, dsk.meta_offset, 0);
|
||||||
read_blocking(dsk.meta_fd, data, buf_size);
|
read_blocking(dsk.meta_fd, data, dsk.meta_block_size);
|
||||||
// Check superblock
|
// Check superblock
|
||||||
blockstore_meta_header_v1_t *hdr = (blockstore_meta_header_v1_t *)data;
|
blockstore_meta_header_v1_t *hdr = (blockstore_meta_header_v1_t *)data;
|
||||||
if (hdr->zero == 0 &&
|
if (hdr->zero == 0 &&
|
||||||
@@ -41,8 +41,11 @@ int disk_tool_t::process_meta(std::function<void(blockstore_meta_header_v1_t *)>
|
|||||||
if (buf_size % dsk.meta_block_size)
|
if (buf_size % dsk.meta_block_size)
|
||||||
{
|
{
|
||||||
buf_size = 8*dsk.meta_block_size;
|
buf_size = 8*dsk.meta_block_size;
|
||||||
|
void *new_data = memalign_or_die(MEM_ALIGNMENT, buf_size);
|
||||||
|
memcpy(new_data, data, dsk.meta_block_size);
|
||||||
free(data);
|
free(data);
|
||||||
data = memalign_or_die(MEM_ALIGNMENT, buf_size);
|
data = new_data;
|
||||||
|
hdr = (blockstore_meta_header_v1_t *)data;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dsk.bitmap_granularity = hdr->bitmap_granularity;
|
dsk.bitmap_granularity = hdr->bitmap_granularity;
|
||||||
|
@@ -621,6 +621,11 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
|
|||||||
}
|
}
|
||||||
// Treat all disks as SSDs if not in the hybrid mode
|
// Treat all disks as SSDs if not in the hybrid mode
|
||||||
prepare_one(options, hybrid && dev.is_hdd ? 1 : 0);
|
prepare_one(options, hybrid && dev.is_hdd ? 1 : 0);
|
||||||
|
if (hybrid)
|
||||||
|
{
|
||||||
|
options.erase("journal_device");
|
||||||
|
options.erase("meta_device");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user