Compare commits
179 Commits
Author | SHA1 | Date | |
---|---|---|---|
5524dbdab7 | |||
cd3dec06ac | |||
371d79e059 | |||
0e888e6c60 | |||
408c21d8f0 | |||
43cb9ae212 | |||
e15b6e7805 | |||
31017d8412 | |||
4819854064 | |||
1f509cca77 | |||
aa8e8e8271 | |||
4d79e531c5 | |||
30dff8893f | |||
becf14a705 | |||
64388788c1 | |||
37653abe4b | |||
7c054c6f10 | |||
bb7709e824 | |||
ebeace5a2d | |||
a378789f10 | |||
1fe678e57b | |||
2e592a2f22 | |||
b92f644e3a | |||
890ea3dbc0 | |||
06630369bf | |||
b4740acf62 | |||
eae81bbda6 | |||
8222e3c77d | |||
29cbe70e74 | |||
a883e79507 | |||
be7e76f849 | |||
6fd2cf5df6 | |||
294a754c9e | |||
8bfea6e7de | |||
bac9e34836 | |||
8aa4d492c1 | |||
9336ee5476 | |||
ad30b11519 | |||
a061246997 | |||
5066e35a49 | |||
93dc31f3fc | |||
f245b56176 | |||
befca06f18 | |||
fbf0263625 | |||
3bcf276d4d | |||
38db53f5ee | |||
cd543a90bc | |||
f600cc07b0 | |||
6a8e530e6b | |||
5cadb170b9 | |||
e72d4ed1d4 | |||
ff479a102d | |||
27d0d5b06a | |||
33950c1ec8 | |||
eea7ef1f19 | |||
cc0fdc6253 | |||
79ecd59b10 | |||
51081c9b45 | |||
b7d398be5b | |||
85e9f67d9d | |||
79c6d6f323 | |||
ae760dbc1d | |||
65487da4b1 | |||
7862282938 | |||
30ce2bd951 | |||
b1a0afd10a | |||
85b6134910 | |||
b1b07a393d | |||
7333022adf | |||
ab8627c9fa | |||
6acf562e01 | |||
6f797f429e | |||
b8a1734465 | |||
c752b68167 | |||
564df2eb5d | |||
9a427dd70a | |||
1a4ceb420d | |||
21b5124a4b | |||
4181add1f4 | |||
a8464c19af | |||
819cb70cdd | |||
3c8e4c6b72 | |||
8ef4cf89dc | |||
7bfb1639ea | |||
628e481c32 | |||
af6f2046fc | |||
9357e5293e | |||
12851dc07d | |||
a5753e35a3 | |||
d6ee1ca17c | |||
71674d00cf | |||
ddb078d5a7 | |||
d22d56f90a | |||
eb1331a079 | |||
c5274f655b | |||
45e07d6294 | |||
a8ee391e05 | |||
de48fa3fd2 | |||
874a766b62 | |||
384bd8e28f | |||
430994f48a | |||
3d7f838c59 | |||
b909d81f41 | |||
e42975ffd1 | |||
93778324e5 | |||
eeb6727170 | |||
7fe82c692e | |||
92c6e16eba | |||
213a9ccb4d | |||
a166147110 | |||
7d532880c3 | |||
0b0405d115 | |||
e651c93a90 | |||
988e90be69 | |||
272a45ad63 | |||
25a15d24cf | |||
700e0e9bff | |||
ab0ca7c00f | |||
f153bc950b | |||
425ff8818d | |||
9e287a7778 | |||
f52f58b9e9 | |||
1fe6b0c0e2 | |||
e4237e9ed8 | |||
10a5fd6abb | |||
1c316ef350 | |||
0b2d12eef1 | |||
1c10430ae1 | |||
dfce91d168 | |||
332a13ba30 | |||
d0e257ee81 | |||
004912aac0 | |||
c18e92273e | |||
9815d70ffc | |||
4a4627dcab | |||
b963f2fd93 | |||
ba7427020e | |||
a0aac7eb2a | |||
ac7b834af3 | |||
ee0c78fd74 | |||
e6646a5b2f | |||
ae69662b17 | |||
57ad4c3636 | |||
b7e4d0c9bf | |||
161a23c966 | |||
2f999d8607 | |||
d007a374f2 | |||
45c0694853 | |||
57bcba2406 | |||
30ac899074 | |||
2348d39cf4 | |||
3de7929fe5 | |||
07b2196bc2 | |||
b8e30608d6 | |||
a612cdca47 | |||
c8d61568b5 | |||
84ed3c6395 | |||
a7b57386c0 | |||
9d4ea5f764 | |||
000e4944ec | |||
8426616d89 | |||
1a841344ec | |||
8603b5cb1d | |||
f12b8e45a9 | |||
878ccbb6ea | |||
b14220b4d0 | |||
181d6ba407 | |||
63c2b9832c | |||
10e2e6a7c8 | |||
a598428992 | |||
08a677b684 | |||
7c8fbdad16 | |||
2f9353df60 | |||
57c744f288 | |||
a11ca56fb1 | |||
b84927b340 | |||
83cacba226 | |||
2c8f0bc6d5 | |||
7ae5b0e368 |
@@ -10,6 +10,9 @@ RUN set -e -x; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
ln -s /root/qemu-build/qemu-*/ ./qemu; \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
cd mon; \
|
||||
npm install; \
|
||||
cd ..; \
|
||||
mkdir build; \
|
||||
cd build; \
|
||||
cmake .. -DWITH_ASAN=yes -DWITH_QEMU=yes; \
|
||||
|
@@ -190,24 +190,6 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_failure_domain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_failure_domain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_interrupted_rebalance:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -280,6 +262,60 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_failure_domain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_failure_domain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_minsize_1:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -316,6 +352,114 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_rm:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_rm.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_chain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot_chain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_chain_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_down:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot_down.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_down_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot_down.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_splitbrain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_splitbrain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_rebalance_verify:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -388,78 +532,6 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_rm:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_rm.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_splitbrain:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_splitbrain.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_write:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -550,6 +622,114 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_csum_32k_dmj:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: TEST_NAME=csum_32k_dmj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_csum_32k_dj:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: TEST_NAME=csum_32k_dj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_csum_32k:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: TEST_NAME=csum_32k OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_csum_4k_dmj:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: TEST_NAME=csum_4k_dmj OSD_ARGS="--data_csum_type crc32c --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_csum_4k_dj:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: TEST_NAME=csum_4k_dj OSD_ARGS="--data_csum_type crc32c --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_csum_4k:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: TEST_NAME=csum_4k OSD_ARGS="--data_csum_type crc32c" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_scrub:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
|
@@ -7,7 +7,8 @@ for my $line (<>)
|
||||
if ($line =~ /\.\/(test_[^\.]+)/s)
|
||||
{
|
||||
chomp $line;
|
||||
my $test_name = $1;
|
||||
my $base_name = $1;
|
||||
my $test_name = $base_name;
|
||||
my $timeout = 3;
|
||||
if ($test_name eq 'test_etcd_fail' || $test_name eq 'test_heal' || $test_name eq 'test_add_osd' ||
|
||||
$test_name eq 'test_interrupted_rebalance' || $test_name eq 'test_rebalance_verify')
|
||||
@@ -16,7 +17,12 @@ for my $line (<>)
|
||||
}
|
||||
while ($line =~ /([^\s=]+)=(\S+)/gs)
|
||||
{
|
||||
if ($1 eq 'SCHEME' && $2 eq 'ec')
|
||||
if ($1 eq 'TEST_NAME')
|
||||
{
|
||||
$test_name = $base_name.'_'.$2;
|
||||
last;
|
||||
}
|
||||
elsif ($1 eq 'SCHEME' && $2 eq 'ec')
|
||||
{
|
||||
$test_name .= '_ec';
|
||||
}
|
||||
|
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VERSION "0.9.2")
|
||||
set(VERSION "1.2.0")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -15,7 +15,7 @@ Vitastor архитектурно похож на Ceph, что означает
|
||||
и автоматическое распределение данных по любому числу дисков любого размера с настраиваемыми схемами
|
||||
избыточности - репликацией или с произвольными кодами коррекции ошибок.
|
||||
|
||||
Vitastor нацелен на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
|
||||
Vitastor нацелен в первую очередь на SSD и SSD+HDD кластеры с как минимум 10 Гбит/с сетью, поддерживает
|
||||
TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
|
||||
что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.
|
||||
|
||||
@@ -50,6 +50,7 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
|
||||
- Параметры
|
||||
- [Общие](docs/config/common.ru.md)
|
||||
- [Сетевые](docs/config/network.ru.md)
|
||||
- [Клиентский код](docs/config/client.en.md)
|
||||
- [Глобальные дисковые параметры](docs/config/layout-cluster.ru.md)
|
||||
- [Дисковые параметры OSD](docs/config/layout-osd.ru.md)
|
||||
- [Прочие параметры OSD](docs/config/osd.ru.md)
|
||||
|
@@ -14,8 +14,8 @@ Vitastor is architecturally similar to Ceph which means strong consistency,
|
||||
primary-replication, symmetric clustering and automatic data distribution over any
|
||||
number of drives of any size with configurable redundancy (replication or erasure codes/XOR).
|
||||
|
||||
Vitastor targets SSD and SSD+HDD clusters with at least 10 Gbit/s network, supports
|
||||
TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
|
||||
Vitastor targets primarily SSD and SSD+HDD clusters with at least 10 Gbit/s network,
|
||||
supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1 ms
|
||||
with proper hardware which is ~10 times faster than other popular SDS's like Ceph
|
||||
or internal systems of public clouds.
|
||||
|
||||
@@ -50,6 +50,7 @@ Read more details below in the documentation.
|
||||
- Parameter Reference
|
||||
- [Common](docs/config/common.en.md)
|
||||
- [Network](docs/config/network.en.md)
|
||||
- [Client](docs/config/client.en.md)
|
||||
- [Global Disk Layout](docs/config/layout-cluster.en.md)
|
||||
- [OSD Disk Layout](docs/config/layout-osd.en.md)
|
||||
- [OSD Runtime Parameters](docs/config/osd.en.md)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
VERSION ?= v0.9.2
|
||||
VERSION ?= v1.2.0
|
||||
|
||||
all: build push
|
||||
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v0.9.2
|
||||
image: vitalif/vitastor-csi:v1.2.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -35,10 +35,13 @@ rules:
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["snapshot.storage.k8s.io"]
|
||||
resources: ["volumesnapshots"]
|
||||
verbs: ["get", "list"]
|
||||
verbs: ["get", "list", "patch"]
|
||||
- apiGroups: ["snapshot.storage.k8s.io"]
|
||||
resources: ["volumesnapshots/status"]
|
||||
verbs: ["get", "list", "patch"]
|
||||
- apiGroups: ["snapshot.storage.k8s.io"]
|
||||
resources: ["volumesnapshotcontents"]
|
||||
verbs: ["create", "get", "list", "watch", "update", "delete"]
|
||||
verbs: ["create", "get", "list", "watch", "update", "delete", "patch"]
|
||||
- apiGroups: ["snapshot.storage.k8s.io"]
|
||||
resources: ["volumesnapshotclasses"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
@@ -53,7 +56,7 @@ rules:
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["snapshot.storage.k8s.io"]
|
||||
resources: ["volumesnapshotcontents/status"]
|
||||
verbs: ["update"]
|
||||
verbs: ["update", "patch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["configmaps"]
|
||||
verbs: ["get"]
|
||||
|
@@ -23,6 +23,11 @@ metadata:
|
||||
name: csi-vitastor-provisioner
|
||||
spec:
|
||||
replicas: 3
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxUnavailable: 1
|
||||
maxSurge: 0
|
||||
selector:
|
||||
matchLabels:
|
||||
app: csi-vitastor-provisioner
|
||||
@@ -46,7 +51,7 @@ spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
containers:
|
||||
- name: csi-provisioner
|
||||
image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.0
|
||||
image: k8s.gcr.io/sig-storage/csi-provisioner:v3.0.0
|
||||
args:
|
||||
- "--csi-address=$(ADDRESS)"
|
||||
- "--v=5"
|
||||
@@ -116,7 +121,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v0.9.2
|
||||
image: vitalif/vitastor-csi:v1.2.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -17,3 +17,4 @@ parameters:
|
||||
# multiple etcdUrls may be specified, delimited by comma
|
||||
#etcdUrl: "http://192.168.7.2:2379"
|
||||
#etcdPrefix: "/vitastor"
|
||||
allowVolumeExpansion: true
|
||||
|
7
csi/deploy/example-snapshot-class.yaml
Normal file
7
csi/deploy/example-snapshot-class.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: snapshot.storage.k8s.io/v1
|
||||
kind: VolumeSnapshotClass
|
||||
metadata:
|
||||
name: vitastor-snapclass
|
||||
driver: csi.vitastor.io
|
||||
deletionPolicy: Delete
|
||||
parameters:
|
16
csi/deploy/example-snapshot-clone.yaml
Normal file
16
csi/deploy/example-snapshot-clone.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: test-vitastor-clone
|
||||
spec:
|
||||
storageClassName: vitastor
|
||||
dataSource:
|
||||
name: snap1
|
||||
kind: VolumeSnapshot
|
||||
apiGroup: snapshot.storage.k8s.io
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
8
csi/deploy/example-snapshot.yaml
Normal file
8
csi/deploy/example-snapshot.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
apiVersion: snapshot.storage.k8s.io/v1
|
||||
kind: VolumeSnapshot
|
||||
metadata:
|
||||
name: snap1
|
||||
spec:
|
||||
volumeSnapshotClassName: vitastor-snapclass
|
||||
source:
|
||||
persistentVolumeClaimName: test-vitastor-pvc
|
@@ -9,6 +9,7 @@ require (
|
||||
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
google.golang.org/grpc v1.33.1
|
||||
google.golang.org/protobuf v1.24.0
|
||||
k8s.io/klog v1.0.0
|
||||
k8s.io/utils v0.0.0-20210305010621-2afb4311ab10
|
||||
)
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "0.9.2"
|
||||
vitastorCSIDriverVersion = "1.2.0"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
@@ -20,6 +20,7 @@ import (
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
"google.golang.org/protobuf/types/known/timestamppb"
|
||||
|
||||
"github.com/container-storage-interface/spec/lib/go/csi"
|
||||
)
|
||||
@@ -45,6 +46,7 @@ type InodeConfig struct
|
||||
ParentPool uint64 `json:"parent_pool,omitempty"`
|
||||
ParentId uint64 `json:"parent_id,omitempty"`
|
||||
Readonly bool `json:"readonly,omitempty"`
|
||||
CreateTs uint64 `json:"create_ts,omitempty"`
|
||||
}
|
||||
|
||||
type ControllerServer struct
|
||||
@@ -178,27 +180,43 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
|
||||
return nil, status.Error(codes.InvalidArgument, "no etcdUrl in storage class configuration and no etcd_address in vitastor.conf")
|
||||
}
|
||||
|
||||
args := []string{ "create", volName, "-s", fmt.Sprintf("%v", volSize), "--pool", fmt.Sprintf("%v", poolId) }
|
||||
|
||||
// Support creation from snapshot
|
||||
var src *csi.VolumeContentSource
|
||||
if (req.VolumeContentSource.GetSnapshot() != nil)
|
||||
{
|
||||
snapId := req.VolumeContentSource.GetSnapshot().GetSnapshotId()
|
||||
if (snapId != "")
|
||||
{
|
||||
snapVars := make(map[string]string)
|
||||
err := json.Unmarshal([]byte(snapId), &snapVars)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "volume ID not in JSON format")
|
||||
}
|
||||
args = append(args, "--parent", snapVars["name"]+"@"+snapVars["snapshot"])
|
||||
src = &csi.VolumeContentSource{
|
||||
Type: &csi.VolumeContentSource_Snapshot{
|
||||
Snapshot: &csi.VolumeContentSource_SnapshotSource{
|
||||
SnapshotId: snapId,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create image using vitastor-cli
|
||||
_, err := invokeCLI(ctxVars, []string{ "create", volName, "-s", fmt.Sprintf("%v", volSize), "--pool", fmt.Sprintf("%v", poolId) })
|
||||
_, err := invokeCLI(ctxVars, args)
|
||||
if (err != nil)
|
||||
{
|
||||
if (strings.Index(err.Error(), "already exists") > 0)
|
||||
{
|
||||
stat, err := invokeCLI(ctxVars, []string{ "ls", "--json", volName })
|
||||
inodeCfg, err := invokeList(ctxVars, volName, true)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, err
|
||||
}
|
||||
var inodeCfg []InodeConfig
|
||||
err = json.Unmarshal(stat, &inodeCfg)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "Invalid JSON in vitastor-cli ls: "+err.Error())
|
||||
}
|
||||
if (len(inodeCfg) == 0)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "vitastor-cli create said that image already exists, but ls can't find it")
|
||||
}
|
||||
if (inodeCfg[0].Size < uint64(volSize))
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "image "+volName+" is already created, but size is less than expected")
|
||||
@@ -217,6 +235,7 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol
|
||||
// Ugly, but VolumeContext isn't passed to DeleteVolume :-(
|
||||
VolumeId: string(volumeIdJson),
|
||||
CapacityBytes: volSize,
|
||||
ContentSource: src,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
@@ -230,15 +249,15 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
|
||||
return nil, status.Error(codes.InvalidArgument, "request cannot be empty")
|
||||
}
|
||||
|
||||
ctxVars := make(map[string]string)
|
||||
err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
|
||||
volVars := make(map[string]string)
|
||||
err := json.Unmarshal([]byte(req.VolumeId), &volVars)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "volume ID not in JSON format")
|
||||
}
|
||||
volName := ctxVars["name"]
|
||||
volName := volVars["name"]
|
||||
|
||||
ctxVars, _, _ = GetConnectionParams(ctxVars)
|
||||
ctxVars, _, _ := GetConnectionParams(volVars)
|
||||
|
||||
_, err = invokeCLI(ctxVars, []string{ "rm", volName })
|
||||
if (err != nil)
|
||||
@@ -344,6 +363,8 @@ func (cs *ControllerServer) ControllerGetCapabilities(ctx context.Context, req *
|
||||
csi.ControllerServiceCapability_RPC_LIST_VOLUMES,
|
||||
csi.ControllerServiceCapability_RPC_EXPAND_VOLUME,
|
||||
csi.ControllerServiceCapability_RPC_CREATE_DELETE_SNAPSHOT,
|
||||
csi.ControllerServiceCapability_RPC_LIST_SNAPSHOTS,
|
||||
// TODO: csi.ControllerServiceCapability_RPC_CLONE_VOLUME,
|
||||
} {
|
||||
controllerServerCapabilities = append(controllerServerCapabilities, functionControllerServerCapabilities(capability))
|
||||
}
|
||||
@@ -353,28 +374,214 @@ func (cs *ControllerServer) ControllerGetCapabilities(ctx context.Context, req *
|
||||
}, nil
|
||||
}
|
||||
|
||||
func invokeList(ctxVars map[string]string, pattern string, expectExist bool) ([]InodeConfig, error)
|
||||
{
|
||||
stat, err := invokeCLI(ctxVars, []string{ "ls", "--json", pattern })
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, err
|
||||
}
|
||||
var inodeCfg []InodeConfig
|
||||
err = json.Unmarshal(stat, &inodeCfg)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "Invalid JSON in vitastor-cli ls: "+err.Error())
|
||||
}
|
||||
if (expectExist && len(inodeCfg) == 0)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "Can't find expected image "+pattern+" via vitastor-cli ls")
|
||||
}
|
||||
return inodeCfg, nil
|
||||
}
|
||||
|
||||
// CreateSnapshot create snapshot of an existing PV
|
||||
func (cs *ControllerServer) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequest) (*csi.CreateSnapshotResponse, error)
|
||||
{
|
||||
return nil, status.Error(codes.Unimplemented, "")
|
||||
klog.Infof("received controller create snapshot request %+v", protosanitizer.StripSecrets(req))
|
||||
if (req == nil)
|
||||
{
|
||||
return nil, status.Errorf(codes.InvalidArgument, "request cannot be empty")
|
||||
}
|
||||
if (req.SourceVolumeId == "" || req.Name == "")
|
||||
{
|
||||
return nil, status.Error(codes.InvalidArgument, "source volume ID and snapshot name are required fields")
|
||||
}
|
||||
|
||||
// snapshot name
|
||||
snapName := req.Name
|
||||
|
||||
// req.VolumeId is an ugly json string in our case :)
|
||||
ctxVars := make(map[string]string)
|
||||
err := json.Unmarshal([]byte(req.SourceVolumeId), &ctxVars)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "volume ID not in JSON format")
|
||||
}
|
||||
volName := ctxVars["name"]
|
||||
|
||||
// Create image using vitastor-cli
|
||||
_, err = invokeCLI(ctxVars, []string{ "create", "--snapshot", snapName, volName })
|
||||
if (err != nil && strings.Index(err.Error(), "already exists") <= 0)
|
||||
{
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check created snapshot
|
||||
inodeCfg, err := invokeList(ctxVars, volName+"@"+snapName, true)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Use ugly JSON snapshot ID again, DeleteSnapshot doesn't have context :-(
|
||||
ctxVars["snapshot"] = snapName
|
||||
snapIdJson, _ := json.Marshal(ctxVars)
|
||||
return &csi.CreateSnapshotResponse{
|
||||
Snapshot: &csi.Snapshot{
|
||||
SizeBytes: int64(inodeCfg[0].Size),
|
||||
SnapshotId: string(snapIdJson),
|
||||
SourceVolumeId: req.SourceVolumeId,
|
||||
CreationTime: ×tamppb.Timestamp{ Seconds: int64(inodeCfg[0].CreateTs) },
|
||||
ReadyToUse: true,
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// DeleteSnapshot delete provided snapshot of a PV
|
||||
func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteSnapshotRequest) (*csi.DeleteSnapshotResponse, error)
|
||||
{
|
||||
return nil, status.Error(codes.Unimplemented, "")
|
||||
klog.Infof("received controller delete snapshot request %+v", protosanitizer.StripSecrets(req))
|
||||
if (req == nil)
|
||||
{
|
||||
return nil, status.Errorf(codes.InvalidArgument, "request cannot be empty")
|
||||
}
|
||||
if (req.SnapshotId == "")
|
||||
{
|
||||
return nil, status.Error(codes.InvalidArgument, "snapshot ID is a required field")
|
||||
}
|
||||
|
||||
volVars := make(map[string]string)
|
||||
err := json.Unmarshal([]byte(req.SnapshotId), &volVars)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "snapshot ID not in JSON format")
|
||||
}
|
||||
volName := volVars["name"]
|
||||
snapName := volVars["snapshot"]
|
||||
|
||||
ctxVars, _, _ := GetConnectionParams(volVars)
|
||||
|
||||
_, err = invokeCLI(ctxVars, []string{ "rm", volName+"@"+snapName })
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &csi.DeleteSnapshotResponse{}, nil
|
||||
}
|
||||
|
||||
// ListSnapshots list the snapshots of a PV
|
||||
func (cs *ControllerServer) ListSnapshots(ctx context.Context, req *csi.ListSnapshotsRequest) (*csi.ListSnapshotsResponse, error)
|
||||
{
|
||||
return nil, status.Error(codes.Unimplemented, "")
|
||||
klog.Infof("received controller list snapshots request %+v", protosanitizer.StripSecrets(req))
|
||||
if (req == nil)
|
||||
{
|
||||
return nil, status.Error(codes.InvalidArgument, "request cannot be empty")
|
||||
}
|
||||
|
||||
volVars := make(map[string]string)
|
||||
err := json.Unmarshal([]byte(req.SourceVolumeId), &volVars)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "volume ID not in JSON format")
|
||||
}
|
||||
volName := volVars["name"]
|
||||
ctxVars, _, _ := GetConnectionParams(volVars)
|
||||
|
||||
inodeCfg, err := invokeList(ctxVars, volName+"@*", false)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp := &csi.ListSnapshotsResponse{}
|
||||
for _, ino := range inodeCfg
|
||||
{
|
||||
snapName := ino.Name[len(volName)+1:]
|
||||
if (len(req.StartingToken) > 0 && snapName < req.StartingToken)
|
||||
{
|
||||
}
|
||||
else if (req.MaxEntries == 0 || len(resp.Entries) < int(req.MaxEntries))
|
||||
{
|
||||
volVars["snapshot"] = snapName
|
||||
snapIdJson, _ := json.Marshal(volVars)
|
||||
resp.Entries = append(resp.Entries, &csi.ListSnapshotsResponse_Entry{
|
||||
Snapshot: &csi.Snapshot{
|
||||
SizeBytes: int64(ino.Size),
|
||||
SnapshotId: string(snapIdJson),
|
||||
SourceVolumeId: req.SourceVolumeId,
|
||||
CreationTime: ×tamppb.Timestamp{ Seconds: int64(ino.CreateTs) },
|
||||
ReadyToUse: true,
|
||||
},
|
||||
})
|
||||
}
|
||||
else
|
||||
{
|
||||
resp.NextToken = snapName
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// ControllerExpandVolume resizes a volume
|
||||
// ControllerExpandVolume increases the size of a volume
|
||||
func (cs *ControllerServer) ControllerExpandVolume(ctx context.Context, req *csi.ControllerExpandVolumeRequest) (*csi.ControllerExpandVolumeResponse, error)
|
||||
{
|
||||
return nil, status.Error(codes.Unimplemented, "")
|
||||
klog.Infof("received controller expand volume request %+v", protosanitizer.StripSecrets(req))
|
||||
if (req == nil)
|
||||
{
|
||||
return nil, status.Error(codes.InvalidArgument, "request cannot be empty")
|
||||
}
|
||||
if (req.VolumeId == "" || req.CapacityRange == nil || req.CapacityRange.RequiredBytes == 0)
|
||||
{
|
||||
return nil, status.Error(codes.InvalidArgument, "VolumeId, CapacityRange and RequiredBytes are required fields")
|
||||
}
|
||||
|
||||
volVars := make(map[string]string)
|
||||
err := json.Unmarshal([]byte(req.VolumeId), &volVars)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "volume ID not in JSON format")
|
||||
}
|
||||
volName := volVars["name"]
|
||||
ctxVars, _, _ := GetConnectionParams(volVars)
|
||||
|
||||
inodeCfg, err := invokeList(ctxVars, volName, true)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if (req.CapacityRange.RequiredBytes > 0 && inodeCfg[0].Size < uint64(req.CapacityRange.RequiredBytes))
|
||||
{
|
||||
sz := ((req.CapacityRange.RequiredBytes+4095)/4096)*4096
|
||||
_, err := invokeCLI(ctxVars, []string{ "modify", "--inc_size", "1", "--resize", fmt.Sprintf("%d", sz), volName })
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, err
|
||||
}
|
||||
inodeCfg, err = invokeList(ctxVars, volName, true)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return &csi.ControllerExpandVolumeResponse{
|
||||
CapacityBytes: int64(inodeCfg[0].Size),
|
||||
NodeExpansionRequired: false,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ControllerGetVolume get volume info
|
||||
|
@@ -49,6 +49,13 @@ func (is *IdentityServer) GetPluginCapabilities(ctx context.Context, req *csi.Ge
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Type: &csi.PluginCapability_VolumeExpansion_{
|
||||
VolumeExpansion: &csi.PluginCapability_VolumeExpansion{
|
||||
Type: csi.PluginCapability_VolumeExpansion_OFFLINE,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
@@ -70,10 +70,10 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
|
||||
isBlock := req.GetVolumeCapability().GetBlock() != nil
|
||||
|
||||
// Check that it's not already mounted
|
||||
_, error := mount.IsNotMountPoint(ns.mounter, targetPath)
|
||||
if (error != nil)
|
||||
_, err := mount.IsNotMountPoint(ns.mounter, targetPath)
|
||||
if (err != nil)
|
||||
{
|
||||
if (os.IsNotExist(error))
|
||||
if (os.IsNotExist(err))
|
||||
{
|
||||
if (isBlock)
|
||||
{
|
||||
@@ -102,12 +102,12 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
|
||||
}
|
||||
else
|
||||
{
|
||||
return nil, status.Error(codes.Internal, error.Error())
|
||||
return nil, status.Error(codes.Internal, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
ctxVars := make(map[string]string)
|
||||
err := json.Unmarshal([]byte(req.VolumeId), &ctxVars)
|
||||
err = json.Unmarshal([]byte(req.VolumeId), &ctxVars)
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, status.Error(codes.Internal, "volume ID not in JSON format")
|
||||
@@ -147,70 +147,74 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
|
||||
}
|
||||
devicePath := strings.TrimSpace(stdoutStr)
|
||||
|
||||
// Check existing format
|
||||
diskMounter := &mount.SafeFormatAndMount{Interface: ns.mounter, Exec: utilexec.New()}
|
||||
existingFormat, err := diskMounter.GetDiskFormat(devicePath)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("failed to get disk format for path %s, error: %v", err)
|
||||
// unmap NBD device
|
||||
unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
|
||||
if (unmapErr != nil)
|
||||
{
|
||||
klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Format the device (ext4 or xfs)
|
||||
fsType := req.GetVolumeCapability().GetMount().GetFsType()
|
||||
opt := req.GetVolumeCapability().GetMount().GetMountFlags()
|
||||
opt = append(opt, "_netdev")
|
||||
if ((req.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY ||
|
||||
req.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY) &&
|
||||
!Contains(opt, "ro"))
|
||||
{
|
||||
opt = append(opt, "ro")
|
||||
}
|
||||
if (fsType == "xfs")
|
||||
{
|
||||
opt = append(opt, "nouuid")
|
||||
}
|
||||
readOnly := Contains(opt, "ro")
|
||||
if (existingFormat == "" && !readOnly)
|
||||
{
|
||||
args := []string{}
|
||||
switch fsType
|
||||
{
|
||||
case "ext4":
|
||||
args = []string{"-m0", "-Enodiscard,lazy_itable_init=1,lazy_journal_init=1", devicePath}
|
||||
case "xfs":
|
||||
args = []string{"-K", devicePath}
|
||||
}
|
||||
if (len(args) > 0)
|
||||
{
|
||||
cmdOut, cmdErr := diskMounter.Exec.Command("mkfs."+fsType, args...).CombinedOutput()
|
||||
if (cmdErr != nil)
|
||||
{
|
||||
klog.Errorf("failed to run mkfs error: %v, output: %v", cmdErr, string(cmdOut))
|
||||
// unmap NBD device
|
||||
unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
|
||||
if (unmapErr != nil)
|
||||
{
|
||||
klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
|
||||
}
|
||||
return nil, status.Error(codes.Internal, cmdErr.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
if (isBlock)
|
||||
{
|
||||
opt = append(opt, "bind")
|
||||
err = diskMounter.Mount(devicePath, targetPath, fsType, opt)
|
||||
err = diskMounter.Mount(devicePath, targetPath, "", []string{"bind"})
|
||||
}
|
||||
else
|
||||
{
|
||||
// Check existing format
|
||||
existingFormat, err := diskMounter.GetDiskFormat(devicePath)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("failed to get disk format for path %s, error: %v", err)
|
||||
goto unmap
|
||||
}
|
||||
|
||||
// Format the device (ext4 or xfs)
|
||||
fsType := req.GetVolumeCapability().GetMount().GetFsType()
|
||||
opt := req.GetVolumeCapability().GetMount().GetMountFlags()
|
||||
opt = append(opt, "_netdev")
|
||||
if ((req.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_MULTI_NODE_READER_ONLY ||
|
||||
req.VolumeCapability.AccessMode.Mode == csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY) &&
|
||||
!Contains(opt, "ro"))
|
||||
{
|
||||
opt = append(opt, "ro")
|
||||
}
|
||||
if (fsType == "xfs")
|
||||
{
|
||||
opt = append(opt, "nouuid")
|
||||
}
|
||||
readOnly := Contains(opt, "ro")
|
||||
if (existingFormat == "" && !readOnly)
|
||||
{
|
||||
var cmdOut []byte
|
||||
switch fsType
|
||||
{
|
||||
case "ext4":
|
||||
args := []string{"-m0", "-Enodiscard,lazy_itable_init=1,lazy_journal_init=1", devicePath}
|
||||
cmdOut, err = diskMounter.Exec.Command("mkfs.ext4", args...).CombinedOutput()
|
||||
case "xfs":
|
||||
cmdOut, err = diskMounter.Exec.Command("mkfs.xfs", "-K", devicePath).CombinedOutput()
|
||||
}
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("failed to run mkfs error: %v, output: %v", err, string(cmdOut))
|
||||
goto unmap
|
||||
}
|
||||
}
|
||||
|
||||
err = diskMounter.FormatAndMount(devicePath, targetPath, fsType, opt)
|
||||
|
||||
// Try to run online resize on mount.
|
||||
// FIXME: Implement online resize. It requires online resize support in vitastor-nbd.
|
||||
if (err == nil && existingFormat != "" && !readOnly)
|
||||
{
|
||||
var cmdOut []byte
|
||||
switch (fsType)
|
||||
{
|
||||
case "ext4":
|
||||
cmdOut, err = diskMounter.Exec.Command("resize2fs", devicePath).CombinedOutput()
|
||||
case "xfs":
|
||||
cmdOut, err = diskMounter.Exec.Command("xfs_growfs", devicePath).CombinedOutput()
|
||||
}
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("failed to run resizefs error: %v, output: %v", err, string(cmdOut))
|
||||
goto unmap
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err != nil)
|
||||
{
|
||||
@@ -218,15 +222,18 @@ func (ns *NodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis
|
||||
"failed to mount device path (%s) to path (%s) for volume (%s) error: %s",
|
||||
devicePath, targetPath, volName, err,
|
||||
)
|
||||
// unmap NBD device
|
||||
unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
|
||||
if (unmapErr != nil)
|
||||
{
|
||||
klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
|
||||
}
|
||||
return nil, status.Error(codes.Internal, err.Error())
|
||||
goto unmap
|
||||
}
|
||||
return &csi.NodePublishVolumeResponse{}, nil
|
||||
|
||||
unmap:
|
||||
// unmap NBD device
|
||||
unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-nbd", "unmap", devicePath).CombinedOutput()
|
||||
if (unmapErr != nil)
|
||||
{
|
||||
klog.Errorf("failed to unmap NBD device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
|
||||
}
|
||||
return nil, status.Error(codes.Internal, err.Error())
|
||||
}
|
||||
|
||||
// NodeUnpublishVolume unmounts the volume from the target path
|
||||
|
58
debian/build-pve-qemu.sh
vendored
Normal file
58
debian/build-pve-qemu.sh
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
exit
|
||||
|
||||
git clone https://git.yourcmc.ru/vitalif/pve-qemu .
|
||||
|
||||
# bookworm
|
||||
|
||||
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
|
||||
|
||||
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources
|
||||
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bookworm pve-no-subscription' >> /etc/apt/sources.list
|
||||
echo 'deb https://vitastor.io/debian bookworm main' >> /etc/apt/sources.list
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
|
||||
echo 'ru_RU UTF-8' >> /etc/locale.gen
|
||||
echo 'en_US UTF-8' >> /etc/locale.gen
|
||||
apt-get update
|
||||
apt-get install wget ca-certificates
|
||||
wget https://enterprise.proxmox.com/debian/proxmox-release-bookworm.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bookworm.gpg
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
|
||||
apt-get update
|
||||
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
|
||||
mk-build-deps --install ./control
|
||||
|
||||
# bullseye
|
||||
|
||||
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-bullseye debian:bullseye bash
|
||||
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
|
||||
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve bullseye pve-no-subscription' >> /etc/apt/sources.list
|
||||
echo 'deb https://vitastor.io/debian bullseye main' >> /etc/apt/sources.list
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
|
||||
echo 'ru_RU UTF-8' >> /etc/locale.gen
|
||||
echo 'en_US UTF-8' >> /etc/locale.gen
|
||||
apt-get update
|
||||
apt-get install wget
|
||||
wget https://enterprise.proxmox.com/debian/proxmox-release-bullseye.gpg -O /etc/apt/trusted.gpg.d/proxmox-release-bullseye.gpg
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
|
||||
apt-get update
|
||||
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
|
||||
mk-build-deps --install ./control
|
||||
|
||||
# buster
|
||||
|
||||
docker run -it -v `pwd`/pve-qemu:/root/pve-qemu --name pve-qemu-buster debian:buster bash
|
||||
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb /deb-src /' >> /etc/apt/sources.list
|
||||
echo 'deb [arch=amd64] http://download.proxmox.com/debian/pve buster pve-no-subscription' >> /etc/apt/sources.list
|
||||
echo 'deb https://vitastor.io/debian buster main' >> /etc/apt/sources.list
|
||||
echo 'deb http://deb.debian.org/debian buster-backports main' >> /etc/apt/sources.list
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf
|
||||
echo 'ru_RU UTF-8' >> /etc/locale.gen
|
||||
echo 'en_US UTF-8' >> /etc/locale.gen
|
||||
apt-get update
|
||||
apt-get install wget ca-certificates
|
||||
wget http://download.proxmox.com/debian/proxmox-ve-release-6.x.gpg -O /etc/apt/trusted.gpg.d/proxmox-ve-release-6.x.gpg
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg
|
||||
apt-get update
|
||||
apt-get install git devscripts equivs wget mc libjemalloc-dev vitastor-client-dev lintian locales
|
||||
mk-build-deps --install ./control
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@@ -1,10 +1,10 @@
|
||||
vitastor (0.9.2-1) unstable; urgency=medium
|
||||
vitastor (1.2.0-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
||||
|
||||
vitastor (0.9.2-1) unstable; urgency=medium
|
||||
vitastor (1.2.0-1) unstable; urgency=medium
|
||||
|
||||
* Implement NFS proxy
|
||||
* Add documentation
|
||||
|
2
debian/control
vendored
2
debian/control
vendored
@@ -2,7 +2,7 @@ Source: vitastor
|
||||
Section: admin
|
||||
Priority: optional
|
||||
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev
|
||||
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8), linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev, libibverbs-dev, libisal-dev, cmake, pkg-config
|
||||
Standards-Version: 4.5.0
|
||||
Homepage: https://vitastor.io/
|
||||
Rules-Requires-Root: no
|
||||
|
26
debian/patched-qemu.Dockerfile
vendored
26
debian/patched-qemu.Dockerfile
vendored
@@ -20,7 +20,7 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" ]; then \
|
||||
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install qemu fio liburing-dev libgoogle-perftools-dev devscripts
|
||||
RUN apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
|
||||
RUN apt-get -y build-dep qemu
|
||||
# To build a custom version
|
||||
#RUN cp /root/packages/qemu-orig/* /root
|
||||
@@ -28,26 +28,34 @@ RUN apt-get --download-only source qemu
|
||||
|
||||
ADD patches /root/vitastor/patches
|
||||
ADD src/qemu_driver.c /root/vitastor/src/qemu_driver.c
|
||||
|
||||
#RUN set -e; \
|
||||
# apt-get install -y wget; \
|
||||
# wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
|
||||
# (echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
|
||||
# (echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
|
||||
# apt-get update; \
|
||||
# apt-get install -y vitastor-client vitastor-client-dev quilt
|
||||
|
||||
RUN set -e; \
|
||||
apt-get install -y wget; \
|
||||
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg; \
|
||||
(echo deb http://vitastor.io/debian $REL main > /etc/apt/sources.list.d/vitastor.list); \
|
||||
(echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
|
||||
dpkg -i /root/packages/vitastor-$REL/vitastor-client_*.deb /root/packages/vitastor-$REL/vitastor-client-dev_*.deb; \
|
||||
apt-get update; \
|
||||
apt-get install -y vitastor-client vitastor-client-dev quilt; \
|
||||
apt-get install -y quilt; \
|
||||
mkdir -p /root/packages/qemu-$REL; \
|
||||
rm -rf /root/packages/qemu-$REL/*; \
|
||||
cd /root/packages/qemu-$REL; \
|
||||
dpkg-source -x /root/qemu*.dsc; \
|
||||
QEMU_VER=$(ls -d qemu*/ | perl -pe 's!^.*(\d+\.\d+).*!$1!'); \
|
||||
cp /root/vitastor/patches/qemu-$QEMU_VER-vitastor.patch qemu-*/debian/patches; \
|
||||
echo qemu-$QEMU_VER-vitastor.patch >> qemu-*/debian/patches/series; \
|
||||
D=$(ls -d qemu*/); \
|
||||
cp /root/vitastor/patches/qemu-$QEMU_VER-vitastor.patch ./qemu-*/debian/patches; \
|
||||
echo qemu-$QEMU_VER-vitastor.patch >> $D/debian/patches/series; \
|
||||
cd /root/packages/qemu-$REL/qemu-*/; \
|
||||
quilt push -a; \
|
||||
quilt add block/vitastor.c; \
|
||||
cp /root/vitastor/src/qemu_driver.c block/vitastor.c; \
|
||||
quilt refresh; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor1; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/5\.2\+dfsg-9/5.2+dfsg-11/; s/^.*\((.*?)(~bpo[\d\+]*)?\).*$/$1/')+vitastor4; \
|
||||
if [ "$REL" = bullseye ]; then V=${V}bullseye; fi; \
|
||||
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v $V 'Plug Vitastor block driver'; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
rm -rf /root/packages/qemu-$REL/qemu-*/
|
||||
|
8
debian/vitastor.Dockerfile
vendored
8
debian/vitastor.Dockerfile
vendored
@@ -35,8 +35,8 @@ RUN set -e -x; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
cp -r /root/vitastor vitastor-0.9.2; \
|
||||
cd vitastor-0.9.2; \
|
||||
cp -r /root/vitastor vitastor-1.2.0; \
|
||||
cd vitastor-1.2.0; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
@@ -49,8 +49,8 @@ RUN set -e -x; \
|
||||
rm -rf a b; \
|
||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.9.2.orig.tar.xz vitastor-0.9.2; \
|
||||
cd vitastor-0.9.2; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.2.0.orig.tar.xz vitastor-1.2.0; \
|
||||
cd vitastor-1.2.0; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
|
@@ -21,7 +21,7 @@ Configuration parameters can be set in 3 places:
|
||||
mon, fio and QEMU options, OpenStack/Proxmox/etc configuration. The latter
|
||||
doesn't allow to set all variables directly, but it allows to override the
|
||||
configuration file and set everything you need inside it.
|
||||
- OSD superblocks created by [vitastor-disk](../usage/disk.en.md) contain
|
||||
- OSD superblocks created by [vitastor-disk](usage/disk.en.md) contain
|
||||
primarily disk layout parameters of specific OSDs. In fact, these parameters
|
||||
are automatically passed into the command line of vitastor-osd process, so
|
||||
they have the same "status" as command-line parameters.
|
||||
@@ -33,6 +33,7 @@ In the future, additional configuration methods may be added:
|
||||
|
||||
- [Common](config/common.en.md)
|
||||
- [Network](config/network.en.md)
|
||||
- [Client](config/client.en.md)
|
||||
- [Global Disk Layout](config/layout-cluster.en.md)
|
||||
- [OSD Disk Layout](config/layout-osd.en.md)
|
||||
- [OSD Runtime Parameters](config/osd.en.md)
|
||||
|
@@ -23,7 +23,7 @@
|
||||
монитора, опциях fio и QEMU, настроек OpenStack, Proxmox и т.п. Последние,
|
||||
как правило, не включают полный набор параметров напрямую, но позволяют
|
||||
определить путь к файлу конфигурации и задать любые параметры в нём.
|
||||
- В суперблоке OSD, записываемом [vitastor-disk](../usage/disk.ru.md) - параметры,
|
||||
- В суперблоке OSD, записываемом [vitastor-disk](usage/disk.ru.md) - параметры,
|
||||
связанные с дисковым форматом и с этим конкретным OSD. На самом деле,
|
||||
при запуске OSD эти параметры автоматически передаются в командную строку
|
||||
процесса vitastor-osd, то есть по "статусу" они эквивалентны параметрам
|
||||
@@ -36,6 +36,7 @@
|
||||
|
||||
- [Общие](config/common.ru.md)
|
||||
- [Сеть](config/network.ru.md)
|
||||
- [Клиентский код](config/client.ru.md)
|
||||
- [Глобальные дисковые параметры](config/layout-cluster.ru.md)
|
||||
- [Дисковые параметры OSD](config/layout-osd.ru.md)
|
||||
- [Прочие параметры OSD](config/osd.ru.md)
|
||||
|
103
docs/config/client.en.md
Normal file
103
docs/config/client.en.md
Normal file
@@ -0,0 +1,103 @@
|
||||
[Documentation](../../README.md#documentation) → [Configuration](../config.en.md) → Client Parameters
|
||||
|
||||
-----
|
||||
|
||||
[Читать на русском](client.ru.md)
|
||||
|
||||
# Client Parameters
|
||||
|
||||
These parameters apply only to clients and affect their interaction with
|
||||
the cluster.
|
||||
|
||||
- [client_max_dirty_bytes](#client_max_dirty_bytes)
|
||||
- [client_max_dirty_ops](#client_max_dirty_ops)
|
||||
- [client_enable_writeback](#client_enable_writeback)
|
||||
- [client_max_buffered_bytes](#client_max_buffered_bytes)
|
||||
- [client_max_buffered_ops](#client_max_buffered_ops)
|
||||
- [client_max_writeback_iodepth](#client_max_writeback_iodepth)
|
||||
|
||||
## client_max_dirty_bytes
|
||||
|
||||
- Type: integer
|
||||
- Default: 33554432
|
||||
- Can be changed online: yes
|
||||
|
||||
Without [immediate_commit](layout-cluster.en.md#immediate_commit)=all this parameter sets the limit of "dirty"
|
||||
(not committed by fsync) data allowed by the client before forcing an
|
||||
additional fsync and committing the data. Also note that the client always
|
||||
holds a copy of uncommitted data in memory so this setting also affects
|
||||
RAM usage of clients.
|
||||
|
||||
## client_max_dirty_ops
|
||||
|
||||
- Type: integer
|
||||
- Default: 1024
|
||||
- Can be changed online: yes
|
||||
|
||||
Same as client_max_dirty_bytes, but instead of total size, limits the number
|
||||
of uncommitted write operations.
|
||||
|
||||
## client_enable_writeback
|
||||
|
||||
- Type: boolean
|
||||
- Default: false
|
||||
- Can be changed online: yes
|
||||
|
||||
This parameter enables client-side write buffering. This means that write
|
||||
requests are accumulated in memory for a short time before being sent to
|
||||
a Vitastor cluster which allows to send them in parallel and increase
|
||||
performance of some applications. Writes are buffered until client forces
|
||||
a flush with fsync() or until the amount of buffered writes exceeds the
|
||||
limit.
|
||||
|
||||
Write buffering significantly increases performance of some applications,
|
||||
for example, CrystalDiskMark under Windows (LOL :-D), but also any other
|
||||
applications if they do writes in one of two non-optimal ways: either if
|
||||
they do a lot of small (4 kb or so) sequential writes, or if they do a lot
|
||||
of small random writes, but without any parallelism or asynchrony, and also
|
||||
without calling fsync().
|
||||
|
||||
With write buffering enabled, you can expect around 22000 T1Q1 random write
|
||||
iops in QEMU more or less regardless of the quality of your SSDs, and this
|
||||
number is in fact bound by QEMU itself rather than Vitastor (check it
|
||||
yourself by adding a "driver=null-co" disk in QEMU). Without write
|
||||
buffering, the current record is 9900 iops, but the number is usually
|
||||
even lower with non-ideal hardware, for example, it may be 5000 iops.
|
||||
|
||||
Even when this parameter is enabled, write buffering isn't enabled until
|
||||
the client explicitly allows it, because enabling it without the client
|
||||
being aware of the fact that his writes may be buffered may lead to data
|
||||
loss. Because of this, older versions of clients don't support write
|
||||
buffering at all, newer versions of the QEMU driver allow write buffering
|
||||
only if it's enabled in disk settings with `-blockdev cache.direct=false`,
|
||||
and newer versions of FIO only allow write buffering if you don't specify
|
||||
`-direct=1`. NBD and NFS drivers allow write buffering by default.
|
||||
|
||||
You can overcome this restriction too with the `client_writeback_allowed`
|
||||
parameter, but you shouldn't do that unless you **really** know what you
|
||||
are doing.
|
||||
|
||||
## client_max_buffered_bytes
|
||||
|
||||
- Type: integer
|
||||
- Default: 33554432
|
||||
- Can be changed online: yes
|
||||
|
||||
Maximum total size of buffered writes which triggers write-back when reached.
|
||||
|
||||
## client_max_buffered_ops
|
||||
|
||||
- Type: integer
|
||||
- Default: 1024
|
||||
- Can be changed online: yes
|
||||
|
||||
Maximum number of buffered writes which triggers write-back when reached.
|
||||
Multiple consecutive modified data regions are counted as 1 write here.
|
||||
|
||||
## client_max_writeback_iodepth
|
||||
|
||||
- Type: integer
|
||||
- Default: 256
|
||||
- Can be changed online: yes
|
||||
|
||||
Maximum number of parallel writes when flushing buffered data to the server.
|
103
docs/config/client.ru.md
Normal file
103
docs/config/client.ru.md
Normal file
@@ -0,0 +1,103 @@
|
||||
[Документация](../../README-ru.md#документация) → [Конфигурация](../config.ru.md) → Параметры клиентского кода
|
||||
|
||||
-----
|
||||
|
||||
[Read in English](client.en.md)
|
||||
|
||||
# Параметры клиентского кода
|
||||
|
||||
Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD) и
|
||||
затрагивают логику их работы с кластером.
|
||||
|
||||
- [client_max_dirty_bytes](#client_max_dirty_bytes)
|
||||
- [client_max_dirty_ops](#client_max_dirty_ops)
|
||||
- [client_enable_writeback](#client_enable_writeback)
|
||||
- [client_max_buffered_bytes](#client_max_buffered_bytes)
|
||||
- [client_max_buffered_ops](#client_max_buffered_ops)
|
||||
- [client_max_writeback_iodepth](#client_max_writeback_iodepth)
|
||||
|
||||
## client_max_dirty_bytes
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 33554432
|
||||
- Можно менять на лету: да
|
||||
|
||||
При работе без [immediate_commit](layout-cluster.ru.md#immediate_commit)=all - это лимит объёма "грязных" (не
|
||||
зафиксированных fsync-ом) данных, при достижении которого клиент будет
|
||||
принудительно вызывать fsync и фиксировать данные. Также стоит иметь в виду,
|
||||
что в этом случае до момента fsync клиент хранит копию незафиксированных
|
||||
данных в памяти, то есть, настройка влияет на потребление памяти клиентами.
|
||||
|
||||
## client_max_dirty_ops
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 1024
|
||||
- Можно менять на лету: да
|
||||
|
||||
Аналогично client_max_dirty_bytes, но ограничивает количество
|
||||
незафиксированных операций записи вместо их общего объёма.
|
||||
|
||||
## client_enable_writeback
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
- Значение по умолчанию: false
|
||||
- Можно менять на лету: да
|
||||
|
||||
Данный параметр разрешает включать буферизацию записи в памяти. Буферизация
|
||||
означает, что операции записи отправляются на кластер Vitastor не сразу, а
|
||||
могут небольшое время накапливаться в памяти и сбрасываться сразу пакетами,
|
||||
до тех пор, пока либо не будет превышен лимит неотправленных записей, либо
|
||||
пока клиент не вызовет fsync.
|
||||
|
||||
Буферизация значительно повышает производительность некоторых приложений,
|
||||
например, CrystalDiskMark в Windows (ха-ха :-D), но также и любых других,
|
||||
которые пишут на диск неоптимально: либо последовательно, но мелкими блоками
|
||||
(например, по 4 кб), либо случайно, но без параллелизма и без fsync - то
|
||||
есть, например, отправляя 128 операций записи в разные места диска, но не
|
||||
все сразу с помощью асинхронного I/O, а по одной.
|
||||
|
||||
В QEMU с буферизацией записи можно ожидать показателя примерно 22000
|
||||
операций случайной записи в секунду в 1 поток и с глубиной очереди 1 (T1Q1)
|
||||
без fsync, почти вне зависимости от того, насколько хороши ваши диски - эта
|
||||
цифра упирается в сам QEMU. Без буферизации рекорд пока что - 9900 операций
|
||||
в секунду, но на железе похуже может быть и поменьше, например, 5000 операций
|
||||
в секунду.
|
||||
|
||||
При этом, даже если данный параметр включён, буферизация не включается, если
|
||||
явно не разрешена клиентом, т.к. если клиент не знает, что запросы записи
|
||||
буферизуются, это может приводить к потере данных. Поэтому в старых версиях
|
||||
клиентских драйверов буферизация записи не включается вообще, в новых
|
||||
версиях QEMU-драйвера включается, только если разрешена опцией диска
|
||||
`-blockdev cache.direct=false`, а в fio - только если нет опции `-direct=1`.
|
||||
В NBD и NFS драйверах буферизация записи разрешена по умолчанию.
|
||||
|
||||
Можно обойти и это ограничение с помощью параметра `client_writeback_allowed`,
|
||||
но делать так не надо, если только вы не уверены в том, что делаете, на все
|
||||
100%. :-)
|
||||
|
||||
## client_max_buffered_bytes
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 33554432
|
||||
- Можно менять на лету: да
|
||||
|
||||
Максимальный общий размер буферизованных записей, при достижении которого
|
||||
начинается процесс сброса данных на сервер.
|
||||
|
||||
## client_max_buffered_ops
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 1024
|
||||
- Можно менять на лету: да
|
||||
|
||||
Максимальное количество буферизованных записей, при достижении которого
|
||||
начинается процесс сброса данных на сервер. При этом несколько
|
||||
последовательных изменённых областей здесь считаются 1 записью.
|
||||
|
||||
## client_max_writeback_iodepth
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 256
|
||||
- Можно менять на лету: да
|
||||
|
||||
Максимальное число параллельных операций записи при сбросе буферов на сервер.
|
@@ -33,12 +33,13 @@ Size of objects (data blocks) into which all physical and virtual drives
|
||||
in Vitastor, affects memory usage, write amplification and I/O load
|
||||
distribution effectiveness.
|
||||
|
||||
Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
|
||||
it's possible to use 4 MB for SSD too - it will lower memory usage, but
|
||||
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
|
||||
it's possible to use 1 MB for SSD too - it will lower memory usage, but
|
||||
may increase average WA and reduce linear performance.
|
||||
|
||||
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
|
||||
544 MB per 1 TB of used disk space with the default 128 KB block size.
|
||||
With 1 MB it's 8 times lower.
|
||||
|
||||
## bitmap_granularity
|
||||
|
||||
@@ -95,8 +96,9 @@ SSD cache or "media-cache" - for example, a lot of Seagate EXOS drives have
|
||||
it (they have internal SSD cache even though it's not stated in datasheets).
|
||||
|
||||
Setting this parameter to "all" or "small" in OSD parameters requires enabling
|
||||
disable_journal_fsync and disable_meta_fsync, setting it to "all" also requires
|
||||
enabling disable_data_fsync.
|
||||
[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
|
||||
[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
|
||||
"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
|
||||
|
||||
TLDR: For optimal performance, set immediate_commit to "all" if you only use
|
||||
SSDs with supercapacitor-based power loss protection (nonvolatile
|
||||
|
@@ -33,14 +33,14 @@ OSD) могут сосуществовать в одном кластере Vita
|
||||
настроек, влияет на потребление памяти, объём избыточной записи (write
|
||||
amplification) и эффективность распределения нагрузки по OSD.
|
||||
|
||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
|
||||
мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
|
||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
|
||||
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
|
||||
это понизит использование памяти, но ухудшит распределение нагрузки и в
|
||||
среднем увеличит WA.
|
||||
|
||||
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
|
||||
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
|
||||
стандартном 128 КБ блоке.
|
||||
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
|
||||
|
||||
## bitmap_granularity
|
||||
|
||||
@@ -103,8 +103,9 @@ HDD-дисках с внутренним SSD или "медиа" кэшем - н
|
||||
указано в спецификациях).
|
||||
|
||||
Указание "all" или "small" в настройках / командной строке OSD требует
|
||||
включения disable_journal_fsync и disable_meta_fsync, значение "all" также
|
||||
требует включения disable_data_fsync.
|
||||
включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
|
||||
[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
|
||||
также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
|
||||
|
||||
Итого, вкратце: для оптимальной производительности установите
|
||||
immediate_commit в значение "all", если вы используете в кластере только SSD
|
||||
|
@@ -24,6 +24,8 @@ initialization and can't be changed after it without losing data.
|
||||
- [disable_journal_fsync](#disable_journal_fsync)
|
||||
- [disable_device_lock](#disable_device_lock)
|
||||
- [disk_alignment](#disk_alignment)
|
||||
- [data_csum_type](#data_csum_type)
|
||||
- [csum_block_size](#csum_block_size)
|
||||
|
||||
## data_device
|
||||
|
||||
@@ -174,3 +176,43 @@ Intel Optane (probably, not tested yet).
|
||||
|
||||
Clients don't need to be aware of disk_alignment, so it's not required to
|
||||
put a modified value into etcd key /vitastor/config/global.
|
||||
|
||||
## data_csum_type
|
||||
|
||||
- Type: string
|
||||
- Default: none
|
||||
|
||||
Data checksum type to use. May be "crc32c" or "none". Set to "crc32c" to
|
||||
enable data checksums.
|
||||
|
||||
## csum_block_size
|
||||
|
||||
- Type: integer
|
||||
- Default: 4096
|
||||
|
||||
Checksum calculation block size.
|
||||
|
||||
Must be equal or a multiple of [bitmap_granularity](layout-cluster.en.md#bitmap_granularity)
|
||||
(which is usually 4 KB).
|
||||
|
||||
Checksums increase metadata size by 4 bytes per each csum_block_size of data.
|
||||
|
||||
Checksums are always a tradeoff:
|
||||
1. You either sacrifice +1 GB RAM per 1 TB of data
|
||||
2. Or you raise csum_block_size, for example, to 32k and sacrifice
|
||||
50% random write iops due to checksum read-modify-write
|
||||
3. Or you turn off [inmemory_metadata](osd.en.md#inmemory_metadata) and
|
||||
sacrifice 50% random read iops due to checksum reads
|
||||
|
||||
All-flash clusters usually have enough RAM to use default csum_block_size,
|
||||
which uses 1 GB RAM per 1 TB of data. HDD clusters usually don't.
|
||||
|
||||
Thus, recommended setups are:
|
||||
1. All-flash, 1 GB RAM per 1 TB data: default (csum_block_size=4k)
|
||||
2. All-flash, less RAM: csum_block_size=4k + inmemory_metadata=false
|
||||
3. Hybrid HDD+SSD: csum_block_size=4k + inmemory_metadata=false
|
||||
4. HDD-only, faster random read: csum_block_size=32k
|
||||
5. HDD-only, faster random write: csum_block_size=4k +
|
||||
inmemory_metadata=false + meta_io=cached
|
||||
|
||||
See also [meta_io](osd.en.md#meta_io).
|
||||
|
@@ -25,6 +25,8 @@
|
||||
- [disable_journal_fsync](#disable_journal_fsync)
|
||||
- [disable_device_lock](#disable_device_lock)
|
||||
- [disk_alignment](#disk_alignment)
|
||||
- [data_csum_type](#data_csum_type)
|
||||
- [csum_block_size](#csum_block_size)
|
||||
|
||||
## data_device
|
||||
|
||||
@@ -183,3 +185,47 @@ journal_block_size и meta_block_size. Однако единственные SSD
|
||||
|
||||
Клиентам не обязательно знать про disk_alignment, так что помещать значение
|
||||
этого параметра в etcd в /vitastor/config/global не нужно.
|
||||
|
||||
## data_csum_type
|
||||
|
||||
- Тип: строка
|
||||
- Значение по умолчанию: none
|
||||
|
||||
Тип используемых OSD контрольных сумм данных. Может быть "crc32c" или "none".
|
||||
Установите в "crc32c", чтобы включить расчёт и проверку контрольных сумм данных.
|
||||
|
||||
Следует понимать, что контрольные суммы в зависимости от размера блока их
|
||||
расчёта либо увеличивают потребление памяти, либо снижают производительность.
|
||||
Подробнее смотрите в описании параметра [csum_block_size](#csum_block_size).
|
||||
|
||||
## csum_block_size
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 4096
|
||||
|
||||
Размер блока расчёта контрольных сумм.
|
||||
|
||||
Должен быть равен или кратен [bitmap_granularity](layout-cluster.ru.md#bitmap_granularity)
|
||||
(который обычно равен 4 КБ).
|
||||
|
||||
Контрольные суммы увеличивают размер метаданных на 4 байта на каждые
|
||||
csum_block_size данных.
|
||||
|
||||
Контрольные суммы - это всегда компромисс:
|
||||
1. Вы либо жертвуете потреблением +1 ГБ памяти на 1 ТБ дискового пространства
|
||||
2. Либо вы повышаете csum_block_size до, скажем, 32k и жертвуете 50%
|
||||
скорости случайной записи из-за цикла чтения-изменения-записи для расчёта
|
||||
новых контрольных сумм
|
||||
3. Либо вы отключаете [inmemory_metadata](osd.ru.md#inmemory_metadata) и
|
||||
жертвуете 50% скорости случайного чтения из-за чтения контрольных сумм
|
||||
с диска
|
||||
|
||||
Таким образом, рекомендуются следующие варианты настроек:
|
||||
1. All-flash, 1 ГБ памяти на 1 ТБ данных: по умолчанию (csum_block_size=4k)
|
||||
2. All-flash, меньше памяти: csum_block_size=4k + inmemory_metadata=false
|
||||
3. Гибридные HDD+SSD: csum_block_size=4k + inmemory_metadata=false
|
||||
4. Только HDD, быстрее случайное чтение: csum_block_size=32k
|
||||
5. Только HDD, быстрее случайная запись: csum_block_size=4k +
|
||||
inmemory_metadata=false + meta_io=cached
|
||||
|
||||
Смотрите также [meta_io](osd.ru.md#meta_io).
|
||||
|
@@ -30,7 +30,6 @@ between clients, OSDs and etcd.
|
||||
- [etcd_slow_timeout](#etcd_slow_timeout)
|
||||
- [etcd_keepalive_timeout](#etcd_keepalive_timeout)
|
||||
- [etcd_ws_keepalive_timeout](#etcd_ws_keepalive_timeout)
|
||||
- [client_dirty_limit](#client_dirty_limit)
|
||||
|
||||
## tcp_header_buffer_size
|
||||
|
||||
@@ -240,17 +239,3 @@ etcd_report_interval to guarantee that keepalive actually works.
|
||||
|
||||
etcd websocket ping interval required to keep the connection alive and
|
||||
detect disconnections quickly.
|
||||
|
||||
## client_dirty_limit
|
||||
|
||||
- Type: integer
|
||||
- Default: 33554432
|
||||
- Can be changed online: yes
|
||||
|
||||
Without immediate_commit=all this parameter sets the limit of "dirty"
|
||||
(not committed by fsync) data allowed by the client before forcing an
|
||||
additional fsync and committing the data. Also note that the client always
|
||||
holds a copy of uncommitted data in memory so this setting also affects
|
||||
RAM usage of clients.
|
||||
|
||||
This parameter doesn't affect OSDs themselves.
|
||||
|
@@ -30,7 +30,6 @@
|
||||
- [etcd_slow_timeout](#etcd_slow_timeout)
|
||||
- [etcd_keepalive_timeout](#etcd_keepalive_timeout)
|
||||
- [etcd_ws_keepalive_timeout](#etcd_ws_keepalive_timeout)
|
||||
- [client_dirty_limit](#client_dirty_limit)
|
||||
|
||||
## tcp_header_buffer_size
|
||||
|
||||
@@ -251,17 +250,3 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
|
||||
- Можно менять на лету: да
|
||||
|
||||
Интервал проверки живости вебсокет-подключений к etcd.
|
||||
|
||||
## client_dirty_limit
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 33554432
|
||||
- Можно менять на лету: да
|
||||
|
||||
При работе без immediate_commit=all - это лимит объёма "грязных" (не
|
||||
зафиксированных fsync-ом) данных, при достижении которого клиент будет
|
||||
принудительно вызывать fsync и фиксировать данные. Также стоит иметь в виду,
|
||||
что в этом случае до момента fsync клиент хранит копию незафиксированных
|
||||
данных в памяти, то есть, настройка влияет на потребление памяти клиентами.
|
||||
|
||||
Параметр не влияет на сами OSD.
|
||||
|
@@ -11,6 +11,7 @@ initialization and can be changed - either with an OSD restart or, for some of
|
||||
them, even without restarting by updating configuration in etcd.
|
||||
|
||||
- [etcd_report_interval](#etcd_report_interval)
|
||||
- [etcd_stats_interval](#etcd_stats_interval)
|
||||
- [run_primary](#run_primary)
|
||||
- [osd_network](#osd_network)
|
||||
- [bind_address](#bind_address)
|
||||
@@ -31,6 +32,9 @@ them, even without restarting by updating configuration in etcd.
|
||||
- [max_flusher_count](#max_flusher_count)
|
||||
- [inmemory_metadata](#inmemory_metadata)
|
||||
- [inmemory_journal](#inmemory_journal)
|
||||
- [data_io](#data_io)
|
||||
- [meta_io](#meta_io)
|
||||
- [journal_io](#journal_io)
|
||||
- [journal_sector_buffer_count](#journal_sector_buffer_count)
|
||||
- [journal_no_same_sector_overwrites](#journal_no_same_sector_overwrites)
|
||||
- [throttle_small_writes](#throttle_small_writes)
|
||||
@@ -53,11 +57,21 @@ them, even without restarting by updating configuration in etcd.
|
||||
- Type: seconds
|
||||
- Default: 5
|
||||
|
||||
Interval at which OSDs report their state to etcd. Affects OSD lease time
|
||||
Interval at which OSDs report their liveness to etcd. Affects OSD lease time
|
||||
and thus the failover speed. Lease time is equal to this parameter value
|
||||
plus max_etcd_attempts * etcd_quick_timeout because it should be guaranteed
|
||||
that every OSD always refreshes its lease in time.
|
||||
|
||||
## etcd_stats_interval
|
||||
|
||||
- Type: seconds
|
||||
- Default: 30
|
||||
|
||||
Interval at which OSDs report their statistics to etcd. Highly affects the
|
||||
imposed load on etcd, because statistics include a key for every OSD and
|
||||
for every PG. At the same time, low statistic intervals make `vitastor-cli`
|
||||
statistics more responsive.
|
||||
|
||||
## run_primary
|
||||
|
||||
- Type: boolean
|
||||
@@ -255,6 +269,60 @@ is typically very small because it's sufficient to have 16-32 MB journal
|
||||
for SSD OSDs. However, in theory it's possible that you'll want to turn it
|
||||
off for hybrid (HDD+SSD) OSDs with large journals on quick devices.
|
||||
|
||||
## data_io
|
||||
|
||||
- Type: string
|
||||
- Default: direct
|
||||
|
||||
I/O mode for *data*. One of "direct", "cached" or "directsync". Corresponds
|
||||
to O_DIRECT, O_SYNC and O_DIRECT|O_SYNC, respectively.
|
||||
|
||||
Choose "cached" to use Linux page cache. This may improve read performance
|
||||
for hot data and slower disks - HDDs and maybe SATA SSDs - but will slightly
|
||||
decrease write performance for fast disks because page cache is an overhead
|
||||
itself.
|
||||
|
||||
Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
|
||||
(which requires disable_data_fsync) with drives having write-back cache
|
||||
which can't be turned off, for example, Intel Optane. Also note that *some*
|
||||
desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making
|
||||
disable_data_fsync unsafe even with "directsync".
|
||||
|
||||
## meta_io
|
||||
|
||||
- Type: string
|
||||
- Default: direct
|
||||
|
||||
I/O mode for *metadata*. One of "direct", "cached" or "directsync".
|
||||
|
||||
"cached" may improve read performance, but only under the following conditions:
|
||||
1. your drives are relatively slow (HDD, SATA SSD), and
|
||||
2. checksums are enabled, and
|
||||
3. [inmemory_metadata](#inmemory_metadata) is disabled.
|
||||
Under all these conditions, metadata blocks are read from disk on every
|
||||
read request to verify checksums and caching them may reduce this extra
|
||||
read load. Without (3) metadata is never read from the disk after starting,
|
||||
and without (2) metadata blocks are read from disk only during journal
|
||||
flushing.
|
||||
|
||||
"directsync" is the same as above.
|
||||
|
||||
If the same device is used for data and metadata, meta_io by default is set
|
||||
to the same value as [data_io](#data_io).
|
||||
|
||||
## journal_io
|
||||
|
||||
- Type: string
|
||||
- Default: direct
|
||||
|
||||
I/O mode for *journal*. One of "direct", "cached" or "directsync".
|
||||
|
||||
Here, "cached" may only improve read performance for recent writes and
|
||||
only if [inmemory_journal](#inmemory_journal) is turned off.
|
||||
|
||||
If the same device is used for metadata and journal, journal_io by default
|
||||
is set to the same value as [meta_io](#meta_io).
|
||||
|
||||
## journal_sector_buffer_count
|
||||
|
||||
- Type: integer
|
||||
|
@@ -12,6 +12,7 @@
|
||||
изменения конфигурации в etcd.
|
||||
|
||||
- [etcd_report_interval](#etcd_report_interval)
|
||||
- [etcd_stats_interval](#etcd_stats_interval)
|
||||
- [run_primary](#run_primary)
|
||||
- [osd_network](#osd_network)
|
||||
- [bind_address](#bind_address)
|
||||
@@ -32,6 +33,9 @@
|
||||
- [max_flusher_count](#max_flusher_count)
|
||||
- [inmemory_metadata](#inmemory_metadata)
|
||||
- [inmemory_journal](#inmemory_journal)
|
||||
- [data_io](#data_io)
|
||||
- [meta_io](#meta_io)
|
||||
- [journal_io](#journal_io)
|
||||
- [journal_sector_buffer_count](#journal_sector_buffer_count)
|
||||
- [journal_no_same_sector_overwrites](#journal_no_same_sector_overwrites)
|
||||
- [throttle_small_writes](#throttle_small_writes)
|
||||
@@ -54,11 +58,21 @@
|
||||
- Тип: секунды
|
||||
- Значение по умолчанию: 5
|
||||
|
||||
Интервал, с которым OSD обновляет своё состояние в etcd. Значение параметра
|
||||
влияет на время резервации (lease) OSD и поэтому на скорость переключения
|
||||
Интервал, с которым OSD сообщает о том, что жив, в etcd. Значение параметра
|
||||
влияет на время резервации (lease) OSD и поэтому - на скорость переключения
|
||||
при падении OSD. Время lease равняется значению этого параметра плюс
|
||||
max_etcd_attempts * etcd_quick_timeout.
|
||||
|
||||
## etcd_stats_interval
|
||||
|
||||
- Тип: секунды
|
||||
- Значение по умолчанию: 30
|
||||
|
||||
Интервал, с которым OSD обновляет свою статистику в etcd. Сильно влияет на
|
||||
создаваемую нагрузку на etcd, потому что статистика содержит по ключу на
|
||||
каждый OSD и на каждую PG. В то же время низкий интервал делает
|
||||
статистику, печатаемую `vitastor-cli`, отзывчивей.
|
||||
|
||||
## run_primary
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
@@ -263,6 +277,63 @@ Flusher - это микро-поток (корутина), которая коп
|
||||
параметра может оказаться полезным для гибридных OSD (HDD+SSD) с большими
|
||||
журналами, расположенными на быстром по сравнению с HDD устройстве.
|
||||
|
||||
## data_io
|
||||
|
||||
- Тип: строка
|
||||
- Значение по умолчанию: direct
|
||||
|
||||
Режим ввода-вывода для *данных*. Одно из значений "direct", "cached" или
|
||||
"directsync", означающих O_DIRECT, O_SYNC и O_DIRECT|O_SYNC, соответственно.
|
||||
|
||||
Выберите "cached", чтобы использовать системный кэш Linux (page cache) при
|
||||
чтении и записи. Это может улучшить скорость чтения горячих данных с
|
||||
относительно медленных дисков - HDD и, возможно, SATA SSD - но немного
|
||||
снижает производительность записи для быстрых дисков, так как кэш сам по
|
||||
себе тоже добавляет накладные расходы.
|
||||
|
||||
Выберите "directsync", если хотите задействовать
|
||||
[immediate_commit](layout-cluster.ru.md#immediate_commit) (требующий
|
||||
включенияd disable_data_fsync) на дисках с неотключаемым кэшем. Пример таких
|
||||
дисков - Intel Optane. При этом также стоит иметь в виду, что *некоторые*
|
||||
настольные SSD (например, HP EX950) игнорируют флаг O_SYNC, делая отключение
|
||||
fsync небезопасным даже с режимом "directsync".
|
||||
|
||||
## meta_io
|
||||
|
||||
- Тип: строка
|
||||
- Значение по умолчанию: direct
|
||||
|
||||
Режим ввода-вывода для *метаданных*. Одно из значений "direct", "cached" или
|
||||
"directsync".
|
||||
|
||||
"cached" может улучшить скорость чтения, если:
|
||||
1. у вас медленные диски (HDD, SATA SSD)
|
||||
2. контрольные суммы включены
|
||||
3. параметр [inmemory_metadata](#inmemory_metadata) отключён.
|
||||
При этих условиях блоки метаданных читаются с диска при каждом запросе чтения
|
||||
для проверки контрольных сумм и их кэширование может снизить дополнительную
|
||||
нагрузку на диск. Без (3) метаданные никогда не читаются с диска после
|
||||
запуска OSD, а без (2) блоки метаданных читаются только при сбросе журнала.
|
||||
|
||||
Если одно и то же устройство используется для данных и метаданных, режим
|
||||
ввода-вывода метаданных по умолчанию устанавливается равным [data_io](#data_io).
|
||||
|
||||
## journal_io
|
||||
|
||||
- Тип: строка
|
||||
- Значение по умолчанию: direct
|
||||
|
||||
Режим ввода-вывода для *журнала*. Одно из значений "direct", "cached" или
|
||||
"directsync".
|
||||
|
||||
Здесь "cached" может улучшить скорость чтения только недавно записанных
|
||||
данных и только если параметр [inmemory_journal](#inmemory_journal)
|
||||
отключён.
|
||||
|
||||
Если одно и то же устройство используется для метаданных и журнала,
|
||||
режим ввода-вывода журнала по умолчанию устанавливается равным
|
||||
[meta_io](#meta_io).
|
||||
|
||||
## journal_sector_buffer_count
|
||||
|
||||
- Тип: целое число
|
||||
|
@@ -205,9 +205,8 @@ This parameter usually doesn't require to be changed.
|
||||
- Default: 131072
|
||||
|
||||
Block size for this pool. The value from /vitastor/config/global is used when
|
||||
unspecified. If your cluster has OSDs with different block sizes then pool must
|
||||
be restricted by [osd_tags](#osd_tags) to only include OSDs with matching block
|
||||
size.
|
||||
unspecified. Only OSDs with matching block_size are used for each pool. If you
|
||||
want to further restrict OSDs for the pool, use [osd_tags](#osd_tags).
|
||||
|
||||
Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-cluster.en.md#block_size).
|
||||
|
||||
@@ -216,10 +215,9 @@ Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-c
|
||||
- Type: integer
|
||||
- Default: 4096
|
||||
|
||||
"Sector" size of virtual disks in this pool. The value from
|
||||
/vitastor/config/global is used when unspecified. Similar to block_size, the
|
||||
pool must be restricted by [osd_tags](#osd_tags) to only include OSDs with
|
||||
matching bitmap_granularity.
|
||||
"Sector" size of virtual disks in this pool. The value from /vitastor/config/global
|
||||
is used when unspecified. Similarly to block_size, only OSDs with matching
|
||||
bitmap_granularity are used for each pool.
|
||||
|
||||
Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-cluster.en.md#bitmap_granularity).
|
||||
|
||||
@@ -229,10 +227,11 @@ Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-c
|
||||
- Default: none
|
||||
|
||||
Immediate commit setting for this pool. The value from /vitastor/config/global
|
||||
is used when unspecified. Similar to block_size, the pool must be restricted by
|
||||
[osd_tags](#osd_tags) to only include OSDs with compatible immediate_commit.
|
||||
Compatible means that a pool with non-immediate commit will work with OSDs with
|
||||
immediate commit enabled, but not vice versa.
|
||||
is used when unspecified. Similarly to block_size, only OSDs with compatible
|
||||
bitmap_granularity are used for each pool. "Compatible" means that a pool with
|
||||
non-immediate commit will use OSDs with immediate commit enabled, but not vice
|
||||
versa. I.e., pools with "none" use all OSDs, pools with "small" only use OSDs
|
||||
with "all" or "small", and pools with "all" only use OSDs with "all".
|
||||
|
||||
Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-cluster.en.md#immediate_commit).
|
||||
|
||||
|
@@ -208,8 +208,9 @@ PG в Vitastor эферемерны, то есть вы можете менят
|
||||
|
||||
Размер блока для данного пула. Если не задан, используется значение из
|
||||
/vitastor/config/global. Если в вашем кластере есть OSD с разными размерами
|
||||
блока, пул должен быть ограничен только OSD, блок которых равен блоку пула,
|
||||
с помощью [osd_tags](#osd_tags).
|
||||
блока, пул будет использовать только OSD с размером блока, равным размеру блока
|
||||
пула. Если вы хотите сильнее ограничить набор используемых для пула OSD -
|
||||
используйте [osd_tags](#osd_tags).
|
||||
|
||||
О самом параметре читайте в разделе [Дисковые параметры уровня кластера](layout-cluster.ru.md#block_size).
|
||||
|
||||
@@ -219,9 +220,8 @@ PG в Vitastor эферемерны, то есть вы можете менят
|
||||
- По умолчанию: 4096
|
||||
|
||||
Размер "сектора" виртуальных дисков в данном пуле. Если не задан, используется
|
||||
значение из /vitastor/config/global. Аналогично block_size, пул должен быть
|
||||
ограничен OSD со значением bitmap_granularity, равным значению пула, с помощью
|
||||
[osd_tags](#osd_tags).
|
||||
значение из /vitastor/config/global. Аналогично block_size, каждый пул будет
|
||||
использовать только OSD с совпадающей с пулом настройкой bitmap_granularity.
|
||||
|
||||
О самом параметре читайте в разделе [Дисковые параметры уровня кластера](layout-cluster.ru.md#bitmap_granularity).
|
||||
|
||||
@@ -231,11 +231,13 @@ PG в Vitastor эферемерны, то есть вы можете менят
|
||||
- По умолчанию: none
|
||||
|
||||
Настройка мгновенного коммита для данного пула. Если не задана, используется
|
||||
значение из /vitastor/config/global. Аналогично block_size, пул должен быть
|
||||
ограничен OSD со значением bitmap_granularity, совместимым со значением пула, с
|
||||
помощью [osd_tags](#osd_tags). Совместимость означает, что пул с отключенным
|
||||
мгновенным коммитом может работать на OSD с включённым мгновенным коммитом, но
|
||||
не наоборот.
|
||||
значение из /vitastor/config/global. Аналогично block_size, каждый пул будет
|
||||
использовать только OSD с *совместимыми* настройками immediate_commit.
|
||||
"Совместимыми" означает, что пул с отключенным мгновенным коммитом будет
|
||||
использовать OSD с включённым мгновенным коммитом, но не наоборот. То есть,
|
||||
пул со значением "none" будет использовать все OSD, пул со "small" будет
|
||||
использовать OSD с "all" или "small", а пул с "all" будет использовать только
|
||||
OSD с "all".
|
||||
|
||||
О самом параметре читайте в разделе [Дисковые параметры уровня кластера](layout-cluster.ru.md#immediate_commit).
|
||||
|
||||
|
4
docs/config/src/client.en.md
Normal file
4
docs/config/src/client.en.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# Client Parameters
|
||||
|
||||
These parameters apply only to clients and affect their interaction with
|
||||
the cluster.
|
4
docs/config/src/client.ru.md
Normal file
4
docs/config/src/client.ru.md
Normal file
@@ -0,0 +1,4 @@
|
||||
# Параметры клиентского кода
|
||||
|
||||
Данные параметры применяются только к клиентам Vitastor (QEMU, fio, NBD) и
|
||||
затрагивают логику их работы с кластером.
|
124
docs/config/src/client.yml
Normal file
124
docs/config/src/client.yml
Normal file
@@ -0,0 +1,124 @@
|
||||
- name: client_max_dirty_bytes
|
||||
type: int
|
||||
default: 33554432
|
||||
online: true
|
||||
info: |
|
||||
Without [immediate_commit](layout-cluster.en.md#immediate_commit)=all this parameter sets the limit of "dirty"
|
||||
(not committed by fsync) data allowed by the client before forcing an
|
||||
additional fsync and committing the data. Also note that the client always
|
||||
holds a copy of uncommitted data in memory so this setting also affects
|
||||
RAM usage of clients.
|
||||
info_ru: |
|
||||
При работе без [immediate_commit](layout-cluster.ru.md#immediate_commit)=all - это лимит объёма "грязных" (не
|
||||
зафиксированных fsync-ом) данных, при достижении которого клиент будет
|
||||
принудительно вызывать fsync и фиксировать данные. Также стоит иметь в виду,
|
||||
что в этом случае до момента fsync клиент хранит копию незафиксированных
|
||||
данных в памяти, то есть, настройка влияет на потребление памяти клиентами.
|
||||
- name: client_max_dirty_ops
|
||||
type: int
|
||||
default: 1024
|
||||
online: true
|
||||
info: |
|
||||
Same as client_max_dirty_bytes, but instead of total size, limits the number
|
||||
of uncommitted write operations.
|
||||
info_ru: |
|
||||
Аналогично client_max_dirty_bytes, но ограничивает количество
|
||||
незафиксированных операций записи вместо их общего объёма.
|
||||
- name: client_enable_writeback
|
||||
type: bool
|
||||
default: false
|
||||
online: true
|
||||
info: |
|
||||
This parameter enables client-side write buffering. This means that write
|
||||
requests are accumulated in memory for a short time before being sent to
|
||||
a Vitastor cluster which allows to send them in parallel and increase
|
||||
performance of some applications. Writes are buffered until client forces
|
||||
a flush with fsync() or until the amount of buffered writes exceeds the
|
||||
limit.
|
||||
|
||||
Write buffering significantly increases performance of some applications,
|
||||
for example, CrystalDiskMark under Windows (LOL :-D), but also any other
|
||||
applications if they do writes in one of two non-optimal ways: either if
|
||||
they do a lot of small (4 kb or so) sequential writes, or if they do a lot
|
||||
of small random writes, but without any parallelism or asynchrony, and also
|
||||
without calling fsync().
|
||||
|
||||
With write buffering enabled, you can expect around 22000 T1Q1 random write
|
||||
iops in QEMU more or less regardless of the quality of your SSDs, and this
|
||||
number is in fact bound by QEMU itself rather than Vitastor (check it
|
||||
yourself by adding a "driver=null-co" disk in QEMU). Without write
|
||||
buffering, the current record is 9900 iops, but the number is usually
|
||||
even lower with non-ideal hardware, for example, it may be 5000 iops.
|
||||
|
||||
Even when this parameter is enabled, write buffering isn't enabled until
|
||||
the client explicitly allows it, because enabling it without the client
|
||||
being aware of the fact that his writes may be buffered may lead to data
|
||||
loss. Because of this, older versions of clients don't support write
|
||||
buffering at all, newer versions of the QEMU driver allow write buffering
|
||||
only if it's enabled in disk settings with `-blockdev cache.direct=false`,
|
||||
and newer versions of FIO only allow write buffering if you don't specify
|
||||
`-direct=1`. NBD and NFS drivers allow write buffering by default.
|
||||
|
||||
You can overcome this restriction too with the `client_writeback_allowed`
|
||||
parameter, but you shouldn't do that unless you **really** know what you
|
||||
are doing.
|
||||
info_ru: |
|
||||
Данный параметр разрешает включать буферизацию записи в памяти. Буферизация
|
||||
означает, что операции записи отправляются на кластер Vitastor не сразу, а
|
||||
могут небольшое время накапливаться в памяти и сбрасываться сразу пакетами,
|
||||
до тех пор, пока либо не будет превышен лимит неотправленных записей, либо
|
||||
пока клиент не вызовет fsync.
|
||||
|
||||
Буферизация значительно повышает производительность некоторых приложений,
|
||||
например, CrystalDiskMark в Windows (ха-ха :-D), но также и любых других,
|
||||
которые пишут на диск неоптимально: либо последовательно, но мелкими блоками
|
||||
(например, по 4 кб), либо случайно, но без параллелизма и без fsync - то
|
||||
есть, например, отправляя 128 операций записи в разные места диска, но не
|
||||
все сразу с помощью асинхронного I/O, а по одной.
|
||||
|
||||
В QEMU с буферизацией записи можно ожидать показателя примерно 22000
|
||||
операций случайной записи в секунду в 1 поток и с глубиной очереди 1 (T1Q1)
|
||||
без fsync, почти вне зависимости от того, насколько хороши ваши диски - эта
|
||||
цифра упирается в сам QEMU. Без буферизации рекорд пока что - 9900 операций
|
||||
в секунду, но на железе похуже может быть и поменьше, например, 5000 операций
|
||||
в секунду.
|
||||
|
||||
При этом, даже если данный параметр включён, буферизация не включается, если
|
||||
явно не разрешена клиентом, т.к. если клиент не знает, что запросы записи
|
||||
буферизуются, это может приводить к потере данных. Поэтому в старых версиях
|
||||
клиентских драйверов буферизация записи не включается вообще, в новых
|
||||
версиях QEMU-драйвера включается, только если разрешена опцией диска
|
||||
`-blockdev cache.direct=false`, а в fio - только если нет опции `-direct=1`.
|
||||
В NBD и NFS драйверах буферизация записи разрешена по умолчанию.
|
||||
|
||||
Можно обойти и это ограничение с помощью параметра `client_writeback_allowed`,
|
||||
но делать так не надо, если только вы не уверены в том, что делаете, на все
|
||||
100%. :-)
|
||||
- name: client_max_buffered_bytes
|
||||
type: int
|
||||
default: 33554432
|
||||
online: true
|
||||
info: |
|
||||
Maximum total size of buffered writes which triggers write-back when reached.
|
||||
info_ru: |
|
||||
Максимальный общий размер буферизованных записей, при достижении которого
|
||||
начинается процесс сброса данных на сервер.
|
||||
- name: client_max_buffered_ops
|
||||
type: int
|
||||
default: 1024
|
||||
online: true
|
||||
info: |
|
||||
Maximum number of buffered writes which triggers write-back when reached.
|
||||
Multiple consecutive modified data regions are counted as 1 write here.
|
||||
info_ru: |
|
||||
Максимальное количество буферизованных записей, при достижении которого
|
||||
начинается процесс сброса данных на сервер. При этом несколько
|
||||
последовательных изменённых областей здесь считаются 1 записью.
|
||||
- name: client_max_writeback_iodepth
|
||||
type: int
|
||||
default: 256
|
||||
online: true
|
||||
info: |
|
||||
Maximum number of parallel writes when flushing buffered data to the server.
|
||||
info_ru: |
|
||||
Максимальное число параллельных операций записи при сбросе буферов на сервер.
|
145
docs/config/src/include.js
Executable file
145
docs/config/src/include.js
Executable file
@@ -0,0 +1,145 @@
|
||||
#!/usr/bin/nodejs
|
||||
|
||||
const fsp = require('fs').promises;
|
||||
|
||||
run(process.argv).catch(console.error);
|
||||
|
||||
async function run(argv)
|
||||
{
|
||||
if (argv.length < 3)
|
||||
{
|
||||
console.log('Markdown preprocessor\nUSAGE: ./include.js file.md');
|
||||
return;
|
||||
}
|
||||
const index_file = await fsp.realpath(argv[2]);
|
||||
const re = /(\{\{[\s\S]*?\}\}|\[[^\]]+\]\([^\)]+\)|(?:^|\n)#[^\n]+)/;
|
||||
let text = await fsp.readFile(index_file, { encoding: 'utf-8' });
|
||||
text = text.split(re);
|
||||
let included = {};
|
||||
let heading = 0, heading_name = '', m;
|
||||
for (let i = 0; i < text.length; i++)
|
||||
{
|
||||
if (text[i].substr(0, 2) == '{{')
|
||||
{
|
||||
// Inclusion
|
||||
let incfile = text[i].substr(2, text[i].length-4);
|
||||
let section = null;
|
||||
let indent = heading;
|
||||
incfile = incfile.replace(/\s*\|\s*indent\s*=\s*(-?\d+)\s*$/, (m, m1) => { indent = parseInt(m1); return ''; });
|
||||
incfile = incfile.replace(/\s*#\s*([^#]+)$/, (m, m1) => { section = m1; return ''; });
|
||||
let inc_heading = section;
|
||||
incfile = rel2abs(index_file, incfile);
|
||||
let inc = await fsp.readFile(incfile, { encoding: 'utf-8' });
|
||||
inc = inc.trim().replace(/^[\s\S]+?\n#/, '#'); // remove until the first header
|
||||
inc = inc.split(re);
|
||||
const indent_str = new Array(indent+1).join('#');
|
||||
let section_start = -1, section_end = -1;
|
||||
for (let j = 0; j < inc.length; j++)
|
||||
{
|
||||
if ((m = /^(\n?)(#+\s*)([\s\S]+)$/.exec(inc[j])))
|
||||
{
|
||||
if (!inc_heading)
|
||||
{
|
||||
inc_heading = m[3].trim();
|
||||
}
|
||||
if (section)
|
||||
{
|
||||
if (m[3].trim() == section)
|
||||
section_start = j;
|
||||
else if (section_start >= 0)
|
||||
{
|
||||
section_end = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
inc[j] = m[1] + indent_str + m[2] + m[3];
|
||||
}
|
||||
else if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(inc[j])) && !/^https?:(\/\/)|^#/.exec(m[2]))
|
||||
{
|
||||
const abs_m2 = rel2abs(incfile, m[2]);
|
||||
const rel_m = abs2rel(__filename, abs_m2);
|
||||
if (rel_m.substr(0, 9) == '../../../') // outside docs
|
||||
inc[j] = m[1] + 'https://git.yourcmc.ru/vitalif/vitastor/src/branch/master/'+rel2abs('docs/config/src/include.js', rel_m) + m[3];
|
||||
else
|
||||
inc[j] = m[1] + abs_m2 + m[3];
|
||||
}
|
||||
}
|
||||
if (section)
|
||||
{
|
||||
inc = section_start >= 0 ? inc.slice(section_start, section_end < 0 ? inc.length : section_end) : [];
|
||||
}
|
||||
if (inc.length)
|
||||
{
|
||||
if (!inc_heading)
|
||||
inc_heading = heading_name||'';
|
||||
included[incfile+(section ? '#'+section : '')] = '#'+inc_heading.toLowerCase().replace(/\P{L}+/ug, '-').replace(/^-|-$/g, '');
|
||||
inc[0] = inc[0].replace(/^\s+/, '');
|
||||
inc[inc.length-1] = inc[inc.length-1].replace(/\s+$/, '');
|
||||
}
|
||||
text.splice(i, 1, ...inc);
|
||||
i = i + inc.length - 1;
|
||||
}
|
||||
else if ((m = /^\n?(#+)\s*([\s\S]+)$/.exec(text[i])))
|
||||
{
|
||||
// Heading
|
||||
heading = m[1].length;
|
||||
heading_name = m[2].trim();
|
||||
}
|
||||
}
|
||||
for (let i = 0; i < text.length; i++)
|
||||
{
|
||||
if ((m = /^(\[[^\]]+\]\()([^\)]+)(\))$/.exec(text[i])) && !/^https?:(\/\/)|^#/.exec(m[2]))
|
||||
{
|
||||
const p = m[2].indexOf('#');
|
||||
if (included[m[2]])
|
||||
{
|
||||
text[i] = m[1]+included[m[2]]+m[3];
|
||||
}
|
||||
else if (p >= 0 && included[m[2].substr(0, p)])
|
||||
{
|
||||
text[i] = m[1]+m[2].substr(p)+m[3];
|
||||
}
|
||||
}
|
||||
}
|
||||
console.log(text.join(''));
|
||||
}
|
||||
|
||||
function rel2abs(ref, rel)
|
||||
{
|
||||
rel = [ ...ref.replace(/^(.*)\/[^\/]+$/, '$1').split(/\/+/), ...rel.split(/\/+/) ];
|
||||
return killdots(rel).join('/');
|
||||
}
|
||||
|
||||
function abs2rel(ref, abs)
|
||||
{
|
||||
ref = ref.split(/\/+/);
|
||||
abs = abs.split(/\/+/);
|
||||
while (ref.length > 1 && ref[0] == abs[0])
|
||||
{
|
||||
ref.shift();
|
||||
abs.shift();
|
||||
}
|
||||
for (let i = 1; i < ref.length; i++)
|
||||
{
|
||||
abs.unshift('..');
|
||||
}
|
||||
return killdots(abs).join('/');
|
||||
}
|
||||
|
||||
function killdots(rel)
|
||||
{
|
||||
for (let i = 0; i < rel.length; i++)
|
||||
{
|
||||
if (rel[i] == '.')
|
||||
{
|
||||
rel.splice(i, 1);
|
||||
i--;
|
||||
}
|
||||
else if (i >= 1 && rel[i] == '..' && rel[i-1] != '..')
|
||||
{
|
||||
rel.splice(i-1, 2);
|
||||
i -= 2;
|
||||
}
|
||||
}
|
||||
return rel;
|
||||
}
|
67
docs/config/src/included.en.md
Normal file
67
docs/config/src/included.en.md
Normal file
@@ -0,0 +1,67 @@
|
||||
# Vitastor
|
||||
|
||||
{{../../../README.md#The Idea}}
|
||||
|
||||
{{../../../README.md#Talks and presentations}}
|
||||
|
||||
{{../../intro/features.en.md}}
|
||||
|
||||
{{../../intro/quickstart.en.md}}
|
||||
|
||||
{{../../intro/architecture.en.md}}
|
||||
|
||||
## Installation
|
||||
|
||||
{{../../installation/packages.en.md}}
|
||||
|
||||
{{../../installation/proxmox.en.md}}
|
||||
|
||||
{{../../installation/openstack.en.md}}
|
||||
|
||||
{{../../installation/kubernetes.en.md}}
|
||||
|
||||
{{../../installation/source.en.md}}
|
||||
|
||||
{{../../config.en.md|indent=1}}
|
||||
|
||||
{{../../config/common.en.md|indent=2}}
|
||||
|
||||
{{../../config/network.en.md|indent=2}}
|
||||
|
||||
{{../../config/client.en.md|indent=2}}
|
||||
|
||||
{{../../config/layout-cluster.en.md|indent=2}}
|
||||
|
||||
{{../../config/layout-osd.en.md|indent=2}}
|
||||
|
||||
{{../../config/osd.en.md|indent=2}}
|
||||
|
||||
{{../../config/monitor.en.md|indent=2}}
|
||||
|
||||
{{../../config/pool.en.md|indent=2}}
|
||||
|
||||
{{../../config/inode.en.md|indent=2}}
|
||||
|
||||
## Usage
|
||||
|
||||
{{../../usage/cli.en.md}}
|
||||
|
||||
{{../../usage/disk.en.md}}
|
||||
|
||||
{{../../usage/fio.en.md}}
|
||||
|
||||
{{../../usage/nbd.en.md}}
|
||||
|
||||
{{../../usage/qemu.en.md}}
|
||||
|
||||
{{../../usage/nfs.en.md}}
|
||||
|
||||
## Performance
|
||||
|
||||
{{../../performance/understanding.en.md}}
|
||||
|
||||
{{../../performance/theoretical.en.md}}
|
||||
|
||||
{{../../performance/comparison1.en.md}}
|
||||
|
||||
{{../../intro/author.en.md|indent=1}}
|
67
docs/config/src/included.ru.md
Normal file
67
docs/config/src/included.ru.md
Normal file
@@ -0,0 +1,67 @@
|
||||
# Vitastor
|
||||
|
||||
{{../../../README-ru.md#Идея|indent=0}}
|
||||
|
||||
{{../../../README-ru.md#Презентации и записи докладов|indent=0}}
|
||||
|
||||
{{../../intro/features.ru.md}}
|
||||
|
||||
{{../../intro/quickstart.ru.md}}
|
||||
|
||||
{{../../intro/architecture.ru.md}}
|
||||
|
||||
## Установка
|
||||
|
||||
{{../../installation/packages.ru.md}}
|
||||
|
||||
{{../../installation/proxmox.ru.md}}
|
||||
|
||||
{{../../installation/openstack.ru.md}}
|
||||
|
||||
{{../../installation/kubernetes.ru.md}}
|
||||
|
||||
{{../../installation/source.ru.md}}
|
||||
|
||||
{{../../config.ru.md|indent=1}}
|
||||
|
||||
{{../../config/common.ru.md|indent=2}}
|
||||
|
||||
{{../../config/network.ru.md|indent=2}}
|
||||
|
||||
{{../../config/client.ru.md|indent=2}}
|
||||
|
||||
{{../../config/layout-cluster.ru.md|indent=2}}
|
||||
|
||||
{{../../config/layout-osd.ru.md|indent=2}}
|
||||
|
||||
{{../../config/osd.ru.md|indent=2}}
|
||||
|
||||
{{../../config/monitor.ru.md|indent=2}}
|
||||
|
||||
{{../../config/pool.ru.md|indent=2}}
|
||||
|
||||
{{../../config/inode.ru.md|indent=2}}
|
||||
|
||||
## Использование
|
||||
|
||||
{{../../usage/cli.ru.md}}
|
||||
|
||||
{{../../usage/disk.ru.md}}
|
||||
|
||||
{{../../usage/fio.ru.md}}
|
||||
|
||||
{{../../usage/nbd.ru.md}}
|
||||
|
||||
{{../../usage/qemu.ru.md}}
|
||||
|
||||
{{../../usage/nfs.ru.md}}
|
||||
|
||||
## Производительность
|
||||
|
||||
{{../../performance/understanding.ru.md}}
|
||||
|
||||
{{../../performance/theoretical.ru.md}}
|
||||
|
||||
{{../../performance/comparison1.ru.md}}
|
||||
|
||||
{{../../intro/author.ru.md|indent=1}}
|
@@ -7,26 +7,27 @@
|
||||
in Vitastor, affects memory usage, write amplification and I/O load
|
||||
distribution effectiveness.
|
||||
|
||||
Recommended default block size is 128 KB for SSD and 4 MB for HDD. In fact,
|
||||
it's possible to use 4 MB for SSD too - it will lower memory usage, but
|
||||
Recommended default block size is 128 KB for SSD and 1 MB for HDD. In fact,
|
||||
it's possible to use 1 MB for SSD too - it will lower memory usage, but
|
||||
may increase average WA and reduce linear performance.
|
||||
|
||||
OSD memory usage is roughly (SIZE / BLOCK * 68 bytes) which is roughly
|
||||
544 MB per 1 TB of used disk space with the default 128 KB block size.
|
||||
With 1 MB it's 8 times lower.
|
||||
info_ru: |
|
||||
Размер объектов (блоков данных), на которые делятся физические и виртуальные
|
||||
диски в Vitastor (в рамках каждого пула). Одна из ключевых на данный момент
|
||||
настроек, влияет на потребление памяти, объём избыточной записи (write
|
||||
amplification) и эффективность распределения нагрузки по OSD.
|
||||
|
||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 4
|
||||
мегабайта для HDD. В принципе, для SSD можно тоже использовать 4 мегабайта,
|
||||
Рекомендуемые по умолчанию размеры блока - 128 килобайт для SSD и 1 мегабайт
|
||||
для HDD. В принципе, для SSD можно тоже использовать блок размером 1 мегабайт,
|
||||
это понизит использование памяти, но ухудшит распределение нагрузки и в
|
||||
среднем увеличит WA.
|
||||
|
||||
Потребление памяти OSD составляет примерно (РАЗМЕР / БЛОК * 68 байт),
|
||||
т.е. примерно 544 МБ памяти на 1 ТБ занятого места на диске при
|
||||
стандартном 128 КБ блоке.
|
||||
стандартном 128 КБ блоке. При 1 МБ блоке памяти нужно в 8 раз меньше.
|
||||
- name: bitmap_granularity
|
||||
type: int
|
||||
default: 4096
|
||||
@@ -86,8 +87,9 @@
|
||||
it (they have internal SSD cache even though it's not stated in datasheets).
|
||||
|
||||
Setting this parameter to "all" or "small" in OSD parameters requires enabling
|
||||
disable_journal_fsync and disable_meta_fsync, setting it to "all" also requires
|
||||
enabling disable_data_fsync.
|
||||
[disable_journal_fsync](layout-osd.en.yml#disable_journal_fsync) and
|
||||
[disable_meta_fsync](layout-osd.en.yml#disable_meta_fsync), setting it to
|
||||
"all" also requires enabling [disable_data_fsync](layout-osd.en.yml#disable_data_fsync).
|
||||
|
||||
TLDR: For optimal performance, set immediate_commit to "all" if you only use
|
||||
SSDs with supercapacitor-based power loss protection (nonvolatile
|
||||
@@ -139,8 +141,9 @@
|
||||
указано в спецификациях).
|
||||
|
||||
Указание "all" или "small" в настройках / командной строке OSD требует
|
||||
включения disable_journal_fsync и disable_meta_fsync, значение "all" также
|
||||
требует включения disable_data_fsync.
|
||||
включения [disable_journal_fsync](layout-osd.ru.yml#disable_journal_fsync) и
|
||||
[disable_meta_fsync](layout-osd.ru.yml#disable_meta_fsync), значение "all"
|
||||
также требует включения [disable_data_fsync](layout-osd.ru.yml#disable_data_fsync).
|
||||
|
||||
Итого, вкратце: для оптимальной производительности установите
|
||||
immediate_commit в значение "all", если вы используете в кластере только SSD
|
||||
|
@@ -204,3 +204,73 @@
|
||||
|
||||
Клиентам не обязательно знать про disk_alignment, так что помещать значение
|
||||
этого параметра в etcd в /vitastor/config/global не нужно.
|
||||
- name: data_csum_type
|
||||
type: string
|
||||
default: none
|
||||
info: |
|
||||
Data checksum type to use. May be "crc32c" or "none". Set to "crc32c" to
|
||||
enable data checksums.
|
||||
info_ru: |
|
||||
Тип используемых OSD контрольных сумм данных. Может быть "crc32c" или "none".
|
||||
Установите в "crc32c", чтобы включить расчёт и проверку контрольных сумм данных.
|
||||
|
||||
Следует понимать, что контрольные суммы в зависимости от размера блока их
|
||||
расчёта либо увеличивают потребление памяти, либо снижают производительность.
|
||||
Подробнее смотрите в описании параметра [csum_block_size](#csum_block_size).
|
||||
- name: csum_block_size
|
||||
type: int
|
||||
default: 4096
|
||||
info: |
|
||||
Checksum calculation block size.
|
||||
|
||||
Must be equal or a multiple of [bitmap_granularity](layout-cluster.en.md#bitmap_granularity)
|
||||
(which is usually 4 KB).
|
||||
|
||||
Checksums increase metadata size by 4 bytes per each csum_block_size of data.
|
||||
|
||||
Checksums are always a tradeoff:
|
||||
1. You either sacrifice +1 GB RAM per 1 TB of data
|
||||
2. Or you raise csum_block_size, for example, to 32k and sacrifice
|
||||
50% random write iops due to checksum read-modify-write
|
||||
3. Or you turn off [inmemory_metadata](osd.en.md#inmemory_metadata) and
|
||||
sacrifice 50% random read iops due to checksum reads
|
||||
|
||||
All-flash clusters usually have enough RAM to use default csum_block_size,
|
||||
which uses 1 GB RAM per 1 TB of data. HDD clusters usually don't.
|
||||
|
||||
Thus, recommended setups are:
|
||||
1. All-flash, 1 GB RAM per 1 TB data: default (csum_block_size=4k)
|
||||
2. All-flash, less RAM: csum_block_size=4k + inmemory_metadata=false
|
||||
3. Hybrid HDD+SSD: csum_block_size=4k + inmemory_metadata=false
|
||||
4. HDD-only, faster random read: csum_block_size=32k
|
||||
5. HDD-only, faster random write: csum_block_size=4k +
|
||||
inmemory_metadata=false + meta_io=cached
|
||||
|
||||
See also [meta_io](osd.en.md#meta_io).
|
||||
info_ru: |
|
||||
Размер блока расчёта контрольных сумм.
|
||||
|
||||
Должен быть равен или кратен [bitmap_granularity](layout-cluster.ru.md#bitmap_granularity)
|
||||
(который обычно равен 4 КБ).
|
||||
|
||||
Контрольные суммы увеличивают размер метаданных на 4 байта на каждые
|
||||
csum_block_size данных.
|
||||
|
||||
Контрольные суммы - это всегда компромисс:
|
||||
1. Вы либо жертвуете потреблением +1 ГБ памяти на 1 ТБ дискового пространства
|
||||
2. Либо вы повышаете csum_block_size до, скажем, 32k и жертвуете 50%
|
||||
скорости случайной записи из-за цикла чтения-изменения-записи для расчёта
|
||||
новых контрольных сумм
|
||||
3. Либо вы отключаете [inmemory_metadata](osd.ru.md#inmemory_metadata) и
|
||||
жертвуете 50% скорости случайного чтения из-за чтения контрольных сумм
|
||||
с диска
|
||||
|
||||
Таким образом, рекомендуются следующие варианты настроек:
|
||||
1. All-flash, 1 ГБ памяти на 1 ТБ данных: по умолчанию (csum_block_size=4k)
|
||||
2. All-flash, меньше памяти: csum_block_size=4k + inmemory_metadata=false
|
||||
3. Гибридные HDD+SSD: csum_block_size=4k + inmemory_metadata=false
|
||||
4. Только HDD, быстрее случайное чтение: csum_block_size=32k
|
||||
5. Только HDD, быстрее случайная запись: csum_block_size=4k +
|
||||
inmemory_metadata=false + meta_io=cached
|
||||
|
||||
Смотрите также [meta_io](osd.ru.md#meta_io).
|
||||
|
@@ -259,23 +259,3 @@
|
||||
detect disconnections quickly.
|
||||
info_ru: |
|
||||
Интервал проверки живости вебсокет-подключений к etcd.
|
||||
- name: client_dirty_limit
|
||||
type: int
|
||||
default: 33554432
|
||||
online: true
|
||||
info: |
|
||||
Without immediate_commit=all this parameter sets the limit of "dirty"
|
||||
(not committed by fsync) data allowed by the client before forcing an
|
||||
additional fsync and committing the data. Also note that the client always
|
||||
holds a copy of uncommitted data in memory so this setting also affects
|
||||
RAM usage of clients.
|
||||
|
||||
This parameter doesn't affect OSDs themselves.
|
||||
info_ru: |
|
||||
При работе без immediate_commit=all - это лимит объёма "грязных" (не
|
||||
зафиксированных fsync-ом) данных, при достижении которого клиент будет
|
||||
принудительно вызывать fsync и фиксировать данные. Также стоит иметь в виду,
|
||||
что в этом случае до момента fsync клиент хранит копию незафиксированных
|
||||
данных в памяти, то есть, настройка влияет на потребление памяти клиентами.
|
||||
|
||||
Параметр не влияет на сами OSD.
|
||||
|
@@ -2,15 +2,28 @@
|
||||
type: sec
|
||||
default: 5
|
||||
info: |
|
||||
Interval at which OSDs report their state to etcd. Affects OSD lease time
|
||||
Interval at which OSDs report their liveness to etcd. Affects OSD lease time
|
||||
and thus the failover speed. Lease time is equal to this parameter value
|
||||
plus max_etcd_attempts * etcd_quick_timeout because it should be guaranteed
|
||||
that every OSD always refreshes its lease in time.
|
||||
info_ru: |
|
||||
Интервал, с которым OSD обновляет своё состояние в etcd. Значение параметра
|
||||
влияет на время резервации (lease) OSD и поэтому на скорость переключения
|
||||
Интервал, с которым OSD сообщает о том, что жив, в etcd. Значение параметра
|
||||
влияет на время резервации (lease) OSD и поэтому - на скорость переключения
|
||||
при падении OSD. Время lease равняется значению этого параметра плюс
|
||||
max_etcd_attempts * etcd_quick_timeout.
|
||||
- name: etcd_stats_interval
|
||||
type: sec
|
||||
default: 30
|
||||
info: |
|
||||
Interval at which OSDs report their statistics to etcd. Highly affects the
|
||||
imposed load on etcd, because statistics include a key for every OSD and
|
||||
for every PG. At the same time, low statistic intervals make `vitastor-cli`
|
||||
statistics more responsive.
|
||||
info_ru: |
|
||||
Интервал, с которым OSD обновляет свою статистику в etcd. Сильно влияет на
|
||||
создаваемую нагрузку на etcd, потому что статистика содержит по ключу на
|
||||
каждый OSD и на каждую PG. В то же время низкий интервал делает
|
||||
статистику, печатаемую `vitastor-cli`, отзывчивей.
|
||||
- name: run_primary
|
||||
type: bool
|
||||
default: true
|
||||
@@ -260,6 +273,96 @@
|
||||
достаточно 16- или 32-мегабайтного журнала. Однако в теории отключение
|
||||
параметра может оказаться полезным для гибридных OSD (HDD+SSD) с большими
|
||||
журналами, расположенными на быстром по сравнению с HDD устройстве.
|
||||
- name: data_io
|
||||
type: string
|
||||
default: direct
|
||||
info: |
|
||||
I/O mode for *data*. One of "direct", "cached" or "directsync". Corresponds
|
||||
to O_DIRECT, O_SYNC and O_DIRECT|O_SYNC, respectively.
|
||||
|
||||
Choose "cached" to use Linux page cache. This may improve read performance
|
||||
for hot data and slower disks - HDDs and maybe SATA SSDs - but will slightly
|
||||
decrease write performance for fast disks because page cache is an overhead
|
||||
itself.
|
||||
|
||||
Choose "directsync" to use [immediate_commit](layout-cluster.ru.md#immediate_commit)
|
||||
(which requires disable_data_fsync) with drives having write-back cache
|
||||
which can't be turned off, for example, Intel Optane. Also note that *some*
|
||||
desktop SSDs (for example, HP EX950) may ignore O_SYNC thus making
|
||||
disable_data_fsync unsafe even with "directsync".
|
||||
info_ru: |
|
||||
Режим ввода-вывода для *данных*. Одно из значений "direct", "cached" или
|
||||
"directsync", означающих O_DIRECT, O_SYNC и O_DIRECT|O_SYNC, соответственно.
|
||||
|
||||
Выберите "cached", чтобы использовать системный кэш Linux (page cache) при
|
||||
чтении и записи. Это может улучшить скорость чтения горячих данных с
|
||||
относительно медленных дисков - HDD и, возможно, SATA SSD - но немного
|
||||
снижает производительность записи для быстрых дисков, так как кэш сам по
|
||||
себе тоже добавляет накладные расходы.
|
||||
|
||||
Выберите "directsync", если хотите задействовать
|
||||
[immediate_commit](layout-cluster.ru.md#immediate_commit) (требующий
|
||||
включенияd disable_data_fsync) на дисках с неотключаемым кэшем. Пример таких
|
||||
дисков - Intel Optane. При этом также стоит иметь в виду, что *некоторые*
|
||||
настольные SSD (например, HP EX950) игнорируют флаг O_SYNC, делая отключение
|
||||
fsync небезопасным даже с режимом "directsync".
|
||||
- name: meta_io
|
||||
type: string
|
||||
default: direct
|
||||
info: |
|
||||
I/O mode for *metadata*. One of "direct", "cached" or "directsync".
|
||||
|
||||
"cached" may improve read performance, but only under the following conditions:
|
||||
1. your drives are relatively slow (HDD, SATA SSD), and
|
||||
2. checksums are enabled, and
|
||||
3. [inmemory_metadata](#inmemory_metadata) is disabled.
|
||||
Under all these conditions, metadata blocks are read from disk on every
|
||||
read request to verify checksums and caching them may reduce this extra
|
||||
read load. Without (3) metadata is never read from the disk after starting,
|
||||
and without (2) metadata blocks are read from disk only during journal
|
||||
flushing.
|
||||
|
||||
"directsync" is the same as above.
|
||||
|
||||
If the same device is used for data and metadata, meta_io by default is set
|
||||
to the same value as [data_io](#data_io).
|
||||
info_ru: |
|
||||
Режим ввода-вывода для *метаданных*. Одно из значений "direct", "cached" или
|
||||
"directsync".
|
||||
|
||||
"cached" может улучшить скорость чтения, если:
|
||||
1. у вас медленные диски (HDD, SATA SSD)
|
||||
2. контрольные суммы включены
|
||||
3. параметр [inmemory_metadata](#inmemory_metadata) отключён.
|
||||
При этих условиях блоки метаданных читаются с диска при каждом запросе чтения
|
||||
для проверки контрольных сумм и их кэширование может снизить дополнительную
|
||||
нагрузку на диск. Без (3) метаданные никогда не читаются с диска после
|
||||
запуска OSD, а без (2) блоки метаданных читаются только при сбросе журнала.
|
||||
|
||||
Если одно и то же устройство используется для данных и метаданных, режим
|
||||
ввода-вывода метаданных по умолчанию устанавливается равным [data_io](#data_io).
|
||||
- name: journal_io
|
||||
type: string
|
||||
default: direct
|
||||
info: |
|
||||
I/O mode for *journal*. One of "direct", "cached" or "directsync".
|
||||
|
||||
Here, "cached" may only improve read performance for recent writes and
|
||||
only if [inmemory_journal](#inmemory_journal) is turned off.
|
||||
|
||||
If the same device is used for metadata and journal, journal_io by default
|
||||
is set to the same value as [meta_io](#meta_io).
|
||||
info_ru: |
|
||||
Режим ввода-вывода для *журнала*. Одно из значений "direct", "cached" или
|
||||
"directsync".
|
||||
|
||||
Здесь "cached" может улучшить скорость чтения только недавно записанных
|
||||
данных и только если параметр [inmemory_journal](#inmemory_journal)
|
||||
отключён.
|
||||
|
||||
Если одно и то же устройство используется для метаданных и журнала,
|
||||
режим ввода-вывода журнала по умолчанию устанавливается равным
|
||||
[meta_io](#meta_io).
|
||||
- name: journal_sector_buffer_count
|
||||
type: int
|
||||
default: 32
|
||||
|
@@ -17,4 +17,15 @@ and apply all `NNN-*.yaml` manifests to your Kubernetes installation:
|
||||
for i in ./???-*.yaml; do kubectl apply -f $i; done
|
||||
```
|
||||
|
||||
After that you'll be able to create PersistentVolumes. See example in [csi/deploy/example-pvc.yaml](../../csi/deploy/example-pvc.yaml).
|
||||
After that you'll be able to create PersistentVolumes.
|
||||
|
||||
## Features
|
||||
|
||||
Vitastor CSI supports:
|
||||
- Kubernetes starting with 1.20 (or 1.17 for older vitastor-csi <= 1.1.0)
|
||||
- Filesystem RWO (ReadWriteOnce) volumes. Example: [PVC](../../csi/deploy/example-pvc.yaml), [pod](../../csi/deploy/example-test-pod.yaml)
|
||||
- Raw block RWX (ReadWriteMany) volumes. Example: [PVC](../../csi/deploy/example-pvc-block.yaml), [pod](../../csi/deploy/example-test-pod-block.yaml)
|
||||
- Volume expansion
|
||||
- Volume snapshots. Example: [snapshot class](../../csi/deploy/example-snapshot-class.yaml), [snapshot](../../csi/deploy/example-snapshot.yaml), [clone](../../csi/deploy/example-snapshot-clone.yaml)
|
||||
|
||||
Remember that to use snapshots with CSI you also have to install [Snapshot Controller and CRDs](https://kubernetes-csi.github.io/docs/snapshot-controller.html#deployment).
|
||||
|
@@ -8,13 +8,24 @@
|
||||
|
||||
У Vitastor есть CSI-плагин для Kubernetes, поддерживающий RWO, а также блочные RWX, тома.
|
||||
|
||||
Для установки возьмите манифесты из директории [csi/deploy/](../csi/deploy/), поместите
|
||||
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../csi/deploy/001-csi-config-map.yaml),
|
||||
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../csi/deploy/009-storage-class.yaml)
|
||||
Для установки возьмите манифесты из директории [csi/deploy/](../../csi/deploy/), поместите
|
||||
вашу конфигурацию подключения к Vitastor в [csi/deploy/001-csi-config-map.yaml](../../csi/deploy/001-csi-config-map.yaml),
|
||||
настройте StorageClass в [csi/deploy/009-storage-class.yaml](../../csi/deploy/009-storage-class.yaml)
|
||||
и примените все `NNN-*.yaml` к вашей инсталляции Kubernetes.
|
||||
|
||||
```
|
||||
for i in ./???-*.yaml; do kubectl apply -f $i; done
|
||||
```
|
||||
|
||||
После этого вы сможете создавать PersistentVolume. Пример смотрите в файле [csi/deploy/example-pvc.yaml](../csi/deploy/example-pvc.yaml).
|
||||
После этого вы сможете создавать PersistentVolume.
|
||||
|
||||
## Возможности
|
||||
|
||||
CSI-плагин Vitastor поддерживает:
|
||||
- Версии Kubernetes, начиная с 1.20 (или с 1.17 для более старых vitastor-csi <= 1.1.0)
|
||||
- Файловые RWO (ReadWriteOnce) тома. Пример: [PVC](../../csi/deploy/example-pvc.yaml), [под](../../csi/deploy/example-test-pod.yaml)
|
||||
- Сырые блочные RWX (ReadWriteMany) тома. Пример: [PVC](../../csi/deploy/example-pvc-block.yaml), [под](../../csi/deploy/example-test-pod-block.yaml)
|
||||
- Расширение размера томов
|
||||
- Снимки томов. Пример: [класс снимков](../../csi/deploy/example-snapshot-class.yaml), [снимок](../../csi/deploy/example-snapshot.yaml), [клон снимка](../../csi/deploy/example-snapshot-clone.yaml)
|
||||
|
||||
Не забывайте, что для использования снимков нужно сначала установить [контроллер снимков и CRD](https://kubernetes-csi.github.io/docs/snapshot-controller.html#deployment).
|
||||
|
@@ -36,5 +36,5 @@ vitastor_pool_id = 1
|
||||
image_upload_use_cinder_backend = True
|
||||
```
|
||||
|
||||
To put Glance images in Vitastor, use [https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](volume-backed images),
|
||||
To put Glance images in Vitastor, use [volume-backed images](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
|
||||
although the support has not been verified yet.
|
||||
|
@@ -36,5 +36,5 @@ image_upload_use_cinder_backend = True
|
||||
```
|
||||
|
||||
Чтобы помещать в Vitastor Glance-образы, нужно использовать
|
||||
[https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html](образы на основе томов Cinder),
|
||||
[образы на основе томов Cinder](https://docs.openstack.org/cinder/pike/admin/blockstorage-volume-backed-image.html),
|
||||
однако, поддержка этой функции ещё не проверялась.
|
||||
|
@@ -11,8 +11,11 @@
|
||||
- Trust Vitastor package signing key:
|
||||
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||
- Add Vitastor package repository to your /etc/apt/sources.list:
|
||||
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
||||
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
|
||||
stable version from 0.9.x branch instead of 1.x
|
||||
- For Debian 10 (Buster) also enable backports repository:
|
||||
`deb http://deb.debian.org/debian buster-backports main`
|
||||
- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu`
|
||||
|
@@ -11,8 +11,11 @@
|
||||
- Добавьте ключ репозитория Vitastor:
|
||||
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
|
||||
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
||||
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
|
||||
установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
|
||||
- Для Debian 10 (Buster) также включите репозиторий backports:
|
||||
`deb http://deb.debian.org/debian buster-backports main`
|
||||
- Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu`
|
||||
|
@@ -6,10 +6,10 @@
|
||||
|
||||
# Proxmox VE
|
||||
|
||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-7.4 are supported):
|
||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-8.0 are supported):
|
||||
|
||||
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
|
||||
buster for 6.4, bullseye for 7.4, pve7.1 for 7.1, pve7.2 for 7.2, pve7.3 for 7.3
|
||||
bookworm for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4
|
||||
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
|
||||
- Define storage in `/etc/pve/storage.cfg` (see below)
|
||||
- Block network access from VMs to Vitastor network (to OSDs and etcd),
|
||||
@@ -35,5 +35,5 @@ vitastor: vitastor
|
||||
vitastor_nbd 0
|
||||
```
|
||||
|
||||
\* Note: you can also manually copy [patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) to Proxmox hosts
|
||||
\* Note: you can also manually copy [patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) to Proxmox hosts
|
||||
as `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm` instead of installing pve-storage-vitastor.
|
||||
|
@@ -1,15 +1,15 @@
|
||||
[Документация](../../README-ru.md#документация) → Установка → Proxmox
|
||||
[Документация](../../README-ru.md#документация) → Установка → Proxmox VE
|
||||
|
||||
-----
|
||||
|
||||
[Read in English](proxmox.en.md)
|
||||
|
||||
# Proxmox
|
||||
# Proxmox VE
|
||||
|
||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-7.4):
|
||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-8.0):
|
||||
|
||||
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
|
||||
buster для 6.4, bullseye для 7.4, pve7.1 для 7.1, pve7.2 для 7.2, pve7.3 для 7.3
|
||||
bookworm для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4
|
||||
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
|
||||
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
|
||||
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
|
||||
@@ -35,5 +35,5 @@ vitastor: vitastor
|
||||
```
|
||||
|
||||
\* Примечание: вместо установки пакета pve-storage-vitastor вы можете вручную скопировать файл
|
||||
[patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) на хосты Proxmox как
|
||||
[patches/VitastorPlugin.pm](../../patches/VitastorPlugin.pm) на хосты Proxmox как
|
||||
`/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm`.
|
||||
|
@@ -21,7 +21,7 @@
|
||||
|
||||
## Basic instructions
|
||||
|
||||
Download source, for example using git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`
|
||||
Download source, for example using git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/`
|
||||
|
||||
Get `fio` source and symlink it into `<vitastor>/fio`. If you don't want to build fio engine,
|
||||
you can disable it by passing `-DWITH_FIO=no` to cmake.
|
||||
@@ -41,7 +41,7 @@ It's recommended to build the QEMU driver (qemu_driver.c) in-tree, as a part of
|
||||
QEMU build process. To do that:
|
||||
- Install vitastor client library headers (from source or from vitastor-client-dev package)
|
||||
- Take a corresponding patch from `patches/qemu-*-vitastor.patch` and apply it to QEMU source
|
||||
- Copy `src/qemu_driver.c` to QEMU source directory as `block/block-vitastor.c`
|
||||
- Copy `src/qemu_driver.c` to QEMU source directory as `block/vitastor.c`
|
||||
- Build QEMU as usual
|
||||
|
||||
But it is also possible to build it out-of-tree. To do that:
|
||||
|
@@ -21,7 +21,7 @@
|
||||
|
||||
## Базовая инструкция
|
||||
|
||||
Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://yourcmc.ru/git/vitalif/vitastor/`
|
||||
Скачайте исходные коды, например, из git: `git clone --recurse-submodules https://git.yourcmc.ru/vitalif/vitastor/`
|
||||
|
||||
Скачайте исходные коды пакета `fio`, распакуйте их и создайте символическую ссылку на них
|
||||
в директории исходников Vitastor: `<vitastor>/fio`. Либо, если вы не хотите собирать плагин fio,
|
||||
@@ -41,7 +41,7 @@ cmake .. && make -j8 install
|
||||
Драйвер QEMU (qemu_driver.c) рекомендуется собирать вместе с самим QEMU. Для этого:
|
||||
- Установите заголовки клиентской библиотеки Vitastor (из исходников или из пакета vitastor-client-dev)
|
||||
- Возьмите соответствующий патч из `patches/qemu-*-vitastor.patch` и примените его к исходникам QEMU
|
||||
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/block-vitastor.c`
|
||||
- Скопируйте [src/qemu_driver.c](../../src/qemu_driver.c) в директорию исходников QEMU как `block/vitastor.c`
|
||||
- Соберите QEMU как обычно
|
||||
|
||||
Однако в целях отладки драйвер также можно собирать отдельно от QEMU. Для этого:
|
||||
@@ -60,7 +60,7 @@ cmake .. && make -j8 install
|
||||
* Для QEMU 2.0+: `<qemu>/qapi-types.h` → `<vitastor>/qemu/b/qemu/qapi-types.h`
|
||||
- `config-host.h` и `qapi` нужны, т.к. в них содержатся автогенерируемые заголовки
|
||||
- Сконфигурируйте cmake Vitastor с `WITH_QEMU=yes` (`cmake .. -DWITH_QEMU=yes`) и, если вы
|
||||
используете RHEL-подобый дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`.
|
||||
используете RHEL-подобный дистрибутив, также с `QEMU_PLUGINDIR=qemu-kvm`.
|
||||
- После этого в процессе сборки Vitastor также будет собираться подходящий для вашей
|
||||
версии QEMU `block-vitastor.so`.
|
||||
- Таким образом можно использовать драйвер даже с немодифицированным QEMU, но в этом случае
|
||||
|
@@ -44,7 +44,7 @@
|
||||
depends linearly on drive capacity and data store block size which is 128 KB by default.
|
||||
With 128 KB blocks metadata takes around 512 MB per 1 TB (which is still less than Ceph wants).
|
||||
Journal is also kept in memory by default, but in SSD-only clusters it's only 32 MB, and in SSD+HDD
|
||||
clusters, where it's beneficial to increase it, [inmemory_journal](docs/config/osd.en.md#inmemory_journal) can be disabled.
|
||||
clusters, where it's beneficial to increase it, [inmemory_journal](../config/osd.en.md#inmemory_journal) can be disabled.
|
||||
- Vitastor storage layer doesn't have internal copy-on-write or redirect-write. I know that maybe
|
||||
it's possible to create a good copy-on-write storage, but it's much harder and makes performance
|
||||
less deterministic, so CoW isn't used in Vitastor.
|
||||
|
@@ -156,7 +156,7 @@
|
||||
блока хранилища (block_size, по умолчанию 128 КБ). С 128 КБ блоком потребление памяти
|
||||
составляет примерно 512 МБ на 1 ТБ данных. Журналы по умолчанию тоже хранятся в памяти,
|
||||
но в SSD-кластерах нужный размер журнала составляет всего 32 МБ, а в гибридных (SSD+HDD)
|
||||
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../docs/config/osd.ru.md#inmemory_journal).
|
||||
кластерах, в которых есть смысл делать журналы больше, можно отключить [inmemory_journal](../config/osd.ru.md#inmemory_journal).
|
||||
- В Vitastor нет внутреннего copy-on-write. Я считаю, что реализация CoW-хранилища гораздо сложнее,
|
||||
поэтому сложнее добиться устойчиво хороших результатов. Возможно, в один прекрасный день
|
||||
я придумаю красивый алгоритм для CoW-хранилища, но пока нет — внутреннего CoW в Vitastor не будет.
|
||||
|
@@ -29,13 +29,15 @@
|
||||
- Snapshots and copy-on-write image clones
|
||||
- [Write throttling to smooth random write workloads in SSD+HDD configurations](../config/osd.en.md#throttle_small_writes)
|
||||
- [RDMA/RoCEv2 support via libibverbs](../config/network.en.md#rdma_device)
|
||||
- [Scrubbing without checksums](../config/osd.en.md#auto_scrub) (verification of copies)
|
||||
- [Scrubbing](../config/osd.en.md#auto_scrub) (verification of copies)
|
||||
- [Checksums](../config/layout-osd.en.md#data_csum_type)
|
||||
- [Client write-back cache](../config/client.en.md#client_enable_writeback)
|
||||
|
||||
## Plugins and tools
|
||||
|
||||
- [Debian and CentOS packages](../installation/packages.en.md)
|
||||
- [Image management CLI (vitastor-cli)](../usage/cli.en.md)
|
||||
- [Disk management CLI (vitastor-disk)](docs/usage/disk.en.md)
|
||||
- [Disk management CLI (vitastor-disk)](../usage/disk.en.md)
|
||||
- Generic user-space client library
|
||||
- [Native QEMU driver](../usage/qemu.en.md)
|
||||
- [Loadable fio engine for benchmarks](../usage/fio.en.md)
|
||||
@@ -49,14 +51,15 @@
|
||||
|
||||
The following features are planned for the future:
|
||||
|
||||
- File system
|
||||
- Control plane optimisation
|
||||
- Other administrative tools
|
||||
- Web GUI
|
||||
- OpenNebula plugin
|
||||
- iSCSI proxy
|
||||
- iSCSI and NVMeoF gateways
|
||||
- Multi-threaded client
|
||||
- Faster failover
|
||||
- Checksums
|
||||
- S3
|
||||
- Tiered storage (SSD caching)
|
||||
- NVDIMM support
|
||||
- Compression (possibly)
|
||||
- Read caching using system page cache (possibly)
|
||||
|
@@ -13,7 +13,7 @@
|
||||
## Серверные функции
|
||||
|
||||
- Базовая часть - надёжное кластерное блочное хранилище без единой точки отказа
|
||||
- [Производительность](../comparison1.ru.md) ;-D
|
||||
- [Производительность](../performance/comparison1.ru.md) ;-D
|
||||
- [Несколько схем отказоустойчивости](../config/pool.ru.md#scheme): репликация, XOR n+1 (1 диск чётности), коды коррекции ошибок
|
||||
Рида-Соломона на основе библиотек jerasure и ISA-L с любым числом дисков данных и чётности в группе
|
||||
- Конфигурация через простые человекочитаемые JSON-структуры в etcd
|
||||
@@ -31,13 +31,15 @@
|
||||
- Снапшоты и copy-on-write клоны
|
||||
- [Сглаживание производительности случайной записи в SSD+HDD конфигурациях](../config/osd.ru.md#throttle_small_writes)
|
||||
- [Поддержка RDMA/RoCEv2 через libibverbs](../config/network.ru.md#rdma_device)
|
||||
- [Фоновая проверка целостности без контрольных сумм](../config/osd.ru.md#auto_scrub) (сверка копий)
|
||||
- [Фоновая проверка целостности](../config/osd.ru.md#auto_scrub) (сверка копий)
|
||||
- [Контрольные суммы](../config/layout-osd.ru.md#data_csum_type)
|
||||
- [Буферизация записи на стороне клиента](../config/client.ru.md#client_enable_writeback)
|
||||
|
||||
## Драйверы и инструменты
|
||||
|
||||
- [Пакеты для Debian и CentOS](../installation/packages.ru.md)
|
||||
- [Консольный интерфейс управления образами (vitastor-cli)](../usage/cli.ru.md)
|
||||
- [Инструмент управления дисками (vitastor-disk)](docs/usage/disk.ru.md)
|
||||
- [Инструмент управления дисками (vitastor-disk)](../usage/disk.ru.md)
|
||||
- Общая пользовательская клиентская библиотека для работы с кластером
|
||||
- [Драйвер диска для QEMU](../usage/qemu.ru.md)
|
||||
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
|
||||
@@ -49,13 +51,15 @@
|
||||
|
||||
## Планы развития
|
||||
|
||||
- Файловая система
|
||||
- Оптимизация слоя управления
|
||||
- Другие инструменты администрирования
|
||||
- Web-интерфейс
|
||||
- Плагин для OpenNebula
|
||||
- iSCSI-прокси
|
||||
- iSCSI и NVMeoF прокси
|
||||
- Многопоточный клиент
|
||||
- Более быстрое переключение при отказах
|
||||
- Контрольные суммы
|
||||
- S3
|
||||
- Поддержка SSD-кэширования (tiered storage)
|
||||
- Поддержка NVDIMM
|
||||
- Возможно, сжатие
|
||||
|
@@ -7,6 +7,7 @@
|
||||
# Quick Start
|
||||
|
||||
- [Preparation](#preparation)
|
||||
- [Recommended drives](#recommended-drives)
|
||||
- [Configure monitors](#configure-monitors)
|
||||
- [Configure OSDs](#configure-osds)
|
||||
- [Create a pool](#create-a-pool)
|
||||
@@ -19,10 +20,20 @@
|
||||
- Get some SATA or NVMe SSDs with capacitors (server-grade drives). You can use desktop SSDs
|
||||
with lazy fsync, but prepare for inferior single-thread latency. Read more about capacitors
|
||||
[here](../config/layout-cluster.en.md#immediate_commit).
|
||||
- If you want to use HDDs, get modern HDDs with Media Cache or SSD Cache: HGST Ultrastar,
|
||||
Toshiba MG08, Seagate EXOS or something similar. If your drives don't have such cache then
|
||||
you also need small SSDs for journal and metadata (even 2 GB per 1 TB of HDD space is enough).
|
||||
- Get a fast network (at least 10 Gbit/s). Something like Mellanox ConnectX-4 with RoCEv2 is ideal.
|
||||
- Disable CPU powersaving: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
|
||||
- [Install Vitastor packages](../installation/packages.en.md).
|
||||
|
||||
## Recommended drives
|
||||
|
||||
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
|
||||
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
|
||||
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
|
||||
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
|
||||
|
||||
## Configure monitors
|
||||
|
||||
On the monitor hosts:
|
||||
@@ -45,9 +56,10 @@ On the monitor hosts:
|
||||
}
|
||||
```
|
||||
- Initialize OSDs:
|
||||
- SSD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. You can add
|
||||
`--disable_data_fsync off` to leave disk cache enabled if you use desktop
|
||||
SSDs without capacitors.
|
||||
- SSD-only or HDD-only: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
|
||||
Add `--disable_data_fsync off` to leave disk write cache enabled if you use
|
||||
desktop SSDs without capacitors. Do NOT add `--disable_data_fsync off` if you
|
||||
use HDDs or SSD+HDD.
|
||||
- Hybrid, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
||||
Pass all your devices (HDD and SSD) to this script — it will partition disks and initialize journals on its own.
|
||||
This script skips HDDs which are already partitioned so if you want to use non-empty disks for
|
||||
|
@@ -7,6 +7,7 @@
|
||||
# Быстрый старт
|
||||
|
||||
- [Подготовка](#подготовка)
|
||||
- [Рекомендуемые диски](#рекомендуемые-диски)
|
||||
- [Настройте мониторы](#настройте-мониторы)
|
||||
- [Настройте OSD](#настройте-osd)
|
||||
- [Создайте пул](#создайте-пул)
|
||||
@@ -19,10 +20,20 @@
|
||||
- Возьмите серверы с SSD (SATA или NVMe), желательно с конденсаторами (серверные SSD). Можно
|
||||
использовать и десктопные SSD, включив режим отложенного fsync, но производительность будет хуже.
|
||||
О конденсаторах читайте [здесь](../config/layout-cluster.ru.md#immediate_commit).
|
||||
- Если хотите использовать HDD, берите современные модели с Media или SSD кэшем - HGST Ultrastar,
|
||||
Toshiba MG08, Seagate EXOS или что-то похожее. Если такого кэша у ваших дисков нет,
|
||||
обязательно возьмите SSD под метаданные и журнал (маленькие, буквально 2 ГБ на 1 ТБ HDD-места).
|
||||
- Возьмите быструю сеть, минимум 10 гбит/с. Идеал - что-то вроде Mellanox ConnectX-4 с RoCEv2.
|
||||
- Для лучшей производительности отключите энергосбережение CPU: `cpupower idle-set -D 0 && cpupower frequency-set -g performance`.
|
||||
- [Установите пакеты Vitastor](../installation/packages.ru.md).
|
||||
|
||||
## Рекомендуемые диски
|
||||
|
||||
- SATA SSD: Micron 5100/5200/5300/5400, Samsung PM863/PM883/PM893, Intel D3-S4510/4520/4610/4620, Kingston DC500M
|
||||
- NVMe: Micron 9100/9200/9300/9400, Micron 7300/7450, Samsung PM983/PM9A3, Samsung PM1723/1735/1743,
|
||||
Intel DC-P3700/P4500/P4600, Intel D7-P5500/P5600, Intel Optane, Kingston DC1000B/DC1500M
|
||||
- HDD: HGST Ultrastar, Toshiba MG06/MG07/MG08, Seagate EXOS
|
||||
|
||||
## Настройте мониторы
|
||||
|
||||
На хостах, выделенных под мониторы:
|
||||
@@ -45,9 +56,10 @@
|
||||
}
|
||||
```
|
||||
- Инициализуйте OSD:
|
||||
- SSD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`. Если вы используете
|
||||
десктопные SSD без конденсаторов, можете оставить кэш включённым, добавив
|
||||
опцию `--disable_data_fsync off`.
|
||||
- Только SSD или только HDD: `vitastor-disk prepare /dev/sdXXX [/dev/sdYYY ...]`.
|
||||
Если вы используете десктопные SSD без конденсаторов, добавьте опцию `--disable_data_fsync off`,
|
||||
чтобы оставить кэш записи диска включённым. НЕ добавляйте эту опцию, если используете
|
||||
жёсткие диски (HDD).
|
||||
- Гибридные, SSD+HDD: `vitastor-disk prepare --hybrid /dev/sdXXX [/dev/sdYYY ...]`.
|
||||
Передайте все ваши SSD и HDD скрипту в командной строке подряд, скрипт автоматически выделит
|
||||
разделы под журналы на SSD и данные на HDD. Скрипт пропускает HDD, на которых уже есть разделы
|
||||
|
@@ -86,6 +86,8 @@ Options (both modes):
|
||||
--journal_size 1G/32M Set journal size (area or partition size)
|
||||
--block_size 1M/128k Set blockstore object size
|
||||
--bitmap_granularity 4k Set bitmap granularity
|
||||
--data_csum_type none Set data checksum type (crc32c or none)
|
||||
--csum_block_size 4k Set data checksum block size
|
||||
--data_device_block 4k Override data device block size
|
||||
--meta_device_block 4k Override metadata device block size
|
||||
--journal_device_block 4k Override journal device block size
|
||||
@@ -100,8 +102,9 @@ checks the device cache status on start and tries to disable cache for SATA/SAS
|
||||
If it doesn't succeed it issues a warning in the system log.
|
||||
|
||||
You can also pass other OSD options here as arguments and they'll be persisted
|
||||
to the superblock: max_write_iodepth, max_write_iodepth, min_flusher_count,
|
||||
max_flusher_count, inmemory_metadata, inmemory_journal, journal_sector_buffer_count,
|
||||
in the superblock: cached_io_data, cached_io_meta, cached_io_journal,
|
||||
inmemory_metadata, inmemory_journal, max_write_iodepth,
|
||||
min_flusher_count, max_flusher_count, journal_sector_buffer_count,
|
||||
journal_no_same_sector_overwrites, throttle_small_writes, throttle_target_iops,
|
||||
throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
|
||||
See [Runtime OSD Parameters](../config/osd.en.md) for details.
|
||||
@@ -249,7 +252,9 @@ Options (see also [Cluster-Wide Disk Layout Parameters](../config/layout-cluster
|
||||
```
|
||||
--object_size 128k Set blockstore block size
|
||||
--bitmap_granularity 4k Set bitmap granularity
|
||||
--journal_size 32M Set journal size
|
||||
--journal_size 16M Set journal size
|
||||
--data_csum_type none Set data checksum type (crc32c or none)
|
||||
--csum_block_size 4k Set data checksum block size
|
||||
--device_block_size 4k Set device block size
|
||||
--journal_offset 0 Set journal offset
|
||||
--device_size 0 Set device size
|
||||
|
@@ -87,6 +87,8 @@ vitastor-disk - инструмент командной строки для уп
|
||||
--journal_size 1G/32M Задать размер журнала (области или раздела журнала)
|
||||
--block_size 1M/128k Задать размер объекта хранилища
|
||||
--bitmap_granularity 4k Задать гранулярность битовых карт
|
||||
--data_csum_type none Задать тип контрольных сумм (crc32c или none)
|
||||
--csum_block_size 4k Задать размер блока расчёта контрольных сумм
|
||||
--data_device_block 4k Задать размер блока устройства данных
|
||||
--meta_device_block 4k Задать размер блока метаданных
|
||||
--journal_device_block 4k Задать размер блока журнала
|
||||
@@ -101,8 +103,9 @@ vitastor-disk - инструмент командной строки для уп
|
||||
это не удаётся, в системный журнал выводится предупреждение.
|
||||
|
||||
Вы можете передать данной команде и некоторые другие опции OSD в качестве аргументов
|
||||
и они тоже будут сохранены в суперблок: max_write_iodepth, max_write_iodepth, min_flusher_count,
|
||||
max_flusher_count, inmemory_metadata, inmemory_journal, journal_sector_buffer_count,
|
||||
и они тоже будут сохранены в суперблок: cached_io_data, cached_io_meta,
|
||||
cached_io_journal, inmemory_metadata, inmemory_journal, max_write_iodepth,
|
||||
min_flusher_count, max_flusher_count, journal_sector_buffer_count,
|
||||
journal_no_same_sector_overwrites, throttle_small_writes, throttle_target_iops,
|
||||
throttle_target_mbs, throttle_target_parallelism, throttle_threshold_us.
|
||||
Читайте об этих параметрах подробнее в разделе [Изменяемые параметры OSD](../config/osd.ru.md).
|
||||
@@ -254,7 +257,9 @@ OSD отключены fsync-и.
|
||||
```
|
||||
--object_size 128k Размер блока хранилища
|
||||
--bitmap_granularity 4k Гранулярность битовых карт
|
||||
--journal_size 32M Размер журнала
|
||||
--journal_size 16M Размер журнала
|
||||
--data_csum_type none Задать тип контрольных сумм (crc32c или none)
|
||||
--csum_block_size 4k Задать размер блока расчёта контрольных сумм
|
||||
--device_block_size 4k Размер блока устройства
|
||||
--journal_offset 0 Смещение журнала
|
||||
--device_size 0 Размер устройства
|
||||
|
@@ -13,6 +13,8 @@ remains decent (see an example [here](../performance/comparison1.en.md#vitastor-
|
||||
|
||||
Vitastor Kubernetes CSI driver is based on NBD.
|
||||
|
||||
See also [VDUSE](qemu.en.md#vduse).
|
||||
|
||||
## Map image
|
||||
|
||||
To create a local block device for a Vitastor image run:
|
||||
|
@@ -16,6 +16,8 @@ NBD немного снижает производительность из-за
|
||||
|
||||
CSI-драйвер Kubernetes Vitastor основан на NBD.
|
||||
|
||||
Смотрите также [VDUSE](qemu.ru.md#vduse).
|
||||
|
||||
## Подключить устройство
|
||||
|
||||
Чтобы создать локальное блочное устройство для образа, выполните команду:
|
||||
|
@@ -29,7 +29,7 @@ vitastor-nfs [--etcd_address ADDR] [ДРУГИЕ ОПЦИИ]
|
||||
--bind <IP> принимать соединения по адресу <IP> (по умолчанию 0.0.0.0 - на всех)
|
||||
--nfspath <PATH> установить путь NFS-экспорта в <PATH> (по умолчанию /)
|
||||
--port <PORT> использовать порт <PORT> для NFS-сервисов (по умолчанию 2049)
|
||||
--pool <POOL> исползовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
|
||||
--pool <POOL> использовать пул <POOL> для новых образов (обязательно, если пул в кластере не один)
|
||||
--foreground 1 не уходить в фон после запуска
|
||||
```
|
||||
|
||||
|
@@ -34,6 +34,20 @@ qemu-system-x86_64 -enable-kvm -m 1024 \
|
||||
-vnc 0.0.0.0:0
|
||||
```
|
||||
|
||||
With a separate I/O thread:
|
||||
|
||||
```
|
||||
qemu-system-x86_64 -enable-kvm -m 1024 \
|
||||
-object iothread,id=vitastor1 \
|
||||
-blockdev '{"node-name":"drive-virtio-disk0","driver":"vitastor","image":"debian9",
|
||||
"cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \
|
||||
-device 'virtio-blk-pci,iothread=vitastor1,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,
|
||||
id=virtio-disk0,bootindex=1,write-cache=off' \
|
||||
-vnc 0.0.0.0:0
|
||||
```
|
||||
|
||||
You can also specify inode ID, pool and size manually instead of `:image=<IMAGE>` option: `:pool=<POOL>:inode=<INODE>:size=<SIZE>`.
|
||||
|
||||
## qemu-img
|
||||
|
||||
For qemu-img, you should use `vitastor:etcd_host=<HOST>:image=<IMAGE>` as filename.
|
||||
@@ -83,3 +97,67 @@ qemu-img rebase -u -b '' testimg.qcow2
|
||||
This can be used for backups. Just note that exporting an image that is currently being written to
|
||||
is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
|
||||
on a live VM.
|
||||
|
||||
## vhost-user-blk
|
||||
|
||||
QEMU, starting with 6.0, includes support for attaching disks via a separate
|
||||
userspace worker process, called `vhost-user-blk`. It usually has slightly (20-30 us)
|
||||
lower latency.
|
||||
|
||||
Example commands to use it with Vitastor:
|
||||
|
||||
```
|
||||
qemu-storage-daemon \
|
||||
--daemonize \
|
||||
--blockdev '{"node-name":"drive-virtio-disk1","driver":"vitastor","image":"testosd1","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \
|
||||
--export type=vhost-user-blk,id=vitastor1,node-name=drive-virtio-disk1,addr.type=unix,addr.path=/run/vitastor1-user-blk.sock,writable=on,num-queues=1
|
||||
|
||||
qemu-system-x86_64 -enable-kvm -m 2048 -M accel=kvm,memory-backend=mem \
|
||||
-object memory-backend-memfd,id=mem,size=2G,share=on \
|
||||
-chardev socket,id=vitastor1,reconnect=1,path=/run/vitastor1-user-blk.sock \
|
||||
-device vhost-user-blk-pci,chardev=vitastor1,num-queues=1,config-wce=off \
|
||||
-vnc 0.0.0.0:0
|
||||
```
|
||||
|
||||
memfd memory-backend is crucial, vhost-user-blk does not work without it.
|
||||
|
||||
## VDUSE
|
||||
|
||||
Linux kernel, starting with version 5.15, supports a new interface for attaching virtual disks
|
||||
to the host - VDUSE (vDPA Device in Userspace). QEMU, starting with 7.2, has support for
|
||||
exporting QEMU block devices over this protocol using qemu-storage-daemon.
|
||||
|
||||
VDUSE has the same problem as other FUSE-like interfaces in Linux: if a userspace process hangs,
|
||||
for example, if it loses connectivity with Vitastor cluster - active processes doing I/O may
|
||||
hang in the D state (uninterruptible sleep) and you won't be able to kill them even with kill -9.
|
||||
In this case reboot will be the only way to remove VDUSE devices from system.
|
||||
|
||||
On the other hand, VDUSE is faster than [NBD](nbd.en.md), so you may prefer to use it if
|
||||
performance is important for you. Approximate performance numbers:
|
||||
direct fio benchmark - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
|
||||
|
||||
To try VDUSE you need at least Linux 5.15, built with VDUSE support
|
||||
(CONFIG_VIRTIO_VDPA=m and CONFIG_VDPA_USER=m). Debian Linux kernels have these options
|
||||
disabled by now, so if you want to try it on Debian, use a kernel from Ubuntu
|
||||
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) or Proxmox.
|
||||
|
||||
Commands to attach Vitastor image as a VDUSE device:
|
||||
|
||||
```
|
||||
modprobe vduse
|
||||
modprobe virtio-vdpa
|
||||
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
|
||||
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
|
||||
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
|
||||
vdpa dev add name test1 mgmtdev vduse
|
||||
```
|
||||
|
||||
After running these commands /dev/vda device will appear in the system and you'll be able to
|
||||
use it as a normal disk.
|
||||
|
||||
To remove the device:
|
||||
|
||||
```
|
||||
vdpa dev del test1
|
||||
kill <qemu-storage-daemon_process_PID>
|
||||
```
|
||||
|
@@ -36,6 +36,18 @@ qemu-system-x86_64 -enable-kvm -m 1024 \
|
||||
-vnc 0.0.0.0:0
|
||||
```
|
||||
|
||||
С отдельным потоком ввода-вывода:
|
||||
|
||||
```
|
||||
qemu-system-x86_64 -enable-kvm -m 1024 \
|
||||
-object iothread,id=vitastor1 \
|
||||
-blockdev '{"node-name":"drive-virtio-disk0","driver":"vitastor","image":"debian9",
|
||||
"cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \
|
||||
-device 'virtio-blk-pci,iothread=vitastor1,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,
|
||||
id=virtio-disk0,bootindex=1,write-cache=off' \
|
||||
-vnc 0.0.0.0:0
|
||||
```
|
||||
|
||||
Вместо `:image=<IMAGE>` также можно указывать номер инода, пул и размер: `:pool=<POOL>:inode=<INODE>:size=<SIZE>`.
|
||||
|
||||
## qemu-img
|
||||
@@ -87,3 +99,67 @@ qemu-img rebase -u -b '' testimg.qcow2
|
||||
Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
|
||||
в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
|
||||
с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.
|
||||
|
||||
## vhost-user-blk
|
||||
|
||||
QEMU, начиная с 6.0, позволяет подключать диски через отдельный рабочий процесс.
|
||||
Этот метод подключения называется `vhost-user-blk` и обычно имеет чуть меньшую
|
||||
задержку (ниже на 20-30 микросекунд, чем при обычном методе).
|
||||
|
||||
Пример команд для использования vhost-user-blk с Vitastor:
|
||||
|
||||
```
|
||||
qemu-storage-daemon \
|
||||
--daemonize \
|
||||
--blockdev '{"node-name":"drive-virtio-disk1","driver":"vitastor","image":"testosd1","cache":{"direct":true,"no-flush":false},"auto-read-only":true,"discard":"unmap"}' \
|
||||
--export type=vhost-user-blk,id=vitastor1,node-name=drive-virtio-disk1,addr.type=unix,addr.path=/run/vitastor1-user-blk.sock,writable=on,num-queues=1
|
||||
|
||||
qemu-system-x86_64 -enable-kvm -m 2048 -M accel=kvm,memory-backend=mem \
|
||||
-object memory-backend-memfd,id=mem,size=2G,share=on \
|
||||
-chardev socket,id=vitastor1,reconnect=1,path=/run/vitastor1-user-blk.sock \
|
||||
-device vhost-user-blk-pci,chardev=vitastor1,num-queues=1,config-wce=off \
|
||||
-vnc 0.0.0.0:0
|
||||
```
|
||||
|
||||
Здесь критична опция memory-backend-memfd, vhost-user-blk без неё не работает.
|
||||
|
||||
## VDUSE
|
||||
|
||||
В Linux, начиная с версии ядра 5.15, доступен новый интерфейс для подключения виртуальных дисков
|
||||
к системе - VDUSE (vDPA Device in Userspace), а в QEMU, начиная с версии 7.2, есть поддержка
|
||||
экспорта блочных устройств QEMU по этому протоколу через qemu-storage-daemon.
|
||||
|
||||
VDUSE страдает общей проблемой FUSE-подобных интерфейсов в Linux: если пользовательский процесс
|
||||
подвиснет, например, если будет потеряна связь с кластером Vitastor - читающие/пишущие в кластер
|
||||
процессы могут "залипнуть" в состоянии D (непрерываемый сон) и их будет невозможно убить даже
|
||||
через kill -9. В этом случае удалить из системы устройство можно только перезагрузившись.
|
||||
|
||||
С другой стороны, VDUSE быстрее по сравнению с [NBD](nbd.ru.md), поэтому его может
|
||||
быть предпочтительно использовать там, где производительность важнее. Порядок показателей:
|
||||
прямое тестирование через fio - 115000 iops, NBD - 60000 iops, VDUSE - 90000 iops.
|
||||
|
||||
Чтобы использовать VDUSE, вам нужно ядро Linux версии хотя бы 5.15, собранное с поддержкой
|
||||
VDUSE (CONFIG_VIRTIO_VDPA=m и CONFIG_VDPA_USER=m). В ядрах в Debian Linux поддержка пока
|
||||
отключена - если хотите попробовать эту функцию на Debian, поставьте ядро из Ubuntu
|
||||
[kernel-ppa/mainline](https://kernel.ubuntu.com/~kernel-ppa/mainline/) или из Proxmox.
|
||||
|
||||
Команды для подключения виртуального диска через VDUSE:
|
||||
|
||||
```
|
||||
modprobe vduse
|
||||
modprobe virtio-vdpa
|
||||
qemu-storage-daemon --daemonize --blockdev '{"node-name":"test1","driver":"vitastor",\
|
||||
"etcd-host":"192.168.7.2:2379/v3","image":"testosd1","cache":{"direct":true,"no-flush":false},"discard":"unmap"}' \
|
||||
--export vduse-blk,id=test1,node-name=test1,name=test1,num-queues=16,queue-size=128,writable=true
|
||||
vdpa dev add name test1 mgmtdev vduse
|
||||
```
|
||||
|
||||
После этого в системе появится устройство /dev/vda, которое можно будет использовать как
|
||||
обычный диск.
|
||||
|
||||
Для удаления устройства из системы:
|
||||
|
||||
```
|
||||
vdpa dev del test1
|
||||
kill <PID_процесса_qemu-storage-daemon>
|
||||
```
|
||||
|
2
json11
2
json11
Submodule json11 updated: fd37016cf8...52a3af664f
@@ -3,5 +3,5 @@ SUBSYSTEM=="block", ENV{ID_PART_ENTRY_TYPE}=="e7009fac-a5a1-4d72-af72-53de130599
|
||||
IMPORT{program}="/usr/bin/vitastor-disk udev $devnode", \
|
||||
SYMLINK+="vitastor/$env{VITASTOR_ALIAS}"
|
||||
|
||||
ENV{VITASTOR_OSD_NUM}!="", ACTION=="add", RUN{program}+="/usr/bin/systemctl enable --now vitastor-osd@$env{VITASTOR_OSD_NUM}"
|
||||
ENV{VITASTOR_OSD_NUM}!="", ACTION=="remove", RUN{program}+="/usr/bin/systemctl disable --now vitastor-osd@$env{VITASTOR_OSD_NUM}"
|
||||
ENV{VITASTOR_OSD_NUM}!="", ACTION=="add", RUN{program}+="/usr/bin/systemctl enable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"
|
||||
ENV{VITASTOR_OSD_NUM}!="", ACTION=="remove", RUN{program}+="/usr/bin/systemctl disable --now --no-block vitastor-osd@$env{VITASTOR_OSD_NUM}"
|
||||
|
@@ -63,8 +63,9 @@ Wants=network-online.target local-fs.target time-sync.target
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
ExecStart=/usr/local/bin/etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
|
||||
--advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
|
||||
Environment=GOGC=50
|
||||
ExecStart=etcd -name etcd${num} --data-dir /var/lib/etcd${num}.etcd \\
|
||||
--snapshot-count 10000 --advertise-client-urls http://${etcds[num]}:2379 --listen-client-urls http://${etcds[num]}:2379 \\
|
||||
--initial-advertise-peer-urls http://${etcds[num]}:2380 --listen-peer-urls http://${etcds[num]}:2380 \\
|
||||
--initial-cluster-token vitastor-etcd-1 --initial-cluster ${etcd_cluster} \\
|
||||
--initial-cluster-state new --max-txn-ops=100000 --max-request-bytes=104857600 \\
|
||||
|
228
mon/mon.js
228
mon/mon.js
@@ -78,9 +78,15 @@ const etcd_tree = {
|
||||
disk_alignment: 4096,
|
||||
bitmap_granularity: 4096,
|
||||
immediate_commit: false, // 'all' or 'small'
|
||||
// client - configurable online
|
||||
client_max_dirty_bytes: 33554432,
|
||||
client_max_dirty_ops: 1024,
|
||||
client_enable_writeback: false,
|
||||
client_max_buffered_bytes: 33554432,
|
||||
client_max_buffered_ops: 1024,
|
||||
client_max_writeback_iodepth: 256,
|
||||
// client and osd - configurable online
|
||||
log_level: 0,
|
||||
client_dirty_limit: 33554432,
|
||||
peer_connect_interval: 5, // seconds. min: 1
|
||||
peer_connect_timeout: 5, // seconds. min: 1
|
||||
osd_idle_timeout: 5, // seconds. min: 1
|
||||
@@ -93,6 +99,7 @@ const etcd_tree = {
|
||||
etcd_ws_keepalive_interval: 30, // seconds
|
||||
// osd
|
||||
etcd_report_interval: 5, // seconds
|
||||
etcd_stats_interval: 30, // seconds
|
||||
run_primary: true,
|
||||
osd_network: null, // "192.168.7.0/24" or an array of masks
|
||||
bind_address: "0.0.0.0",
|
||||
@@ -390,12 +397,13 @@ class Mon
|
||||
this.etcd_prefix = this.etcd_prefix.replace(/\/\/+/g, '/').replace(/^\/?(.*[^\/])\/?$/, '/$1');
|
||||
this.etcd_start_timeout = (config.etcd_start_timeout || 5) * 1000;
|
||||
this.state = JSON.parse(JSON.stringify(this.constructor.etcd_tree));
|
||||
this.prev_stats = { osd_stats: {}, osd_diff: {} };
|
||||
this.signals_set = false;
|
||||
this.stat_time = Date.now();
|
||||
this.ws = null;
|
||||
this.ws_alive = false;
|
||||
this.ws_keepalive_timer = null;
|
||||
this.on_stop_cb = () => this.on_stop(0).catch(console.error);
|
||||
this.recheck_pgs_active = false;
|
||||
}
|
||||
|
||||
parse_etcd_addresses(addrs)
|
||||
@@ -539,10 +547,18 @@ class Mon
|
||||
{
|
||||
retries = 1;
|
||||
}
|
||||
const tried = {};
|
||||
while (retries < 0 || retry < retries)
|
||||
{
|
||||
const cur_addr = this.pick_next_etcd();
|
||||
const base = 'ws'+cur_addr.substr(4);
|
||||
let now = Date.now();
|
||||
if (tried[base] && now-tried[base] < this.etcd_start_timeout)
|
||||
{
|
||||
await new Promise(ok => setTimeout(ok, this.etcd_start_timeout-(now-tried[base])));
|
||||
now = Date.now();
|
||||
}
|
||||
tried[base] = now;
|
||||
const ok = await new Promise((ok, no) =>
|
||||
{
|
||||
const timer_id = setTimeout(() =>
|
||||
@@ -677,8 +693,27 @@ class Mon
|
||||
});
|
||||
}
|
||||
|
||||
// Schedule save_last_clean() to to run after a small timeout (1s) (to not spam etcd)
|
||||
schedule_save_last_clean()
|
||||
{
|
||||
if (!this.save_last_clean_timer)
|
||||
{
|
||||
this.save_last_clean_timer = setTimeout(() =>
|
||||
{
|
||||
this.save_last_clean_timer = null;
|
||||
this.save_last_clean().catch(this.die);
|
||||
}, this.config.mon_change_timeout || 1000);
|
||||
}
|
||||
}
|
||||
|
||||
async save_last_clean()
|
||||
{
|
||||
if (this.save_last_clean_running)
|
||||
{
|
||||
this.schedule_save_last_clean();
|
||||
return;
|
||||
}
|
||||
this.save_last_clean_running = true;
|
||||
// last_clean_pgs is used to avoid extra data move when observing a series of changes in the cluster
|
||||
const new_clean_pgs = { items: {} };
|
||||
next_pool:
|
||||
@@ -715,6 +750,7 @@ class Mon
|
||||
value: b64(JSON.stringify(this.state.history.last_clean_pgs))
|
||||
} } ],
|
||||
}, this.etcd_start_timeout, 0);
|
||||
this.save_last_clean_running = false;
|
||||
}
|
||||
|
||||
get_mon_state()
|
||||
@@ -1148,6 +1184,33 @@ class Mon
|
||||
}
|
||||
}
|
||||
|
||||
filter_osds_by_block_layout(flat_tree, block_size, bitmap_granularity, immediate_commit)
|
||||
{
|
||||
for (const host in flat_tree)
|
||||
{
|
||||
let found = 0;
|
||||
for (const osd in flat_tree[host])
|
||||
{
|
||||
const osd_stat = this.state.osd.stats[osd];
|
||||
if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
|
||||
osd_stat.bitmap_granularity && osd_stat.bitmap_granularity != bitmap_granularity ||
|
||||
osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
|
||||
osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
|
||||
{
|
||||
delete flat_tree[host][osd];
|
||||
}
|
||||
else
|
||||
{
|
||||
found++;
|
||||
}
|
||||
}
|
||||
if (!found)
|
||||
{
|
||||
delete flat_tree[host];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
get_affinity_osds(pool_cfg, up_osds, osd_tree)
|
||||
{
|
||||
let aff_osds = up_osds;
|
||||
@@ -1161,6 +1224,12 @@ class Mon
|
||||
|
||||
async recheck_pgs()
|
||||
{
|
||||
if (this.recheck_pgs_active)
|
||||
{
|
||||
this.schedule_recheck();
|
||||
return;
|
||||
}
|
||||
this.recheck_pgs_active = true;
|
||||
// Take configuration and state, check it against the stored configuration hash
|
||||
// Recalculate PGs and save them to etcd if the configuration is changed
|
||||
// FIXME: Do not change anything if the distribution is good and random enough and no PGs are degraded
|
||||
@@ -1182,6 +1251,7 @@ class Mon
|
||||
// Pool deleted. Delete all PGs, but first stop them.
|
||||
if (!await this.stop_all_pgs(pool_id))
|
||||
{
|
||||
this.recheck_pgs_active = false;
|
||||
this.schedule_recheck();
|
||||
return;
|
||||
}
|
||||
@@ -1208,6 +1278,12 @@ class Mon
|
||||
pool_tree = pool_tree ? pool_tree.children : [];
|
||||
pool_tree = LPOptimizer.flatten_tree(pool_tree, levels, pool_cfg.failure_domain, 'osd');
|
||||
this.filter_osds_by_tags(osd_tree, pool_tree, pool_cfg.osd_tags);
|
||||
this.filter_osds_by_block_layout(
|
||||
pool_tree,
|
||||
pool_cfg.block_size || this.config.block_size || 131072,
|
||||
pool_cfg.bitmap_granularity || this.config.bitmap_granularity || 4096,
|
||||
pool_cfg.immediate_commit || this.config.immediate_commit || 'none'
|
||||
);
|
||||
// These are for the purpose of building history.osd_sets
|
||||
const real_prev_pgs = [];
|
||||
let pg_history = [];
|
||||
@@ -1244,9 +1320,16 @@ class Mon
|
||||
// PG count changed. Need to bring all PGs down.
|
||||
if (!await this.stop_all_pgs(pool_id))
|
||||
{
|
||||
this.recheck_pgs_active = false;
|
||||
this.schedule_recheck();
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (prev_pgs.length != pool_cfg.pg_count)
|
||||
{
|
||||
// Scale PG count
|
||||
// Do it even if old_pg_count is already equal to pool_cfg.pg_count,
|
||||
// because last_clean_pgs may still contain the old number of PGs
|
||||
const new_pg_history = [];
|
||||
PGUtil.scale_pg_count(prev_pgs, real_prev_pgs, pg_history, new_pg_history, pool_cfg.pg_count);
|
||||
pg_history = new_pg_history;
|
||||
@@ -1348,6 +1431,7 @@ class Mon
|
||||
await this.save_pg_config(new_config_pgs);
|
||||
}
|
||||
}
|
||||
this.recheck_pgs_active = false;
|
||||
}
|
||||
|
||||
async save_pg_config(new_config_pgs, etcd_request = { compare: [], success: [] })
|
||||
@@ -1397,7 +1481,6 @@ class Mon
|
||||
}
|
||||
|
||||
// Schedule a recheck to run after a small timeout (1s)
|
||||
// If already scheduled, cancel previous timer and schedule it again
|
||||
// This is required for multiple change events to trigger at most 1 recheck in 1s
|
||||
schedule_recheck()
|
||||
{
|
||||
@@ -1411,15 +1494,15 @@ class Mon
|
||||
}
|
||||
}
|
||||
|
||||
derive_osd_stats(st, prev)
|
||||
derive_osd_stats(st, prev, prev_diff)
|
||||
{
|
||||
const zero_stats = { op: { bps: 0n, iops: 0n, lat: 0n }, subop: { iops: 0n, lat: 0n }, recovery: { bps: 0n, iops: 0n } };
|
||||
const diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
|
||||
if (!st || !st.time || prev && (prev.time || this.stat_time/1000) >= st.time)
|
||||
const diff = { op_stats: {}, subop_stats: {}, recovery_stats: {}, inode_stats: {} };
|
||||
if (!st || !st.time || !prev || prev.time >= st.time)
|
||||
{
|
||||
return diff;
|
||||
return prev_diff || diff;
|
||||
}
|
||||
const timediff = BigInt(st.time*1000 - (prev && prev.time*1000 || this.stat_time));
|
||||
const timediff = BigInt(st.time*1000 - prev.time*1000);
|
||||
for (const op in st.op_stats||{})
|
||||
{
|
||||
const pr = prev && prev.op_stats && prev.op_stats[op];
|
||||
@@ -1451,25 +1534,47 @@ class Mon
|
||||
if (n > 0)
|
||||
diff.recovery_stats[op] = { ...c, bps: b*1000n/timediff, iops: n*1000n/timediff };
|
||||
}
|
||||
for (const pool_id in st.inode_stats||{})
|
||||
{
|
||||
const pool_diff = diff.inode_stats[pool_id] = {};
|
||||
for (const inode_num in st.inode_stats[pool_id])
|
||||
{
|
||||
const inode_diff = diff.inode_stats[pool_id][inode_num] = {};
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
const c = st.inode_stats[pool_id][inode_num][op];
|
||||
const pr = prev && prev.inode_stats && prev.inode_stats[pool_id] &&
|
||||
prev.inode_stats[pool_id][inode_num] && prev.inode_stats[pool_id][inode_num][op];
|
||||
const n = BigInt(c.count||0) - BigInt(pr && pr.count||0);
|
||||
inode_diff[op] = {
|
||||
bps: (BigInt(c.bytes||0) - BigInt(pr && pr.bytes||0))*1000n/timediff,
|
||||
iops: n*1000n/timediff,
|
||||
lat: (BigInt(c.usec||0) - BigInt(pr && pr.usec||0))/(n || 1n),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
sum_op_stats(timestamp, prev_stats)
|
||||
sum_op_stats()
|
||||
{
|
||||
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
|
||||
if (!prev_stats || prev_stats.timestamp >= timestamp)
|
||||
for (const osd in this.state.osd.stats)
|
||||
{
|
||||
return sum_diff;
|
||||
const cur = { ...this.state.osd.stats[osd], inode_stats: this.state.osd.inodestats[osd]||{} };
|
||||
this.prev_stats.osd_diff[osd] = this.derive_osd_stats(
|
||||
cur, this.prev_stats.osd_stats[osd], this.prev_stats.osd_diff[osd]
|
||||
);
|
||||
this.prev_stats.osd_stats[osd] = cur;
|
||||
}
|
||||
const tm = BigInt(timestamp - (prev_stats.timestamp || 0));
|
||||
const sum_diff = { op_stats: {}, subop_stats: {}, recovery_stats: {} };
|
||||
// Sum derived values instead of deriving summed
|
||||
for (const osd in this.state.osd.stats)
|
||||
{
|
||||
const derived = this.derive_osd_stats(this.state.osd.stats[osd],
|
||||
this.prev_stats && this.prev_stats.osd_stats && this.prev_stats.osd_stats[osd]);
|
||||
for (const type in derived)
|
||||
const derived = this.prev_stats.osd_diff[osd];
|
||||
for (const type in sum_diff)
|
||||
{
|
||||
for (const op in derived[type])
|
||||
for (const op in derived[type]||{})
|
||||
{
|
||||
for (const k in derived[type][op])
|
||||
{
|
||||
@@ -1497,10 +1602,14 @@ class Mon
|
||||
break;
|
||||
}
|
||||
}
|
||||
const pool_cfg = (this.state.config.pools[pool_id]||{});
|
||||
if (!object_size)
|
||||
{
|
||||
object_size = (this.state.config.pools[pool_id]||{}).block_size ||
|
||||
this.config.block_size || 131072;
|
||||
object_size = pool_cfg.block_size || this.config.block_size || 131072;
|
||||
}
|
||||
if (pool_cfg.scheme !== 'replicated')
|
||||
{
|
||||
object_size *= ((pool_cfg.pg_size||0) - (pool_cfg.parity_chunks||0));
|
||||
}
|
||||
object_size = BigInt(object_size);
|
||||
for (const pg_num in this.state.pg.stats[pool_id])
|
||||
@@ -1522,14 +1631,14 @@ class Mon
|
||||
return { object_counts, object_bytes };
|
||||
}
|
||||
|
||||
sum_inode_stats(prev_stats, timestamp, prev_timestamp)
|
||||
sum_inode_stats()
|
||||
{
|
||||
const inode_stats = {};
|
||||
const inode_stub = () => ({
|
||||
raw_used: 0n,
|
||||
read: { count: 0n, usec: 0n, bytes: 0n },
|
||||
write: { count: 0n, usec: 0n, bytes: 0n },
|
||||
delete: { count: 0n, usec: 0n, bytes: 0n },
|
||||
read: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
|
||||
write: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
|
||||
delete: { count: 0n, usec: 0n, bytes: 0n, bps: 0n, iops: 0n, lat: 0n },
|
||||
});
|
||||
const seen_pools = {};
|
||||
for (const pool_id in this.state.config.pools)
|
||||
@@ -1581,11 +1690,25 @@ class Mon
|
||||
}
|
||||
}
|
||||
}
|
||||
if (prev_stats && prev_timestamp >= timestamp)
|
||||
for (const osd in this.prev_stats.osd_diff)
|
||||
{
|
||||
prev_stats = null;
|
||||
for (const pool_id in this.prev_stats.osd_diff[osd].inode_stats)
|
||||
{
|
||||
for (const inode_num in this.prev_stats.osd_diff[osd].inode_stats[pool_id])
|
||||
{
|
||||
inode_stats[pool_id][inode_num] = inode_stats[pool_id][inode_num] || inode_stub();
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
const op_diff = this.prev_stats.osd_diff[osd].inode_stats[pool_id][inode_num][op] || {};
|
||||
const op_st = inode_stats[pool_id][inode_num][op];
|
||||
op_st.bps += op_diff.bps;
|
||||
op_st.iops += op_diff.iops;
|
||||
op_st.lat += op_diff.lat;
|
||||
op_st.n_osd = (op_st.n_osd || 0) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
const tm = prev_stats ? BigInt(timestamp - prev_timestamp) : 0;
|
||||
for (const pool_id in inode_stats)
|
||||
{
|
||||
for (const inode_num in inode_stats[pool_id])
|
||||
@@ -1594,11 +1717,12 @@ class Mon
|
||||
for (const op of [ 'read', 'write', 'delete' ])
|
||||
{
|
||||
const op_st = inode_stats[pool_id][inode_num][op];
|
||||
const prev_st = prev_stats && prev_stats[pool_id] && prev_stats[pool_id][inode_num] && prev_stats[pool_id][inode_num][op];
|
||||
op_st.bps = prev_st ? (op_st.bytes - prev_st.bytes) * 1000n / tm : 0;
|
||||
op_st.iops = prev_st ? (op_st.count - prev_st.count) * 1000n / tm : 0;
|
||||
op_st.lat = prev_st ? (op_st.usec - prev_st.usec) / ((op_st.count - prev_st.count) || 1n) : 0;
|
||||
if (op_st.bps > 0 || op_st.iops > 0 || op_st.lat > 0)
|
||||
if (op_st.n_osd)
|
||||
{
|
||||
op_st.lat /= BigInt(op_st.n_osd);
|
||||
delete op_st.n_osd;
|
||||
}
|
||||
if (op_st.bps > 0 || op_st.iops > 0)
|
||||
nonzero = true;
|
||||
}
|
||||
if (!nonzero && (!this.state.config.inode[pool_id] || !this.state.config.inode[pool_id][inode_num]))
|
||||
@@ -1608,7 +1732,7 @@ class Mon
|
||||
}
|
||||
}
|
||||
}
|
||||
return inode_stats;
|
||||
return { inode_stats, seen_pools };
|
||||
}
|
||||
|
||||
serialize_bigints(obj)
|
||||
@@ -1631,15 +1755,9 @@ class Mon
|
||||
async update_total_stats()
|
||||
{
|
||||
const txn = [];
|
||||
const timestamp = Date.now();
|
||||
const { object_counts, object_bytes } = this.sum_object_counts();
|
||||
let stats = this.sum_op_stats(timestamp, this.prev_stats);
|
||||
let inode_stats = this.sum_inode_stats(
|
||||
this.prev_stats ? this.prev_stats.inode_stats : null,
|
||||
timestamp, this.prev_stats ? this.prev_stats.timestamp : null
|
||||
);
|
||||
this.prev_stats = { timestamp, inode_stats, osd_stats: { ...this.state.osd.stats } };
|
||||
this.stat_time = Date.now();
|
||||
let stats = this.sum_op_stats();
|
||||
let { inode_stats, seen_pools } = this.sum_inode_stats();
|
||||
stats.object_counts = object_counts;
|
||||
stats.object_bytes = object_bytes;
|
||||
stats = this.serialize_bigints(stats);
|
||||
@@ -1669,12 +1787,22 @@ class Mon
|
||||
}
|
||||
for (const pool_id in this.state.pool.stats)
|
||||
{
|
||||
const pool_stats = { ...this.state.pool.stats[pool_id] };
|
||||
this.serialize_bigints(pool_stats);
|
||||
txn.push({ requestPut: {
|
||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
value: b64(JSON.stringify(pool_stats)),
|
||||
} });
|
||||
if (!seen_pools[pool_id])
|
||||
{
|
||||
txn.push({ requestDeleteRange: {
|
||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
} });
|
||||
delete this.state.pool.stats[pool_id];
|
||||
}
|
||||
else
|
||||
{
|
||||
const pool_stats = { ...this.state.pool.stats[pool_id] };
|
||||
this.serialize_bigints(pool_stats);
|
||||
txn.push({ requestPut: {
|
||||
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
|
||||
value: b64(JSON.stringify(pool_stats)),
|
||||
} });
|
||||
}
|
||||
}
|
||||
if (txn.length)
|
||||
{
|
||||
@@ -1774,10 +1902,18 @@ class Mon
|
||||
{
|
||||
retries = 1;
|
||||
}
|
||||
const tried = {};
|
||||
while (retries < 0 || retry < retries)
|
||||
{
|
||||
retry++;
|
||||
const base = this.pick_next_etcd();
|
||||
let now = Date.now();
|
||||
if (tried[base] && now-tried[base] < timeout)
|
||||
{
|
||||
await new Promise(ok => setTimeout(ok, timeout-(now-tried[base])));
|
||||
now = Date.now();
|
||||
}
|
||||
tried[base] = now;
|
||||
const res = await POST(base+path, body, timeout);
|
||||
if (res.error)
|
||||
{
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vitastor-mon",
|
||||
"version": "1.0.0",
|
||||
"version": "1.2.0",
|
||||
"description": "Vitastor SDS monitor service",
|
||||
"main": "mon-main.js",
|
||||
"scripts": {
|
||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VERSION = '0.9.2'
|
||||
VERSION = '1.2.0'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
644
patches/libvirt-9.0-vitastor.diff
Normal file
644
patches/libvirt-9.0-vitastor.diff
Normal file
@@ -0,0 +1,644 @@
|
||||
commit e6f935157944279c2c0634915c3c00feeec748c9
|
||||
Author: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
Date: Mon Jun 19 00:58:19 2023 +0300
|
||||
|
||||
Add Vitastor support
|
||||
|
||||
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
|
||||
index aaad4a3..5f5daa8 100644
|
||||
--- a/include/libvirt/libvirt-storage.h
|
||||
+++ b/include/libvirt/libvirt-storage.h
|
||||
@@ -326,6 +326,7 @@ typedef enum {
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
|
||||
} virConnectListAllStoragePoolsFlags;
|
||||
|
||||
int virConnectListAllStoragePools(virConnectPtr conn,
|
||||
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
|
||||
index 45965fa..b7c23d3 100644
|
||||
--- a/src/conf/domain_conf.c
|
||||
+++ b/src/conf/domain_conf.c
|
||||
@@ -7103,7 +7103,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
|
||||
src->configFile = virXPathString("string(./config/@file)", ctxt);
|
||||
|
||||
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
|
||||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
|
||||
src->query = virXMLPropString(node, "query");
|
||||
|
||||
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
|
||||
@@ -30121,6 +30122,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_POOL_MPATH:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
|
||||
index 5a9bf20..05058b8 100644
|
||||
--- a/src/conf/domain_validate.c
|
||||
+++ b/src/conf/domain_validate.c
|
||||
@@ -494,6 +494,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
@@ -541,7 +542,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
}
|
||||
}
|
||||
|
||||
- /* internal snapshots and config files are currently supported only with rbd: */
|
||||
+ /* internal snapshots are currently supported only with rbd: */
|
||||
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
|
||||
if (src->snapshot) {
|
||||
@@ -550,11 +551,15 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
"only with 'rbd' disks"));
|
||||
return -1;
|
||||
}
|
||||
-
|
||||
+ }
|
||||
+ /* config files are currently supported only with rbd and vitastor: */
|
||||
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||
if (src->configFile) {
|
||||
virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
_("<config> element is currently supported "
|
||||
- "only with 'rbd' disks"));
|
||||
+ "only with 'rbd' and 'vitastor' disks"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
|
||||
index 6cb0a20..8bf7de9 100644
|
||||
--- a/src/conf/schemas/domaincommon.rng
|
||||
+++ b/src/conf/schemas/domaincommon.rng
|
||||
@@ -1972,6 +1972,35 @@
|
||||
</element>
|
||||
</define>
|
||||
|
||||
+ <define name="diskSourceNetworkProtocolVitastor">
|
||||
+ <element name="source">
|
||||
+ <interleave>
|
||||
+ <attribute name="protocol">
|
||||
+ <value>vitastor</value>
|
||||
+ </attribute>
|
||||
+ <ref name="diskSourceCommon"/>
|
||||
+ <optional>
|
||||
+ <attribute name="name"/>
|
||||
+ </optional>
|
||||
+ <optional>
|
||||
+ <attribute name="query"/>
|
||||
+ </optional>
|
||||
+ <zeroOrMore>
|
||||
+ <ref name="diskSourceNetworkHost"/>
|
||||
+ </zeroOrMore>
|
||||
+ <optional>
|
||||
+ <element name="config">
|
||||
+ <attribute name="file">
|
||||
+ <ref name="absFilePath"/>
|
||||
+ </attribute>
|
||||
+ <empty/>
|
||||
+ </element>
|
||||
+ </optional>
|
||||
+ <empty/>
|
||||
+ </interleave>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
<define name="diskSourceNetworkProtocolISCSI">
|
||||
<element name="source">
|
||||
<attribute name="protocol">
|
||||
@@ -2264,6 +2293,7 @@
|
||||
<ref name="diskSourceNetworkProtocolSimple"/>
|
||||
<ref name="diskSourceNetworkProtocolVxHS"/>
|
||||
<ref name="diskSourceNetworkProtocolNFS"/>
|
||||
+ <ref name="diskSourceNetworkProtocolVitastor"/>
|
||||
</choice>
|
||||
</define>
|
||||
|
||||
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
|
||||
index f5a9636..8339bc4 100644
|
||||
--- a/src/conf/storage_conf.c
|
||||
+++ b/src/conf/storage_conf.c
|
||||
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
|
||||
"logical", "disk", "iscsi",
|
||||
"iscsi-direct", "scsi", "mpath",
|
||||
"rbd", "sheepdog", "gluster",
|
||||
- "zfs", "vstorage",
|
||||
+ "zfs", "vstorage", "vitastor",
|
||||
);
|
||||
|
||||
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
|
||||
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
|
||||
.formatToString = virStorageFileFormatTypeToString,
|
||||
}
|
||||
},
|
||||
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
|
||||
+ .poolOptions = {
|
||||
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NAME),
|
||||
+ },
|
||||
+ .volOptions = {
|
||||
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
|
||||
+ .formatFromString = virStorageVolumeFormatFromString,
|
||||
+ .formatToString = virStorageFileFormatTypeToString,
|
||||
+ }
|
||||
+ },
|
||||
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
|
||||
.poolOptions = {
|
||||
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
@@ -542,6 +554,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
|
||||
_("element 'name' is mandatory for RBD pool"));
|
||||
return -1;
|
||||
}
|
||||
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
|
||||
+ virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
+ _("element 'name' is mandatory for Vitastor pool"));
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
if (options->formatFromString) {
|
||||
g_autofree char *format = NULL;
|
||||
@@ -1132,6 +1149,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
|
||||
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
|
||||
* files, so they don't have a target */
|
||||
if (def->type != VIR_STORAGE_POOL_RBD &&
|
||||
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
|
||||
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
|
||||
def->type != VIR_STORAGE_POOL_GLUSTER &&
|
||||
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
|
||||
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
|
||||
index fc67957..720c07e 100644
|
||||
--- a/src/conf/storage_conf.h
|
||||
+++ b/src/conf/storage_conf.h
|
||||
@@ -103,6 +103,7 @@ typedef enum {
|
||||
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
|
||||
VIR_STORAGE_POOL_ZFS, /* ZFS */
|
||||
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
|
||||
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
|
||||
|
||||
VIR_STORAGE_POOL_LAST,
|
||||
} virStoragePoolType;
|
||||
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
|
||||
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
|
||||
index cecd7e8..d7b79a4 100644
|
||||
--- a/src/conf/storage_source_conf.c
|
||||
+++ b/src/conf/storage_source_conf.c
|
||||
@@ -87,6 +87,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
|
||||
"ssh",
|
||||
"vxhs",
|
||||
"nfs",
|
||||
+ "vitastor",
|
||||
);
|
||||
|
||||
|
||||
@@ -1286,6 +1287,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
return 24007;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
/* we don't provide a default for RBD */
|
||||
return 0;
|
||||
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
|
||||
index 14a6825..eb4acac 100644
|
||||
--- a/src/conf/storage_source_conf.h
|
||||
+++ b/src/conf/storage_source_conf.h
|
||||
@@ -128,6 +128,7 @@ typedef enum {
|
||||
VIR_STORAGE_NET_PROTOCOL_SSH,
|
||||
VIR_STORAGE_NET_PROTOCOL_VXHS,
|
||||
VIR_STORAGE_NET_PROTOCOL_NFS,
|
||||
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
|
||||
|
||||
VIR_STORAGE_NET_PROTOCOL_LAST
|
||||
} virStorageNetProtocol;
|
||||
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
|
||||
index e6c187e..035b423 100644
|
||||
--- a/src/conf/virstorageobj.c
|
||||
+++ b/src/conf/virstorageobj.c
|
||||
@@ -1433,6 +1433,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
|
||||
return 1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
@@ -1918,6 +1919,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
|
||||
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
|
||||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
|
||||
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
|
||||
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
|
||||
index 8490034..ab2cdaa 100644
|
||||
--- a/src/libvirt-storage.c
|
||||
+++ b/src/libvirt-storage.c
|
||||
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
|
||||
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
|
||||
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
|
||||
index 17ac880..59711b5 100644
|
||||
--- a/src/libxl/libxl_conf.c
|
||||
+++ b/src/libxl/libxl_conf.c
|
||||
@@ -970,6 +970,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
|
||||
index 6919325..55ffc32 100644
|
||||
--- a/src/libxl/xen_xl.c
|
||||
+++ b/src/libxl/xen_xl.c
|
||||
@@ -1445,6 +1445,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
|
||||
index e865aa1..40162af 100644
|
||||
--- a/src/qemu/qemu_block.c
|
||||
+++ b/src/qemu/qemu_block.c
|
||||
@@ -604,6 +604,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
|
||||
}
|
||||
|
||||
|
||||
+static virJSONValue *
|
||||
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
|
||||
+{
|
||||
+ virJSONValue *ret = NULL;
|
||||
+ virStorageNetHostDef *host;
|
||||
+ size_t i;
|
||||
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
|
||||
+ g_autofree char *etcd = NULL;
|
||||
+
|
||||
+ for (i = 0; i < src->nhosts; i++) {
|
||||
+ host = src->hosts + i;
|
||||
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
|
||||
+ }
|
||||
+ if (src->nhosts > 0) {
|
||||
+ etcd = virBufferContentAndReset(&buf);
|
||||
+ }
|
||||
+
|
||||
+ if (virJSONValueObjectAdd(&ret,
|
||||
+ "S:etcd-host", etcd,
|
||||
+ "S:etcd-prefix", src->query,
|
||||
+ "S:config-path", src->configFile,
|
||||
+ "s:image", src->path,
|
||||
+ NULL) < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static virJSONValue *
|
||||
qemuBlockStorageSourceGetSheepdogProps(virStorageSource *src)
|
||||
{
|
||||
@@ -917,6 +949,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return NULL;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(fileprops = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
@@ -1860,6 +1898,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
@@ -2242,6 +2281,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
driver = "sheepdog";
|
||||
if (!(location = qemuBlockStorageSourceGetSheepdogProps(src)))
|
||||
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
||||
index 2eb5653..60ee82d 100644
|
||||
--- a/src/qemu/qemu_domain.c
|
||||
+++ b/src/qemu/qemu_domain.c
|
||||
@@ -4958,7 +4958,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
|
||||
if (src->query &&
|
||||
(actualType != VIR_STORAGE_TYPE_NETWORK ||
|
||||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
|
||||
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
|
||||
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
||||
_("query is supported only with HTTP(S) protocols"));
|
||||
return -1;
|
||||
@@ -10129,6 +10130,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
|
||||
break;
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
|
||||
index b841680..a6be771 100644
|
||||
--- a/src/qemu/qemu_snapshot.c
|
||||
+++ b/src/qemu/qemu_snapshot.c
|
||||
@@ -373,6 +373,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
@@ -578,6 +579,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
|
||||
index d90c1c9..e853457 100644
|
||||
--- a/src/storage/storage_driver.c
|
||||
+++ b/src/storage/storage_driver.c
|
||||
@@ -1627,6 +1627,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
|
||||
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
|
||||
index e48ae72..2017ccc 100644
|
||||
--- a/src/storage_file/storage_source_backingstore.c
|
||||
+++ b/src/storage_file/storage_source_backingstore.c
|
||||
@@ -284,6 +284,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
|
||||
}
|
||||
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseVitastorColonString(const char *colonstr,
|
||||
+ virStorageSource *src)
|
||||
+{
|
||||
+ char *p, *e, *next;
|
||||
+ g_autofree char *options = NULL;
|
||||
+
|
||||
+ /* optionally skip the "vitastor:" prefix if provided */
|
||||
+ if (STRPREFIX(colonstr, "vitastor:"))
|
||||
+ colonstr += strlen("vitastor:");
|
||||
+
|
||||
+ options = g_strdup(colonstr);
|
||||
+
|
||||
+ p = options;
|
||||
+ while (*p) {
|
||||
+ /* find : delimiter or end of string */
|
||||
+ for (e = p; *e && *e != ':'; ++e) {
|
||||
+ if (*e == '\\') {
|
||||
+ e++;
|
||||
+ if (*e == '\0')
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (*e == '\0') {
|
||||
+ next = e; /* last kv pair */
|
||||
+ } else {
|
||||
+ next = e + 1;
|
||||
+ *e = '\0';
|
||||
+ }
|
||||
+
|
||||
+ if (STRPREFIX(p, "image=")) {
|
||||
+ src->path = g_strdup(p + strlen("image="));
|
||||
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
|
||||
+ src->query = g_strdup(p + strlen("etcd-prefix="));
|
||||
+ } else if (STRPREFIX(p, "config-path=")) {
|
||||
+ src->configFile = g_strdup(p + strlen("config-path="));
|
||||
+ } else if (STRPREFIX(p, "etcd-host=")) {
|
||||
+ char *h, *sep;
|
||||
+
|
||||
+ h = p + strlen("etcd-host=");
|
||||
+ while (h < e) {
|
||||
+ for (sep = h; sep < e; ++sep) {
|
||||
+ if (*sep == '\\' && (sep[1] == ',' ||
|
||||
+ sep[1] == ';' ||
|
||||
+ sep[1] == ' ')) {
|
||||
+ *sep = '\0';
|
||||
+ sep += 2;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (virStorageSourceRBDAddHost(src, h) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ h = sep;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ p = next;
|
||||
+ }
|
||||
+
|
||||
+ if (!src->path) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseNBDColonString(const char *nbdstr,
|
||||
virStorageSource *src)
|
||||
@@ -396,6 +465,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
@@ -984,6 +1058,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
|
||||
+ virJSONValue *json,
|
||||
+ const char *jsonstr G_GNUC_UNUSED,
|
||||
+ int opaque G_GNUC_UNUSED)
|
||||
+{
|
||||
+ const char *filename;
|
||||
+ const char *image = virJSONValueObjectGetString(json, "image");
|
||||
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
|
||||
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
|
||||
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
|
||||
+ size_t nservers;
|
||||
+ size_t i;
|
||||
+
|
||||
+ src->type = VIR_STORAGE_TYPE_NETWORK;
|
||||
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
|
||||
+
|
||||
+ /* legacy syntax passed via 'filename' option */
|
||||
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
|
||||
+ return virStorageSourceParseVitastorColonString(filename, src);
|
||||
+
|
||||
+ if (!image) {
|
||||
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
|
||||
+ _("missing image name in Vitastor backing volume "
|
||||
+ "JSON specification"));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ src->path = g_strdup(image);
|
||||
+ src->configFile = g_strdup(conf);
|
||||
+ src->query = g_strdup(etcd_prefix);
|
||||
+
|
||||
+ if (servers) {
|
||||
+ nservers = virJSONValueArraySize(servers);
|
||||
+
|
||||
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
|
||||
+ src->nhosts = nservers;
|
||||
+
|
||||
+ for (i = 0; i < nservers; i++) {
|
||||
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
|
||||
+ virJSONValueArrayGet(servers, i)) < 0)
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
|
||||
virJSONValue *json,
|
||||
@@ -1162,6 +1284,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
|
||||
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
|
||||
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
|
||||
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
|
||||
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
|
||||
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
|
||||
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
|
||||
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
|
||||
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
|
||||
index bd6f063..cce34e1 100644
|
||||
--- a/src/test/test_driver.c
|
||||
+++ b/src/test/test_driver.c
|
||||
@@ -7338,6 +7338,7 @@ testStorageVolumeTypeForPool(int pooltype)
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
return VIR_STORAGE_VOL_NETWORK;
|
||||
case VIR_STORAGE_POOL_LOGICAL:
|
||||
case VIR_STORAGE_POOL_DISK:
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
index eee75af..8bd0a57 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='no'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
index 805950a..852df0d 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='yes'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
|
||||
index e8e40d6..db55fe5 100644
|
||||
--- a/tests/storagepoolxml2argvtest.c
|
||||
+++ b/tests/storagepoolxml2argvtest.c
|
||||
@@ -65,6 +65,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
default:
|
||||
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
|
||||
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
|
||||
index 8a98c6a..4b1bbd4 100644
|
||||
--- a/tools/virsh-pool.c
|
||||
+++ b/tools/virsh-pool.c
|
||||
@@ -1221,6 +1221,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
|
||||
break;
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
|
||||
+ break;
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
break;
|
||||
}
|
190
patches/pve-qemu-8.0-vitastor.patch
Normal file
190
patches/pve-qemu-8.0-vitastor.patch
Normal file
@@ -0,0 +1,190 @@
|
||||
diff --git a/block/meson.build b/block/meson.build
|
||||
index 382bec0e7d..af6207dbce 100644
|
||||
--- a/block/meson.build
|
||||
+++ b/block/meson.build
|
||||
@@ -114,6 +114,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
diff --git a/meson.build b/meson.build
|
||||
index c44d05a13f..ebedb42843 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1028,6 +1028,26 @@ if not get_option('rbd').auto() or have_block
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -1882,6 +1902,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||
@@ -4020,6 +4041,7 @@ if spice_protocol.found()
|
||||
summary_info += {' spice server support': spice}
|
||||
endif
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index fc9447d267..c4ac55c283 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -173,6 +173,8 @@ option('lzo', type : 'feature', value : 'auto',
|
||||
description: 'lzo compression support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index c05ad0c07e..f5eb701604 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3308,7 +3308,7 @@
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
'pbs',
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4338,6 +4338,28 @@
|
||||
'*key-secret': 'str',
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -4787,6 +4809,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5187,6 +5210,17 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5385,6 +5419,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
index 6e8983f39c..1b0b9fcf3e 100755
|
||||
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
+++ b/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
@@ -32,7 +32,7 @@
|
||||
--with-git=meson \
|
||||
--with-git-submodules=update \
|
||||
--target-list="x86_64-softmmu" \
|
||||
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
--audio-drv-list="" \
|
||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||
--with-coroutine=ucontext \
|
||||
@@ -179,6 +179,7 @@
|
||||
--enable-opengl \
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+--enable-vitastor \
|
||||
--enable-rdma \
|
||||
--enable-seccomp \
|
||||
--enable-snappy \
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 009fab1515..95914e6ebc 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -144,6 +144,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qed qed image format support'
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' sdl SDL user interface'
|
||||
@@ -392,6 +393,8 @@ _meson_option_parse() {
|
||||
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-replication) printf "%s" -Dreplication=enabled ;;
|
176
patches/qemu-2.12-vitastor.patch
Normal file
176
patches/qemu-2.12-vitastor.patch
Normal file
@@ -0,0 +1,176 @@
|
||||
diff --git a/block/Makefile.objs b/block/Makefile.objs
|
||||
index d644bac60a..e404236291 100644
|
||||
--- a/block/Makefile.objs
|
||||
+++ b/block/Makefile.objs
|
||||
@@ -19,6 +19,7 @@ block-obj-$(if $(CONFIG_LIBISCSI),y,n) += iscsi-opts.o
|
||||
block-obj-$(CONFIG_LIBNFS) += nfs.o
|
||||
block-obj-$(CONFIG_CURL) += curl.o
|
||||
block-obj-$(CONFIG_RBD) += rbd.o
|
||||
+block-obj-$(CONFIG_VITASTOR) += vitastor.o
|
||||
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
|
||||
block-obj-$(CONFIG_VXHS) += vxhs.o
|
||||
block-obj-$(CONFIG_LIBSSH2) += ssh.o
|
||||
@@ -39,6 +40,8 @@ curl.o-cflags := $(CURL_CFLAGS)
|
||||
curl.o-libs := $(CURL_LIBS)
|
||||
rbd.o-cflags := $(RBD_CFLAGS)
|
||||
rbd.o-libs := $(RBD_LIBS)
|
||||
+vitastor.o-cflags := $(VITASTOR_CFLAGS)
|
||||
+vitastor.o-libs := $(VITASTOR_LIBS)
|
||||
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
|
||||
gluster.o-libs := $(GLUSTERFS_LIBS)
|
||||
vxhs.o-libs := $(VXHS_LIBS)
|
||||
diff --git a/configure b/configure
|
||||
index 0a19b033bc..58b7fbf24c 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -398,6 +398,7 @@ trace_backends="log"
|
||||
trace_file="trace"
|
||||
spice=""
|
||||
rbd=""
|
||||
+vitastor=""
|
||||
smartcard=""
|
||||
libusb=""
|
||||
usb_redir=""
|
||||
@@ -1213,6 +1214,10 @@ for opt do
|
||||
;;
|
||||
--enable-rbd) rbd="yes"
|
||||
;;
|
||||
+ --disable-vitastor) vitastor="no"
|
||||
+ ;;
|
||||
+ --enable-vitastor) vitastor="yes"
|
||||
+ ;;
|
||||
--disable-xfsctl) xfs="no"
|
||||
;;
|
||||
--enable-xfsctl) xfs="yes"
|
||||
@@ -1601,6 +1606,7 @@ disabled with --disable-FEATURE, default is enabled if available:
|
||||
vhost-crypto vhost-crypto acceleration support
|
||||
spice spice
|
||||
rbd rados block device (rbd)
|
||||
+ vitastor vitastor block device
|
||||
libiscsi iscsi support
|
||||
libnfs nfs support
|
||||
smartcard smartcard support (libcacard)
|
||||
@@ -3594,6 +3600,27 @@ EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
+##########################################
|
||||
+# vitastor probe
|
||||
+if test "$vitastor" != "no" ; then
|
||||
+ cat > $TMPC <<EOF
|
||||
+#include <vitastor_c.h>
|
||||
+int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+}
|
||||
+EOF
|
||||
+ vitastor_libs="-lvitastor_client"
|
||||
+ if compile_prog "" "$vitastor_libs" ; then
|
||||
+ vitastor=yes
|
||||
+ else
|
||||
+ if test "$vitastor" = "yes" ; then
|
||||
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
|
||||
+ fi
|
||||
+ vitastor=no
|
||||
+ fi
|
||||
+fi
|
||||
+
|
||||
##########################################
|
||||
# libssh2 probe
|
||||
min_libssh2_version=1.2.8
|
||||
@@ -5837,6 +5864,7 @@ echo "Trace output file $trace_file-<pid>"
|
||||
fi
|
||||
echo "spice support $spice $(echo_version $spice $spice_protocol_version/$spice_server_version)"
|
||||
echo "rbd support $rbd"
|
||||
+echo "vitastor support $vitastor"
|
||||
echo "xfsctl support $xfs"
|
||||
echo "smartcard support $smartcard"
|
||||
echo "libusb $libusb"
|
||||
@@ -6416,6 +6444,11 @@ if test "$rbd" = "yes" ; then
|
||||
echo "RBD_CFLAGS=$rbd_cflags" >> $config_host_mak
|
||||
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
|
||||
fi
|
||||
+if test "$vitastor" = "yes" ; then
|
||||
+ echo "CONFIG_VITASTOR=m" >> $config_host_mak
|
||||
+ echo "VITASTOR_CFLAGS=$vitastor_cflags" >> $config_host_mak
|
||||
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
|
||||
+fi
|
||||
|
||||
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
|
||||
if test "$coroutine_pool" = "yes" ; then
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index c50517bff3..c780bb2c1c 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -2514,7 +2514,7 @@
|
||||
'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
|
||||
'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
|
||||
'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
|
||||
- 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
|
||||
+ 'quorum', 'raw', 'rbd', 'vitastor', 'replication', 'sheepdog', 'ssh',
|
||||
'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
|
||||
|
||||
##
|
||||
@@ -3217,6 +3217,28 @@
|
||||
'*snap-id': 'uint32',
|
||||
'*tag': 'str' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -3547,6 +3569,7 @@
|
||||
'rbd': 'BlockdevOptionsRbd',
|
||||
'replication':'BlockdevOptionsReplication',
|
||||
'sheepdog': 'BlockdevOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'ssh': 'BlockdevOptionsSsh',
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
@@ -3991,6 +4014,17 @@
|
||||
'*subformat': 'BlockdevVhdxSubformat',
|
||||
'*block-state-zero': 'bool' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVpcSubformat:
|
||||
#
|
||||
@@ -4074,6 +4108,7 @@
|
||||
'rbd': 'BlockdevCreateOptionsRbd',
|
||||
'replication': 'BlockdevCreateNotSupported',
|
||||
'sheepdog': 'BlockdevCreateOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'throttle': 'BlockdevCreateNotSupported',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
181
patches/qemu-5.2-vitastor.patch
Normal file
181
patches/qemu-5.2-vitastor.patch
Normal file
@@ -0,0 +1,181 @@
|
||||
Index: qemu-5.2+dfsg/qapi/block-core.json
|
||||
===================================================================
|
||||
--- qemu-5.2+dfsg.orig/qapi/block-core.json
|
||||
+++ qemu-5.2+dfsg/qapi/block-core.json
|
||||
@@ -2831,7 +2831,7 @@
|
||||
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
|
||||
'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'defined(CONFIG_REPLICATION)' },
|
||||
- 'sheepdog',
|
||||
+ 'sheepdog', 'vitastor',
|
||||
'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
|
||||
|
||||
##
|
||||
@@ -3668,6 +3668,28 @@
|
||||
'*tag': 'str' } }
|
||||
|
||||
##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
+##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
# An enumeration of replication modes.
|
||||
@@ -4015,6 +4037,7 @@
|
||||
'replication': { 'type': 'BlockdevOptionsReplication',
|
||||
'if': 'defined(CONFIG_REPLICATION)' },
|
||||
'sheepdog': 'BlockdevOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'ssh': 'BlockdevOptionsSsh',
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
@@ -4404,6 +4427,17 @@
|
||||
'*cluster-size' : 'size' } }
|
||||
|
||||
##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
+##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
# Subformat options for VMDK images
|
||||
@@ -4665,6 +4699,7 @@
|
||||
'qed': 'BlockdevCreateOptionsQed',
|
||||
'rbd': 'BlockdevCreateOptionsRbd',
|
||||
'sheepdog': 'BlockdevCreateOptionsSheepdog',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
Index: qemu-5.2+dfsg/block/meson.build
|
||||
===================================================================
|
||||
--- qemu-5.2+dfsg.orig/block/meson.build
|
||||
+++ qemu-5.2+dfsg/block/meson.build
|
||||
@@ -76,6 +76,7 @@ foreach m : [
|
||||
['CONFIG_LIBNFS', 'nfs', libnfs, 'nfs.c'],
|
||||
['CONFIG_LIBSSH', 'ssh', libssh, 'ssh.c'],
|
||||
['CONFIG_RBD', 'rbd', rbd, 'rbd.c'],
|
||||
+ ['CONFIG_VITASTOR', 'vitastor', vitastor, 'vitastor.c'],
|
||||
]
|
||||
if config_host.has_key(m[0])
|
||||
if enable_modules
|
||||
Index: qemu-5.2+dfsg/configure
|
||||
===================================================================
|
||||
--- qemu-5.2+dfsg.orig/configure
|
||||
+++ qemu-5.2+dfsg/configure
|
||||
@@ -372,6 +372,7 @@ trace_backends="log"
|
||||
trace_file="trace"
|
||||
spice=""
|
||||
rbd=""
|
||||
+vitastor=""
|
||||
smartcard=""
|
||||
u2f="auto"
|
||||
libusb=""
|
||||
@@ -1263,6 +1264,10 @@ for opt do
|
||||
;;
|
||||
--enable-rbd) rbd="yes"
|
||||
;;
|
||||
+ --disable-vitastor) vitastor="no"
|
||||
+ ;;
|
||||
+ --enable-vitastor) vitastor="yes"
|
||||
+ ;;
|
||||
--disable-xfsctl) xfs="no"
|
||||
;;
|
||||
--enable-xfsctl) xfs="yes"
|
||||
@@ -1827,6 +1832,7 @@ disabled with --disable-FEATURE, default
|
||||
vhost-vdpa vhost-vdpa kernel backend support
|
||||
spice spice
|
||||
rbd rados block device (rbd)
|
||||
+ vitastor vitastor block device
|
||||
libiscsi iscsi support
|
||||
libnfs nfs support
|
||||
smartcard smartcard support (libcacard)
|
||||
@@ -3719,6 +3725,27 @@ EOF
|
||||
fi
|
||||
|
||||
##########################################
|
||||
+# vitastor probe
|
||||
+if test "$vitastor" != "no" ; then
|
||||
+ cat > $TMPC <<EOF
|
||||
+#include <vitastor_c.h>
|
||||
+int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+}
|
||||
+EOF
|
||||
+ vitastor_libs="-lvitastor_client"
|
||||
+ if compile_prog "" "$vitastor_libs" ; then
|
||||
+ vitastor=yes
|
||||
+ else
|
||||
+ if test "$vitastor" = "yes" ; then
|
||||
+ feature_not_found "vitastor block device" "Install vitastor-client-dev"
|
||||
+ fi
|
||||
+ vitastor=no
|
||||
+ fi
|
||||
+fi
|
||||
+
|
||||
+##########################################
|
||||
# libssh probe
|
||||
if test "$libssh" != "no" ; then
|
||||
if $pkg_config --exists libssh; then
|
||||
@@ -6456,6 +6483,10 @@ if test "$rbd" = "yes" ; then
|
||||
echo "CONFIG_RBD=y" >> $config_host_mak
|
||||
echo "RBD_LIBS=$rbd_libs" >> $config_host_mak
|
||||
fi
|
||||
+if test "$vitastor" = "yes" ; then
|
||||
+ echo "CONFIG_VITASTOR=y" >> $config_host_mak
|
||||
+ echo "VITASTOR_LIBS=$vitastor_libs" >> $config_host_mak
|
||||
+fi
|
||||
|
||||
echo "CONFIG_COROUTINE_BACKEND=$coroutine" >> $config_host_mak
|
||||
if test "$coroutine_pool" = "yes" ; then
|
||||
Index: qemu-5.2+dfsg/meson.build
|
||||
===================================================================
|
||||
--- qemu-5.2+dfsg.orig/meson.build
|
||||
+++ qemu-5.2+dfsg/meson.build
|
||||
@@ -596,6 +596,10 @@ rbd = not_found
|
||||
if 'CONFIG_RBD' in config_host
|
||||
rbd = declare_dependency(link_args: config_host['RBD_LIBS'].split())
|
||||
endif
|
||||
+vitastor = not_found
|
||||
+if 'CONFIG_VITASTOR' in config_host
|
||||
+ vitastor = declare_dependency(link_args: config_host['VITASTOR_LIBS'].split())
|
||||
+endif
|
||||
glusterfs = not_found
|
||||
if 'CONFIG_GLUSTERFS' in config_host
|
||||
glusterfs = declare_dependency(compile_args: config_host['GLUSTERFS_CFLAGS'].split(),
|
||||
@@ -2145,6 +2149,7 @@ endif
|
||||
# TODO: add back protocol and server version
|
||||
summary_info += {'spice support': config_host.has_key('CONFIG_SPICE')}
|
||||
summary_info += {'rbd support': config_host.has_key('CONFIG_RBD')}
|
||||
+summary_info += {'vitastor support': config_host.has_key('CONFIG_VITASTOR')}
|
||||
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
|
||||
summary_info += {'smartcard support': config_host.has_key('CONFIG_SMARTCARD')}
|
||||
summary_info += {'U2F support': u2f.found()}
|
@@ -24,4 +24,4 @@ rm fio
|
||||
mv fio-copy fio
|
||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform 's#^#vitastor-0.9.2/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.9.2$(rpm --eval '%dist').tar.gz *
|
||||
tar --transform 's#^#vitastor-1.2.0/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.2.0$(rpm --eval '%dist').tar.gz *
|
||||
|
@@ -22,7 +22,7 @@
|
||||
Name: qemu-kvm
|
||||
Version: 4.2.0
|
||||
-Release: 29.vitastor%{?dist}.6
|
||||
+Release: 32.vitastor%{?dist}.6
|
||||
+Release: 34.vitastor%{?dist}.6
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
Epoch: 15
|
||||
License: GPLv2 and GPLv2+ and CC-BY
|
||||
|
@@ -13,7 +13,7 @@
|
||||
Name: qemu-kvm
|
||||
Version: 4.2.0
|
||||
-Release: 29%{?dist}.6
|
||||
+Release: 32.vitastor%{?dist}.6
|
||||
+Release: 33.vitastor%{?dist}.6
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
Epoch: 15
|
||||
License: GPLv2 and GPLv2+ and CC-BY
|
||||
|
103
rpm/qemu-kvm-6.2-el8.spec.patch
Normal file
103
rpm/qemu-kvm-6.2-el8.spec.patch
Normal file
@@ -0,0 +1,103 @@
|
||||
--- qemu-kvm-6.2.spec.orig 2023-07-18 13:52:57.636625440 +0000
|
||||
+++ qemu-kvm-6.2.spec 2023-07-18 13:52:19.011683886 +0000
|
||||
@@ -73,6 +73,7 @@ Requires: %{name}-hw-usbredir = %{epoch}
|
||||
%endif \
|
||||
Requires: %{name}-block-iscsi = %{epoch}:%{version}-%{release} \
|
||||
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
|
||||
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
|
||||
Requires: %{name}-block-ssh = %{epoch}:%{version}-%{release}
|
||||
|
||||
# Macro to properly setup RHEL/RHEV conflict handling
|
||||
@@ -83,7 +84,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version
|
||||
Summary: QEMU is a machine emulator and virtualizer
|
||||
Name: qemu-kvm
|
||||
Version: 6.2.0
|
||||
-Release: 32%{?rcrel}%{?dist}
|
||||
+Release: 32.vitastor%{?rcrel}%{?dist}
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
Epoch: 15
|
||||
License: GPLv2 and GPLv2+ and CC-BY
|
||||
@@ -122,6 +123,7 @@ Source37: tests_data_acpi_pc_SSDT.dimmpx
|
||||
Source38: tests_data_acpi_q35_FACP.slic
|
||||
Source39: tests_data_acpi_q35_SSDT.dimmpxm
|
||||
Source40: tests_data_acpi_virt_SSDT.memhp
|
||||
+Source41: qemu-vitastor.c
|
||||
|
||||
Patch0001: 0001-redhat-Adding-slirp-to-the-exploded-tree.patch
|
||||
Patch0005: 0005-Initial-redhat-build.patch
|
||||
@@ -652,6 +654,7 @@ Patch255: kvm-scsi-protect-req-aiocb-wit
|
||||
Patch256: kvm-dma-helpers-prevent-dma_blk_cb-vs-dma_aio_cancel-rac.patch
|
||||
# For bz#2090990 - qemu crash with error scsi_req_unref(SCSIRequest *): Assertion `req->refcount > 0' failed or scsi_dma_complete(void *, int): Assertion `r->req.aiocb != NULL' failed [8.7.0]
|
||||
Patch257: kvm-virtio-scsi-reset-SCSI-devices-from-main-loop-thread.patch
|
||||
+Patch258: qemu-6.2-vitastor.patch
|
||||
|
||||
BuildRequires: wget
|
||||
BuildRequires: rpm-build
|
||||
@@ -689,6 +692,7 @@ BuildRequires: libcurl-devel
|
||||
BuildRequires: libssh-devel
|
||||
BuildRequires: librados-devel
|
||||
BuildRequires: librbd-devel
|
||||
+BuildRequires: vitastor-client-devel
|
||||
%if %{have_gluster}
|
||||
# For gluster block driver
|
||||
BuildRequires: glusterfs-api-devel
|
||||
@@ -926,6 +930,14 @@ Install this package if you want to acce
|
||||
using the rbd protocol.
|
||||
|
||||
|
||||
+%package block-vitastor
|
||||
+Summary: QEMU Vitastor block driver
|
||||
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||
+
|
||||
+%description block-vitastor
|
||||
+This package provides the additional Vitastor block driver for QEMU.
|
||||
+
|
||||
+
|
||||
%package block-ssh
|
||||
Summary: QEMU SSH block driver
|
||||
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||
@@ -979,6 +991,7 @@ This package provides usbredir support.
|
||||
rm -fr slirp
|
||||
mkdir slirp
|
||||
%autopatch -p1
|
||||
+cp %{SOURCE41} ./block/vitastor.c
|
||||
|
||||
%global qemu_kvm_build qemu_kvm_build
|
||||
mkdir -p %{qemu_kvm_build}
|
||||
@@ -994,7 +1007,7 @@ cp -f %{SOURCE40} tests/data/acpi/virt/S
|
||||
# --build-id option is used for giving info to the debug packages.
|
||||
buildldflags="VL_LDFLAGS=-Wl,--build-id"
|
||||
|
||||
-%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle
|
||||
+%global block_drivers_list qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle
|
||||
|
||||
%if 0%{have_gluster}
|
||||
%global block_drivers_list %{block_drivers_list},gluster
|
||||
@@ -1149,9 +1162,7 @@ pushd %{qemu_kvm_build}
|
||||
--firmwarepath=%{_prefix}/share/qemu-firmware \
|
||||
--meson="git" \
|
||||
--target-list="%{buildarch}" \
|
||||
- --block-drv-rw-whitelist=%{block_drivers_list} \
|
||||
--audio-drv-list= \
|
||||
- --block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \
|
||||
--with-coroutine=ucontext \
|
||||
--with-git=git \
|
||||
--tls-priority=@QEMU,SYSTEM \
|
||||
@@ -1197,6 +1208,7 @@ pushd %{qemu_kvm_build}
|
||||
%endif
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+ --enable-vitastor \
|
||||
%if 0%{have_librdma}
|
||||
--enable-rdma \
|
||||
%endif
|
||||
@@ -1794,6 +1806,9 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.
|
||||
%files block-rbd
|
||||
%{_libdir}/qemu-kvm/block-rbd.so
|
||||
|
||||
+%files block-vitastor
|
||||
+%{_libdir}/qemu-kvm/block-vitastor.so
|
||||
+
|
||||
%files block-ssh
|
||||
%{_libdir}/qemu-kvm/block-ssh.so
|
||||
|
93
rpm/qemu-kvm-7.2-el9.spec.patch
Normal file
93
rpm/qemu-kvm-7.2-el9.spec.patch
Normal file
@@ -0,0 +1,93 @@
|
||||
--- qemu-kvm-7.2.spec.orig 2023-06-22 13:56:19.000000000 +0000
|
||||
+++ qemu-kvm-7.2.spec 2023-07-18 07:55:22.347090196 +0000
|
||||
@@ -100,8 +100,6 @@
|
||||
%endif
|
||||
|
||||
%global target_list %{kvm_target}-softmmu
|
||||
-%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress
|
||||
-%global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https
|
||||
%define qemudocdir %{_docdir}/%{name}
|
||||
%global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios"
|
||||
|
||||
@@ -126,6 +124,7 @@ Requires: %{name}-device-usb-host = %{ep
|
||||
Requires: %{name}-device-usb-redirect = %{epoch}:%{version}-%{release} \
|
||||
%endif \
|
||||
Requires: %{name}-block-rbd = %{epoch}:%{version}-%{release} \
|
||||
+Requires: %{name}-block-vitastor = %{epoch}:%{version}-%{release}\
|
||||
Requires: %{name}-audio-pa = %{epoch}:%{version}-%{release}
|
||||
|
||||
# Since SPICE is removed from RHEL-9, the following Obsoletes:
|
||||
@@ -148,7 +147,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}
|
||||
Summary: QEMU is a machine emulator and virtualizer
|
||||
Name: qemu-kvm
|
||||
Version: 7.2.0
|
||||
-Release: 14%{?rcrel}%{?dist}%{?cc_suffix}.1
|
||||
+Release: 14.vitastor%{?rcrel}%{?dist}%{?cc_suffix}.1
|
||||
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
|
||||
# Epoch 15 used for RHEL 8
|
||||
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
|
||||
@@ -171,6 +170,7 @@ Source28: 95-kvm-memlock.conf
|
||||
Source30: kvm-s390x.conf
|
||||
Source31: kvm-x86.conf
|
||||
Source36: README.tests
|
||||
+Source37: qemu-vitastor.c
|
||||
|
||||
|
||||
Patch0004: 0004-Initial-redhat-build.patch
|
||||
@@ -418,6 +418,7 @@ Patch134: kvm-target-i386-Fix-BZHI-instr
|
||||
Patch135: kvm-intel-iommu-fail-DEVIOTLB_UNMAP-without-dt-mode.patch
|
||||
# For bz#2203745 - Disk detach is unsuccessful while the guest is still booting [rhel-9.2.0.z]
|
||||
Patch136: kvm-acpi-pcihp-allow-repeating-hot-unplug-requests.patch
|
||||
+Patch137: qemu-7.2-vitastor.patch
|
||||
|
||||
%if %{have_clang}
|
||||
BuildRequires: clang
|
||||
@@ -449,6 +450,7 @@ BuildRequires: libcurl-devel
|
||||
%if %{have_block_rbd}
|
||||
BuildRequires: librbd-devel
|
||||
%endif
|
||||
+BuildRequires: vitastor-client-devel
|
||||
# We need both because the 'stap' binary is probed for by configure
|
||||
BuildRequires: systemtap
|
||||
BuildRequires: systemtap-sdt-devel
|
||||
@@ -642,6 +644,14 @@ using the rbd protocol.
|
||||
%endif
|
||||
|
||||
|
||||
+%package block-vitastor
|
||||
+Summary: QEMU Vitastor block driver
|
||||
+Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||
+
|
||||
+%description block-vitastor
|
||||
+This package provides the additional Vitastor block driver for QEMU.
|
||||
+
|
||||
+
|
||||
%package audio-pa
|
||||
Summary: QEMU PulseAudio audio driver
|
||||
Requires: %{name}-common%{?_isa} = %{epoch}:%{version}-%{release}
|
||||
@@ -719,6 +729,7 @@ This package provides usbredir support.
|
||||
%prep
|
||||
%setup -q -n qemu-%{version}%{?rcstr}
|
||||
%autopatch -p1
|
||||
+cp %{SOURCE37} ./block/vitastor.c
|
||||
|
||||
%global qemu_kvm_build qemu_kvm_build
|
||||
mkdir -p %{qemu_kvm_build}
|
||||
@@ -946,6 +957,7 @@ run_configure \
|
||||
%if %{have_block_rbd}
|
||||
--enable-rbd \
|
||||
%endif
|
||||
+ --enable-vitastor \
|
||||
%if %{have_librdma}
|
||||
--enable-rdma \
|
||||
%endif
|
||||
@@ -1426,6 +1438,9 @@ useradd -r -u 107 -g qemu -G kvm -d / -s
|
||||
%files block-rbd
|
||||
%{_libdir}/%{name}/block-rbd.so
|
||||
%endif
|
||||
+%files block-vitastor
|
||||
+%{_libdir}/%{name}/block-vitastor.so
|
||||
+
|
||||
%files audio-pa
|
||||
%{_libdir}/%{name}/audio-pa.so
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.9.2.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-1.2.0.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.9.2
|
||||
Version: 1.2.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.9.2.el7.tar.gz
|
||||
Source0: vitastor-1.2.0.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.9.2.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-1.2.0.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.9.2
|
||||
Version: 1.2.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.9.2.el8.tar.gz
|
||||
Source0: vitastor-1.2.0.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -18,7 +18,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.9.2.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-1.2.0.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.9.2
|
||||
Version: 1.2.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.9.2.el9.tar.gz
|
||||
Source0: vitastor-1.2.0.el9.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVERSION="0.9.2")
|
||||
add_definitions(-DVERSION="1.2.0")
|
||||
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
||||
if (${WITH_ASAN})
|
||||
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
||||
@@ -137,6 +137,7 @@ endif (${WITH_FIO})
|
||||
add_library(vitastor_client SHARED
|
||||
cluster_client.cpp
|
||||
cluster_client_list.cpp
|
||||
cluster_client_wb.cpp
|
||||
vitastor_c.cpp
|
||||
cli_common.cpp
|
||||
cli_alloc_osd.cpp
|
||||
@@ -300,7 +301,7 @@ target_link_libraries(test_crc32
|
||||
add_executable(test_cluster_client
|
||||
EXCLUDE_FROM_ALL
|
||||
test_cluster_client.cpp
|
||||
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
|
||||
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp cluster_client_wb.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
|
||||
etcd_state_client.cpp timerfd_manager.cpp str_util.cpp ../json11/json11.cpp
|
||||
)
|
||||
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
|
||||
|
@@ -19,8 +19,8 @@ bool string_to_addr(std::string str, bool parse_port, int default_port, struct s
|
||||
if (p != std::string::npos && !(str.length() > 0 && str[p-1] == ']')) // "[ipv6]" which contains ':'
|
||||
{
|
||||
char null_byte = 0;
|
||||
int n = sscanf(str.c_str()+p+1, "%d%c", &default_port, &null_byte);
|
||||
if (n != 1 || default_port >= 0x10000)
|
||||
int scanned = sscanf(str.c_str()+p+1, "%d%c", &default_port, &null_byte);
|
||||
if (scanned != 1 || default_port >= 0x10000)
|
||||
return false;
|
||||
str = str.substr(0, p);
|
||||
}
|
||||
|
@@ -143,34 +143,83 @@ uint64_t allocator::get_free_count()
|
||||
return free;
|
||||
}
|
||||
|
||||
// FIXME: Move to utils?
|
||||
void bitmap_set(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity)
|
||||
{
|
||||
if (start == 0)
|
||||
if (start == 0 && len == 32*bitmap_granularity)
|
||||
*((uint32_t*)bitmap) = UINT32_MAX;
|
||||
else if (start == 0 && len == 64*bitmap_granularity)
|
||||
*((uint64_t*)bitmap) = UINT64_MAX;
|
||||
else
|
||||
{
|
||||
if (len == 32*bitmap_granularity)
|
||||
unsigned bit_start = start / bitmap_granularity;
|
||||
unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
|
||||
while (bit_start < bit_end)
|
||||
{
|
||||
*((uint32_t*)bitmap) = UINT32_MAX;
|
||||
return;
|
||||
}
|
||||
else if (len == 64*bitmap_granularity)
|
||||
{
|
||||
*((uint64_t*)bitmap) = UINT64_MAX;
|
||||
return;
|
||||
}
|
||||
}
|
||||
unsigned bit_start = start / bitmap_granularity;
|
||||
unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
|
||||
while (bit_start < bit_end)
|
||||
{
|
||||
if (!(bit_start & 7) && bit_end >= bit_start+8)
|
||||
{
|
||||
((uint8_t*)bitmap)[bit_start / 8] = UINT8_MAX;
|
||||
bit_start += 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
((uint8_t*)bitmap)[bit_start / 8] |= 1 << (bit_start % 8);
|
||||
bit_start++;
|
||||
if (!(bit_start & 7) && bit_end >= bit_start+8)
|
||||
{
|
||||
((uint8_t*)bitmap)[bit_start / 8] = UINT8_MAX;
|
||||
bit_start += 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
((uint8_t*)bitmap)[bit_start / 8] |= 1 << (bit_start % 8);
|
||||
bit_start++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void bitmap_clear(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity)
|
||||
{
|
||||
if (start == 0 && len == 32*bitmap_granularity)
|
||||
*((uint32_t*)bitmap) = 0;
|
||||
else if (start == 0 && len == 64*bitmap_granularity)
|
||||
*((uint64_t*)bitmap) = 0;
|
||||
else
|
||||
{
|
||||
unsigned bit_start = start / bitmap_granularity;
|
||||
unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
|
||||
while (bit_start < bit_end)
|
||||
{
|
||||
if (!(bit_start & 7) && bit_end >= bit_start+8)
|
||||
{
|
||||
((uint8_t*)bitmap)[bit_start / 8] = 0;
|
||||
bit_start += 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
((uint8_t*)bitmap)[bit_start / 8] &= (0xFF ^ (1 << (bit_start % 8)));
|
||||
bit_start++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool bitmap_check(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity)
|
||||
{
|
||||
bool r = false;
|
||||
if (start == 0 && len == 32*bitmap_granularity)
|
||||
r = !!*((uint32_t*)bitmap);
|
||||
else if (start == 0 && len == 64*bitmap_granularity)
|
||||
r = !!*((uint64_t*)bitmap);
|
||||
else
|
||||
{
|
||||
unsigned bit_start = start / bitmap_granularity;
|
||||
unsigned bit_end = ((start + len) + bitmap_granularity - 1) / bitmap_granularity;
|
||||
while (bit_start < bit_end)
|
||||
{
|
||||
if (!(bit_start & 7) && bit_end >= bit_start+8)
|
||||
{
|
||||
r = r || !!((uint8_t*)bitmap)[bit_start / 8];
|
||||
bit_start += 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
r = r || (((uint8_t*)bitmap)[bit_start / 8] & (1 << (bit_start % 8)));
|
||||
bit_start++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
@@ -23,3 +23,5 @@ public:
|
||||
};
|
||||
|
||||
void bitmap_set(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity);
|
||||
void bitmap_clear(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity);
|
||||
bool bitmap_check(void *bitmap, uint64_t start, uint64_t len, uint64_t bitmap_granularity);
|
||||
|
@@ -77,6 +77,7 @@ Output:
|
||||
-EINVAL = invalid input parameters
|
||||
-ENOENT = requested object/version does not exist for reads
|
||||
-ENOSPC = no space left in the store for writes
|
||||
-EDOM = checksum error.
|
||||
- version = the version actually read or written
|
||||
|
||||
## BS_OP_DELETE
|
||||
|
@@ -40,10 +40,49 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
|
||||
data_block_size = parse_size(config["block_size"]);
|
||||
journal_device = config["journal_device"];
|
||||
journal_offset = parse_size(config["journal_offset"]);
|
||||
disk_alignment = strtoull(config["disk_alignment"].c_str(), NULL, 10);
|
||||
journal_block_size = strtoull(config["journal_block_size"].c_str(), NULL, 10);
|
||||
meta_block_size = strtoull(config["meta_block_size"].c_str(), NULL, 10);
|
||||
bitmap_granularity = strtoull(config["bitmap_granularity"].c_str(), NULL, 10);
|
||||
disk_alignment = parse_size(config["disk_alignment"]);
|
||||
journal_block_size = parse_size(config["journal_block_size"]);
|
||||
meta_block_size = parse_size(config["meta_block_size"]);
|
||||
bitmap_granularity = parse_size(config["bitmap_granularity"]);
|
||||
meta_format = stoull_full(config["meta_format"]);
|
||||
if (config.find("data_io") == config.end() &&
|
||||
config.find("meta_io") == config.end() &&
|
||||
config.find("journal_io") == config.end())
|
||||
{
|
||||
bool cached_io_data = config["cached_io_data"] == "true" || config["cached_io_data"] == "yes" || config["cached_io_data"] == "1";
|
||||
bool cached_io_meta = cached_io_data && (meta_device == data_device || meta_device == "") &&
|
||||
config.find("cached_io_meta") == config.end() ||
|
||||
config["cached_io_meta"] == "true" || config["cached_io_meta"] == "yes" || config["cached_io_meta"] == "1";
|
||||
bool cached_io_journal = cached_io_meta && (journal_device == meta_device || journal_device == "") &&
|
||||
config.find("cached_io_journal") == config.end() ||
|
||||
config["cached_io_journal"] == "true" || config["cached_io_journal"] == "yes" || config["cached_io_journal"] == "1";
|
||||
data_io = cached_io_data ? "cached" : "direct";
|
||||
meta_io = cached_io_meta ? "cached" : "direct";
|
||||
journal_io = cached_io_journal ? "cached" : "direct";
|
||||
}
|
||||
else
|
||||
{
|
||||
data_io = config.find("data_io") != config.end() ? config["data_io"] : "direct";
|
||||
meta_io = config.find("meta_io") != config.end()
|
||||
? config["meta_io"]
|
||||
: (meta_device == data_device || meta_device == "" ? data_io : "direct");
|
||||
journal_io = config.find("journal_io") != config.end()
|
||||
? config["journal_io"]
|
||||
: (journal_device == meta_device || journal_device == "" ? meta_io : "direct");
|
||||
}
|
||||
if (config["data_csum_type"] == "crc32c")
|
||||
{
|
||||
data_csum_type = BLOCKSTORE_CSUM_CRC32C;
|
||||
}
|
||||
else if (config["data_csum_type"] == "" || config["data_csum_type"] == "none")
|
||||
{
|
||||
data_csum_type = BLOCKSTORE_CSUM_NONE;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("data_csum_type="+config["data_csum_type"]+" is unsupported, only \"crc32c\" and \"none\" are supported");
|
||||
}
|
||||
csum_block_size = parse_size(config["csum_block_size"]);
|
||||
// Validate
|
||||
if (!data_block_size)
|
||||
{
|
||||
@@ -91,7 +130,23 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
|
||||
}
|
||||
if (data_block_size % bitmap_granularity)
|
||||
{
|
||||
throw std::runtime_error("Block size must be a multiple of sparse write tracking granularity");
|
||||
throw std::runtime_error("Data block size must be a multiple of sparse write tracking granularity");
|
||||
}
|
||||
if (!data_csum_type)
|
||||
{
|
||||
csum_block_size = 0;
|
||||
}
|
||||
else if (!csum_block_size)
|
||||
{
|
||||
csum_block_size = bitmap_granularity;
|
||||
}
|
||||
if (csum_block_size && (csum_block_size % bitmap_granularity))
|
||||
{
|
||||
throw std::runtime_error("Checksum block size must be a multiple of sparse write tracking granularity");
|
||||
}
|
||||
if (csum_block_size && (data_block_size % csum_block_size))
|
||||
{
|
||||
throw std::runtime_error("Checksum block size must be a divisor of data block size");
|
||||
}
|
||||
if (meta_device == "")
|
||||
{
|
||||
@@ -110,7 +165,9 @@ void blockstore_disk_t::parse_config(std::map<std::string, std::string> & config
|
||||
throw std::runtime_error("journal_offset must be a multiple of journal_block_size = "+std::to_string(journal_block_size));
|
||||
}
|
||||
clean_entry_bitmap_size = data_block_size / bitmap_granularity / 8;
|
||||
clean_entry_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
|
||||
clean_dyn_size = clean_entry_bitmap_size*2 + (csum_block_size
|
||||
? data_block_size/csum_block_size*(data_csum_type & 0xFF) : 0);
|
||||
clean_entry_size = sizeof(clean_disk_entry) + clean_dyn_size + 4 /*entry_csum*/;
|
||||
}
|
||||
|
||||
void blockstore_disk_t::calc_lengths(bool skip_meta_check)
|
||||
@@ -160,6 +217,25 @@ void blockstore_disk_t::calc_lengths(bool skip_meta_check)
|
||||
// required metadata size
|
||||
block_count = data_len / data_block_size;
|
||||
meta_len = (1 + (block_count - 1 + meta_block_size / clean_entry_size) / (meta_block_size / clean_entry_size)) * meta_block_size;
|
||||
if (meta_format == BLOCKSTORE_META_FORMAT_V1 ||
|
||||
!meta_format && !skip_meta_check && meta_area_size < meta_len && !data_csum_type)
|
||||
{
|
||||
uint64_t clean_entry_v0_size = sizeof(clean_disk_entry) + 2*clean_entry_bitmap_size;
|
||||
uint64_t meta_v0_len = (1 + (block_count - 1 + meta_block_size / clean_entry_v0_size)
|
||||
/ (meta_block_size / clean_entry_v0_size)) * meta_block_size;
|
||||
if (meta_format == BLOCKSTORE_META_FORMAT_V1 || meta_area_size >= meta_v0_len)
|
||||
{
|
||||
// Old metadata fits.
|
||||
printf("Warning: Using old metadata format without checksums because the new format doesn't fit into provided area\n");
|
||||
clean_entry_size = clean_entry_v0_size;
|
||||
meta_len = meta_v0_len;
|
||||
meta_format = BLOCKSTORE_META_FORMAT_V1;
|
||||
}
|
||||
else
|
||||
meta_format = BLOCKSTORE_META_FORMAT_V2;
|
||||
}
|
||||
else
|
||||
meta_format = BLOCKSTORE_META_FORMAT_V2;
|
||||
if (!skip_meta_check && meta_area_size < meta_len)
|
||||
{
|
||||
throw std::runtime_error("Metadata area is too small, need at least "+std::to_string(meta_len)+" bytes");
|
||||
@@ -214,9 +290,19 @@ static void check_size(int fd, uint64_t *size, uint64_t *sectsize, std::string n
|
||||
}
|
||||
}
|
||||
|
||||
static int bs_openmode(const std::string & mode)
|
||||
{
|
||||
if (mode == "directsync")
|
||||
return O_DIRECT|O_SYNC;
|
||||
else if (mode == "cached")
|
||||
return O_SYNC;
|
||||
else
|
||||
return O_DIRECT;
|
||||
}
|
||||
|
||||
void blockstore_disk_t::open_data()
|
||||
{
|
||||
data_fd = open(data_device.c_str(), O_DIRECT|O_RDWR);
|
||||
data_fd = open(data_device.c_str(), bs_openmode(data_io) | O_RDWR);
|
||||
if (data_fd == -1)
|
||||
{
|
||||
throw std::runtime_error("Failed to open data device "+data_device+": "+std::string(strerror(errno)));
|
||||
@@ -241,9 +327,9 @@ void blockstore_disk_t::open_data()
|
||||
|
||||
void blockstore_disk_t::open_meta()
|
||||
{
|
||||
if (meta_device != data_device)
|
||||
if (meta_device != data_device || meta_io != data_io)
|
||||
{
|
||||
meta_fd = open(meta_device.c_str(), O_DIRECT|O_RDWR);
|
||||
meta_fd = open(meta_device.c_str(), bs_openmode(meta_io) | O_RDWR);
|
||||
if (meta_fd == -1)
|
||||
{
|
||||
throw std::runtime_error("Failed to open metadata device "+meta_device+": "+std::string(strerror(errno)));
|
||||
@@ -253,7 +339,7 @@ void blockstore_disk_t::open_meta()
|
||||
{
|
||||
throw std::runtime_error("meta_offset exceeds device size = "+std::to_string(meta_device_size));
|
||||
}
|
||||
if (!disable_flock && flock(meta_fd, LOCK_EX|LOCK_NB) != 0)
|
||||
if (!disable_flock && meta_device != data_device && flock(meta_fd, LOCK_EX|LOCK_NB) != 0)
|
||||
{
|
||||
throw std::runtime_error(std::string("Failed to lock metadata device: ") + strerror(errno));
|
||||
}
|
||||
@@ -279,15 +365,15 @@ void blockstore_disk_t::open_meta()
|
||||
|
||||
void blockstore_disk_t::open_journal()
|
||||
{
|
||||
if (journal_device != meta_device)
|
||||
if (journal_device != meta_device || journal_io != meta_io)
|
||||
{
|
||||
journal_fd = open(journal_device.c_str(), O_DIRECT|O_RDWR);
|
||||
journal_fd = open(journal_device.c_str(), bs_openmode(journal_io) | O_RDWR);
|
||||
if (journal_fd == -1)
|
||||
{
|
||||
throw std::runtime_error("Failed to open journal device "+journal_device+": "+std::string(strerror(errno)));
|
||||
}
|
||||
check_size(journal_fd, &journal_device_size, &journal_device_sect, "journal device");
|
||||
if (!disable_flock && flock(journal_fd, LOCK_EX|LOCK_NB) != 0)
|
||||
if (!disable_flock && journal_device != meta_device && flock(journal_fd, LOCK_EX|LOCK_NB) != 0)
|
||||
{
|
||||
throw std::runtime_error(std::string("Failed to lock journal device: ") + strerror(errno));
|
||||
}
|
||||
|
@@ -8,6 +8,10 @@
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
#define BLOCKSTORE_CSUM_NONE 0
|
||||
// Lower byte of checksum type is its length
|
||||
#define BLOCKSTORE_CSUM_CRC32C 0x104
|
||||
|
||||
struct blockstore_disk_t
|
||||
{
|
||||
std::string data_device, meta_device, journal_device;
|
||||
@@ -21,17 +25,24 @@ struct blockstore_disk_t
|
||||
uint64_t meta_block_size = 4096;
|
||||
// Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment
|
||||
uint64_t bitmap_granularity = 4096;
|
||||
// Data checksum type, BLOCKSTORE_CSUM_NONE or BLOCKSTORE_CSUM_CRC32C
|
||||
uint32_t data_csum_type = BLOCKSTORE_CSUM_NONE;
|
||||
// Checksum block size, must be a multiple of bitmap_granularity
|
||||
uint32_t csum_block_size = 4096;
|
||||
// By default, Blockstore locks all opened devices exclusively. This option can be used to disable locking
|
||||
bool disable_flock = false;
|
||||
// I/O modes for data, metadata and journal: direct or "" = O_DIRECT, cached = O_SYNC, directsync = O_DIRECT|O_SYNC
|
||||
// O_SYNC without O_DIRECT = use Linux page cache for reads and writes
|
||||
std::string data_io, meta_io, journal_io;
|
||||
|
||||
int meta_fd = -1, data_fd = -1, journal_fd = -1;
|
||||
uint64_t meta_offset, meta_device_sect, meta_device_size, meta_len;
|
||||
uint64_t meta_offset, meta_device_sect, meta_device_size, meta_len, meta_format = 0;
|
||||
uint64_t data_offset, data_device_sect, data_device_size, data_len;
|
||||
uint64_t journal_offset, journal_device_sect, journal_device_size, journal_len;
|
||||
|
||||
uint32_t block_order;
|
||||
uint64_t block_count;
|
||||
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0;
|
||||
uint32_t clean_entry_bitmap_size = 0, clean_entry_size = 0, clean_dyn_size = 0;
|
||||
|
||||
void parse_config(std::map<std::string, std::string> & config);
|
||||
void open_data();
|
||||
@@ -39,4 +50,13 @@ struct blockstore_disk_t
|
||||
void open_journal();
|
||||
void calc_lengths(bool skip_meta_check = false);
|
||||
void close_all();
|
||||
|
||||
inline uint64_t dirty_dyn_size(uint64_t offset, uint64_t len)
|
||||
{
|
||||
// Checksums may be partial if write is not aligned with csum_block_size
|
||||
return clean_entry_bitmap_size + (csum_block_size && len > 0
|
||||
? ((offset+len+csum_block_size-1)/csum_block_size - offset/csum_block_size)
|
||||
* (data_csum_type & 0xFF)
|
||||
: 0);
|
||||
}
|
||||
};
|
||||
|
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user