Compare commits

...

6 Commits

Author SHA1 Message Date
45490f4e51 Try to catch "data lost during self-heal"
All checks were successful
Test / buildenv (push) Successful in 19s
Test / build (push) Successful in 2m35s
Test / make_test (push) Successful in 44s
Test / test_heal_pg_size_2 (push) Successful in 5m24s
Test / test_heal_csum_32k_dmj (push) Successful in 5m19s
Test / test_heal_csum_32k_dj (push) Successful in 5m15s
Test / test_heal_ec (push) Successful in 6m10s
Test / test_heal_csum_32k (push) Successful in 5m49s
Test / test_heal_csum_4k_dj (push) Successful in 5m45s
Test / test_heal_csum_4k_dmj (push) Successful in 5m48s
Test / test_heal_csum_4k (push) Successful in 6m34s
2024-02-21 19:24:36 +03:00
b3c15db331 32M journal by default in simple-offsets
All checks were successful
Test / test_snapshot_ec (push) Successful in 30s
Test / test_rm (push) Successful in 18s
Test / test_move_reappear (push) Successful in 24s
Test / test_snapshot_down (push) Successful in 26s
Test / test_snapshot_down_ec (push) Successful in 30s
Test / test_splitbrain (push) Successful in 23s
Test / test_snapshot_chain (push) Successful in 2m17s
Test / test_snapshot_chain_ec (push) Successful in 2m55s
Test / test_rebalance_verify_imm (push) Successful in 2m46s
Test / test_rebalance_verify (push) Successful in 3m9s
Test / test_switch_primary (push) Successful in 39s
Test / test_write (push) Successful in 43s
Test / test_write_no_same (push) Successful in 19s
Test / test_write_xor (push) Successful in 55s
Test / test_rebalance_verify_ec (push) Successful in 3m35s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m37s
Test / test_heal_pg_size_2 (push) Successful in 3m36s
Test / test_heal_ec (push) Successful in 5m47s
Test / test_heal_csum_32k_dmj (push) Successful in 5m21s
Test / test_heal_csum_32k_dj (push) Successful in 6m16s
Test / test_heal_csum_32k (push) Successful in 6m45s
Test / test_scrub (push) Successful in 1m56s
Test / test_heal_csum_4k_dj (push) Successful in 6m39s
Test / test_heal_csum_4k_dmj (push) Successful in 6m42s
Test / test_scrub_zero_osd_2 (push) Successful in 1m16s
Test / test_scrub_xor (push) Successful in 47s
Test / test_scrub_pg_size_3 (push) Successful in 1m26s
Test / test_heal_csum_4k (push) Successful in 6m32s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 48s
Test / test_scrub_ec (push) Successful in 49s
2024-02-21 15:25:02 +03:00
685bcd6ef9 Do not reserve extra space for big_writes during sync - sync itself is needed to commit and clear them 2024-02-21 13:00:14 +03:00
3eb389b321 Supposed fix for "unexpected state during flush: 0x51" with EC
Some checks failed
Test / test_move_reappear (push) Successful in 22s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m32s
Test / test_rm (push) Successful in 16s
Test / test_snapshot_down (push) Successful in 31s
Test / test_snapshot_down_ec (push) Successful in 32s
Test / test_splitbrain (push) Successful in 25s
Test / test_snapshot_chain (push) Successful in 2m4s
Test / test_snapshot_chain_ec (push) Successful in 2m51s
Test / test_rebalance_verify_imm (push) Successful in 2m47s
Test / test_rebalance_verify (push) Successful in 3m30s
Test / test_switch_primary (push) Successful in 38s
Test / test_write (push) Successful in 51s
Test / test_write_no_same (push) Successful in 16s
Test / test_write_xor (push) Successful in 52s
Test / test_rebalance_verify_ec (push) Successful in 3m32s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m7s
Test / test_scrub_zero_osd_2 (push) Successful in 59s
Test / test_scrub (push) Successful in 1m2s
Test / test_scrub_xor (push) Successful in 36s
Test / test_scrub_ec (push) Successful in 38s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 40s
Test / test_scrub_pg_size_3 (push) Successful in 49s
Test / test_heal_csum_32k_dmj (push) Successful in 5m12s
Test / test_heal_csum_32k_dj (push) Successful in 5m8s
Test / test_heal_csum_32k (push) Successful in 4m55s
Test / test_heal_ec (push) Failing after 10m14s
Test / test_heal_csum_4k_dmj (push) Successful in 4m59s
Test / test_heal_csum_4k_dj (push) Successful in 5m5s
Test / test_heal_pg_size_2 (push) Successful in 3m54s
Test / test_heal_csum_4k (push) Successful in 3m49s
2024-02-21 01:32:06 +03:00
3d16cde23c Fix assertions, add small sequential write test
Some checks failed
Test / test_snapshot_down_ec (push) Successful in 32s
Test / test_splitbrain (push) Successful in 22s
Test / test_snapshot_chain (push) Successful in 2m8s
Test / test_snapshot_chain_ec (push) Successful in 2m48s
Test / test_rebalance_verify_imm (push) Successful in 2m57s
Test / test_rebalance_verify (push) Successful in 3m29s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 54s
Test / test_write_xor (push) Successful in 51s
Test / test_write_no_same (push) Successful in 16s
Test / test_rebalance_verify_ec (push) Successful in 3m40s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m20s
Test / test_scrub (push) Successful in 1m1s
Test / test_scrub_zero_osd_2 (push) Successful in 46s
Test / test_scrub_xor (push) Successful in 41s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m0s
Test / test_scrub_ec (push) Successful in 58s
Test / test_scrub_pg_size_3 (push) Successful in 1m45s
Test / test_heal_pg_size_2 (push) Failing after 4m52s
Test / test_heal_csum_32k_dmj (push) Successful in 5m36s
Test / test_heal_csum_32k_dj (push) Successful in 5m33s
Test / test_interrupted_rebalance_imm (push) Successful in 1m35s
Test / test_interrupted_rebalance (push) Successful in 2m28s
Test / test_interrupted_rebalance_ec (push) Successful in 2m30s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 2m41s
Test / test_heal_ec (push) Failing after 10m20s
Test / test_heal_csum_4k_dmj (push) Successful in 4m21s
Test / test_heal_csum_32k (push) Successful in 5m15s
Test / test_heal_csum_4k_dj (push) Successful in 5m48s
Test / test_heal_csum_4k (push) Successful in 5m32s
2024-02-20 19:41:48 +03:00
c6406d67fc Fix journal space_check incorrectly checking for space at the beginning 2024-02-20 19:40:56 +03:00
11 changed files with 72 additions and 691 deletions

View File

@@ -64,546 +64,6 @@ jobs:
# leak sanitizer sometimes crashes # leak sanitizer sometimes crashes
- run: cd /root/vitastor/build && ASAN_OPTIONS=detect_leaks=0 make -j16 test - run: cd /root/vitastor/build && ASAN_OPTIONS=detect_leaks=0 make -j16 test
test_add_osd:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_add_osd.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_cas:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_cas.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_count:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_change_pg_count.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_count_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_change_pg_count.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_size:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_change_pg_size.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_create_nomaxid:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_create_nomaxid.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_etcd_fail:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_etcd_fail.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_ec_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_failure_domain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_failure_domain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_minsize_1:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_minsize_1.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_move_reappear:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_move_reappear.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_rm.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 6
run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_splitbrain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_splitbrain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_ec_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_switch_primary:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_switch_primary.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_write.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write_xor:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=xor /root/vitastor/tests/test_write.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write_no_same:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_write_no_same.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_pg_size_2: test_heal_pg_size_2:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: build needs: build
@@ -611,7 +71,7 @@ jobs:
steps: steps:
- name: Run test - name: Run test
id: test id: test
timeout-minutes: 10 timeout-minutes: 1000
run: PG_SIZE=2 /root/vitastor/tests/test_heal.sh run: PG_SIZE=2 /root/vitastor/tests/test_heal.sh
- name: Print logs - name: Print logs
if: always() && steps.test.outcome == 'failure' if: always() && steps.test.outcome == 'failure'
@@ -629,7 +89,7 @@ jobs:
steps: steps:
- name: Run test - name: Run test
id: test id: test
timeout-minutes: 10 timeout-minutes: 1000
run: SCHEME=ec /root/vitastor/tests/test_heal.sh run: SCHEME=ec /root/vitastor/tests/test_heal.sh
- name: Print logs - name: Print logs
if: always() && steps.test.outcome == 'failure' if: always() && steps.test.outcome == 'failure'
@@ -647,7 +107,7 @@ jobs:
steps: steps:
- name: Run test - name: Run test
id: test id: test
timeout-minutes: 10 timeout-minutes: 1000
run: TEST_NAME=csum_32k_dmj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh run: TEST_NAME=csum_32k_dmj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs - name: Print logs
if: always() && steps.test.outcome == 'failure' if: always() && steps.test.outcome == 'failure'
@@ -665,7 +125,7 @@ jobs:
steps: steps:
- name: Run test - name: Run test
id: test id: test
timeout-minutes: 10 timeout-minutes: 1000
run: TEST_NAME=csum_32k_dj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh run: TEST_NAME=csum_32k_dj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs - name: Print logs
if: always() && steps.test.outcome == 'failure' if: always() && steps.test.outcome == 'failure'
@@ -683,7 +143,7 @@ jobs:
steps: steps:
- name: Run test - name: Run test
id: test id: test
timeout-minutes: 10 timeout-minutes: 1000
run: TEST_NAME=csum_32k OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh run: TEST_NAME=csum_32k OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs - name: Print logs
if: always() && steps.test.outcome == 'failure' if: always() && steps.test.outcome == 'failure'
@@ -701,7 +161,7 @@ jobs:
steps: steps:
- name: Run test - name: Run test
id: test id: test
timeout-minutes: 10 timeout-minutes: 1000
run: TEST_NAME=csum_4k_dmj OSD_ARGS="--data_csum_type crc32c --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh run: TEST_NAME=csum_4k_dmj OSD_ARGS="--data_csum_type crc32c --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs - name: Print logs
if: always() && steps.test.outcome == 'failure' if: always() && steps.test.outcome == 'failure'
@@ -719,7 +179,7 @@ jobs:
steps: steps:
- name: Run test - name: Run test
id: test id: test
timeout-minutes: 10 timeout-minutes: 1000
run: TEST_NAME=csum_4k_dj OSD_ARGS="--data_csum_type crc32c --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh run: TEST_NAME=csum_4k_dj OSD_ARGS="--data_csum_type crc32c --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs - name: Print logs
if: always() && steps.test.outcome == 'failure' if: always() && steps.test.outcome == 'failure'
@@ -737,7 +197,7 @@ jobs:
steps: steps:
- name: Run test - name: Run test
id: test id: test
timeout-minutes: 10 timeout-minutes: 1000
run: TEST_NAME=csum_4k OSD_ARGS="--data_csum_type crc32c" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh run: TEST_NAME=csum_4k OSD_ARGS="--data_csum_type crc32c" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs - name: Print logs
if: always() && steps.test.outcome == 'failure' if: always() && steps.test.outcome == 'failure'
@@ -747,112 +207,3 @@ jobs:
cat $i cat $i
echo "" echo ""
done done
test_scrub:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_zero_osd_2:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: ZERO_OSD=2 /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_xor:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=xor /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_pg_size_3:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: PG_SIZE=3 /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: PG_SIZE=6 PG_MINSIZE=4 OSD_COUNT=6 SCHEME=ec /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done

View File

@@ -261,7 +261,7 @@ Options (see also [Cluster-Wide Disk Layout Parameters](../config/layout-cluster
``` ```
--object_size 128k Set blockstore block size --object_size 128k Set blockstore block size
--bitmap_granularity 4k Set bitmap granularity --bitmap_granularity 4k Set bitmap granularity
--journal_size 16M Set journal size --journal_size 32M Set journal size
--data_csum_type none Set data checksum type (crc32c or none) --data_csum_type none Set data checksum type (crc32c or none)
--csum_block_size 4k Set data checksum block size --csum_block_size 4k Set data checksum block size
--device_block_size 4k Set device block size --device_block_size 4k Set device block size

View File

@@ -267,7 +267,7 @@ OSD отключены fsync-и.
``` ```
--object_size 128k Размер блока хранилища --object_size 128k Размер блока хранилища
--bitmap_granularity 4k Гранулярность битовых карт --bitmap_granularity 4k Гранулярность битовых карт
--journal_size 16M Размер журнала --journal_size 32M Размер журнала
--data_csum_type none Задать тип контрольных сумм (crc32c или none) --data_csum_type none Задать тип контрольных сумм (crc32c или none)
--csum_block_size 4k Задать размер блока расчёта контрольных сумм --csum_block_size 4k Задать размер блока расчёта контрольных сумм
--device_block_size 4k Размер блока устройства --device_block_size 4k Размер блока устройства

View File

@@ -103,7 +103,7 @@ int blockstore_journal_check_t::check_available(blockstore_op_t *op, int entries
if (data_after > 0) if (data_after > 0)
{ {
next_pos = next_pos + data_after; next_pos = next_pos + data_after;
if (next_pos > bs->journal.len) if (next_pos >= bs->journal.len)
{ {
if (right_dir) if (right_dir)
next_pos = bs->journal.block_size + data_after; next_pos = bs->journal.block_size + data_after;
@@ -146,7 +146,7 @@ journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type,
journal.in_sector_pos = 0; journal.in_sector_pos = 0;
auto next_next_free = (journal.next_free+journal.block_size) < journal.len ? journal.next_free + journal.block_size : journal.block_size; auto next_next_free = (journal.next_free+journal.block_size) < journal.len ? journal.next_free + journal.block_size : journal.block_size;
// double check that next_free doesn't cross used_start from the left // double check that next_free doesn't cross used_start from the left
assert(journal.next_free >= journal.used_start || next_next_free < journal.used_start); assert(journal.next_free >= journal.used_start && next_next_free >= journal.next_free || next_next_free < journal.used_start);
journal.next_free = next_next_free; journal.next_free = next_next_free;
memset(journal.inmemory memset(journal.inmemory
? (uint8_t*)journal.buffer + journal.sector_info[journal.cur_sector].offset ? (uint8_t*)journal.buffer + journal.sector_info[journal.cur_sector].offset

View File

@@ -307,35 +307,49 @@ int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
return STAB_SPLIT_DONE; return STAB_SPLIT_DONE;
} }
} }
else if (IS_IN_FLIGHT(dirty_it->second.state))
{
// Object write is still in progress. Wait until the write request completes
return STAB_SPLIT_WAIT;
}
else if (!IS_SYNCED(dirty_it->second.state))
{
// Object not synced yet - sync it
// In previous versions we returned EBUSY here and required
// the caller (OSD) to issue a global sync first. But a global sync
// waits for all writes in the queue including inflight writes. And
// inflight writes may themselves be blocked by unstable writes being
// still present in the journal and not flushed away from it.
// So we must sync specific objects here.
//
// Even more, we have to process "stabilize" request in parts. That is,
// we must stabilize all objects which are already synced. Otherwise
// they may block objects which are NOT synced yet.
return STAB_SPLIT_SYNC;
}
else if (IS_STABLE(dirty_it->second.state)) else if (IS_STABLE(dirty_it->second.state))
{ {
// Already stable // Already stable
return STAB_SPLIT_DONE; return STAB_SPLIT_DONE;
} }
else while (true)
{ {
return STAB_SPLIT_TODO; if (IS_IN_FLIGHT(dirty_it->second.state))
{
// Object write is still in progress. Wait until the write request completes
return STAB_SPLIT_WAIT;
}
else if (!IS_SYNCED(dirty_it->second.state))
{
// Object not synced yet - sync it
// In previous versions we returned EBUSY here and required
// the caller (OSD) to issue a global sync first. But a global sync
// waits for all writes in the queue including inflight writes. And
// inflight writes may themselves be blocked by unstable writes being
// still present in the journal and not flushed away from it.
// So we must sync specific objects here.
//
// Even more, we have to process "stabilize" request in parts. That is,
// we must stabilize all objects which are already synced. Otherwise
// they may block objects which are NOT synced yet.
return STAB_SPLIT_SYNC;
}
else if (IS_STABLE(dirty_it->second.state))
{
break;
}
// Check previous versions too
if (dirty_it == dirty_db.begin())
{
break;
}
dirty_it--;
if (dirty_it->first.oid != ov.oid)
{
break;
}
} }
return STAB_SPLIT_TODO;
}); });
if (r != 1) if (r != 1)
{ {

View File

@@ -76,7 +76,6 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
// 2nd step: Data device is synced, prepare & write journal entries // 2nd step: Data device is synced, prepare & write journal entries
// Check space in the journal and journal memory buffers // Check space in the journal and journal memory buffers
blockstore_journal_check_t space_check(this); blockstore_journal_check_t space_check(this);
auto reservation = (unstable_writes.size()+unstable_unsynced+PRIV(op)->sync_big_writes.size())*journal.block_size;
if (dsk.csum_block_size) if (dsk.csum_block_size)
{ {
// More complex check because all journal entries have different lengths // More complex check because all journal entries have different lengths
@@ -86,14 +85,14 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
left--; left--;
auto & dirty_entry = dirty_db.at(sbw); auto & dirty_entry = dirty_db.at(sbw);
uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len); uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len);
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, left ? 0 : reservation)) if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, 0))
{ {
return 0; return 0;
} }
} }
} }
else if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(), else if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, reservation)) sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, 0))
{ {
return 0; return 0;
} }

View File

@@ -475,7 +475,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
} }
} }
// double check that next_free doesn't cross used_start from the left // double check that next_free doesn't cross used_start from the left
assert(journal.next_free >= journal.used_start || next_next_free < journal.used_start); assert(journal.next_free >= journal.used_start && next_next_free >= journal.next_free || next_next_free < journal.used_start);
journal.next_free = next_next_free; journal.next_free = next_next_free;
je->oid = op->oid; je->oid = op->oid;
je->version = op->version; je->version = op->version;
@@ -517,7 +517,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
if (next_next_free >= journal.len) if (next_next_free >= journal.len)
next_next_free = dsk.journal_block_size; next_next_free = dsk.journal_block_size;
// double check that next_free doesn't cross used_start from the left // double check that next_free doesn't cross used_start from the left
assert(journal.next_free >= journal.used_start || next_next_free < journal.used_start); assert(journal.next_free >= journal.used_start && next_next_free >= journal.next_free || next_next_free < journal.used_start);
journal.next_free = next_next_free; journal.next_free = next_next_free;
if (!(dirty_it->second.state & BS_ST_INSTANT)) if (!(dirty_it->second.state & BS_ST_INSTANT))
{ {

View File

@@ -47,7 +47,7 @@ void disk_tool_simple_offsets(json11::Json cfg, bool json_output)
if (!bitmap_granularity) if (!bitmap_granularity)
bitmap_granularity = DEFAULT_BITMAP_GRANULARITY; bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
if (!journal_size) if (!journal_size)
journal_size = 16*1024*1024; journal_size = 32*1024*1024;
if (!device_block_size) if (!device_block_size)
device_block_size = 4096; device_block_size = 4096;
if (!data_csum_type) if (!data_csum_type)

View File

@@ -167,7 +167,7 @@ static const char *help_text =
" Calculate offsets for old simple&stupid (no superblock) OSD deployment. Options:\n" " Calculate offsets for old simple&stupid (no superblock) OSD deployment. Options:\n"
" --object_size 128k Set blockstore block size\n" " --object_size 128k Set blockstore block size\n"
" --bitmap_granularity 4k Set bitmap granularity\n" " --bitmap_granularity 4k Set bitmap granularity\n"
" --journal_size 16M Set journal size\n" " --journal_size 32M Set journal size\n"
" --data_csum_type none Set data checksum type (crc32c or none)\n" " --data_csum_type none Set data checksum type (crc32c or none)\n"
" --csum_block_size 4k Set data checksum block size\n" " --csum_block_size 4k Set data checksum block size\n"
" --device_block_size 4k Set device block size\n" " --device_block_size 4k Set device block size\n"

View File

@@ -33,7 +33,7 @@ kill_osds()
for kill_osd in $(seq 2 $OSD_COUNT); do for kill_osd in $(seq 2 $OSD_COUNT); do
sleep 15 sleep 15
# Wait for all PGs to clear has_degraded - all data will be at least in 2 copies # Wait for all PGs to clear has_degraded - all data will be at least in 2 copies
wait_condition 60 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\ wait_condition 600 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT" jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
echo Killing OSD $kill_osd and starting OSD $((kill_osd-1)) echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
p=OSD${kill_osd}_PID p=OSD${kill_osd}_PID
@@ -60,6 +60,7 @@ qemu-img convert -S 4096 -p \
-O raw ./testdata/read.bin -O raw ./testdata/read.bin
if ! diff -q ./testdata/read.bin ./testdata/mirror.bin; then if ! diff -q ./testdata/read.bin ./testdata/mirror.bin; then
sleep 100000
format_error Data lost during self-heal format_error Data lost during self-heal
fi fi

View File

@@ -6,21 +6,37 @@ check_qemu
#LD_PRELOAD=libasan.so.5 \ #LD_PRELOAD=libasan.so.5 \
# fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M # fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M
# Small sequential writes were causing various bugs at different moments
echo Small sequential writes
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -numjobs=1 -iodepth=16 \
-rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10
# Random writes without immediate_commit were stalling OSDs # Random writes without immediate_commit were stalling OSDs
echo 68k random writes
LD_PRELOAD="build/src/libfio_vitastor.so" \ LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=68k -direct=1 -numjobs=16 -iodepth=4 \ fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=68k -direct=1 -numjobs=16 -iodepth=4 \
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10 -rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10
# A lot of parallel syncs was crashing the primary OSD at some point # A lot of parallel syncs was crashing the primary OSD at some point
echo T64Q1 writes with fsync
LD_PRELOAD="build/src/libfio_vitastor.so" \ LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -numjobs=64 -iodepth=1 -fsync=1 \ fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -numjobs=64 -iodepth=1 -fsync=1 \
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -number_ios=100 -rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -number_ios=100
echo Linear write
LD_PRELOAD="build/src/libfio_vitastor.so" \ LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -cluster_log_level=10 fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -cluster_log_level=10
echo T1Q1 writes with fsync=32
LD_PRELOAD="build/src/libfio_vitastor.so" \ LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -buffer_pattern=0xdeadface \ fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -buffer_pattern=0xdeadface \
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -number_ios=1024 -rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -number_ios=1024