Compare commits

...

19 Commits

Author SHA1 Message Date
45490f4e51 Try to catch "data lost during self-heal"
All checks were successful
Test / buildenv (push) Successful in 19s
Test / build (push) Successful in 2m35s
Test / make_test (push) Successful in 44s
Test / test_heal_pg_size_2 (push) Successful in 5m24s
Test / test_heal_csum_32k_dmj (push) Successful in 5m19s
Test / test_heal_csum_32k_dj (push) Successful in 5m15s
Test / test_heal_ec (push) Successful in 6m10s
Test / test_heal_csum_32k (push) Successful in 5m49s
Test / test_heal_csum_4k_dj (push) Successful in 5m45s
Test / test_heal_csum_4k_dmj (push) Successful in 5m48s
Test / test_heal_csum_4k (push) Successful in 6m34s
2024-02-21 19:24:36 +03:00
b3c15db331 32M journal by default in simple-offsets
All checks were successful
Test / test_snapshot_ec (push) Successful in 30s
Test / test_rm (push) Successful in 18s
Test / test_move_reappear (push) Successful in 24s
Test / test_snapshot_down (push) Successful in 26s
Test / test_snapshot_down_ec (push) Successful in 30s
Test / test_splitbrain (push) Successful in 23s
Test / test_snapshot_chain (push) Successful in 2m17s
Test / test_snapshot_chain_ec (push) Successful in 2m55s
Test / test_rebalance_verify_imm (push) Successful in 2m46s
Test / test_rebalance_verify (push) Successful in 3m9s
Test / test_switch_primary (push) Successful in 39s
Test / test_write (push) Successful in 43s
Test / test_write_no_same (push) Successful in 19s
Test / test_write_xor (push) Successful in 55s
Test / test_rebalance_verify_ec (push) Successful in 3m35s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m37s
Test / test_heal_pg_size_2 (push) Successful in 3m36s
Test / test_heal_ec (push) Successful in 5m47s
Test / test_heal_csum_32k_dmj (push) Successful in 5m21s
Test / test_heal_csum_32k_dj (push) Successful in 6m16s
Test / test_heal_csum_32k (push) Successful in 6m45s
Test / test_scrub (push) Successful in 1m56s
Test / test_heal_csum_4k_dj (push) Successful in 6m39s
Test / test_heal_csum_4k_dmj (push) Successful in 6m42s
Test / test_scrub_zero_osd_2 (push) Successful in 1m16s
Test / test_scrub_xor (push) Successful in 47s
Test / test_scrub_pg_size_3 (push) Successful in 1m26s
Test / test_heal_csum_4k (push) Successful in 6m32s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 48s
Test / test_scrub_ec (push) Successful in 49s
2024-02-21 15:25:02 +03:00
685bcd6ef9 Do not reserve extra space for big_writes during sync - sync itself is needed to commit and clear them 2024-02-21 13:00:14 +03:00
3eb389b321 Supposed fix for "unexpected state during flush: 0x51" with EC
Some checks failed
Test / test_move_reappear (push) Successful in 22s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m32s
Test / test_rm (push) Successful in 16s
Test / test_snapshot_down (push) Successful in 31s
Test / test_snapshot_down_ec (push) Successful in 32s
Test / test_splitbrain (push) Successful in 25s
Test / test_snapshot_chain (push) Successful in 2m4s
Test / test_snapshot_chain_ec (push) Successful in 2m51s
Test / test_rebalance_verify_imm (push) Successful in 2m47s
Test / test_rebalance_verify (push) Successful in 3m30s
Test / test_switch_primary (push) Successful in 38s
Test / test_write (push) Successful in 51s
Test / test_write_no_same (push) Successful in 16s
Test / test_write_xor (push) Successful in 52s
Test / test_rebalance_verify_ec (push) Successful in 3m32s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m7s
Test / test_scrub_zero_osd_2 (push) Successful in 59s
Test / test_scrub (push) Successful in 1m2s
Test / test_scrub_xor (push) Successful in 36s
Test / test_scrub_ec (push) Successful in 38s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 40s
Test / test_scrub_pg_size_3 (push) Successful in 49s
Test / test_heal_csum_32k_dmj (push) Successful in 5m12s
Test / test_heal_csum_32k_dj (push) Successful in 5m8s
Test / test_heal_csum_32k (push) Successful in 4m55s
Test / test_heal_ec (push) Failing after 10m14s
Test / test_heal_csum_4k_dmj (push) Successful in 4m59s
Test / test_heal_csum_4k_dj (push) Successful in 5m5s
Test / test_heal_pg_size_2 (push) Successful in 3m54s
Test / test_heal_csum_4k (push) Successful in 3m49s
2024-02-21 01:32:06 +03:00
3d16cde23c Fix assertions, add small sequential write test
Some checks failed
Test / test_snapshot_down_ec (push) Successful in 32s
Test / test_splitbrain (push) Successful in 22s
Test / test_snapshot_chain (push) Successful in 2m8s
Test / test_snapshot_chain_ec (push) Successful in 2m48s
Test / test_rebalance_verify_imm (push) Successful in 2m57s
Test / test_rebalance_verify (push) Successful in 3m29s
Test / test_switch_primary (push) Successful in 36s
Test / test_write (push) Successful in 54s
Test / test_write_xor (push) Successful in 51s
Test / test_write_no_same (push) Successful in 16s
Test / test_rebalance_verify_ec (push) Successful in 3m40s
Test / test_rebalance_verify_ec_imm (push) Successful in 4m20s
Test / test_scrub (push) Successful in 1m1s
Test / test_scrub_zero_osd_2 (push) Successful in 46s
Test / test_scrub_xor (push) Successful in 41s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m0s
Test / test_scrub_ec (push) Successful in 58s
Test / test_scrub_pg_size_3 (push) Successful in 1m45s
Test / test_heal_pg_size_2 (push) Failing after 4m52s
Test / test_heal_csum_32k_dmj (push) Successful in 5m36s
Test / test_heal_csum_32k_dj (push) Successful in 5m33s
Test / test_interrupted_rebalance_imm (push) Successful in 1m35s
Test / test_interrupted_rebalance (push) Successful in 2m28s
Test / test_interrupted_rebalance_ec (push) Successful in 2m30s
Test / test_interrupted_rebalance_ec_imm (push) Successful in 2m41s
Test / test_heal_ec (push) Failing after 10m20s
Test / test_heal_csum_4k_dmj (push) Successful in 4m21s
Test / test_heal_csum_32k (push) Successful in 5m15s
Test / test_heal_csum_4k_dj (push) Successful in 5m48s
Test / test_heal_csum_4k (push) Successful in 5m32s
2024-02-20 19:41:48 +03:00
c6406d67fc Fix journal space_check incorrectly checking for space at the beginning 2024-02-20 19:40:56 +03:00
f87964861d Release 1.4.6
All checks were successful
Test / test_snapshot_ec (push) Successful in 29s
Test / test_rm (push) Successful in 18s
Test / test_move_reappear (push) Successful in 26s
Test / test_snapshot_down (push) Successful in 28s
Test / test_snapshot_down_ec (push) Successful in 32s
Test / test_splitbrain (push) Successful in 23s
Test / test_snapshot_chain (push) Successful in 2m3s
Test / test_snapshot_chain_ec (push) Successful in 2m46s
Test / test_rebalance_verify_imm (push) Successful in 3m1s
Test / test_rebalance_verify (push) Successful in 3m30s
Test / test_switch_primary (push) Successful in 38s
Test / test_write (push) Successful in 32s
Test / test_write_no_same (push) Successful in 17s
Test / test_write_xor (push) Successful in 38s
Test / test_rebalance_verify_ec (push) Successful in 4m38s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m57s
Test / test_heal_csum_32k_dj (push) Successful in 5m14s
Test / test_heal_csum_32k_dmj (push) Successful in 5m21s
Test / test_heal_csum_32k (push) Successful in 5m45s
Test / test_heal_csum_4k_dmj (push) Successful in 5m27s
Test / test_scrub (push) Successful in 1m30s
Test / test_heal_csum_4k_dj (push) Successful in 5m26s
Test / test_scrub_zero_osd_2 (push) Successful in 38s
Test / test_scrub_xor (push) Successful in 40s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m8s
Test / test_scrub_ec (push) Successful in 1m5s
Test / test_scrub_pg_size_3 (push) Successful in 1m49s
Test / test_heal_csum_4k (push) Successful in 5m41s
Test / test_heal_ec (push) Successful in 4m11s
Test / test_heal_pg_size_2 (push) Successful in 4m22s
Unwavering stabilization of 1.4.x, continued :-)

- Include the accidentally lost part of 1.4.5 journal trimming fix
- Fix a possible OSD crash with "BUG: Attempt to overwrite used offset"
  which was probably present for long time, but became apparent after
  fixing flapping tests in CI
- Fix remaining flapping tests in CI. It was the first time when tests
  actually passed without retries :-)
2024-02-20 17:01:26 +03:00
62a4f45160 Raise test_scrub waiting timeout
Some checks failed
Test / test_snapshot_ec (push) Successful in 27s
Test / test_rm (push) Successful in 19s
Test / test_move_reappear (push) Successful in 25s
Test / test_snapshot_down (push) Successful in 28s
Test / test_snapshot_down_ec (push) Successful in 33s
Test / test_splitbrain (push) Successful in 28s
Test / test_snapshot_chain (push) Successful in 2m17s
Test / test_snapshot_chain_ec (push) Successful in 3m5s
Test / test_rebalance_verify_imm (push) Successful in 3m0s
Test / test_rebalance_verify (push) Successful in 3m43s
Test / test_switch_primary (push) Successful in 40s
Test / test_write (push) Successful in 41s
Test / test_write_no_same (push) Successful in 14s
Test / test_write_xor (push) Successful in 42s
Test / test_rebalance_verify_ec (push) Successful in 3m55s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m6s
Test / test_heal_pg_size_2 (push) Successful in 4m51s
Test / test_heal_csum_32k_dj (push) Successful in 5m47s
Test / test_heal_csum_32k_dmj (push) Successful in 5m50s
Test / test_heal_csum_32k (push) Successful in 5m42s
Test / test_heal_ec (push) Failing after 10m30s
Test / test_heal_csum_4k_dmj (push) Successful in 5m22s
Test / test_scrub (push) Successful in 1m21s
Test / test_scrub_xor (push) Successful in 46s
Test / test_scrub_zero_osd_2 (push) Successful in 53s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m21s
Test / test_scrub_pg_size_3 (push) Successful in 1m56s
Test / test_scrub_ec (push) Successful in 55s
Test / test_heal_csum_4k (push) Successful in 4m28s
Test / test_heal_csum_4k_dj (push) Failing after 10m15s
2024-02-20 16:26:09 +03:00
7048228678 Supposed fix for "BUG: Attempt to overwrite used offset" 2024-02-20 15:56:48 +03:00
ea73857450 Add asserts to catch "BUG: Attempt to overwrite used offset" 2024-02-20 15:56:48 +03:00
6cfe38ec04 Followup to empty cur.oid as stop condition for forced trim fix 2024-02-20 15:56:38 +03:00
7ae5766fdb Wait to clear has_degraded in test_heal - should fix flaps of test_heal_* in CI 2024-02-20 15:56:27 +03:00
f882c7dd87 Release 1.4.5
All checks were successful
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m23s
Test / test_rm (push) Successful in 15s
Test / test_move_reappear (push) Successful in 21s
Test / test_snapshot_down (push) Successful in 26s
Test / test_snapshot_down_ec (push) Successful in 30s
Test / test_splitbrain (push) Successful in 29s
Test / test_snapshot_chain (push) Successful in 2m17s
Test / test_snapshot_chain_ec (push) Successful in 3m14s
Test / test_rebalance_verify_imm (push) Successful in 3m24s
Test / test_rebalance_verify (push) Successful in 3m59s
Test / test_switch_primary (push) Successful in 35s
Test / test_write_xor (push) Successful in 32s
Test / test_write_no_same (push) Successful in 13s
Test / test_rebalance_verify_ec (push) Successful in 3m46s
Test / test_rebalance_verify_ec_imm (push) Successful in 3m13s
Test / test_heal_pg_size_2 (push) Successful in 3m52s
Test / test_heal_ec (push) Successful in 5m25s
Test / test_heal_csum_32k_dj (push) Successful in 4m24s
Test / test_heal_csum_4k_dmj (push) Successful in 4m23s
Test / test_heal_csum_4k_dj (push) Successful in 4m17s
Test / test_scrub (push) Successful in 38s
Test / test_scrub_zero_osd_2 (push) Successful in 29s
Test / test_scrub_xor (push) Successful in 30s
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 43s
Test / test_scrub_ec (push) Successful in 32s
Test / test_scrub_pg_size_3 (push) Successful in 1m46s
Test / test_heal_csum_4k (push) Successful in 4m4s
Test / test_write (push) Successful in 1m38s
Test / test_heal_csum_32k_dmj (push) Successful in 4m5s
Test / test_heal_csum_32k (push) Successful in 4m15s
- Fix a write stall caused by incorrect journal trimming introduced in 1.4.4 :)
- Fix PGs sometimes hanging in "starting" state on mass OSD restarts
- Fix a rare crash with "map::at" during OSD pings
- Use new defaults for non-capacitor (desktop) SSDs - improves T1Q256 random write from ~6k iops to ~45k iops
- Make journal_trim_interval configurable
2024-02-16 10:13:33 +03:00
26dd863c8d Fix sometimes possible crash on clients.at() during pings 2024-02-16 10:13:33 +03:00
2ae859fbc6 Use min/max_flusher_count=32/256, 128M journal and autosync_writes=512 for non-capacitor SSDs by default 2024-02-16 10:13:33 +03:00
f6cd9f9153 Add a note about pg_minsize 2024-02-15 23:38:52 +03:00
8389c0f33b Fix PGs sometimes hanging in "starting" state on mass OSD restarts 2024-02-15 23:38:52 +03:00
9db2196aef Make journal_trim_interval configurable 2024-02-15 23:38:51 +03:00
8d6ae662fe Use empty cur.oid as stop condition for forced trim, not journal_trim_counter 2024-02-15 23:27:17 +03:00
42 changed files with 195 additions and 758 deletions

View File

@@ -64,546 +64,6 @@ jobs:
# leak sanitizer sometimes crashes
- run: cd /root/vitastor/build && ASAN_OPTIONS=detect_leaks=0 make -j16 test
test_add_osd:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_add_osd.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_cas:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_cas.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_count:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_change_pg_count.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_count_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_change_pg_count.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_change_pg_size:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_change_pg_size.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_create_nomaxid:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_create_nomaxid.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_etcd_fail:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_etcd_fail.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_interrupted_rebalance_ec_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_interrupted_rebalance.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_failure_domain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_failure_domain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_minsize_1:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_minsize_1.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_move_reappear:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_move_reappear.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_rm.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_chain_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 6
run: SCHEME=ec /root/vitastor/tests/test_snapshot_chain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_snapshot_down_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_snapshot_down.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_splitbrain:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_splitbrain.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_rebalance_verify_ec_imm:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 10
run: SCHEME=ec IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_rebalance_verify.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_switch_primary:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_switch_primary.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_write.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write_xor:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=xor /root/vitastor/tests/test_write.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write_no_same:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_write_no_same.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_heal_pg_size_2:
runs-on: ubuntu-latest
needs: build
@@ -611,7 +71,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 10
timeout-minutes: 1000
run: PG_SIZE=2 /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -629,7 +89,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 10
timeout-minutes: 1000
run: SCHEME=ec /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -647,7 +107,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 10
timeout-minutes: 1000
run: TEST_NAME=csum_32k_dmj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -665,7 +125,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 10
timeout-minutes: 1000
run: TEST_NAME=csum_32k_dj OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -683,7 +143,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 10
timeout-minutes: 1000
run: TEST_NAME=csum_32k OSD_ARGS="--data_csum_type crc32c --csum_block_size 32k" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -701,7 +161,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 10
timeout-minutes: 1000
run: TEST_NAME=csum_4k_dmj OSD_ARGS="--data_csum_type crc32c --inmemory_metadata false --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -719,7 +179,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 10
timeout-minutes: 1000
run: TEST_NAME=csum_4k_dj OSD_ARGS="--data_csum_type crc32c --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -737,7 +197,7 @@ jobs:
steps:
- name: Run test
id: test
timeout-minutes: 10
timeout-minutes: 1000
run: TEST_NAME=csum_4k OSD_ARGS="--data_csum_type crc32c" OFFSET_ARGS=$OSD_ARGS /root/vitastor/tests/test_heal.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
@@ -747,112 +207,3 @@ jobs:
cat $i
echo ""
done
test_scrub:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_zero_osd_2:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: ZERO_OSD=2 /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_xor:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=xor /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_pg_size_3:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: PG_SIZE=3 /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: PG_SIZE=6 PG_MINSIZE=4 OSD_COUNT=6 SCHEME=ec /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_scrub_ec:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: SCHEME=ec /root/vitastor/tests/test_scrub.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done

View File

@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
project(vitastor)
set(VERSION "1.4.4")
set(VERSION "1.4.6")
add_subdirectory(src)

View File

@@ -1,4 +1,4 @@
VERSION ?= v1.4.4
VERSION ?= v1.4.6
all: build push

View File

@@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v1.4.4
image: vitalif/vitastor-csi:v1.4.6
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -121,7 +121,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v1.4.4
image: vitalif/vitastor-csi:v1.4.6
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "1.4.4"
vitastorCSIDriverVersion = "1.4.6"
)
// Config struct fills the parameters of request or user input

2
debian/changelog vendored
View File

@@ -1,4 +1,4 @@
vitastor (1.4.4-1) unstable; urgency=medium
vitastor (1.4.6-1) unstable; urgency=medium
* Bugfixes

View File

@@ -35,8 +35,8 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-1.4.4; \
cd vitastor-1.4.4; \
cp -r /root/vitastor vitastor-1.4.6; \
cd vitastor-1.4.6; \
ln -s /root/fio-build/fio-*/ ./fio; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
@@ -49,8 +49,8 @@ RUN set -e -x; \
rm -rf a b; \
echo "dep:fio=$FIO" > debian/fio_version; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.4.orig.tar.xz vitastor-1.4.4; \
cd vitastor-1.4.4; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_1.4.6.orig.tar.xz vitastor-1.4.6; \
cd vitastor-1.4.6; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

View File

@@ -154,6 +154,9 @@ That is, if it becomes impossible to place PG data on at least (pg_minsize)
OSDs, PG is deactivated for both read and write. So you know that a fresh
write always goes to at least (pg_minsize) OSDs (disks).
That is, pg_size minus pg_minsize sets the number of disk failures to tolerate
without temporary downtime (for [osd_out_time](monitor.en.md#osd_out_time)).
FIXME: pg_minsize behaviour may be changed in the future to only make PGs
read-only instead of deactivating them.

View File

@@ -157,6 +157,10 @@
OSD, PG деактивируется на чтение и запись. Иными словами, всегда известно,
что новые блоки данных всегда записываются как минимум на pg_minsize дисков.
По сути, разница pg_size и pg_minsize задаёт число отказов дисков, которые пул
может пережить без временной (на [osd_out_time](monitor.ru.md#osd_out_time))
остановки обслуживания.
FIXME: Поведение pg_minsize может быть изменено в будущем с полной деактивации
PG на перевод их в режим только для чтения.

View File

@@ -261,7 +261,7 @@ Options (see also [Cluster-Wide Disk Layout Parameters](../config/layout-cluster
```
--object_size 128k Set blockstore block size
--bitmap_granularity 4k Set bitmap granularity
--journal_size 16M Set journal size
--journal_size 32M Set journal size
--data_csum_type none Set data checksum type (crc32c or none)
--csum_block_size 4k Set data checksum block size
--device_block_size 4k Set device block size

View File

@@ -267,7 +267,7 @@ OSD отключены fsync-и.
```
--object_size 128k Размер блока хранилища
--bitmap_granularity 4k Гранулярность битовых карт
--journal_size 16M Размер журнала
--journal_size 32M Размер журнала
--data_csum_type none Задать тип контрольных сумм (crc32c или none)
--csum_block_size 4k Задать размер блока расчёта контрольных сумм
--device_block_size 4k Размер блока устройства

View File

@@ -1,6 +1,6 @@
{
"name": "vitastor-mon",
"version": "1.4.4",
"version": "1.4.6",
"description": "Vitastor SDS monitor service",
"main": "mon-main.js",
"scripts": {

View File

@@ -50,7 +50,7 @@ from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VERSION = '1.4.4'
VERSION = '1.4.6'
LOG = logging.getLogger(__name__)

View File

@@ -24,4 +24,4 @@ rm fio
mv fio-copy fio
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-1.4.4/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.4$(rpm --eval '%dist').tar.gz *
tar --transform 's#^#vitastor-1.4.6/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-1.4.6$(rpm --eval '%dist').tar.gz *

View File

@@ -36,7 +36,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-1.4.4.el7.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-1.4.6.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 1.4.4
Version: 1.4.6
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.4.4.el7.tar.gz
Source0: vitastor-1.4.6.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -35,7 +35,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-1.4.4.el8.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-1.4.6.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 1.4.4
Version: 1.4.6
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.4.4.el8.tar.gz
Source0: vitastor-1.4.6.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -18,7 +18,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-1.4.4.el9.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-1.4.6.el9.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 1.4.4
Version: 1.4.6
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-1.4.4.el9.tar.gz
Source0: vitastor-1.4.6.el9.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel

View File

@@ -16,7 +16,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVERSION="1.4.4")
add_definitions(-DVERSION="1.4.6")
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
add_link_options(-fno-omit-frame-pointer)
if (${WITH_ASAN})

View File

@@ -19,7 +19,6 @@ journal_flusher_t::journal_flusher_t(blockstore_impl_t *bs)
syncing_flushers = 0;
// FIXME: allow to configure flusher_start_threshold and journal_trim_interval
flusher_start_threshold = bs->dsk.journal_block_size / sizeof(journal_entry_stable);
journal_trim_interval = 512;
journal_trim_counter = bs->journal.flush_journal ? 1 : 0;
trim_wanted = bs->journal.flush_journal ? 1 : 0;
journal_superblock = bs->journal.inmemory ? bs->journal.buffer : memalign_or_die(MEM_ALIGNMENT, bs->dsk.journal_block_size);
@@ -365,9 +364,10 @@ resume_0:
!flusher->flush_queue.size() || !flusher->dequeuing)
{
stop_flusher:
if (flusher->trim_wanted > 0 && !flusher->journal_trim_counter)
if (flusher->trim_wanted > 0 && cur.oid.inode != 0)
{
// Attempt forced trim
cur.oid = {};
flusher->active_flushers++;
goto trim_journal;
}
@@ -415,6 +415,7 @@ stop_flusher:
flusher->sync_to_repeat.erase(cur.oid);
if (!flusher->try_find_other(dirty_end, cur))
{
cur.oid = {};
goto stop_flusher;
}
}
@@ -583,7 +584,8 @@ resume_2:
flusher->sync_to_repeat.erase(repeat_it);
trim_journal:
// Clear unused part of the journal every <journal_trim_interval> flushes
if (!((++flusher->journal_trim_counter) % flusher->journal_trim_interval) || flusher->trim_wanted > 0)
if (bs->journal_trim_interval && !((++flusher->journal_trim_counter) % bs->journal_trim_interval) ||
flusher->trim_wanted > 0)
{
resume_26:
resume_27:

View File

@@ -107,7 +107,7 @@ class journal_flusher_t
blockstore_impl_t *bs;
friend class journal_flusher_co;
int journal_trim_counter, journal_trim_interval;
int journal_trim_counter;
bool trimming;
void* journal_superblock;

View File

@@ -253,6 +253,7 @@ class blockstore_impl_t
bool inmemory_meta = false;
// Maximum and minimum flusher count
unsigned max_flusher_count, min_flusher_count;
unsigned journal_trim_interval;
// Maximum queue depth
unsigned max_write_iodepth = 128;
// Enable small (journaled) write throttling, useful for the SSD+HDD case

View File

@@ -103,7 +103,7 @@ int blockstore_journal_check_t::check_available(blockstore_op_t *op, int entries
if (data_after > 0)
{
next_pos = next_pos + data_after;
if (next_pos > bs->journal.len)
if (next_pos >= bs->journal.len)
{
if (right_dir)
next_pos = bs->journal.block_size + data_after;
@@ -146,7 +146,7 @@ journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type,
journal.in_sector_pos = 0;
auto next_next_free = (journal.next_free+journal.block_size) < journal.len ? journal.next_free + journal.block_size : journal.block_size;
// double check that next_free doesn't cross used_start from the left
assert(journal.next_free >= journal.used_start || next_next_free < journal.used_start);
assert(journal.next_free >= journal.used_start && next_next_free >= journal.next_free || next_next_free < journal.used_start);
journal.next_free = next_next_free;
memset(journal.inmemory
? (uint8_t*)journal.buffer + journal.sector_info[journal.cur_sector].offset

View File

@@ -13,6 +13,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config, bool init)
max_flusher_count = strtoull(config["flusher_count"].c_str(), NULL, 10);
}
min_flusher_count = strtoull(config["min_flusher_count"].c_str(), NULL, 10);
journal_trim_interval = strtoull(config["journal_trim_interval"].c_str(), NULL, 10);
max_write_iodepth = strtoull(config["max_write_iodepth"].c_str(), NULL, 10);
throttle_small_writes = config["throttle_small_writes"] == "true" || config["throttle_small_writes"] == "1" || config["throttle_small_writes"] == "yes";
throttle_target_iops = strtoull(config["throttle_target_iops"].c_str(), NULL, 10);
@@ -31,6 +32,10 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config, bool init)
{
min_flusher_count = 1;
}
if (!journal_trim_interval)
{
journal_trim_interval = 512;
}
if (!max_write_iodepth)
{
max_write_iodepth = 128;

View File

@@ -505,7 +505,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
for (auto & rv: PRIV(read_op)->read_vec)
{
if (rv.journal_sector)
journal.used_sectors[rv.journal_sector-1]++;
journal.used_sectors.at(rv.journal_sector-1)++;
}
}
read_op->retval = 0;
@@ -966,7 +966,7 @@ void blockstore_impl_t::handle_read_event(ring_data_t *data, blockstore_op_t *op
{
if (rv.journal_sector)
{
auto used = --journal.used_sectors[rv.journal_sector-1];
auto used = --journal.used_sectors.at(rv.journal_sector-1);
if (used == 0)
{
journal.used_sectors.erase(rv.journal_sector-1);

View File

@@ -215,7 +215,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
#endif
data_alloc->set(dirty_it->second.location >> dsk.block_order, false);
}
auto used = --journal.used_sectors[dirty_it->second.journal_sector];
auto used = --journal.used_sectors.at(dirty_it->second.journal_sector);
#ifdef BLOCKSTORE_DEBUG
printf(
"remove usage of journal offset %08lx by %lx:%lx v%lu (%lu refs)\n", dirty_it->second.journal_sector,
@@ -225,6 +225,11 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
if (used == 0)
{
journal.used_sectors.erase(dirty_it->second.journal_sector);
if (dirty_it->second.journal_sector == journal.sector_info[journal.cur_sector].offset)
{
// Mark current sector as "full" to select the new one
journal.in_sector_pos = dsk.journal_block_size;
}
flusher->mark_trim_possible();
}
free_dirty_dyn_data(dirty_it->second);

View File

@@ -307,35 +307,49 @@ int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
return STAB_SPLIT_DONE;
}
}
else if (IS_IN_FLIGHT(dirty_it->second.state))
{
// Object write is still in progress. Wait until the write request completes
return STAB_SPLIT_WAIT;
}
else if (!IS_SYNCED(dirty_it->second.state))
{
// Object not synced yet - sync it
// In previous versions we returned EBUSY here and required
// the caller (OSD) to issue a global sync first. But a global sync
// waits for all writes in the queue including inflight writes. And
// inflight writes may themselves be blocked by unstable writes being
// still present in the journal and not flushed away from it.
// So we must sync specific objects here.
//
// Even more, we have to process "stabilize" request in parts. That is,
// we must stabilize all objects which are already synced. Otherwise
// they may block objects which are NOT synced yet.
return STAB_SPLIT_SYNC;
}
else if (IS_STABLE(dirty_it->second.state))
{
// Already stable
return STAB_SPLIT_DONE;
}
else
while (true)
{
return STAB_SPLIT_TODO;
if (IS_IN_FLIGHT(dirty_it->second.state))
{
// Object write is still in progress. Wait until the write request completes
return STAB_SPLIT_WAIT;
}
else if (!IS_SYNCED(dirty_it->second.state))
{
// Object not synced yet - sync it
// In previous versions we returned EBUSY here and required
// the caller (OSD) to issue a global sync first. But a global sync
// waits for all writes in the queue including inflight writes. And
// inflight writes may themselves be blocked by unstable writes being
// still present in the journal and not flushed away from it.
// So we must sync specific objects here.
//
// Even more, we have to process "stabilize" request in parts. That is,
// we must stabilize all objects which are already synced. Otherwise
// they may block objects which are NOT synced yet.
return STAB_SPLIT_SYNC;
}
else if (IS_STABLE(dirty_it->second.state))
{
break;
}
// Check previous versions too
if (dirty_it == dirty_db.begin())
{
break;
}
dirty_it--;
if (dirty_it->first.oid != ov.oid)
{
break;
}
}
return STAB_SPLIT_TODO;
});
if (r != 1)
{

View File

@@ -76,7 +76,6 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
// 2nd step: Data device is synced, prepare & write journal entries
// Check space in the journal and journal memory buffers
blockstore_journal_check_t space_check(this);
auto reservation = (unstable_writes.size()+unstable_unsynced+PRIV(op)->sync_big_writes.size())*journal.block_size;
if (dsk.csum_block_size)
{
// More complex check because all journal entries have different lengths
@@ -86,14 +85,14 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
left--;
auto & dirty_entry = dirty_db.at(sbw);
uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len);
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, left ? 0 : reservation))
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, 0))
{
return 0;
}
}
}
else if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, reservation))
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, 0))
{
return 0;
}
@@ -116,7 +115,10 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
journal, (dirty_entry.state & BS_ST_INSTANT) ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
sizeof(journal_entry_big_write) + dyn_size
);
dirty_entry.journal_sector = journal.sector_info[journal.cur_sector].offset;
auto jsec = dirty_entry.journal_sector = journal.sector_info[journal.cur_sector].offset;
assert(journal.next_free >= journal.used_start
? (jsec >= journal.used_start && jsec < journal.next_free)
: (jsec >= journal.used_start || jsec < journal.next_free));
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
#ifdef BLOCKSTORE_DEBUG
printf(

View File

@@ -436,7 +436,19 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_SMALL_WRITE_INSTANT : JE_SMALL_WRITE,
sizeof(journal_entry_small_write) + dyn_size
);
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
auto jsec = dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
if (!(journal.next_free >= journal.used_start
? (jsec >= journal.used_start && jsec < journal.next_free)
: (jsec >= journal.used_start || jsec < journal.next_free)))
{
printf(
"BUG: journal offset %08lx is used by %lx:%lx v%lu (%lu refs) BUT used_start=%lx next_free=%lx\n",
dirty_it->second.journal_sector, dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version,
journal.used_sectors[journal.sector_info[journal.cur_sector].offset],
journal.used_start, journal.next_free
);
abort();
}
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
#ifdef BLOCKSTORE_DEBUG
printf(
@@ -463,7 +475,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
}
}
// double check that next_free doesn't cross used_start from the left
assert(journal.next_free >= journal.used_start || next_next_free < journal.used_start);
assert(journal.next_free >= journal.used_start && next_next_free >= journal.next_free || next_next_free < journal.used_start);
journal.next_free = next_next_free;
je->oid = op->oid;
je->version = op->version;
@@ -505,7 +517,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
if (next_next_free >= journal.len)
next_next_free = dsk.journal_block_size;
// double check that next_free doesn't cross used_start from the left
assert(journal.next_free >= journal.used_start || next_next_free < journal.used_start);
assert(journal.next_free >= journal.used_start && next_next_free >= journal.next_free || next_next_free < journal.used_start);
journal.next_free = next_next_free;
if (!(dirty_it->second.state & BS_ST_INSTANT))
{
@@ -558,7 +570,19 @@ resume_2:
journal, op->opcode == BS_OP_WRITE_STABLE ? JE_BIG_WRITE_INSTANT : JE_BIG_WRITE,
sizeof(journal_entry_big_write) + dyn_size
);
dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
auto jsec = dirty_it->second.journal_sector = journal.sector_info[journal.cur_sector].offset;
if (!(journal.next_free >= journal.used_start
? (jsec >= journal.used_start && jsec < journal.next_free)
: (jsec >= journal.used_start || jsec < journal.next_free)))
{
printf(
"BUG: journal offset %08lx is used by %lx:%lx v%lu (%lu refs) BUT used_start=%lx next_free=%lx\n",
dirty_it->second.journal_sector, dirty_it->first.oid.inode, dirty_it->first.oid.stripe, dirty_it->first.version,
journal.used_sectors[journal.sector_info[journal.cur_sector].offset],
journal.used_start, journal.next_free
);
abort();
}
journal.used_sectors[journal.sector_info[journal.cur_sector].offset]++;
#ifdef BLOCKSTORE_DEBUG
printf(

View File

@@ -47,7 +47,7 @@ void disk_tool_simple_offsets(json11::Json cfg, bool json_output)
if (!bitmap_granularity)
bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
if (!journal_size)
journal_size = 16*1024*1024;
journal_size = 32*1024*1024;
if (!device_block_size)
device_block_size = 4096;
if (!data_csum_type)

View File

@@ -167,7 +167,7 @@ static const char *help_text =
" Calculate offsets for old simple&stupid (no superblock) OSD deployment. Options:\n"
" --object_size 128k Set blockstore block size\n"
" --bitmap_granularity 4k Set bitmap granularity\n"
" --journal_size 16M Set journal size\n"
" --journal_size 32M Set journal size\n"
" --data_csum_type none Set data checksum type (crc32c or none)\n"
" --csum_block_size 4k Set data checksum block size\n"
" --device_block_size 4k Set device block size\n"

View File

@@ -8,6 +8,7 @@
int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_hdd)
{
static const char *allow_additional_params[] = {
"autosync_writes",
"data_io",
"meta_io",
"journal_io",
@@ -99,12 +100,9 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
options["disable_journal_fsync"] = options["disable_data_fsync"];
}
// Calculate offsets if the same device is used for two or more of data, meta, and journal
if (options["journal_size"] == "")
if (options["journal_size"] == "" && (options["journal_device"] == "" || options["journal_device"] == options["data_device"]))
{
if (options["journal_device"] == "")
options["journal_size"] = is_hdd ? "128M" : "32M";
else if (is_hdd)
options["journal_size"] = DEFAULT_HYBRID_JOURNAL;
options["journal_size"] = is_hdd || !json_is_true(options["disable_data_fsync"]) ? "128M" : "32M";
}
bool is_hybrid = is_hdd && options["journal_device"] != "" && options["journal_device"] != options["data_device"];
if (is_hdd)
@@ -114,6 +112,15 @@ int disk_tool_t::prepare_one(std::map<std::string, std::string> options, int is_
if (is_hybrid && options["throttle_small_writes"] == "")
options["throttle_small_writes"] = "1";
}
else if (!json_is_true(options["disable_data_fsync"]))
{
if (options.find("min_flusher_count") == options.end())
options["min_flusher_count"] = "32";
if (options.find("max_flusher_count") == options.end())
options["max_flusher_count"] = "256";
if (options.find("autosync_writes") == options.end())
options["autosync_writes"] = "512";
}
json11::Json::object sb;
blockstore_disk_t dsk;
try
@@ -616,6 +623,7 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
options.erase("disable_meta_fsync");
options.erase("disable_journal_fsync");
}
auto journal_size = options["journal_size"];
for (auto & dev: devinfo)
{
if (!hybrid || dev.is_hdd)
@@ -633,11 +641,13 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
{
return 1;
}
options.erase("journal_size");
}
// Treat all disks as SSDs if not in the hybrid mode
prepare_one(options, dev.is_hdd ? 1 : 0);
if (hybrid)
{
options["journal_size"] = journal_size;
options.erase("journal_device");
options.erase("meta_device");
}

View File

@@ -45,11 +45,12 @@ void osd_messenger_t::init()
#endif
keepalive_timer_id = tfd->set_timer(1000, true, [this](int)
{
std::vector<int> to_stop;
std::vector<osd_op_t*> to_ping;
for (auto cl_it = clients.begin(); cl_it != clients.end(); cl_it++)
auto cl_it = clients.begin();
while (cl_it != clients.end())
{
auto cl = cl_it->second;
cl_it++;
auto peer_fd = cl->peer_fd;
if (!cl->osd_num || cl->peer_state != PEER_CONNECTED && cl->peer_state != PEER_RDMA)
{
// Do not run keepalive on regular clients
@@ -62,7 +63,9 @@ void osd_messenger_t::init()
{
// Ping timed out, stop the client
fprintf(stderr, "Ping timed out for OSD %lu (client %d), disconnecting peer\n", cl->osd_num, cl->peer_fd);
to_stop.push_back(cl->peer_fd);
stop_client(peer_fd, true);
// Restart iterator because it may be invalidated
cl_it = clients.upper_bound(peer_fd);
}
}
else if (cl->idle_time_remaining > 0)
@@ -100,9 +103,11 @@ void osd_messenger_t::init()
stop_client(fail_fd, true);
}
};
to_ping.push_back(op);
cl->ping_time_remaining = osd_ping_timeout;
cl->idle_time_remaining = osd_idle_timeout;
outbox_push(op);
// Restart iterator because it may be invalidated
cl_it = clients.upper_bound(peer_fd);
}
}
else
@@ -110,15 +115,6 @@ void osd_messenger_t::init()
cl->idle_time_remaining = osd_idle_timeout;
}
}
// Don't stop clients while a 'clients' iterator is still active
for (int peer_fd: to_stop)
{
stop_client(peer_fd, true);
}
for (auto op: to_ping)
{
outbox_push(op);
}
});
}

View File

@@ -843,7 +843,13 @@ void osd_t::report_pg_states()
pg_state_exists = true;
if (pg.state == PG_OFFLINE && pg_it->second.cur_primary != this->osd_num)
{
// Nothing to check or report, PG is already taken over by another OSD
// Nothing to report, PG is already taken over by another OSD
checks.push_back(json11::Json::object {
{ "target", "MOD" },
{ "key", state_key_base64 },
{ "result", "LESS" },
{ "mod_revision", st_cli.etcd_watch_revision+1 },
});
continue;
}
}
@@ -851,11 +857,6 @@ void osd_t::report_pg_states()
}
if (!pg_state_exists)
{
if (pg.state == PG_OFFLINE)
{
// Nothing to check or report, PG is already stopped
continue;
}
// Check that the PG key does not exist
// Failed check indicates an unsuccessful PG lock attempt in this case
checks.push_back(json11::Json::object {

View File

@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
Name: Vitastor
Description: Vitastor client library
Version: 1.4.4
Version: 1.4.6
Libs: -L${libdir} -lvitastor_client
Cflags: -I${includedir}

View File

@@ -22,7 +22,7 @@ if [ "$IMMEDIATE_COMMIT" != "" ]; then
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5"
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"all","client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
else
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5"
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5 --min_flusher_count 16"
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
fi

View File

@@ -30,14 +30,16 @@ kill_osds()
kill -9 $OSD1_PID
$ETCDCTL del /vitastor/osd/state/1
for i in $(seq 2 $OSD_COUNT); do
for kill_osd in $(seq 2 $OSD_COUNT); do
sleep 15
echo Killing OSD $i and starting OSD $((i-1))
p=OSD${i}_PID
# Wait for all PGs to clear has_degraded - all data will be at least in 2 copies
wait_condition 600 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
p=OSD${kill_osd}_PID
kill -9 ${!p}
$ETCDCTL del /vitastor/osd/state/$i
start_osd $((i-1))
sleep 15
$ETCDCTL del /vitastor/osd/state/$kill_osd
start_osd $((kill_osd-1))
done
sleep 5
@@ -58,6 +60,7 @@ qemu-img convert -S 4096 -p \
-O raw ./testdata/read.bin
if ! diff -q ./testdata/read.bin ./testdata/mirror.bin; then
sleep 100000
format_error Data lost during self-heal
fi

View File

@@ -44,7 +44,7 @@ wait_condition 10 "$ETCDCTL"$' get --print-value-only /vitastor/config/pgs | jq
$ETCDCTL put /vitastor/pg/history/1/1 `$ETCDCTL get --print-value-only /vitastor/pg/history/1/1 | jq -s -c '(.[0] // {}) + {"next_scrub":1}'`
# Wait for scrub to finish
wait_condition 60 "$ETCDCTL get --prefix /vitastor/pg/history/ --print-value-only | jq -s -e '([ .[] | select(.next_scrub == 0 or .next_scrub == null) ] | length) == $PG_COUNT'" Scrubbing
wait_condition 300 "$ETCDCTL get --prefix /vitastor/pg/history/ --print-value-only | jq -s -e '([ .[] | select(.next_scrub == 0 or .next_scrub == null) ] | length) == $PG_COUNT'" Scrubbing
if [[ ($SCHEME = replicated && $PG_SIZE < 3) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) < 2) ]]; then
# Check that objects are marked as inconsistent if 2 replicas or EC/XOR 2+1
@@ -56,7 +56,7 @@ if [[ ($SCHEME = replicated && $PG_SIZE < 3) || ($SCHEME != replicated && $((PG_
build/src/vitastor-cli fix --etcd_address $ETCD_URL --bad_osds $ZERO_OSD
elif [[ ($SCHEME = replicated && $PG_SIZE > 2) || ($SCHEME != replicated && $((PG_SIZE-PG_DATA_SIZE)) > 1) ]]; then
# Check that everything heals
wait_finish_rebalance 60
wait_finish_rebalance 300
build/src/vitastor-cli describe --etcd_address $ETCD_URL --json | jq -e '. | length == 0'
fi

View File

@@ -6,21 +6,37 @@ check_qemu
#LD_PRELOAD=libasan.so.5 \
# fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M
# Small sequential writes were causing various bugs at different moments
echo Small sequential writes
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -numjobs=1 -iodepth=16 \
-rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10
# Random writes without immediate_commit were stalling OSDs
echo 68k random writes
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=68k -direct=1 -numjobs=16 -iodepth=4 \
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10
# A lot of parallel syncs was crashing the primary OSD at some point
echo T64Q1 writes with fsync
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -numjobs=64 -iodepth=1 -fsync=1 \
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -number_ios=100
echo Linear write
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -cluster_log_level=10
echo T1Q1 writes with fsync=32
LD_PRELOAD="build/src/libfio_vitastor.so" \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -buffer_pattern=0xdeadface \
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -number_ios=1024