From e094c309599331c61e80fe30390018133c88d3f5 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sun, 7 Apr 2024 18:58:44 +0300 Subject: [PATCH] Add ENOSPC handling tests --- .gitea/workflows/test.yml | 72 +++++++++++++++++++++++++++++++++++++++ src/cluster_client.cpp | 4 ++- src/cluster_client.h | 1 + tests/run_tests.sh | 5 +++ tests/test_enospc.sh | 28 +++++++++++++++ 5 files changed, 109 insertions(+), 1 deletion(-) create mode 100755 tests/test_enospc.sh diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml index 0635c3bd..d77d0946 100644 --- a/.gitea/workflows/test.yml +++ b/.gitea/workflows/test.yml @@ -748,6 +748,78 @@ jobs: echo "" done + test_enospc: + runs-on: ubuntu-latest + needs: build + container: ${{env.TEST_IMAGE}}:${{github.sha}} + steps: + - name: Run test + id: test + timeout-minutes: 3 + run: /root/vitastor/tests/test_enospc.sh + - name: Print logs + if: always() && steps.test.outcome == 'failure' + run: | + for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do + echo "-------- $i --------" + cat $i + echo "" + done + + test_enospc_xor: + runs-on: ubuntu-latest + needs: build + container: ${{env.TEST_IMAGE}}:${{github.sha}} + steps: + - name: Run test + id: test + timeout-minutes: 3 + run: SCHEME=xor /root/vitastor/tests/test_enospc.sh + - name: Print logs + if: always() && steps.test.outcome == 'failure' + run: | + for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do + echo "-------- $i --------" + cat $i + echo "" + done + + test_enospc_imm: + runs-on: ubuntu-latest + needs: build + container: ${{env.TEST_IMAGE}}:${{github.sha}} + steps: + - name: Run test + id: test + timeout-minutes: 3 + run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_enospc.sh + - name: Print logs + if: always() && steps.test.outcome == 'failure' + run: | + for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do + echo "-------- $i --------" + cat $i + echo "" + done + + test_enospc_imm_xor: + runs-on: ubuntu-latest + needs: build + container: ${{env.TEST_IMAGE}}:${{github.sha}} + steps: + - name: Run test + id: test + timeout-minutes: 3 + run: IMMEDIATE_COMMIT=1 SCHEME=xor /root/vitastor/tests/test_enospc.sh + - name: Print logs + if: always() && steps.test.outcome == 'failure' + run: | + for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do + echo "-------- $i --------" + cat $i + echo "" + done + test_scrub: runs-on: ubuntu-latest needs: build diff --git a/src/cluster_client.cpp b/src/cluster_client.cpp index 1cc35138..ef12a2e4 100644 --- a/src/cluster_client.cpp +++ b/src/cluster_client.cpp @@ -397,6 +397,8 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co client_eio_retry_interval = 10; } } + // client_retry_enospc + client_retry_enospc = config["client_retry_enospc"].is_null() ? true : config["client_retry_enospc"].bool_value(); // log_level log_level = config["log_level"].uint64_value(); msgr.parse_config(config); @@ -817,7 +819,7 @@ resume_2: return 1; } else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) && - op->retval != -EPIPE && (op->retval != -EIO || !client_eio_retry_interval) && op->retval != -ENOSPC) + op->retval != -EPIPE && (op->retval != -EIO || !client_eio_retry_interval) && (op->retval != -ENOSPC || !client_retry_enospc)) { // Fatal error (neither -EPIPE, -EIO nor -ENOSPC) erase_op(op); diff --git a/src/cluster_client.h b/src/cluster_client.h index 7f9d8e3f..c77c328a 100644 --- a/src/cluster_client.h +++ b/src/cluster_client.h @@ -94,6 +94,7 @@ class cluster_client_t int log_level = 0; int client_retry_interval = 50; // ms int client_eio_retry_interval = 1000; // ms + bool client_retry_enospc = true; int retry_timeout_id = 0; int retry_timeout_duration = 0; diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 5212792e..36775989 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -62,6 +62,11 @@ TEST_NAME=csum_4k_dmj OSD_ARGS="--data_csum_type crc32c --inmemory_metadata fal TEST_NAME=csum_4k_dj OSD_ARGS="--data_csum_type crc32c --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS ./test_heal.sh TEST_NAME=csum_4k OSD_ARGS="--data_csum_type crc32c" OFFSET_ARGS=$OSD_ARGS ./test_heal.sh +./test_enospc.sh +SCHEME=xor ./test_enospc.sh +IMMEDIATE_COMMIT=1 ./test_enospc.sh +IMMEDIATE_COMMIT=1 SCHEME=xor ./test_enospc.sh + ./test_scrub.sh ZERO_OSD=2 ./test_scrub.sh SCHEME=xor ./test_scrub.sh diff --git a/tests/test_enospc.sh b/tests/test_enospc.sh new file mode 100755 index 00000000..9652b3da --- /dev/null +++ b/tests/test_enospc.sh @@ -0,0 +1,28 @@ +#!/bin/bash -ex + +OSD_SIZE=200 +GLOBAL_CONFIG=',"client_retry_enospc":false' + +. `dirname $0`/run_3osds.sh + +export LD_PRELOAD="build/src/libfio_vitastor.so" + +# Should fail with ENOSPC +if fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \ + -rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=500M -cluster_log_level=10; then + format_error "Should get ENOSPC, but didn't" +fi + +# Should fail with ENOSPC too (the idea is to try to overwrite first objects to check their rollback) +if fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=32 \ + -rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=500M -cluster_log_level=10; then + format_error "Should get ENOSPC, but didn't" +fi + +# Should complete OK +if ! fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=4 \ + -rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=100M -cluster_log_level=10 -number_ios=4096; then + format_error "Should do random writes over ENOSPC correctly, but got an error" +fi + +format_green OK