Add ENOSPC handling tests
parent
3bf4dd5abd
commit
2b863fb715
|
@ -748,6 +748,78 @@ jobs:
|
||||||
echo ""
|
echo ""
|
||||||
done
|
done
|
||||||
|
|
||||||
|
test_enospc:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: /root/vitastor/tests/test_enospc.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_enospc_xor:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: SCHEME=xor /root/vitastor/tests/test_enospc.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_enospc_imm:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: IMMEDIATE_COMMIT=1 /root/vitastor/tests/test_enospc.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
test_enospc_imm_xor:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
needs: build
|
||||||
|
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||||
|
steps:
|
||||||
|
- name: Run test
|
||||||
|
id: test
|
||||||
|
timeout-minutes: 3
|
||||||
|
run: IMMEDIATE_COMMIT=1 SCHEME=xor /root/vitastor/tests/test_enospc.sh
|
||||||
|
- name: Print logs
|
||||||
|
if: always() && steps.test.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||||
|
echo "-------- $i --------"
|
||||||
|
cat $i
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
test_scrub:
|
test_scrub:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: build
|
needs: build
|
||||||
|
|
|
@ -11,6 +11,7 @@ affect their interaction with the cluster.
|
||||||
|
|
||||||
- [client_retry_interval](#client_retry_interval)
|
- [client_retry_interval](#client_retry_interval)
|
||||||
- [client_eio_retry_interval](#client_eio_retry_interval)
|
- [client_eio_retry_interval](#client_eio_retry_interval)
|
||||||
|
- [client_retry_enospc](#client_retry_enospc)
|
||||||
- [client_max_dirty_bytes](#client_max_dirty_bytes)
|
- [client_max_dirty_bytes](#client_max_dirty_bytes)
|
||||||
- [client_max_dirty_ops](#client_max_dirty_ops)
|
- [client_max_dirty_ops](#client_max_dirty_ops)
|
||||||
- [client_enable_writeback](#client_enable_writeback)
|
- [client_enable_writeback](#client_enable_writeback)
|
||||||
|
@ -41,6 +42,15 @@ Retry time for I/O requests failed due to data corruption or unfinished
|
||||||
EC object deletions (has_incomplete PG state). 0 disables such retries
|
EC object deletions (has_incomplete PG state). 0 disables such retries
|
||||||
and clients are not blocked and just get EIO error code instead.
|
and clients are not blocked and just get EIO error code instead.
|
||||||
|
|
||||||
|
## client_retry_enospc
|
||||||
|
|
||||||
|
- Type: boolean
|
||||||
|
- Default: true
|
||||||
|
- Can be changed online: yes
|
||||||
|
|
||||||
|
Retry writes on out of space errors to wait until some space is freed on
|
||||||
|
OSDs.
|
||||||
|
|
||||||
## client_max_dirty_bytes
|
## client_max_dirty_bytes
|
||||||
|
|
||||||
- Type: integer
|
- Type: integer
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
|
|
||||||
- [client_retry_interval](#client_retry_interval)
|
- [client_retry_interval](#client_retry_interval)
|
||||||
- [client_eio_retry_interval](#client_eio_retry_interval)
|
- [client_eio_retry_interval](#client_eio_retry_interval)
|
||||||
|
- [client_retry_enospc](#client_retry_enospc)
|
||||||
- [client_max_dirty_bytes](#client_max_dirty_bytes)
|
- [client_max_dirty_bytes](#client_max_dirty_bytes)
|
||||||
- [client_max_dirty_ops](#client_max_dirty_ops)
|
- [client_max_dirty_ops](#client_max_dirty_ops)
|
||||||
- [client_enable_writeback](#client_enable_writeback)
|
- [client_enable_writeback](#client_enable_writeback)
|
||||||
|
@ -42,6 +43,15 @@
|
||||||
0 отключает повторы таких запросов и клиенты не блокируются, а вместо
|
0 отключает повторы таких запросов и клиенты не блокируются, а вместо
|
||||||
этого просто получают код ошибки EIO.
|
этого просто получают код ошибки EIO.
|
||||||
|
|
||||||
|
## client_retry_enospc
|
||||||
|
|
||||||
|
- Тип: булево (да/нет)
|
||||||
|
- Значение по умолчанию: true
|
||||||
|
- Можно менять на лету: да
|
||||||
|
|
||||||
|
Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
|
||||||
|
ожидать, пока на OSD не освободится место.
|
||||||
|
|
||||||
## client_max_dirty_bytes
|
## client_max_dirty_bytes
|
||||||
|
|
||||||
- Тип: целое число
|
- Тип: целое число
|
||||||
|
|
|
@ -22,6 +22,16 @@
|
||||||
или незавершённых удалений EC-объектов (состояния PG has_incomplete).
|
или незавершённых удалений EC-объектов (состояния PG has_incomplete).
|
||||||
0 отключает повторы таких запросов и клиенты не блокируются, а вместо
|
0 отключает повторы таких запросов и клиенты не блокируются, а вместо
|
||||||
этого просто получают код ошибки EIO.
|
этого просто получают код ошибки EIO.
|
||||||
|
- name: client_retry_enospc
|
||||||
|
type: bool
|
||||||
|
default: true
|
||||||
|
online: true
|
||||||
|
info: |
|
||||||
|
Retry writes on out of space errors to wait until some space is freed on
|
||||||
|
OSDs.
|
||||||
|
info_ru: |
|
||||||
|
Повторять запросы записи, завершившиеся с ошибками нехватки места, т.е.
|
||||||
|
ожидать, пока на OSD не освободится место.
|
||||||
- name: client_max_dirty_bytes
|
- name: client_max_dirty_bytes
|
||||||
type: int
|
type: int
|
||||||
default: 33554432
|
default: 33554432
|
||||||
|
|
|
@ -91,6 +91,7 @@ const etcd_tree = {
|
||||||
client_max_writeback_iodepth: 256,
|
client_max_writeback_iodepth: 256,
|
||||||
client_retry_interval: 50, // ms. min: 10
|
client_retry_interval: 50, // ms. min: 10
|
||||||
client_eio_retry_interval: 1000, // ms
|
client_eio_retry_interval: 1000, // ms
|
||||||
|
client_retry_enospc: true,
|
||||||
// client and osd - configurable online
|
// client and osd - configurable online
|
||||||
log_level: 0,
|
log_level: 0,
|
||||||
peer_connect_interval: 5, // seconds. min: 1
|
peer_connect_interval: 5, // seconds. min: 1
|
||||||
|
|
|
@ -398,6 +398,8 @@ void cluster_client_t::on_load_config_hook(json11::Json::object & etcd_global_co
|
||||||
client_eio_retry_interval = 10;
|
client_eio_retry_interval = 10;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// client_retry_enospc
|
||||||
|
client_retry_enospc = config["client_retry_enospc"].is_null() ? true : config["client_retry_enospc"].bool_value();
|
||||||
// log_level
|
// log_level
|
||||||
log_level = config["log_level"].uint64_value();
|
log_level = config["log_level"].uint64_value();
|
||||||
msgr.parse_config(config);
|
msgr.parse_config(config);
|
||||||
|
@ -818,7 +820,7 @@ resume_2:
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) &&
|
else if (op->retval != 0 && !(op->flags & OP_FLUSH_BUFFER) &&
|
||||||
op->retval != -EPIPE && (op->retval != -EIO || !client_eio_retry_interval) && op->retval != -ENOSPC)
|
op->retval != -EPIPE && (op->retval != -EIO || !client_eio_retry_interval) && (op->retval != -ENOSPC || !client_retry_enospc))
|
||||||
{
|
{
|
||||||
// Fatal error (neither -EPIPE, -EIO nor -ENOSPC)
|
// Fatal error (neither -EPIPE, -EIO nor -ENOSPC)
|
||||||
erase_op(op);
|
erase_op(op);
|
||||||
|
|
|
@ -94,6 +94,7 @@ class cluster_client_t
|
||||||
int log_level = 0;
|
int log_level = 0;
|
||||||
int client_retry_interval = 50; // ms
|
int client_retry_interval = 50; // ms
|
||||||
int client_eio_retry_interval = 1000; // ms
|
int client_eio_retry_interval = 1000; // ms
|
||||||
|
bool client_retry_enospc = true;
|
||||||
|
|
||||||
int retry_timeout_id = 0;
|
int retry_timeout_id = 0;
|
||||||
int retry_timeout_duration = 0;
|
int retry_timeout_duration = 0;
|
||||||
|
|
|
@ -62,6 +62,11 @@ TEST_NAME=csum_4k_dmj OSD_ARGS="--data_csum_type crc32c --inmemory_metadata fal
|
||||||
TEST_NAME=csum_4k_dj OSD_ARGS="--data_csum_type crc32c --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS ./test_heal.sh
|
TEST_NAME=csum_4k_dj OSD_ARGS="--data_csum_type crc32c --inmemory_journal false" OFFSET_ARGS=$OSD_ARGS ./test_heal.sh
|
||||||
TEST_NAME=csum_4k OSD_ARGS="--data_csum_type crc32c" OFFSET_ARGS=$OSD_ARGS ./test_heal.sh
|
TEST_NAME=csum_4k OSD_ARGS="--data_csum_type crc32c" OFFSET_ARGS=$OSD_ARGS ./test_heal.sh
|
||||||
|
|
||||||
|
./test_enospc.sh
|
||||||
|
SCHEME=xor ./test_enospc.sh
|
||||||
|
IMMEDIATE_COMMIT=1 ./test_enospc.sh
|
||||||
|
IMMEDIATE_COMMIT=1 SCHEME=xor ./test_enospc.sh
|
||||||
|
|
||||||
./test_scrub.sh
|
./test_scrub.sh
|
||||||
ZERO_OSD=2 ./test_scrub.sh
|
ZERO_OSD=2 ./test_scrub.sh
|
||||||
SCHEME=xor ./test_scrub.sh
|
SCHEME=xor ./test_scrub.sh
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
#!/bin/bash -ex
|
||||||
|
|
||||||
|
OSD_SIZE=200
|
||||||
|
GLOBAL_CONFIG=',"client_retry_enospc":false'
|
||||||
|
|
||||||
|
. `dirname $0`/run_3osds.sh
|
||||||
|
|
||||||
|
export LD_PRELOAD="build/src/libfio_vitastor.so"
|
||||||
|
|
||||||
|
# Should fail with ENOSPC
|
||||||
|
if fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 \
|
||||||
|
-rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=500M -cluster_log_level=10; then
|
||||||
|
format_error "Should get ENOSPC, but didn't"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Should fail with ENOSPC too (the idea is to try to overwrite first objects to check their rollback)
|
||||||
|
if fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=32 \
|
||||||
|
-rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=500M -cluster_log_level=10; then
|
||||||
|
format_error "Should get ENOSPC, but didn't"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Should complete OK
|
||||||
|
if ! fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=4 \
|
||||||
|
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=100M -cluster_log_level=10 -number_ios=4096; then
|
||||||
|
format_error "Should do random writes over ENOSPC correctly, but got an error"
|
||||||
|
fi
|
||||||
|
|
||||||
|
export -n LD_PRELOAD
|
||||||
|
|
||||||
|
format_green OK
|
Loading…
Reference in New Issue