Add a test for simple master switching without PG reconfiguration
Test / buildenv (push) Successful in 12s Details
Test / build (push) Successful in 2m48s Details
Test / test_cas (push) Successful in 11s Details
Test / make_test (push) Successful in 39s Details
Test / test_change_pg_size (push) Successful in 9s Details
Test / test_change_pg_count (push) Successful in 44s Details
Test / test_create_nomaxid (push) Successful in 9s Details
Test / test_change_pg_count_ec (push) Successful in 47s Details
Test / test_etcd_fail (push) Successful in 56s Details
Test / test_interrupted_rebalance_imm (push) Successful in 1m36s Details
Test / test_add_osd (push) Successful in 2m44s Details
Test / test_failure_domain (push) Successful in 46s Details
Test / test_snapshot (push) Successful in 22s Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m29s Details
Test / test_snapshot_ec (push) Successful in 24s Details
Test / test_minsize_1 (push) Successful in 14s Details
Test / test_rm (push) Successful in 13s Details
Test / test_move_reappear (push) Successful in 20s Details
Test / test_snapshot_chain (push) Successful in 1m27s Details
Test / test_snapshot_down (push) Successful in 23s Details
Test / test_snapshot_chain_ec (push) Successful in 1m56s Details
Test / test_snapshot_down_ec (push) Successful in 23s Details
Test / test_splitbrain (push) Successful in 17s Details
Test / test_interrupted_rebalance_ec (push) Successful in 6m40s Details
Test / test_interrupted_rebalance (push) Successful in 8m12s Details
Test / test_rebalance_verify_imm (push) Successful in 3m12s Details
Test / test_switch_primary (push) Successful in 34s Details
Test / test_write (push) Successful in 46s Details
Test / test_rebalance_verify_ec (push) Successful in 3m18s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 2m42s Details
Test / test_write_no_same (push) Successful in 15s Details
Test / test_rebalance_verify (push) Successful in 6m36s Details
Test / test_heal_ec (push) Successful in 5m2s Details
Test / test_heal_csum_32k_dmj (push) Successful in 4m33s Details
Test / test_heal_csum_32k_dj (push) Successful in 5m58s Details
Test / test_heal_csum_32k (push) Successful in 6m6s Details
Test / test_scrub (push) Successful in 47s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m17s Details
Test / test_scrub_zero_osd_2 (push) Successful in 43s Details
Test / test_scrub_xor (push) Successful in 47s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m44s Details
Test / test_scrub_ec (push) Successful in 41s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m18s Details
Test / test_scrub_pg_size_3 (push) Successful in 2m11s Details
Test / test_heal_csum_4k (push) Successful in 6m12s Details
Test / test_heal_pg_size_2 (push) Successful in 3m16s Details
Test / test_write_xor (push) Successful in 34s Details

Also use osd_out_time:1 only in select tests and restart mon in tests only on connection errors
test-fix-ec-unknown-state-51
Vitaliy Filippov 2024-01-17 00:16:56 +03:00
parent 3ca3b8a8d8
commit 2aa5aa7ab6
9 changed files with 53 additions and 10 deletions

View File

@ -532,6 +532,24 @@ jobs:
echo ""
done
test_switch_primary:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_switch_primary.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_write:
runs-on: ubuntu-latest
needs: build

View File

@ -390,7 +390,8 @@ class Mon
{
constructor(config)
{
this.die = (e) => this._die(e);
this.failconnect = (e) => this._die(e, 2);
this.die = (e) => this._die(e, 1);
if (fs.existsSync(config.config_path||'/etc/vitastor/vitastor.conf'))
{
config = {
@ -604,7 +605,7 @@ class Mon
}
if (!this.ws)
{
this.die('Failed to open etcd watch websocket');
this.failconnect('Failed to open etcd watch websocket');
}
const cur_addr = this.selected_etcd_url;
this.ws_alive = true;
@ -791,7 +792,7 @@ class Mon
const res = await this.etcd_call('/lease/keepalive', { ID: this.etcd_lease_id }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
if (!res.result.TTL)
{
this.die('Lease expired');
this.failconnect('Lease expired');
}
}, this.config.etcd_mon_timeout);
if (!this.signals_set)
@ -1997,14 +1998,14 @@ class Mon
return res.json;
}
}
this.die();
this.failconnect();
}
_die(err)
_die(err, code)
{
// In fact we can just try to rejoin
console.error(new Error(err || 'Cluster connection failed'));
process.exit(1);
process.exit(code || 2);
}
local_ips(all)

View File

@ -10,6 +10,7 @@ SCHEME=${SCHEME:-replicated}
# OFFSET_ARGS
# PG_SIZE
# PG_MINSIZE
# GLOBAL_CONFIG
if [ "$SCHEME" = "ec" ]; then
OSD_COUNT=${OSD_COUNT:-5}
@ -19,10 +20,10 @@ fi
if [ "$IMMEDIATE_COMMIT" != "" ]; then
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5"
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"osd_out_time":1,"immediate_commit":"all","client_enable_writeback":true}'
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"immediate_commit":"all","client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
else
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5"
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"osd_out_time":1,"client_enable_writeback":true}'
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"recovery_tune_util_low":1,"client_enable_writeback":true,"client_max_writeback_iodepth":32'$GLOBAL_CONFIG'}'
fi
start_osd_on()
@ -53,7 +54,7 @@ for i in $(seq 1 $OSD_COUNT); do
start_osd $i
done
(while true; do node mon/mon-main.js --etcd_address $ETCD_URL --etcd_prefix "/vitastor" --verbose 1 || true; done) >>./testdata/mon.log 2>&1 &
(while true; do set +e; node mon/mon-main.js --etcd_address $ETCD_URL --etcd_prefix "/vitastor" --verbose 1; if [[ $? -ne 2 ]]; then break; fi; done) >>./testdata/mon.log 2>&1 &
MON_PID=$!
if [ "$SCHEME" = "ec" ]; then

View File

@ -45,6 +45,8 @@ IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
SCHEME=ec ./test_rebalance_verify.sh
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_rebalance_verify.sh
./test_switch_primary.sh
./test_write.sh
SCHEME=xor ./test_write.sh

View File

@ -1,7 +1,7 @@
#!/bin/bash -ex
PG_COUNT=2048
GLOBAL_CONFIG=',"osd_out_time":1'
. `dirname $0`/run_3osds.sh
LD_PRELOAD="build/src/libfio_vitastor.so" \

View File

@ -9,6 +9,7 @@ if [[ "$SCHEME" = "ec" ]]; then
fi
OSD_COUNT=${OSD_COUNT:-7}
PG_COUNT=32
GLOBAL_CONFIG=',"osd_out_time":1'
. `dirname $0`/run_3osds.sh
check_qemu

View File

@ -2,6 +2,7 @@
PG_MINSIZE=1
SCHEME=replicated
GLOBAL_CONFIG=',"osd_out_time":1'
. `dirname $0`/run_3osds.sh

View File

@ -4,6 +4,7 @@ OSD_COUNT=2
PG_SIZE=2
PG_MINSIZE=1
SCHEME=replicated
GLOBAL_CONFIG=',"osd_out_time":1'
. `dirname $0`/run_3osds.sh

18
tests/test_switch_primary.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash -ex
. `dirname $0`/run_3osds.sh
primary=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
primary_pid=OSD${primary}_PID
kill -9 ${!primary_pid}
sleep 15
wait_condition 10 "$ETCDCTL get --print-value-only /vitastor/config/pgs | jq -s -e '.[0].items[\"1\"][\"1\"].primary != \"$primary\"'"
newprim=$($ETCDCTL get --print-value-only /vitastor/config/pgs | jq -r '.items["1"]["1"].primary')
if [ "$newprim" = "$primary" ]; then
format_error Primary not switched
fi
format_green OK