Split etcd_stats_interval out of etcd_report_interval
Test / buildenv (push) Successful in 8s Details
Test / build (push) Successful in 2m43s Details
Test / test_cas (push) Successful in 8s Details
Test / make_test (push) Successful in 34s Details
Test / test_change_pg_size (push) Successful in 7s Details
Test / test_change_pg_count (push) Successful in 35s Details
Test / test_create_nomaxid (push) Successful in 6s Details
Test / test_change_pg_count_ec (push) Successful in 1m8s Details
Test / test_etcd_fail (push) Successful in 1m29s Details
Test / test_interrupted_rebalance (push) Successful in 1m40s Details
Test / test_add_osd (push) Successful in 2m35s Details
Test / test_failure_domain (push) Successful in 41s Details
Test / test_interrupted_rebalance_imm (push) Successful in 2m3s Details
Test / test_snapshot (push) Successful in 29s Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m26s Details
Test / test_minsize_1 (push) Successful in 14s Details
Test / test_interrupted_rebalance_ec (push) Successful in 1m46s Details
Test / test_snapshot_ec (push) Successful in 36s Details
Test / test_move_reappear (push) Successful in 19s Details
Test / test_rm (push) Successful in 15s Details
Test / test_snapshot_down (push) Successful in 29s Details
Test / test_snapshot_down_ec (push) Successful in 30s Details
Test / test_splitbrain (push) Successful in 26s Details
Test / test_snapshot_chain (push) Successful in 2m15s Details
Test / test_snapshot_chain_ec (push) Successful in 2m57s Details
Test / test_rebalance_verify_imm (push) Successful in 2m29s Details
Test / test_rebalance_verify (push) Successful in 3m40s Details
Test / test_write (push) Successful in 1m0s Details
Test / test_write_no_same (push) Successful in 13s Details
Test / test_write_xor (push) Successful in 50s Details
Test / test_rebalance_verify_ec (push) Successful in 4m58s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 4m14s Details
Test / test_heal_pg_size_2 (push) Successful in 4m21s Details
Test / test_heal_ec (push) Successful in 4m5s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m36s Details
Test / test_heal_csum_32k_dj (push) Successful in 6m28s Details
Test / test_heal_csum_32k (push) Successful in 6m38s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m46s Details
Test / test_scrub_zero_osd_2 (push) Successful in 59s Details
Test / test_scrub (push) Successful in 1m16s Details
Test / test_scrub_xor (push) Successful in 53s Details
Test / test_scrub_pg_size_3 (push) Successful in 1m57s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m18s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m7s Details
Test / test_heal_csum_4k (push) Successful in 5m43s Details
Test / test_scrub_ec (push) Successful in 32s Details

hotfix-1.1.0
Vitaliy Filippov 2023-10-27 01:24:33 +03:00
parent 6fd2cf5df6
commit be7e76f849
7 changed files with 30 additions and 9 deletions

View File

@ -2,15 +2,28 @@
type: sec
default: 5
info: |
Interval at which OSDs report their state to etcd. Affects OSD lease time
Interval at which OSDs report their liveness to etcd. Affects OSD lease time
and thus the failover speed. Lease time is equal to this parameter value
plus max_etcd_attempts * etcd_quick_timeout because it should be guaranteed
that every OSD always refreshes its lease in time.
info_ru: |
Интервал, с которым OSD обновляет своё состояние в etcd. Значение параметра
влияет на время резервации (lease) OSD и поэтому на скорость переключения
Интервал, с которым OSD сообщает о том, что жив, в etcd. Значение параметра
влияет на время резервации (lease) OSD и поэтому - на скорость переключения
при падении OSD. Время lease равняется значению этого параметра плюс
max_etcd_attempts * etcd_quick_timeout.
- name: etcd_stats_interval
type: sec
default: 30
info: |
Interval at which OSDs report their statistics to etcd. Highly affects the
imposed load on etcd, because statistics include a key for every OSD and
for every PG. At the same time, low statistic intervals make `vitastor-cli`
statistics more responsive.
info_ru: |
Интервал, с которым OSD обновляет свою статистику в etcd. Сильно влияет на
создаваемую нагрузку на etcd, потому что статистика содержит по ключу на
каждый OSD и на каждую PG. В то же время низкий интервал делает
статистику, печатаемую `vitastor-cli`, отзывчивей.
- name: run_primary
type: bool
default: true

View File

@ -99,6 +99,7 @@ const etcd_tree = {
etcd_ws_keepalive_interval: 30, // seconds
// osd
etcd_report_interval: 5, // seconds
etcd_stats_interval: 30, // seconds
run_primary: true,
osd_network: null, // "192.168.7.0/24" or an array of masks
bind_address: "0.0.0.0",

View File

@ -160,6 +160,9 @@ void osd_t::parse_config(bool init)
etcd_report_interval = config["etcd_report_interval"].uint64_value();
if (etcd_report_interval <= 0)
etcd_report_interval = 5;
etcd_stats_interval = config["etcd_stats_interval"].uint64_value();
if (etcd_stats_interval <= 0)
etcd_stats_interval = 30;
readonly = json_is_true(config["readonly"]);
run_primary = !json_is_false(config["run_primary"]);
allow_test_ops = json_is_true(config["allow_test_ops"]);

View File

@ -93,6 +93,7 @@ class osd_t
json11::Json::object cli_config, file_config, etcd_global_config, etcd_osd_config, config;
int etcd_report_interval = 5;
int etcd_stats_interval = 30;
bool readonly = false;
osd_num_t osd_num = 1; // OSD numbers start with 1

View File

@ -429,14 +429,18 @@ void osd_t::acquire_lease()
create_osd_state();
});
printf(
"[OSD %lu] reporting to etcd at %s every %d seconds\n", this->osd_num,
"[OSD %lu] reporting to etcd at %s every %d seconds (statistics every %d seconds)\n", this->osd_num,
(config["etcd_address"].is_string() ? config["etcd_address"].string_value() : config["etcd_address"].dump()).c_str(),
etcd_report_interval
etcd_report_interval, etcd_stats_interval
);
tfd->set_timer(etcd_report_interval*1000, true, [this](int timer_id)
{
renew_lease(false);
});
tfd->set_timer(etcd_stats_interval*1000, true, [this](int timer_id)
{
report_statistics();
});
}
// Report "up" state once, then keep it alive using the lease
@ -541,7 +545,6 @@ void osd_t::renew_lease(bool reload)
else
{
etcd_failed_attempts = 0;
report_statistics();
// Reload PGs
if (reload && run_primary)
{

View File

@ -18,10 +18,10 @@ else
fi
if [ "$IMMEDIATE_COMMIT" != "" ]; then
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10"
NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5"
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":1,"immediate_commit":"all","client_enable_writeback":true}'
else
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10"
NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5"
$ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":1,"client_enable_writeback":true}'
fi

View File

@ -7,7 +7,7 @@ OSD_COUNT=5
OSD_ARGS="$OSD_ARGS"
for i in $(seq 1 $OSD_COUNT); do
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 &
eval OSD${i}_PID=$!
done