diff --git a/docs/config/src/osd.yml b/docs/config/src/osd.yml index 7ca498d2..53a2f7c8 100644 --- a/docs/config/src/osd.yml +++ b/docs/config/src/osd.yml @@ -2,15 +2,28 @@ type: sec default: 5 info: | - Interval at which OSDs report their state to etcd. Affects OSD lease time + Interval at which OSDs report their liveness to etcd. Affects OSD lease time and thus the failover speed. Lease time is equal to this parameter value plus max_etcd_attempts * etcd_quick_timeout because it should be guaranteed that every OSD always refreshes its lease in time. info_ru: | - Интервал, с которым OSD обновляет своё состояние в etcd. Значение параметра - влияет на время резервации (lease) OSD и поэтому на скорость переключения + Интервал, с которым OSD сообщает о том, что жив, в etcd. Значение параметра + влияет на время резервации (lease) OSD и поэтому - на скорость переключения при падении OSD. Время lease равняется значению этого параметра плюс max_etcd_attempts * etcd_quick_timeout. +- name: etcd_stats_interval + type: sec + default: 30 + info: | + Interval at which OSDs report their statistics to etcd. Highly affects the + imposed load on etcd, because statistics include a key for every OSD and + for every PG. At the same time, low statistic intervals make `vitastor-cli` + statistics more responsive. + info_ru: | + Интервал, с которым OSD обновляет свою статистику в etcd. Сильно влияет на + создаваемую нагрузку на etcd, потому что статистика содержит по ключу на + каждый OSD и на каждую PG. В то же время низкий интервал делает + статистику, печатаемую `vitastor-cli`, отзывчивей. - name: run_primary type: bool default: true diff --git a/mon/mon.js b/mon/mon.js index c6b981bf..453300c6 100644 --- a/mon/mon.js +++ b/mon/mon.js @@ -99,6 +99,7 @@ const etcd_tree = { etcd_ws_keepalive_interval: 30, // seconds // osd etcd_report_interval: 5, // seconds + etcd_stats_interval: 30, // seconds run_primary: true, osd_network: null, // "192.168.7.0/24" or an array of masks bind_address: "0.0.0.0", diff --git a/src/osd.cpp b/src/osd.cpp index 32157ab1..d4ab0ef2 100644 --- a/src/osd.cpp +++ b/src/osd.cpp @@ -160,6 +160,9 @@ void osd_t::parse_config(bool init) etcd_report_interval = config["etcd_report_interval"].uint64_value(); if (etcd_report_interval <= 0) etcd_report_interval = 5; + etcd_stats_interval = config["etcd_stats_interval"].uint64_value(); + if (etcd_stats_interval <= 0) + etcd_stats_interval = 30; readonly = json_is_true(config["readonly"]); run_primary = !json_is_false(config["run_primary"]); allow_test_ops = json_is_true(config["allow_test_ops"]); diff --git a/src/osd.h b/src/osd.h index c7a67de9..d9a7a2ae 100644 --- a/src/osd.h +++ b/src/osd.h @@ -93,6 +93,7 @@ class osd_t json11::Json::object cli_config, file_config, etcd_global_config, etcd_osd_config, config; int etcd_report_interval = 5; + int etcd_stats_interval = 30; bool readonly = false; osd_num_t osd_num = 1; // OSD numbers start with 1 diff --git a/src/osd_cluster.cpp b/src/osd_cluster.cpp index 2df59d3e..110646b8 100644 --- a/src/osd_cluster.cpp +++ b/src/osd_cluster.cpp @@ -429,14 +429,18 @@ void osd_t::acquire_lease() create_osd_state(); }); printf( - "[OSD %lu] reporting to etcd at %s every %d seconds\n", this->osd_num, + "[OSD %lu] reporting to etcd at %s every %d seconds (statistics every %d seconds)\n", this->osd_num, (config["etcd_address"].is_string() ? config["etcd_address"].string_value() : config["etcd_address"].dump()).c_str(), - etcd_report_interval + etcd_report_interval, etcd_stats_interval ); tfd->set_timer(etcd_report_interval*1000, true, [this](int timer_id) { renew_lease(false); }); + tfd->set_timer(etcd_stats_interval*1000, true, [this](int timer_id) + { + report_statistics(); + }); } // Report "up" state once, then keep it alive using the lease @@ -541,7 +545,6 @@ void osd_t::renew_lease(bool reload) else { etcd_failed_attempts = 0; - report_statistics(); // Reload PGs if (reload && run_primary) { diff --git a/tests/run_3osds.sh b/tests/run_3osds.sh index b9a3e64c..ebead60e 100644 --- a/tests/run_3osds.sh +++ b/tests/run_3osds.sh @@ -18,10 +18,10 @@ else fi if [ "$IMMEDIATE_COMMIT" != "" ]; then - NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10" + NO_SAME="--journal_no_same_sector_overwrites true --journal_sector_buffer_count 1024 --disable_data_fsync 1 --immediate_commit all --log_level 10 --etcd_stats_interval 5" $ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":1,"immediate_commit":"all","client_enable_writeback":true}' else - NO_SAME="--journal_sector_buffer_count 1024 --log_level 10" + NO_SAME="--journal_sector_buffer_count 1024 --log_level 10 --etcd_stats_interval 5" $ETCDCTL put /vitastor/config/global '{"recovery_queue_depth":1,"osd_out_time":1,"client_enable_writeback":true}' fi diff --git a/tests/test_move_reappear.sh b/tests/test_move_reappear.sh index 27aa55d6..e8b949bd 100755 --- a/tests/test_move_reappear.sh +++ b/tests/test_move_reappear.sh @@ -7,7 +7,7 @@ OSD_COUNT=5 OSD_ARGS="$OSD_ARGS" for i in $(seq 1 $OSD_COUNT); do dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1)) - build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 & + build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 --etcd_stats_interval 5 $OSD_ARGS --etcd_address $ETCD_URL $(build/src/vitastor-disk simple-offsets --format options ./testdata/test_osd$i.bin 2>/dev/null) >>./testdata/osd$i.log 2>&1 & eval OSD${i}_PID=$! done