Compare commits

...

2 Commits

Author SHA1 Message Date
Vitaliy Filippov 0f80c87b43 Add a minimum interval for etcd_state_client to reload state
(To prevent excessive load on etcd during outages)
2025-03-19 02:36:09 +03:00
Vitaliy Filippov e0953fd502 Wait for all "up" OSDs to be connected before starting PG 2025-03-19 02:36:09 +03:00
7 changed files with 77 additions and 0 deletions

View File

@ -30,6 +30,7 @@ between clients, OSDs and etcd.
- [etcd_slow_timeout](#etcd_slow_timeout)
- [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
- [etcd_min_reload_interval](#etcd_min_reload_interval)
## tcp_header_buffer_size
@ -261,3 +262,13 @@ etcd_report_interval to guarantee that keepalive actually works.
etcd websocket ping interval required to keep the connection alive and
detect disconnections quickly.
## etcd_min_reload_interval
- Type: milliseconds
- Default: 1000
- Can be changed online: yes
Minimum interval for full etcd state reload. Introduced to prevent
excessive load on etcd during outages when etcd can't keep up with event
streams and cancels them.

View File

@ -30,6 +30,7 @@
- [etcd_slow_timeout](#etcd_slow_timeout)
- [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
- [etcd_min_reload_interval](#etcd_min_reload_interval)
## tcp_header_buffer_size
@ -271,3 +272,13 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
- Можно менять на лету: да
Интервал проверки живости вебсокет-подключений к etcd.
## etcd_min_reload_interval
- Тип: миллисекунды
- Значение по умолчанию: 1000
- Можно менять на лету: да
Минимальный интервал полной перезагрузки состояния из etcd. Добавлено для
предотвращения избыточной нагрузки на etcd во время отказов, когда etcd не
успевает рассылать потоки событий и отменяет их.

View File

@ -306,3 +306,15 @@
detect disconnections quickly.
info_ru: |
Интервал проверки живости вебсокет-подключений к etcd.
- name: etcd_min_reload_interval
type: ms
default: 1000
online: true
info: |
Minimum interval for full etcd state reload. Introduced to prevent
excessive load on etcd during outages when etcd can't keep up with event
streams and cancels them.
info_ru: |
Минимальный интервал полной перезагрузки состояния из etcd. Добавлено для
предотвращения избыточной нагрузки на etcd во время отказов, когда etcd не
успевает рассылать потоки событий и отменяет их.

View File

@ -22,6 +22,8 @@ vitastor-cli - интерфейс командной строки для адм
- [flatten](#flatten)
- [rm-data](#rm-data)
- [merge-data](#merge-data)
- [describe](#describe)
- [fix](#fix)
- [alloc-osd](#alloc-osd)
- [rm-osd](#rm-osd)
- [osd-tree](#osd-tree)

View File

@ -31,6 +31,11 @@ etcd_state_client_t::~etcd_state_client_t()
keepalive_client = NULL;
}
#endif
if (load_pgs_timer_id >= 0)
{
tfd->clear_timer(load_pgs_timer_id);
load_pgs_timer_id = -1;
}
}
#ifndef __MOCK__
@ -143,6 +148,7 @@ void etcd_state_client_t::etcd_call(std::string api, json11::Json payload, int t
}
if (interval > 0)
{
// FIXME: Prevent destruction of etcd_state_client if timers or requests are active
tfd->set_timer(interval, false, [this, api, payload, timeout, retries, interval, callback](int)
{
etcd_call(api, payload, timeout, retries, interval, callback);
@ -271,6 +277,11 @@ void etcd_state_client_t::parse_config(const json11::Json & config)
{
this->etcd_quick_timeout = 1000;
}
this->etcd_min_reload_interval = config["etcd_min_reload_interval"].uint64_value();
if (this->etcd_min_reload_interval <= 0)
{
this->etcd_min_reload_interval = 50;
}
if (this->etcd_ws_keepalive_interval != old_etcd_ws_keepalive_interval && ws_keepalive_timer >= 0)
{
#ifndef __MOCK__
@ -603,6 +614,23 @@ void etcd_state_client_t::load_global_config()
void etcd_state_client_t::load_pgs()
{
timespec tv;
clock_gettime(CLOCK_REALTIME, &tv);
uint64_t ms_passed = (tv.tv_sec-etcd_last_reload.tv_sec)*1000 + (tv.tv_nsec-etcd_last_reload.tv_nsec)/1000000;
if (ms_passed < etcd_min_reload_interval)
{
if (load_pgs_timer_id < 0)
{
load_pgs_timer_id = tfd->set_timer(etcd_min_reload_interval+50-ms_passed, false, [this](int) { load_pgs(); });
}
return;
}
etcd_last_reload = tv;
if (load_pgs_timer_id >= 0)
{
tfd->clear_timer(load_pgs_timer_id);
load_pgs_timer_id = -1;
}
json11::Json::array txn = {
json11::Json::object {
{ "request_range", json11::Json::object {

View File

@ -108,6 +108,7 @@ public:
int max_etcd_attempts = 5;
int etcd_quick_timeout = 1000;
int etcd_slow_timeout = 5000;
int etcd_min_reload_interval = 1000;
bool infinite_start = true;
uint64_t global_block_size = DEFAULT_BLOCK_SIZE;
uint32_t global_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
@ -122,6 +123,8 @@ public:
uint64_t etcd_watch_revision_config = 0;
uint64_t etcd_watch_revision_osd = 0;
uint64_t etcd_watch_revision_pg = 0;
timespec etcd_last_reload = {};
int load_pgs_timer_id = -1;
std::map<pool_id_t, pool_config_t> pool_config;
std::map<osd_num_t, json11::Json> peer_states;
std::set<osd_num_t> seen_peers;

View File

@ -199,6 +199,7 @@ void osd_t::start_pg_peering(pg_t & pg)
drop_dirty_pg_connections({ .pool_id = pg.pool_id, .pg_num = pg.pg_num });
// Try to connect with current peers if they're up, but we don't have connections to them
// Otherwise we may erroneously decide that the pg is incomplete :-)
bool all_connected = true;
for (auto pg_osd: pg.all_peers)
{
if (pg_osd != this->osd_num &&
@ -206,8 +207,17 @@ void osd_t::start_pg_peering(pg_t & pg)
msgr.wanted_peers.find(pg_osd) == msgr.wanted_peers.end())
{
msgr.connect_peer(pg_osd, st_cli.peer_states[pg_osd]);
if (!st_cli.peer_states[pg_osd].is_null())
all_connected = false;
}
}
if (!all_connected)
{
// Wait until all OSDs are either connected or their /osd/state disappears from etcd
pg.state = PG_INCOMPLETE;
report_pg_state(pg);
return;
}
// Calculate current write OSD set
pg.pg_cursize = 0;
pg.cur_set.resize(pg.target_set.size());