Compare commits

...

2 Commits

Author SHA1 Message Date
Vitaliy Filippov 0f80c87b43 Add a minimum interval for etcd_state_client to reload state
(To prevent excessive load on etcd during outages)
2025-03-19 02:36:09 +03:00
Vitaliy Filippov e0953fd502 Wait for all "up" OSDs to be connected before starting PG 2025-03-19 02:36:09 +03:00
7 changed files with 77 additions and 0 deletions

View File

@ -30,6 +30,7 @@ between clients, OSDs and etcd.
- [etcd_slow_timeout](#etcd_slow_timeout) - [etcd_slow_timeout](#etcd_slow_timeout)
- [etcd_keepalive_timeout](#etcd_keepalive_timeout) - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval) - [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
- [etcd_min_reload_interval](#etcd_min_reload_interval)
## tcp_header_buffer_size ## tcp_header_buffer_size
@ -261,3 +262,13 @@ etcd_report_interval to guarantee that keepalive actually works.
etcd websocket ping interval required to keep the connection alive and etcd websocket ping interval required to keep the connection alive and
detect disconnections quickly. detect disconnections quickly.
## etcd_min_reload_interval
- Type: milliseconds
- Default: 1000
- Can be changed online: yes
Minimum interval for full etcd state reload. Introduced to prevent
excessive load on etcd during outages when etcd can't keep up with event
streams and cancels them.

View File

@ -30,6 +30,7 @@
- [etcd_slow_timeout](#etcd_slow_timeout) - [etcd_slow_timeout](#etcd_slow_timeout)
- [etcd_keepalive_timeout](#etcd_keepalive_timeout) - [etcd_keepalive_timeout](#etcd_keepalive_timeout)
- [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval) - [etcd_ws_keepalive_interval](#etcd_ws_keepalive_interval)
- [etcd_min_reload_interval](#etcd_min_reload_interval)
## tcp_header_buffer_size ## tcp_header_buffer_size
@ -271,3 +272,13 @@ etcd_report_interval, чтобы keepalive гарантированно рабо
- Можно менять на лету: да - Можно менять на лету: да
Интервал проверки живости вебсокет-подключений к etcd. Интервал проверки живости вебсокет-подключений к etcd.
## etcd_min_reload_interval
- Тип: миллисекунды
- Значение по умолчанию: 1000
- Можно менять на лету: да
Минимальный интервал полной перезагрузки состояния из etcd. Добавлено для
предотвращения избыточной нагрузки на etcd во время отказов, когда etcd не
успевает рассылать потоки событий и отменяет их.

View File

@ -306,3 +306,15 @@
detect disconnections quickly. detect disconnections quickly.
info_ru: | info_ru: |
Интервал проверки живости вебсокет-подключений к etcd. Интервал проверки живости вебсокет-подключений к etcd.
- name: etcd_min_reload_interval
type: ms
default: 1000
online: true
info: |
Minimum interval for full etcd state reload. Introduced to prevent
excessive load on etcd during outages when etcd can't keep up with event
streams and cancels them.
info_ru: |
Минимальный интервал полной перезагрузки состояния из etcd. Добавлено для
предотвращения избыточной нагрузки на etcd во время отказов, когда etcd не
успевает рассылать потоки событий и отменяет их.

View File

@ -22,6 +22,8 @@ vitastor-cli - интерфейс командной строки для адм
- [flatten](#flatten) - [flatten](#flatten)
- [rm-data](#rm-data) - [rm-data](#rm-data)
- [merge-data](#merge-data) - [merge-data](#merge-data)
- [describe](#describe)
- [fix](#fix)
- [alloc-osd](#alloc-osd) - [alloc-osd](#alloc-osd)
- [rm-osd](#rm-osd) - [rm-osd](#rm-osd)
- [osd-tree](#osd-tree) - [osd-tree](#osd-tree)

View File

@ -31,6 +31,11 @@ etcd_state_client_t::~etcd_state_client_t()
keepalive_client = NULL; keepalive_client = NULL;
} }
#endif #endif
if (load_pgs_timer_id >= 0)
{
tfd->clear_timer(load_pgs_timer_id);
load_pgs_timer_id = -1;
}
} }
#ifndef __MOCK__ #ifndef __MOCK__
@ -143,6 +148,7 @@ void etcd_state_client_t::etcd_call(std::string api, json11::Json payload, int t
} }
if (interval > 0) if (interval > 0)
{ {
// FIXME: Prevent destruction of etcd_state_client if timers or requests are active
tfd->set_timer(interval, false, [this, api, payload, timeout, retries, interval, callback](int) tfd->set_timer(interval, false, [this, api, payload, timeout, retries, interval, callback](int)
{ {
etcd_call(api, payload, timeout, retries, interval, callback); etcd_call(api, payload, timeout, retries, interval, callback);
@ -271,6 +277,11 @@ void etcd_state_client_t::parse_config(const json11::Json & config)
{ {
this->etcd_quick_timeout = 1000; this->etcd_quick_timeout = 1000;
} }
this->etcd_min_reload_interval = config["etcd_min_reload_interval"].uint64_value();
if (this->etcd_min_reload_interval <= 0)
{
this->etcd_min_reload_interval = 50;
}
if (this->etcd_ws_keepalive_interval != old_etcd_ws_keepalive_interval && ws_keepalive_timer >= 0) if (this->etcd_ws_keepalive_interval != old_etcd_ws_keepalive_interval && ws_keepalive_timer >= 0)
{ {
#ifndef __MOCK__ #ifndef __MOCK__
@ -603,6 +614,23 @@ void etcd_state_client_t::load_global_config()
void etcd_state_client_t::load_pgs() void etcd_state_client_t::load_pgs()
{ {
timespec tv;
clock_gettime(CLOCK_REALTIME, &tv);
uint64_t ms_passed = (tv.tv_sec-etcd_last_reload.tv_sec)*1000 + (tv.tv_nsec-etcd_last_reload.tv_nsec)/1000000;
if (ms_passed < etcd_min_reload_interval)
{
if (load_pgs_timer_id < 0)
{
load_pgs_timer_id = tfd->set_timer(etcd_min_reload_interval+50-ms_passed, false, [this](int) { load_pgs(); });
}
return;
}
etcd_last_reload = tv;
if (load_pgs_timer_id >= 0)
{
tfd->clear_timer(load_pgs_timer_id);
load_pgs_timer_id = -1;
}
json11::Json::array txn = { json11::Json::array txn = {
json11::Json::object { json11::Json::object {
{ "request_range", json11::Json::object { { "request_range", json11::Json::object {

View File

@ -108,6 +108,7 @@ public:
int max_etcd_attempts = 5; int max_etcd_attempts = 5;
int etcd_quick_timeout = 1000; int etcd_quick_timeout = 1000;
int etcd_slow_timeout = 5000; int etcd_slow_timeout = 5000;
int etcd_min_reload_interval = 1000;
bool infinite_start = true; bool infinite_start = true;
uint64_t global_block_size = DEFAULT_BLOCK_SIZE; uint64_t global_block_size = DEFAULT_BLOCK_SIZE;
uint32_t global_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY; uint32_t global_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
@ -122,6 +123,8 @@ public:
uint64_t etcd_watch_revision_config = 0; uint64_t etcd_watch_revision_config = 0;
uint64_t etcd_watch_revision_osd = 0; uint64_t etcd_watch_revision_osd = 0;
uint64_t etcd_watch_revision_pg = 0; uint64_t etcd_watch_revision_pg = 0;
timespec etcd_last_reload = {};
int load_pgs_timer_id = -1;
std::map<pool_id_t, pool_config_t> pool_config; std::map<pool_id_t, pool_config_t> pool_config;
std::map<osd_num_t, json11::Json> peer_states; std::map<osd_num_t, json11::Json> peer_states;
std::set<osd_num_t> seen_peers; std::set<osd_num_t> seen_peers;

View File

@ -199,6 +199,7 @@ void osd_t::start_pg_peering(pg_t & pg)
drop_dirty_pg_connections({ .pool_id = pg.pool_id, .pg_num = pg.pg_num }); drop_dirty_pg_connections({ .pool_id = pg.pool_id, .pg_num = pg.pg_num });
// Try to connect with current peers if they're up, but we don't have connections to them // Try to connect with current peers if they're up, but we don't have connections to them
// Otherwise we may erroneously decide that the pg is incomplete :-) // Otherwise we may erroneously decide that the pg is incomplete :-)
bool all_connected = true;
for (auto pg_osd: pg.all_peers) for (auto pg_osd: pg.all_peers)
{ {
if (pg_osd != this->osd_num && if (pg_osd != this->osd_num &&
@ -206,8 +207,17 @@ void osd_t::start_pg_peering(pg_t & pg)
msgr.wanted_peers.find(pg_osd) == msgr.wanted_peers.end()) msgr.wanted_peers.find(pg_osd) == msgr.wanted_peers.end())
{ {
msgr.connect_peer(pg_osd, st_cli.peer_states[pg_osd]); msgr.connect_peer(pg_osd, st_cli.peer_states[pg_osd]);
if (!st_cli.peer_states[pg_osd].is_null())
all_connected = false;
} }
} }
if (!all_connected)
{
// Wait until all OSDs are either connected or their /osd/state disappears from etcd
pg.state = PG_INCOMPLETE;
report_pg_state(pg);
return;
}
// Calculate current write OSD set // Calculate current write OSD set
pg.pg_cursize = 0; pg.pg_cursize = 0;
pg.cur_set.resize(pg.target_set.size()); pg.cur_set.resize(pg.target_set.size());