Aggregate recovery delay using simple mean over last 10 observations (EWMA is shit)
parent
5ca7cde612
commit
1edf86ed26
|
@ -117,7 +117,7 @@ const etcd_tree = {
|
|||
recovery_tune_max_util: 1.0,
|
||||
recovery_tune_max_client_util: 0.5,
|
||||
recovery_tune_interval: 1,
|
||||
recovery_tune_ewma_rate: 0.5,
|
||||
recovery_tune_agg_interval: 10, // 10 times recovery_tune_interval
|
||||
recovery_tune_sleep_min_us: 10, // 10 microseconds
|
||||
recovery_pg_switch: 128,
|
||||
recovery_sync_batch: 16,
|
||||
|
|
|
@ -220,8 +220,8 @@ void osd_t::parse_config(bool init)
|
|||
auto old_recovery_tune_interval = recovery_tune_interval;
|
||||
recovery_tune_interval = config["recovery_tune_interval"].is_null()
|
||||
? 1 : config["recovery_tune_interval"].uint64_value();
|
||||
recovery_tune_ewma_rate = config["recovery_tune_ewma_rate"].is_null()
|
||||
? 0.5 : config["recovery_tune_ewma_rate"].number_value();
|
||||
recovery_tune_agg_interval = config["recovery_tune_agg_interval"].is_null()
|
||||
? 10 : config["recovery_tune_agg_interval"].uint64_value();
|
||||
recovery_tune_sleep_min_us = config["recovery_tune_sleep_min_us"].is_null()
|
||||
? 10 : config["recovery_tune_sleep_min_us"].uint64_value();
|
||||
recovery_pg_switch = config["recovery_pg_switch"].uint64_value();
|
||||
|
@ -494,11 +494,12 @@ void osd_t::print_stats()
|
|||
{
|
||||
uint64_t bw = (recovery_stat[i].bytes - recovery_print_prev[i].bytes) / print_stats_interval;
|
||||
printf(
|
||||
"[OSD %lu] %s recovery: %.1f op/s, B/W: %.2f %s, avg lat %ld us\n", osd_num, recovery_stat_names[i],
|
||||
"[OSD %lu] %s recovery: %.1f op/s, B/W: %.2f %s, avg latency %ld us, delay %ld us\n", osd_num, recovery_stat_names[i],
|
||||
(recovery_stat[i].count - recovery_print_prev[i].count) * 1.0 / print_stats_interval,
|
||||
(bw > 1024*1024*1024 ? bw/1024.0/1024/1024 : (bw > 1024*1024 ? bw/1024.0/1024 : bw/1024.0)),
|
||||
(bw > 1024*1024*1024 ? "GB/s" : (bw > 1024*1024 ? "MB/s" : "KB/s")),
|
||||
(recovery_stat[i].usec - recovery_print_prev[i].usec) / (recovery_stat[i].count - recovery_print_prev[i].count)
|
||||
(recovery_stat[i].usec - recovery_print_prev[i].usec) / (recovery_stat[i].count - recovery_print_prev[i].count),
|
||||
recovery_target_sleep_us
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -123,7 +123,7 @@ class osd_t
|
|||
double recovery_tune_max_util = 1.0;
|
||||
double recovery_tune_max_client_util = 0.5;
|
||||
int recovery_tune_interval = 1;
|
||||
double recovery_tune_ewma_rate = 0.2;
|
||||
int recovery_tune_agg_interval = 10;
|
||||
int recovery_tune_sleep_min_us = 10;
|
||||
int recovery_pg_switch = DEFAULT_RECOVERY_PG_SWITCH;
|
||||
int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
|
||||
|
@ -210,8 +210,10 @@ class osd_t
|
|||
uint64_t rtune_avg_lat = 0;
|
||||
double rtune_client_util = 0, rtune_target_util = 1;
|
||||
osd_op_stats_t rtune_prev_stats, rtune_prev_recovery_stats;
|
||||
uint64_t recovery_target_queue_depth = 1;
|
||||
std::vector<uint64_t> recovery_target_sleep_items;
|
||||
uint64_t recovery_target_sleep_us = 0;
|
||||
uint64_t recovery_target_sleep_total = 0;
|
||||
int recovery_target_sleep_cur = 0, recovery_target_sleep_count = 0;
|
||||
|
||||
// cluster connection
|
||||
void parse_config(bool init);
|
||||
|
|
|
@ -346,7 +346,6 @@ void osd_t::apply_recovery_tune_interval()
|
|||
}
|
||||
else
|
||||
{
|
||||
recovery_target_queue_depth = recovery_queue_depth;
|
||||
recovery_target_sleep_us = recovery_sleep_us;
|
||||
}
|
||||
}
|
||||
|
@ -412,8 +411,7 @@ void osd_t::tune_recovery()
|
|||
// = rtune_avg_lat * rtune_avg_lat * rtune_avg_iops / target_util
|
||||
// = 0.0625
|
||||
// recovery utilisation will be 1
|
||||
auto client_util = total_client_usec/1000000.0/recovery_tune_interval;
|
||||
rtune_client_util = rtune_client_util*(1-recovery_tune_ewma_rate) + client_util*recovery_tune_ewma_rate;
|
||||
rtune_client_util = total_client_usec/1000000.0/recovery_tune_interval;
|
||||
rtune_target_util = (rtune_client_util < recovery_tune_min_client_util
|
||||
? recovery_tune_max_util
|
||||
: recovery_tune_min_util + (rtune_client_util >= recovery_tune_max_client_util
|
||||
|
@ -421,15 +419,31 @@ void osd_t::tune_recovery()
|
|||
(recovery_tune_max_client_util-rtune_client_util)/(recovery_tune_max_client_util-recovery_tune_min_client_util)
|
||||
)
|
||||
);
|
||||
rtune_avg_lat = total_recovery_usec/recovery_count*recovery_tune_ewma_rate + rtune_avg_lat*(1-recovery_tune_ewma_rate);
|
||||
recovery_target_queue_depth = (int)rtune_target_util + (rtune_target_util < 1 || rtune_target_util-(int)rtune_target_util >= 0.1 ? 1 : 0);
|
||||
rtune_avg_lat = total_recovery_usec/recovery_count;
|
||||
uint64_t target_lat = rtune_avg_lat * rtune_avg_lat/1000000.0 * recovery_count/recovery_tune_interval / rtune_target_util;
|
||||
recovery_target_sleep_us = target_lat > rtune_avg_lat+recovery_tune_sleep_min_us ? target_lat-rtune_avg_lat : 0;
|
||||
if (log_level > 3)
|
||||
auto sleep_us = target_lat > rtune_avg_lat+recovery_tune_sleep_min_us ? target_lat-rtune_avg_lat : 0;
|
||||
if (recovery_target_sleep_items.size() != recovery_tune_agg_interval)
|
||||
{
|
||||
recovery_target_sleep_items.resize(recovery_tune_agg_interval);
|
||||
for (int i = 0; i < recovery_tune_agg_interval; i++)
|
||||
recovery_target_sleep_items[i] = 0;
|
||||
recovery_target_sleep_total = 0;
|
||||
recovery_target_sleep_cur = 0;
|
||||
recovery_target_sleep_count = 0;
|
||||
}
|
||||
recovery_target_sleep_total -= recovery_target_sleep_items[recovery_target_sleep_cur];
|
||||
recovery_target_sleep_items[recovery_target_sleep_cur] = sleep_us;
|
||||
recovery_target_sleep_cur = (recovery_target_sleep_cur+1) % recovery_tune_agg_interval;
|
||||
recovery_target_sleep_total += sleep_us;
|
||||
if (recovery_target_sleep_count < recovery_tune_agg_interval)
|
||||
recovery_target_sleep_count++;
|
||||
recovery_target_sleep_us = recovery_target_sleep_total / recovery_target_sleep_count;
|
||||
if (log_level > 4)
|
||||
{
|
||||
printf(
|
||||
"recovery tune: cli %lu us, recovery %lu us / %lu ops, target util %.2f -> queue %ld, lat %lu us, real %lu us, delay %lu us\n",
|
||||
total_client_usec, total_recovery_usec, recovery_count, rtune_target_util, recovery_target_queue_depth, target_lat, rtune_avg_lat, recovery_target_sleep_us
|
||||
"[OSD %lu] auto-tune: client util: %.2f, recovery util: %.2f, lat: %lu us -> target util %.2f, delay %lu us\n",
|
||||
osd_num, rtune_client_util, total_recovery_usec/1000000.0/recovery_tune_interval,
|
||||
rtune_avg_lat, rtune_target_util, recovery_target_sleep_us
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -437,7 +451,7 @@ void osd_t::tune_recovery()
|
|||
// Just trigger write requests for degraded objects. They'll be recovered during writing
|
||||
bool osd_t::continue_recovery()
|
||||
{
|
||||
while (recovery_ops.size() < recovery_target_queue_depth)
|
||||
while (recovery_ops.size() < recovery_queue_depth)
|
||||
{
|
||||
osd_recovery_op_t op;
|
||||
if (pick_next_recovery(op))
|
||||
|
|
Loading…
Reference in New Issue