Measure & report recovery op count and bandwidth

trace-sqes
Vitaliy Filippov 2020-05-11 16:12:18 +03:00
parent 47b6f64106
commit 5084ff7c6c
4 changed files with 43 additions and 6 deletions

15
osd.cpp
View File

@ -546,6 +546,21 @@ void osd_t::print_stats()
subop_stat_sum[1][i] = subop_stat_sum[0][i];
}
}
for (int i = 0; i < 2; i++)
{
if (recovery_stat_count[0][i] != recovery_stat_count[1][i])
{
uint64_t bw = (recovery_stat_bytes[0][i] - recovery_stat_bytes[1][i]) / print_stats_interval;
printf(
"%s recovery: %.1f op/s, B/W: %.2f %s\n", recovery_stat_names[i],
(recovery_stat_count[0][i] - recovery_stat_count[1][i]) * 1.0 / print_stats_interval,
(bw > 1024*1024*1024 ? bw/1024.0/1024/1024 : (bw > 1024*1024 ? bw/1024.0/1024 : bw/1024.0)),
(bw > 1024*1024*1024 ? "GB/s" : (bw > 1024*1024 ? "MB/s" : "KB/s"))
);
recovery_stat_count[1][i] = recovery_stat_count[0][i];
recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i];
}
}
if (incomplete_objects > 0)
{
printf("%lu object(s) incomplete\n", incomplete_objects);

3
osd.h
View File

@ -288,6 +288,9 @@ class osd_t
uint64_t op_stat_bytes[2][OSD_OP_MAX+1] = { 0 };
uint64_t subop_stat_sum[2][OSD_OP_MAX+1] = { 0 };
uint64_t subop_stat_count[2][OSD_OP_MAX+1] = { 0 };
const char* recovery_stat_names[2] = { "degraded", "misplaced" };
uint64_t recovery_stat_count[2][2] = { 0 };
uint64_t recovery_stat_bytes[2][2] = { 0 };
// cluster connection
void http_request(const std::string & host, const std::string & request,

View File

@ -145,21 +145,22 @@ json11::Json osd_t::get_statistics()
json11::Json::object st;
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
st["time"] = std::to_string(ts.tv_sec)+"."+std::to_string(ts.tv_nsec/1000000);
char time_str[50] = { 0 };
sprintf(time_str, "%ld.%03ld", ts.tv_sec, ts.tv_nsec/1000000);
st["time"] = time_str;
st["blockstore_ready"] = bs->is_started();
if (bs)
{
st["size"] = bs->get_block_count() * bs->get_block_size();
st["free"] = bs->get_free_block_count() * bs->get_block_size();
}
// FIXME: report recovery ops and bandwidth
// FIXME: handle integer overflow
json11::Json::object op_stats, subop_stats;
for (int i = 0; i <= OSD_OP_MAX; i++)
{
op_stats[osd_op_names[i]] = json11::Json::object {
{ "count", op_stat_count[0][i] },
{ "sum", op_stat_sum[0][i] },
{ "usec", op_stat_sum[0][i] },
{ "bytes", op_stat_bytes[0][i] },
};
}
@ -167,11 +168,21 @@ json11::Json osd_t::get_statistics()
{
subop_stats[osd_op_names[i]] = json11::Json::object {
{ "count", subop_stat_count[0][i] },
{ "sum", subop_stat_sum[0][i] },
{ "usec", subop_stat_sum[0][i] },
};
}
st["op_latency"] = op_stats;
st["subop_latency"] = subop_stats;
st["op_stats"] = op_stats;
st["subop_stats"] = subop_stats;
st["recovery_stats"] = json11::Json::object {
{ recovery_stat_names[0], json11::Json::object {
{ "count", recovery_stat_count[0][0] },
{ "bytes", recovery_stat_bytes[0][0] },
} },
{ recovery_stat_names[1], json11::Json::object {
{ "count", recovery_stat_count[0][1] },
{ "bytes", recovery_stat_bytes[0][1] },
} },
};
return st;
}

View File

@ -244,6 +244,14 @@ resume_5:
}
if (op_data->object_state)
{
{
int recovery_type = op_data->object_state->state & (OBJ_DEGRADED|OBJ_INCOMPLETE) ? 0 : 1;
recovery_stat_count[0][recovery_type]++;
for (int role = 0; role < pg.pg_size; role++)
{
recovery_stat_bytes[0][recovery_type] += op_data->stripes[role].write_end - op_data->stripes[role].write_start;
}
}
if (op_data->object_state->state & OBJ_MISPLACED)
{
// Remove extra chunks