Track recovery op latencies + refactor into a structure

kv-update
Vitaliy Filippov 2023-12-09 15:36:00 +03:00
parent dcc76eee15
commit d84dee7098
5 changed files with 44 additions and 30 deletions

View File

@ -421,14 +421,6 @@ void osd_t::exec_op(osd_op_t *cur_op)
}
}
void osd_t::reset_stats()
{
msgr.stats = {};
prev_stats = {};
memset(recovery_stat_count, 0, sizeof(recovery_stat_count));
memset(recovery_stat_bytes, 0, sizeof(recovery_stat_bytes));
}
void osd_t::print_stats()
{
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
@ -466,19 +458,19 @@ void osd_t::print_stats()
}
for (int i = 0; i < 2; i++)
{
if (recovery_stat_count[0][i] != recovery_stat_count[1][i])
if (recovery_stat[i].count > recovery_print_prev[i].count)
{
uint64_t bw = (recovery_stat_bytes[0][i] - recovery_stat_bytes[1][i]) / print_stats_interval;
uint64_t bw = (recovery_stat[i].bytes - recovery_print_prev[i].bytes) / print_stats_interval;
printf(
"[OSD %lu] %s recovery: %.1f op/s, B/W: %.2f %s\n", osd_num, recovery_stat_names[i],
(recovery_stat_count[0][i] - recovery_stat_count[1][i]) * 1.0 / print_stats_interval,
"[OSD %lu] %s recovery: %.1f op/s, B/W: %.2f %s, avg lat %ld us\n", osd_num, recovery_stat_names[i],
(recovery_stat[i].count - recovery_print_prev[i].count) * 1.0 / print_stats_interval,
(bw > 1024*1024*1024 ? bw/1024.0/1024/1024 : (bw > 1024*1024 ? bw/1024.0/1024 : bw/1024.0)),
(bw > 1024*1024*1024 ? "GB/s" : (bw > 1024*1024 ? "MB/s" : "KB/s"))
(bw > 1024*1024*1024 ? "GB/s" : (bw > 1024*1024 ? "MB/s" : "KB/s")),
(recovery_stat[i].usec - recovery_print_prev[i].usec) / (recovery_stat[i].count - recovery_print_prev[i].count)
);
recovery_stat_count[1][i] = recovery_stat_count[0][i];
recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i];
}
}
memcpy(recovery_print_prev, recovery_stat, sizeof(recovery_stat));
if (corrupted_objects > 0)
{
printf("[OSD %lu] %lu object(s) corrupted\n", osd_num, corrupted_objects);

View File

@ -87,6 +87,11 @@ struct osd_chain_read_t
struct osd_rmw_stripe_t;
struct recovery_stat_t
{
uint64_t count, usec, bytes;
};
class osd_t
{
// config
@ -189,8 +194,8 @@ class osd_t
std::map<uint64_t, inode_stats_t> inode_stats;
std::map<uint64_t, timespec> vanishing_inodes;
const char* recovery_stat_names[2] = { "degraded", "misplaced" };
uint64_t recovery_stat_count[2][2] = {};
uint64_t recovery_stat_bytes[2][2] = {};
recovery_stat_t recovery_stat[2];
recovery_stat_t recovery_print_prev[2];
// cluster connection
void parse_config(bool init);
@ -209,7 +214,6 @@ class osd_t
void renew_lease(bool reload);
void print_stats();
void print_slow();
void reset_stats();
json11::Json get_statistics();
void report_statistics();
void report_pg_state(pg_t & pg);

View File

@ -213,12 +213,14 @@ json11::Json osd_t::get_statistics()
st["subop_stats"] = subop_stats;
st["recovery_stats"] = json11::Json::object {
{ recovery_stat_names[0], json11::Json::object {
{ "count", recovery_stat_count[0][0] },
{ "bytes", recovery_stat_bytes[0][0] },
{ "count", recovery_stat[0].count },
{ "bytes", recovery_stat[0].bytes },
{ "usec", recovery_stat[0].usec },
} },
{ recovery_stat_names[1], json11::Json::object {
{ "count", recovery_stat_count[0][1] },
{ "bytes", recovery_stat_bytes[0][1] },
{ "count", recovery_stat[1].count },
{ "bytes", recovery_stat[1].bytes },
{ "usec", recovery_stat[1].usec },
} },
};
return st;

View File

@ -3,13 +3,15 @@
#include "osd_primary.h"
#define SELF_FD -1
void osd_t::autosync()
{
if (immediate_commit != IMMEDIATE_ALL && !autosync_op)
{
autosync_op = new osd_op_t();
autosync_op->op_type = OSD_OP_IN;
autosync_op->peer_fd = -1;
autosync_op->peer_fd = SELF_FD;
autosync_op->req = (osd_any_op_t){
.sync = {
.header = {
@ -85,9 +87,13 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
cur_op->reply.hdr.id = cur_op->req.hdr.id;
cur_op->reply.hdr.opcode = cur_op->req.hdr.opcode;
cur_op->reply.hdr.retval = retval;
if (cur_op->peer_fd == -1)
if (cur_op->peer_fd == SELF_FD)
{
msgr.measure_exec(cur_op);
// Do not include internal primary writes (recovery/rebalance) into client op statistics
if (cur_op->req.hdr.opcode != OSD_OP_WRITE)
{
msgr.measure_exec(cur_op);
}
// Copy lambda to be unaffected by `delete op`
std::function<void(osd_op_t*)>(cur_op->callback)(cur_op);
}

View File

@ -292,16 +292,26 @@ resume_7:
{
{
int recovery_type = op_data->object_state->state & (OBJ_DEGRADED|OBJ_INCOMPLETE) ? 0 : 1;
recovery_stat_count[0][recovery_type]++;
if (!recovery_stat_count[0][recovery_type])
recovery_stat[recovery_type].count++;
if (!recovery_stat[recovery_type].count) // wrapped
{
recovery_stat_count[0][recovery_type]++;
recovery_stat_bytes[0][recovery_type] = 0;
memset(&recovery_print_prev[recovery_type], 0, sizeof(recovery_print_prev[recovery_type]));
memset(&recovery_stat[recovery_type], 0, sizeof(recovery_stat[recovery_type]));
recovery_stat[recovery_type].count++;
}
for (int role = 0; role < (op_data->scheme == POOL_SCHEME_REPLICATED ? 1 : pg.pg_size); role++)
{
recovery_stat_bytes[0][recovery_type] += op_data->stripes[role].write_end - op_data->stripes[role].write_start;
recovery_stat[recovery_type].bytes += op_data->stripes[role].write_end - op_data->stripes[role].write_start;
}
if (!cur_op->tv_end.tv_sec)
{
clock_gettime(CLOCK_REALTIME, &cur_op->tv_end);
}
uint64_t usec = (
(cur_op->tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 +
(cur_op->tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000
);
recovery_stat[recovery_type].usec += usec;
}
// Any kind of a non-clean object can have extra chunks, because we don't record objects
// as degraded & misplaced or incomplete & misplaced at the same time. So try to remove extra chunks