Track recovery op latencies + refactor into a structure
parent
dcc76eee15
commit
d84dee7098
22
src/osd.cpp
22
src/osd.cpp
|
@ -421,14 +421,6 @@ void osd_t::exec_op(osd_op_t *cur_op)
|
|||
}
|
||||
}
|
||||
|
||||
void osd_t::reset_stats()
|
||||
{
|
||||
msgr.stats = {};
|
||||
prev_stats = {};
|
||||
memset(recovery_stat_count, 0, sizeof(recovery_stat_count));
|
||||
memset(recovery_stat_bytes, 0, sizeof(recovery_stat_bytes));
|
||||
}
|
||||
|
||||
void osd_t::print_stats()
|
||||
{
|
||||
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
|
||||
|
@ -466,19 +458,19 @@ void osd_t::print_stats()
|
|||
}
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
if (recovery_stat_count[0][i] != recovery_stat_count[1][i])
|
||||
if (recovery_stat[i].count > recovery_print_prev[i].count)
|
||||
{
|
||||
uint64_t bw = (recovery_stat_bytes[0][i] - recovery_stat_bytes[1][i]) / print_stats_interval;
|
||||
uint64_t bw = (recovery_stat[i].bytes - recovery_print_prev[i].bytes) / print_stats_interval;
|
||||
printf(
|
||||
"[OSD %lu] %s recovery: %.1f op/s, B/W: %.2f %s\n", osd_num, recovery_stat_names[i],
|
||||
(recovery_stat_count[0][i] - recovery_stat_count[1][i]) * 1.0 / print_stats_interval,
|
||||
"[OSD %lu] %s recovery: %.1f op/s, B/W: %.2f %s, avg lat %ld us\n", osd_num, recovery_stat_names[i],
|
||||
(recovery_stat[i].count - recovery_print_prev[i].count) * 1.0 / print_stats_interval,
|
||||
(bw > 1024*1024*1024 ? bw/1024.0/1024/1024 : (bw > 1024*1024 ? bw/1024.0/1024 : bw/1024.0)),
|
||||
(bw > 1024*1024*1024 ? "GB/s" : (bw > 1024*1024 ? "MB/s" : "KB/s"))
|
||||
(bw > 1024*1024*1024 ? "GB/s" : (bw > 1024*1024 ? "MB/s" : "KB/s")),
|
||||
(recovery_stat[i].usec - recovery_print_prev[i].usec) / (recovery_stat[i].count - recovery_print_prev[i].count)
|
||||
);
|
||||
recovery_stat_count[1][i] = recovery_stat_count[0][i];
|
||||
recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i];
|
||||
}
|
||||
}
|
||||
memcpy(recovery_print_prev, recovery_stat, sizeof(recovery_stat));
|
||||
if (corrupted_objects > 0)
|
||||
{
|
||||
printf("[OSD %lu] %lu object(s) corrupted\n", osd_num, corrupted_objects);
|
||||
|
|
10
src/osd.h
10
src/osd.h
|
@ -87,6 +87,11 @@ struct osd_chain_read_t
|
|||
|
||||
struct osd_rmw_stripe_t;
|
||||
|
||||
struct recovery_stat_t
|
||||
{
|
||||
uint64_t count, usec, bytes;
|
||||
};
|
||||
|
||||
class osd_t
|
||||
{
|
||||
// config
|
||||
|
@ -189,8 +194,8 @@ class osd_t
|
|||
std::map<uint64_t, inode_stats_t> inode_stats;
|
||||
std::map<uint64_t, timespec> vanishing_inodes;
|
||||
const char* recovery_stat_names[2] = { "degraded", "misplaced" };
|
||||
uint64_t recovery_stat_count[2][2] = {};
|
||||
uint64_t recovery_stat_bytes[2][2] = {};
|
||||
recovery_stat_t recovery_stat[2];
|
||||
recovery_stat_t recovery_print_prev[2];
|
||||
|
||||
// cluster connection
|
||||
void parse_config(bool init);
|
||||
|
@ -209,7 +214,6 @@ class osd_t
|
|||
void renew_lease(bool reload);
|
||||
void print_stats();
|
||||
void print_slow();
|
||||
void reset_stats();
|
||||
json11::Json get_statistics();
|
||||
void report_statistics();
|
||||
void report_pg_state(pg_t & pg);
|
||||
|
|
|
@ -213,12 +213,14 @@ json11::Json osd_t::get_statistics()
|
|||
st["subop_stats"] = subop_stats;
|
||||
st["recovery_stats"] = json11::Json::object {
|
||||
{ recovery_stat_names[0], json11::Json::object {
|
||||
{ "count", recovery_stat_count[0][0] },
|
||||
{ "bytes", recovery_stat_bytes[0][0] },
|
||||
{ "count", recovery_stat[0].count },
|
||||
{ "bytes", recovery_stat[0].bytes },
|
||||
{ "usec", recovery_stat[0].usec },
|
||||
} },
|
||||
{ recovery_stat_names[1], json11::Json::object {
|
||||
{ "count", recovery_stat_count[0][1] },
|
||||
{ "bytes", recovery_stat_bytes[0][1] },
|
||||
{ "count", recovery_stat[1].count },
|
||||
{ "bytes", recovery_stat[1].bytes },
|
||||
{ "usec", recovery_stat[1].usec },
|
||||
} },
|
||||
};
|
||||
return st;
|
||||
|
|
|
@ -3,13 +3,15 @@
|
|||
|
||||
#include "osd_primary.h"
|
||||
|
||||
#define SELF_FD -1
|
||||
|
||||
void osd_t::autosync()
|
||||
{
|
||||
if (immediate_commit != IMMEDIATE_ALL && !autosync_op)
|
||||
{
|
||||
autosync_op = new osd_op_t();
|
||||
autosync_op->op_type = OSD_OP_IN;
|
||||
autosync_op->peer_fd = -1;
|
||||
autosync_op->peer_fd = SELF_FD;
|
||||
autosync_op->req = (osd_any_op_t){
|
||||
.sync = {
|
||||
.header = {
|
||||
|
@ -85,9 +87,13 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
|
|||
cur_op->reply.hdr.id = cur_op->req.hdr.id;
|
||||
cur_op->reply.hdr.opcode = cur_op->req.hdr.opcode;
|
||||
cur_op->reply.hdr.retval = retval;
|
||||
if (cur_op->peer_fd == -1)
|
||||
if (cur_op->peer_fd == SELF_FD)
|
||||
{
|
||||
msgr.measure_exec(cur_op);
|
||||
// Do not include internal primary writes (recovery/rebalance) into client op statistics
|
||||
if (cur_op->req.hdr.opcode != OSD_OP_WRITE)
|
||||
{
|
||||
msgr.measure_exec(cur_op);
|
||||
}
|
||||
// Copy lambda to be unaffected by `delete op`
|
||||
std::function<void(osd_op_t*)>(cur_op->callback)(cur_op);
|
||||
}
|
||||
|
|
|
@ -292,16 +292,26 @@ resume_7:
|
|||
{
|
||||
{
|
||||
int recovery_type = op_data->object_state->state & (OBJ_DEGRADED|OBJ_INCOMPLETE) ? 0 : 1;
|
||||
recovery_stat_count[0][recovery_type]++;
|
||||
if (!recovery_stat_count[0][recovery_type])
|
||||
recovery_stat[recovery_type].count++;
|
||||
if (!recovery_stat[recovery_type].count) // wrapped
|
||||
{
|
||||
recovery_stat_count[0][recovery_type]++;
|
||||
recovery_stat_bytes[0][recovery_type] = 0;
|
||||
memset(&recovery_print_prev[recovery_type], 0, sizeof(recovery_print_prev[recovery_type]));
|
||||
memset(&recovery_stat[recovery_type], 0, sizeof(recovery_stat[recovery_type]));
|
||||
recovery_stat[recovery_type].count++;
|
||||
}
|
||||
for (int role = 0; role < (op_data->scheme == POOL_SCHEME_REPLICATED ? 1 : pg.pg_size); role++)
|
||||
{
|
||||
recovery_stat_bytes[0][recovery_type] += op_data->stripes[role].write_end - op_data->stripes[role].write_start;
|
||||
recovery_stat[recovery_type].bytes += op_data->stripes[role].write_end - op_data->stripes[role].write_start;
|
||||
}
|
||||
if (!cur_op->tv_end.tv_sec)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, &cur_op->tv_end);
|
||||
}
|
||||
uint64_t usec = (
|
||||
(cur_op->tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 +
|
||||
(cur_op->tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000
|
||||
);
|
||||
recovery_stat[recovery_type].usec += usec;
|
||||
}
|
||||
// Any kind of a non-clean object can have extra chunks, because we don't record objects
|
||||
// as degraded & misplaced or incomplete & misplaced at the same time. So try to remove extra chunks
|
||||
|
|
Loading…
Reference in New Issue