diff --git a/src/osd.cpp b/src/osd.cpp index 7116146e..6513e417 100644 --- a/src/osd.cpp +++ b/src/osd.cpp @@ -421,14 +421,6 @@ void osd_t::exec_op(osd_op_t *cur_op) } } -void osd_t::reset_stats() -{ - msgr.stats = {}; - prev_stats = {}; - memset(recovery_stat_count, 0, sizeof(recovery_stat_count)); - memset(recovery_stat_bytes, 0, sizeof(recovery_stat_bytes)); -} - void osd_t::print_stats() { for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++) @@ -466,19 +458,19 @@ void osd_t::print_stats() } for (int i = 0; i < 2; i++) { - if (recovery_stat_count[0][i] != recovery_stat_count[1][i]) + if (recovery_stat[i].count > recovery_print_prev[i].count) { - uint64_t bw = (recovery_stat_bytes[0][i] - recovery_stat_bytes[1][i]) / print_stats_interval; + uint64_t bw = (recovery_stat[i].bytes - recovery_print_prev[i].bytes) / print_stats_interval; printf( - "[OSD %lu] %s recovery: %.1f op/s, B/W: %.2f %s\n", osd_num, recovery_stat_names[i], - (recovery_stat_count[0][i] - recovery_stat_count[1][i]) * 1.0 / print_stats_interval, + "[OSD %lu] %s recovery: %.1f op/s, B/W: %.2f %s, avg lat %ld us\n", osd_num, recovery_stat_names[i], + (recovery_stat[i].count - recovery_print_prev[i].count) * 1.0 / print_stats_interval, (bw > 1024*1024*1024 ? bw/1024.0/1024/1024 : (bw > 1024*1024 ? bw/1024.0/1024 : bw/1024.0)), - (bw > 1024*1024*1024 ? "GB/s" : (bw > 1024*1024 ? "MB/s" : "KB/s")) + (bw > 1024*1024*1024 ? "GB/s" : (bw > 1024*1024 ? "MB/s" : "KB/s")), + (recovery_stat[i].usec - recovery_print_prev[i].usec) / (recovery_stat[i].count - recovery_print_prev[i].count) ); - recovery_stat_count[1][i] = recovery_stat_count[0][i]; - recovery_stat_bytes[1][i] = recovery_stat_bytes[0][i]; } } + memcpy(recovery_print_prev, recovery_stat, sizeof(recovery_stat)); if (corrupted_objects > 0) { printf("[OSD %lu] %lu object(s) corrupted\n", osd_num, corrupted_objects); diff --git a/src/osd.h b/src/osd.h index d9a7a2ae..36a62423 100644 --- a/src/osd.h +++ b/src/osd.h @@ -87,6 +87,11 @@ struct osd_chain_read_t struct osd_rmw_stripe_t; +struct recovery_stat_t +{ + uint64_t count, usec, bytes; +}; + class osd_t { // config @@ -189,8 +194,8 @@ class osd_t std::map inode_stats; std::map vanishing_inodes; const char* recovery_stat_names[2] = { "degraded", "misplaced" }; - uint64_t recovery_stat_count[2][2] = {}; - uint64_t recovery_stat_bytes[2][2] = {}; + recovery_stat_t recovery_stat[2]; + recovery_stat_t recovery_print_prev[2]; // cluster connection void parse_config(bool init); @@ -209,7 +214,6 @@ class osd_t void renew_lease(bool reload); void print_stats(); void print_slow(); - void reset_stats(); json11::Json get_statistics(); void report_statistics(); void report_pg_state(pg_t & pg); diff --git a/src/osd_cluster.cpp b/src/osd_cluster.cpp index a98c6c84..2ecee460 100644 --- a/src/osd_cluster.cpp +++ b/src/osd_cluster.cpp @@ -213,12 +213,14 @@ json11::Json osd_t::get_statistics() st["subop_stats"] = subop_stats; st["recovery_stats"] = json11::Json::object { { recovery_stat_names[0], json11::Json::object { - { "count", recovery_stat_count[0][0] }, - { "bytes", recovery_stat_bytes[0][0] }, + { "count", recovery_stat[0].count }, + { "bytes", recovery_stat[0].bytes }, + { "usec", recovery_stat[0].usec }, } }, { recovery_stat_names[1], json11::Json::object { - { "count", recovery_stat_count[0][1] }, - { "bytes", recovery_stat_bytes[0][1] }, + { "count", recovery_stat[1].count }, + { "bytes", recovery_stat[1].bytes }, + { "usec", recovery_stat[1].usec }, } }, }; return st; diff --git a/src/osd_primary_subops.cpp b/src/osd_primary_subops.cpp index 6a05fd9b..6cfa2331 100644 --- a/src/osd_primary_subops.cpp +++ b/src/osd_primary_subops.cpp @@ -3,13 +3,15 @@ #include "osd_primary.h" +#define SELF_FD -1 + void osd_t::autosync() { if (immediate_commit != IMMEDIATE_ALL && !autosync_op) { autosync_op = new osd_op_t(); autosync_op->op_type = OSD_OP_IN; - autosync_op->peer_fd = -1; + autosync_op->peer_fd = SELF_FD; autosync_op->req = (osd_any_op_t){ .sync = { .header = { @@ -85,9 +87,13 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval) cur_op->reply.hdr.id = cur_op->req.hdr.id; cur_op->reply.hdr.opcode = cur_op->req.hdr.opcode; cur_op->reply.hdr.retval = retval; - if (cur_op->peer_fd == -1) + if (cur_op->peer_fd == SELF_FD) { - msgr.measure_exec(cur_op); + // Do not include internal primary writes (recovery/rebalance) into client op statistics + if (cur_op->req.hdr.opcode != OSD_OP_WRITE) + { + msgr.measure_exec(cur_op); + } // Copy lambda to be unaffected by `delete op` std::function(cur_op->callback)(cur_op); } diff --git a/src/osd_primary_write.cpp b/src/osd_primary_write.cpp index 9da63aa3..dee2da01 100644 --- a/src/osd_primary_write.cpp +++ b/src/osd_primary_write.cpp @@ -292,16 +292,26 @@ resume_7: { { int recovery_type = op_data->object_state->state & (OBJ_DEGRADED|OBJ_INCOMPLETE) ? 0 : 1; - recovery_stat_count[0][recovery_type]++; - if (!recovery_stat_count[0][recovery_type]) + recovery_stat[recovery_type].count++; + if (!recovery_stat[recovery_type].count) // wrapped { - recovery_stat_count[0][recovery_type]++; - recovery_stat_bytes[0][recovery_type] = 0; + memset(&recovery_print_prev[recovery_type], 0, sizeof(recovery_print_prev[recovery_type])); + memset(&recovery_stat[recovery_type], 0, sizeof(recovery_stat[recovery_type])); + recovery_stat[recovery_type].count++; } for (int role = 0; role < (op_data->scheme == POOL_SCHEME_REPLICATED ? 1 : pg.pg_size); role++) { - recovery_stat_bytes[0][recovery_type] += op_data->stripes[role].write_end - op_data->stripes[role].write_start; + recovery_stat[recovery_type].bytes += op_data->stripes[role].write_end - op_data->stripes[role].write_start; } + if (!cur_op->tv_end.tv_sec) + { + clock_gettime(CLOCK_REALTIME, &cur_op->tv_end); + } + uint64_t usec = ( + (cur_op->tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 + + (cur_op->tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000 + ); + recovery_stat[recovery_type].usec += usec; } // Any kind of a non-clean object can have extra chunks, because we don't record objects // as degraded & misplaced or incomplete & misplaced at the same time. So try to remove extra chunks