Experiment/WIP: Try to track "secondary" recovery ops separately

kv-update
Vitaliy Filippov 2023-12-17 00:03:21 +03:00
parent 751935ddd8
commit 5ca7cde612
10 changed files with 137 additions and 75 deletions

View File

@ -149,7 +149,7 @@ public:
std::map<osd_num_t, osd_wanted_peer_t> wanted_peers; std::map<osd_num_t, osd_wanted_peer_t> wanted_peers;
std::map<uint64_t, int> osd_peer_fds; std::map<uint64_t, int> osd_peer_fds;
// op statistics // op statistics
osd_op_stats_t stats; osd_op_stats_t stats, recovery_stats;
void init(); void init();
void parse_config(const json11::Json & config); void parse_config(const json11::Json & config);
@ -175,6 +175,7 @@ public:
bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_msg); bool connect_rdma(int peer_fd, std::string rdma_address, uint64_t client_max_msg);
#endif #endif
void inc_op_stats(osd_op_stats_t & stats, uint64_t opcode, timespec & tv_begin, timespec & tv_end, uint64_t len);
void measure_exec(osd_op_t *cur_op); void measure_exec(osd_op_t *cur_op);
protected: protected:

View File

@ -24,3 +24,17 @@ osd_op_t::~osd_op_t()
free(buf); free(buf);
} }
} }
bool osd_op_t::is_recovery_related()
{
return (req.hdr.opcode == OSD_OP_SEC_READ ||
req.hdr.opcode == OSD_OP_SEC_WRITE ||
req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) &&
(req.sec_rw.flags & OSD_OP_RECOVERY_RELATED) ||
req.hdr.opcode == OSD_OP_SEC_DELETE &&
(req.sec_del.flags & OSD_OP_RECOVERY_RELATED) ||
req.hdr.opcode == OSD_OP_SEC_STABILIZE &&
(req.sec_stab.flags & OSD_OP_RECOVERY_RELATED) ||
req.hdr.opcode == OSD_OP_SEC_SYNC &&
(req.sec_sync.flags & OSD_OP_RECOVERY_RELATED);
}

View File

@ -173,4 +173,6 @@ struct osd_op_t
osd_op_buf_list_t iov; osd_op_buf_list_t iov;
~osd_op_t(); ~osd_op_t();
bool is_recovery_related();
}; };

View File

@ -131,6 +131,23 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op)
} }
} }
void osd_messenger_t::inc_op_stats(osd_op_stats_t & stats, uint64_t opcode, timespec & tv_begin, timespec & tv_end, uint64_t len)
{
uint64_t usecs = (
(tv_end.tv_sec - tv_begin.tv_sec)*1000000 +
(tv_end.tv_nsec - tv_begin.tv_nsec)/1000
);
stats.op_stat_count[opcode]++;
if (!stats.op_stat_count[opcode])
{
stats.op_stat_count[opcode] = 1;
stats.op_stat_sum[opcode] = 0;
stats.op_stat_bytes[opcode] = 0;
}
stats.op_stat_sum[opcode] += usecs;
stats.op_stat_bytes[opcode] += len;
}
void osd_messenger_t::measure_exec(osd_op_t *cur_op) void osd_messenger_t::measure_exec(osd_op_t *cur_op)
{ {
// Measure execution latency // Measure execution latency
@ -142,29 +159,24 @@ void osd_messenger_t::measure_exec(osd_op_t *cur_op)
{ {
clock_gettime(CLOCK_REALTIME, &cur_op->tv_end); clock_gettime(CLOCK_REALTIME, &cur_op->tv_end);
} }
stats.op_stat_count[cur_op->req.hdr.opcode]++; uint64_t len = 0;
if (!stats.op_stat_count[cur_op->req.hdr.opcode])
{
stats.op_stat_count[cur_op->req.hdr.opcode]++;
stats.op_stat_sum[cur_op->req.hdr.opcode] = 0;
stats.op_stat_bytes[cur_op->req.hdr.opcode] = 0;
}
stats.op_stat_sum[cur_op->req.hdr.opcode] += (
(cur_op->tv_end.tv_sec - cur_op->tv_begin.tv_sec)*1000000 +
(cur_op->tv_end.tv_nsec - cur_op->tv_begin.tv_nsec)/1000
);
if (cur_op->req.hdr.opcode == OSD_OP_READ || if (cur_op->req.hdr.opcode == OSD_OP_READ ||
cur_op->req.hdr.opcode == OSD_OP_WRITE || cur_op->req.hdr.opcode == OSD_OP_WRITE ||
cur_op->req.hdr.opcode == OSD_OP_SCRUB) cur_op->req.hdr.opcode == OSD_OP_SCRUB)
{ {
// req.rw.len is internally set to the full object size for scrubs // req.rw.len is internally set to the full object size for scrubs
stats.op_stat_bytes[cur_op->req.hdr.opcode] += cur_op->req.rw.len; len = cur_op->req.rw.len;
} }
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ || else if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ ||
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
{ {
stats.op_stat_bytes[cur_op->req.hdr.opcode] += cur_op->req.sec_rw.len; len = cur_op->req.sec_rw.len;
}
inc_op_stats(stats, cur_op->req.hdr.opcode, cur_op->tv_begin, cur_op->tv_end, len);
if (cur_op->is_recovery_related())
{
inc_op_stats(recovery_stats, cur_op->req.hdr.opcode, cur_op->tv_begin, cur_op->tv_end, len);
} }
} }

View File

@ -123,7 +123,7 @@ class osd_t
double recovery_tune_max_util = 1.0; double recovery_tune_max_util = 1.0;
double recovery_tune_max_client_util = 0.5; double recovery_tune_max_client_util = 0.5;
int recovery_tune_interval = 1; int recovery_tune_interval = 1;
double recovery_tune_ewma_rate = 0.5; double recovery_tune_ewma_rate = 0.2;
int recovery_tune_sleep_min_us = 10; int recovery_tune_sleep_min_us = 10;
int recovery_pg_switch = DEFAULT_RECOVERY_PG_SWITCH; int recovery_pg_switch = DEFAULT_RECOVERY_PG_SWITCH;
int recovery_sync_batch = DEFAULT_RECOVERY_BATCH; int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
@ -208,10 +208,8 @@ class osd_t
// recovery auto-tuning // recovery auto-tuning
int rtune_timer_id = -1; int rtune_timer_id = -1;
uint64_t rtune_avg_lat = 0; uint64_t rtune_avg_lat = 0;
double rtune_avg_count = 0;
double rtune_client_util = 0, rtune_target_util = 1; double rtune_client_util = 0, rtune_target_util = 1;
osd_op_stats_t rtune_prev_stats; osd_op_stats_t rtune_prev_stats, rtune_prev_recovery_stats;
recovery_stat_t rtune_prev_recovery[2];
uint64_t recovery_target_queue_depth = 1; uint64_t recovery_target_queue_depth = 1;
uint64_t recovery_target_sleep_us = 0; uint64_t recovery_target_sleep_us = 0;
@ -304,7 +302,7 @@ class osd_t
bool remember_unstable_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state); bool remember_unstable_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state);
void handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op); void handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op);
void handle_primary_bs_subop(osd_op_t *subop); void handle_primary_bs_subop(osd_op_t *subop);
void add_bs_subop_stats(osd_op_t *subop); void add_bs_subop_stats(osd_op_t *subop, bool recovery_related = false);
void pg_cancel_write_queue(pg_t & pg, osd_op_t *first_op, object_id oid, int retval); void pg_cancel_write_queue(pg_t & pg, osd_op_t *first_op, object_id oid, int retval);
void submit_primary_subops(int submit_type, uint64_t op_version, const uint64_t* osd_set, osd_op_t *cur_op); void submit_primary_subops(int submit_type, uint64_t op_version, const uint64_t* osd_set, osd_op_t *cur_op);

View File

@ -325,17 +325,7 @@ void osd_t::submit_recovery_op(osd_recovery_op_t *op)
{ {
printf("Recovery operation done for %lx:%lx\n", op->oid.inode, op->oid.stripe); printf("Recovery operation done for %lx:%lx\n", op->oid.inode, op->oid.stripe);
} }
if (recovery_target_sleep_us) finish_recovery_op(op);
{
this->tfd->set_timer_us(recovery_target_sleep_us, false, [this, op](int timer_id)
{
finish_recovery_op(op);
});
}
else
{
finish_recovery_op(op);
}
}; };
exec_op(op->osd_op); exec_op(op->osd_op);
} }
@ -383,29 +373,46 @@ void osd_t::finish_recovery_op(osd_recovery_op_t *op)
void osd_t::tune_recovery() void osd_t::tune_recovery()
{ {
static int total_client_ops[] = { OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE }; static int accounted_ops[] = {
uint64_t total_client_usec = 0; OSD_OP_SEC_READ, OSD_OP_SEC_WRITE, OSD_OP_SEC_WRITE_STABLE,
for (int i = 0; i < sizeof(total_client_ops)/sizeof(total_client_ops[0]); i++) OSD_OP_SEC_STABILIZE, OSD_OP_SEC_SYNC, OSD_OP_SEC_DELETE
};
uint64_t total_client_usec = 0, total_recovery_usec = 0, recovery_count = 0;
for (int i = 0; i < sizeof(accounted_ops)/sizeof(accounted_ops[0]); i++)
{ {
total_client_usec += (msgr.stats.op_stat_sum[total_client_ops[i]] - rtune_prev_stats.op_stat_sum[total_client_ops[i]]); total_client_usec += (msgr.stats.op_stat_sum[accounted_ops[i]]
rtune_prev_stats.op_stat_sum[total_client_ops[i]] = msgr.stats.op_stat_sum[total_client_ops[i]]; - rtune_prev_stats.op_stat_sum[accounted_ops[i]]);
total_recovery_usec += (msgr.recovery_stats.op_stat_sum[accounted_ops[i]]
- rtune_prev_recovery_stats.op_stat_sum[accounted_ops[i]]);
recovery_count += (msgr.recovery_stats.op_stat_count[accounted_ops[i]]
- rtune_prev_recovery_stats.op_stat_count[accounted_ops[i]]);
rtune_prev_stats.op_stat_sum[accounted_ops[i]] = msgr.stats.op_stat_sum[accounted_ops[i]];
rtune_prev_recovery_stats.op_stat_sum[accounted_ops[i]] = msgr.recovery_stats.op_stat_sum[accounted_ops[i]];
rtune_prev_recovery_stats.op_stat_count[accounted_ops[i]] = msgr.recovery_stats.op_stat_count[accounted_ops[i]];
} }
uint64_t total_recovery_usec = 0, recovery_count = 0; total_client_usec -= total_recovery_usec;
total_recovery_usec += recovery_stat[0].usec-rtune_prev_recovery[0].usec;
total_recovery_usec += recovery_stat[1].usec-rtune_prev_recovery[1].usec;
recovery_count += recovery_stat[0].count-rtune_prev_recovery[0].count;
recovery_count += recovery_stat[1].count-rtune_prev_recovery[1].count;
memcpy(rtune_prev_recovery, recovery_stat, sizeof(recovery_stat));
if (recovery_count == 0) if (recovery_count == 0)
{ {
return; return;
} }
rtune_avg_lat = total_recovery_usec/recovery_count*recovery_tune_ewma_rate + // example:
rtune_avg_lat*(1-recovery_tune_ewma_rate); // total 3 GB/s
rtune_avg_count = recovery_count*recovery_tune_ewma_rate + // recovery queue 1
rtune_avg_count*(1-recovery_tune_ewma_rate); // 120 OSDs
// client_util = count/interval * usec/1000000.0/count = usec/1000000.0/interval :-) // EC 5+3
double client_util = total_client_usec/1000000.0/recovery_tune_interval; // 128kb block_size => 640kb object
// 3000*1024/640/120 = 40 MB/s per OSD = 64 recovered objects per OSD
// = 64*8*2 subops = 1024 recovery subop iops
// 8 recovery subop queue
// => subop avg latency = 0.0078125 sec
// utilisation = 8
// target util 1
// intuitively target latency should be 8x of real
// target_lat = rtune_avg_lat * utilisation / target_util
// = rtune_avg_lat * rtune_avg_lat * rtune_avg_iops / target_util
// = 0.0625
// recovery utilisation will be 1
auto client_util = total_client_usec/1000000.0/recovery_tune_interval;
rtune_client_util = rtune_client_util*(1-recovery_tune_ewma_rate) + client_util*recovery_tune_ewma_rate; rtune_client_util = rtune_client_util*(1-recovery_tune_ewma_rate) + client_util*recovery_tune_ewma_rate;
rtune_target_util = (rtune_client_util < recovery_tune_min_client_util rtune_target_util = (rtune_client_util < recovery_tune_min_client_util
? recovery_tune_max_util ? recovery_tune_max_util
@ -414,19 +421,15 @@ void osd_t::tune_recovery()
(recovery_tune_max_client_util-rtune_client_util)/(recovery_tune_max_client_util-recovery_tune_min_client_util) (recovery_tune_max_client_util-rtune_client_util)/(recovery_tune_max_client_util-recovery_tune_min_client_util)
) )
); );
// for example: utilisation = 8, target = 1 rtune_avg_lat = total_recovery_usec/recovery_count*recovery_tune_ewma_rate + rtune_avg_lat*(1-recovery_tune_ewma_rate);
// intuitively target latency should be 8x of real
// target_lat = rtune_avg_lat * utilisation / target_util
// = rtune_avg_lat * rtune_avg_lat * rtune_avg_iops / target_util
// = 0.0625
recovery_target_queue_depth = (int)rtune_target_util + (rtune_target_util < 1 || rtune_target_util-(int)rtune_target_util >= 0.1 ? 1 : 0); recovery_target_queue_depth = (int)rtune_target_util + (rtune_target_util < 1 || rtune_target_util-(int)rtune_target_util >= 0.1 ? 1 : 0);
uint64_t target_lat = rtune_avg_lat * rtune_avg_lat/1000000.0*rtune_avg_count/recovery_tune_interval/rtune_target_util; uint64_t target_lat = rtune_avg_lat * rtune_avg_lat/1000000.0 * recovery_count/recovery_tune_interval / rtune_target_util;
recovery_target_sleep_us = target_lat > rtune_avg_lat+recovery_tune_sleep_min_us ? target_lat-rtune_avg_lat : 0; recovery_target_sleep_us = target_lat > rtune_avg_lat+recovery_tune_sleep_min_us ? target_lat-rtune_avg_lat : 0;
if (log_level > 3) if (log_level > 3)
{ {
printf( printf(
"recovery tune: client util %.2f (ewma %.2f), target util %.2f -> queue %ld, lat %lu us, real %lu us, pause %lu us\n", "recovery tune: cli %lu us, recovery %lu us / %lu ops, target util %.2f -> queue %ld, lat %lu us, real %lu us, delay %lu us\n",
client_util, rtune_client_util, rtune_target_util, recovery_target_queue_depth, target_lat, rtune_avg_lat, recovery_target_sleep_us total_client_usec, total_recovery_usec, recovery_count, rtune_target_util, recovery_target_queue_depth, target_lat, rtune_avg_lat, recovery_target_sleep_us
); );
} }
} }

View File

@ -34,6 +34,7 @@
#define OSD_OP_MAX 18 #define OSD_OP_MAX 18
#define OSD_RW_MAX 64*1024*1024 #define OSD_RW_MAX 64*1024*1024
#define OSD_PROTOCOL_VERSION 1 #define OSD_PROTOCOL_VERSION 1
#define OSD_OP_RECOVERY_RELATED (uint32_t)1
// Memory alignment for direct I/O (usually 512 bytes) // Memory alignment for direct I/O (usually 512 bytes)
#ifndef DIRECT_IO_ALIGNMENT #ifndef DIRECT_IO_ALIGNMENT
@ -88,7 +89,8 @@ struct __attribute__((__packed__)) osd_op_sec_rw_t
uint32_t len; uint32_t len;
// bitmap/attribute length - bitmap comes after header, but before data // bitmap/attribute length - bitmap comes after header, but before data
uint32_t attr_len; uint32_t attr_len;
uint32_t pad0; // the only possible flag is OSD_OP_RECOVERY_RELATED
uint32_t flags;
}; };
struct __attribute__((__packed__)) osd_reply_sec_rw_t struct __attribute__((__packed__)) osd_reply_sec_rw_t
@ -109,6 +111,9 @@ struct __attribute__((__packed__)) osd_op_sec_del_t
object_id oid; object_id oid;
// delete version (automatic or specific) // delete version (automatic or specific)
uint64_t version; uint64_t version;
// the only possible flag is OSD_OP_RECOVERY_RELATED
uint32_t flags;
uint32_t pad0;
}; };
struct __attribute__((__packed__)) osd_reply_sec_del_t struct __attribute__((__packed__)) osd_reply_sec_del_t
@ -121,6 +126,9 @@ struct __attribute__((__packed__)) osd_reply_sec_del_t
struct __attribute__((__packed__)) osd_op_sec_sync_t struct __attribute__((__packed__)) osd_op_sec_sync_t
{ {
osd_op_header_t header; osd_op_header_t header;
// the only possible flag is OSD_OP_RECOVERY_RELATED
uint32_t flags;
uint32_t pad0;
}; };
struct __attribute__((__packed__)) osd_reply_sec_sync_t struct __attribute__((__packed__)) osd_reply_sec_sync_t
@ -134,6 +142,9 @@ struct __attribute__((__packed__)) osd_op_sec_stab_t
osd_op_header_t header; osd_op_header_t header;
// obj_ver_id array length in bytes // obj_ver_id array length in bytes
uint64_t len; uint64_t len;
// the only possible flag is OSD_OP_RECOVERY_RELATED
uint32_t flags;
uint32_t pad0;
}; };
typedef osd_op_sec_stab_t osd_op_sec_rollback_t; typedef osd_op_sec_stab_t osd_op_sec_rollback_t;

View File

@ -221,6 +221,7 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o
.offset = wr ? si->write_start : si->read_start, .offset = wr ? si->write_start : si->read_start,
.len = subop_len, .len = subop_len,
.attr_len = wr ? clean_entry_bitmap_size : 0, .attr_len = wr ? clean_entry_bitmap_size : 0,
.flags = cur_op->peer_fd == SELF_FD && cur_op->req.hdr.opcode != OSD_OP_SCRUB ? OSD_OP_RECOVERY_RELATED : 0,
}; };
#ifdef OSD_DEBUG #ifdef OSD_DEBUG
printf( printf(
@ -300,7 +301,8 @@ void osd_t::handle_primary_bs_subop(osd_op_t *subop)
" retval = "+std::to_string(bs_op->retval)+")" " retval = "+std::to_string(bs_op->retval)+")"
); );
} }
add_bs_subop_stats(subop); bool recovery_related = cur_op->peer_fd == SELF_FD && cur_op->req.hdr.opcode != OSD_OP_SCRUB;
add_bs_subop_stats(subop, recovery_related);
subop->req.hdr.opcode = bs_op_to_osd_op[bs_op->opcode]; subop->req.hdr.opcode = bs_op_to_osd_op[bs_op->opcode];
subop->reply.hdr.retval = bs_op->retval; subop->reply.hdr.retval = bs_op->retval;
if (bs_op->opcode == BS_OP_READ || bs_op->opcode == BS_OP_WRITE || bs_op->opcode == BS_OP_WRITE_STABLE) if (bs_op->opcode == BS_OP_READ || bs_op->opcode == BS_OP_WRITE || bs_op->opcode == BS_OP_WRITE_STABLE)
@ -312,30 +314,33 @@ void osd_t::handle_primary_bs_subop(osd_op_t *subop)
} }
delete bs_op; delete bs_op;
subop->bs_op = NULL; subop->bs_op = NULL;
subop->peer_fd = -1; subop->peer_fd = SELF_FD;
handle_primary_subop(subop, cur_op); if (recovery_related && recovery_target_sleep_us)
{
tfd->set_timer_us(recovery_target_sleep_us, false, [=](int timer_id)
{
handle_primary_subop(subop, cur_op);
});
}
else
{
handle_primary_subop(subop, cur_op);
}
} }
void osd_t::add_bs_subop_stats(osd_op_t *subop) void osd_t::add_bs_subop_stats(osd_op_t *subop, bool recovery_related)
{ {
// Include local blockstore ops in statistics // Include local blockstore ops in statistics
uint64_t opcode = bs_op_to_osd_op[subop->bs_op->opcode]; uint64_t opcode = bs_op_to_osd_op[subop->bs_op->opcode];
timespec tv_end; timespec tv_end;
clock_gettime(CLOCK_REALTIME, &tv_end); clock_gettime(CLOCK_REALTIME, &tv_end);
msgr.stats.op_stat_count[opcode]++; uint64_t len = (opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE)
if (!msgr.stats.op_stat_count[opcode]) ? subop->bs_op->len : 0;
msgr.inc_op_stats(msgr.stats, opcode, subop->tv_begin, tv_end, len);
if (recovery_related)
{ {
msgr.stats.op_stat_count[opcode] = 1; // It is OSD_OP_RECOVERY_RELATED
msgr.stats.op_stat_sum[opcode] = 0; msgr.inc_op_stats(msgr.recovery_stats, opcode, subop->tv_begin, tv_end, len);
msgr.stats.op_stat_bytes[opcode] = 0;
}
msgr.stats.op_stat_sum[opcode] += (
(tv_end.tv_sec - subop->tv_begin.tv_sec)*1000000 +
(tv_end.tv_nsec - subop->tv_begin.tv_nsec)/1000
);
if (opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE)
{
msgr.stats.op_stat_bytes[opcode] += subop->bs_op->len;
} }
} }
@ -558,6 +563,7 @@ void osd_t::submit_primary_del_batch(osd_op_t *cur_op, obj_ver_osd_t *chunks_to_
}, },
.oid = chunk.oid, .oid = chunk.oid,
.version = chunk.version, .version = chunk.version,
.flags = cur_op->peer_fd == SELF_FD && cur_op->req.hdr.opcode != OSD_OP_SCRUB ? OSD_OP_RECOVERY_RELATED : 0,
} }; } };
subops[i].callback = [cur_op, this](osd_op_t *subop) subops[i].callback = [cur_op, this](osd_op_t *subop)
{ {
@ -615,6 +621,7 @@ int osd_t::submit_primary_sync_subops(osd_op_t *cur_op)
.id = msgr.next_subop_id++, .id = msgr.next_subop_id++,
.opcode = OSD_OP_SEC_SYNC, .opcode = OSD_OP_SEC_SYNC,
}, },
.flags = cur_op->peer_fd == SELF_FD && cur_op->req.hdr.opcode != OSD_OP_SCRUB ? OSD_OP_RECOVERY_RELATED : 0,
} }; } };
subops[i].callback = [cur_op, this](osd_op_t *subop) subops[i].callback = [cur_op, this](osd_op_t *subop)
{ {
@ -674,6 +681,7 @@ void osd_t::submit_primary_stab_subops(osd_op_t *cur_op)
.opcode = OSD_OP_SEC_STABILIZE, .opcode = OSD_OP_SEC_STABILIZE,
}, },
.len = (uint64_t)(stab_osd.len * sizeof(obj_ver_id)), .len = (uint64_t)(stab_osd.len * sizeof(obj_ver_id)),
.flags = cur_op->peer_fd == SELF_FD && cur_op->req.hdr.opcode != OSD_OP_SCRUB ? OSD_OP_RECOVERY_RELATED : 0,
} }; } };
subops[i].iov.push_back(op_data->unstable_writes + stab_osd.start, stab_osd.len * sizeof(obj_ver_id)); subops[i].iov.push_back(op_data->unstable_writes + stab_osd.start, stab_osd.len * sizeof(obj_ver_id));
subops[i].callback = [cur_op, this](osd_op_t *subop) subops[i].callback = [cur_op, this](osd_op_t *subop)

View File

@ -296,7 +296,6 @@ resume_7:
if (!recovery_stat[recovery_type].count) // wrapped if (!recovery_stat[recovery_type].count) // wrapped
{ {
memset(&recovery_print_prev[recovery_type], 0, sizeof(recovery_print_prev[recovery_type])); memset(&recovery_print_prev[recovery_type], 0, sizeof(recovery_print_prev[recovery_type]));
memset(&rtune_prev_recovery[recovery_type], 0, sizeof(rtune_prev_recovery[recovery_type]));
memset(&recovery_stat[recovery_type], 0, sizeof(recovery_stat[recovery_type])); memset(&recovery_stat[recovery_type], 0, sizeof(recovery_stat[recovery_type]));
recovery_stat[recovery_type].count++; recovery_stat[recovery_type].count++;
} }

View File

@ -42,7 +42,21 @@ void osd_t::secondary_op_callback(osd_op_t *op)
int retval = op->bs_op->retval; int retval = op->bs_op->retval;
delete op->bs_op; delete op->bs_op;
op->bs_op = NULL; op->bs_op = NULL;
finish_op(op, retval); if (op->is_recovery_related() && recovery_target_sleep_us)
{
if (!op->tv_end.tv_sec)
{
clock_gettime(CLOCK_REALTIME, &op->tv_end);
}
tfd->set_timer_us(recovery_target_sleep_us, false, [this, op, retval](int timer_id)
{
finish_op(op, retval);
});
}
else
{
finish_op(op, retval);
}
} }
void osd_t::exec_secondary(osd_op_t *cur_op) void osd_t::exec_secondary(osd_op_t *cur_op)