Compare commits

...

2 Commits

Author SHA1 Message Date
Vitaliy Filippov 42eebfc1bd Fix OSDs still crashing when the cluster is full with EC
Test / test_rebalance_verify_ec (push) Successful in 1m37s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 1m39s Details
Test / test_write_no_same (push) Successful in 9s Details
Test / test_switch_primary (push) Successful in 33s Details
Test / test_write (push) Successful in 34s Details
Test / test_write_xor (push) Successful in 35s Details
Test / test_heal_pg_size_2 (push) Successful in 2m16s Details
Test / test_heal_ec (push) Successful in 2m19s Details
Test / test_heal_csum_32k_dmj (push) Successful in 2m13s Details
Test / test_heal_antietcd (push) Successful in 2m18s Details
Test / test_heal_csum_32k_dj (push) Successful in 2m23s Details
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s Details
Test / test_heal_csum_32k (push) Successful in 2m22s Details
Test / test_heal_csum_4k_dj (push) Successful in 2m22s Details
Test / test_resize_auto (push) Successful in 10s Details
Test / test_resize (push) Successful in 14s Details
Test / test_snapshot_pool2 (push) Successful in 16s Details
Test / test_osd_tags (push) Successful in 9s Details
Test / test_enospc (push) Successful in 11s Details
Test / test_enospc_imm (push) Successful in 11s Details
Test / test_enospc_xor (push) Successful in 14s Details
Test / test_enospc_imm_xor (push) Successful in 15s Details
Test / test_scrub (push) Successful in 16s Details
Test / test_scrub_zero_osd_2 (push) Successful in 16s Details
Test / test_scrub_xor (push) Successful in 15s Details
Test / test_scrub_pg_size_3 (push) Successful in 18s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 16s Details
Test / test_scrub_ec (push) Successful in 15s Details
Test / test_nfs (push) Successful in 13s Details
Test / test_heal_csum_4k (push) Successful in 2m26s Details
ENOSPC handling was introduced in 1.6.0 but it was not complete; now it is

P.S: See also client_retry_enospc (true by default)
2024-12-26 01:56:33 +03:00
Vitaliy Filippov cef98052f5 Improve logging of subop failures 2024-12-26 01:54:40 +03:00
2 changed files with 38 additions and 20 deletions

View File

@ -304,11 +304,23 @@ void osd_t::handle_primary_bs_subop(osd_op_t *subop)
(bs_op->opcode != BS_OP_WRITE && bs_op->opcode != BS_OP_WRITE_STABLE || (bs_op->opcode != BS_OP_WRITE && bs_op->opcode != BS_OP_WRITE_STABLE ||
bs_op->retval != -ENOSPC)) bs_op->retval != -ENOSPC))
{ {
// die on any error except ENOSPC // die on any error except ENOSPC during write
throw std::runtime_error( if (bs_op->opcode == BS_OP_WRITE || bs_op->opcode == BS_OP_WRITE_STABLE)
"local blockstore modification failed (opcode = "+std::to_string(bs_op->opcode)+ {
" retval = "+std::to_string(bs_op->retval)+")" printf(
); "%s subop to %jx:%jx v%ju failed locally: retval = %d (expected %d)\n",
osd_op_names[bs_op_to_osd_op[bs_op->opcode]],
bs_op->oid.inode, bs_op->oid.stripe, bs_op->version, bs_op->retval, expected
);
}
else
{
printf(
"%s subop failed locally: retval = %d (expected %d)\n",
osd_op_names[bs_op_to_osd_op[bs_op->opcode]], bs_op->retval, expected
);
}
throw std::runtime_error("local blockstore modification failed");
} }
bool recovery_related = cur_op->peer_fd == SELF_FD && cur_op->req.hdr.opcode != OSD_OP_SCRUB; bool recovery_related = cur_op->peer_fd == SELF_FD && cur_op->req.hdr.opcode != OSD_OP_SCRUB;
add_bs_subop_stats(subop, recovery_related); add_bs_subop_stats(subop, recovery_related);
@ -383,9 +395,11 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
{ {
uint64_t version = subop->reply.sec_rw.version; uint64_t version = subop->reply.sec_rw.version;
#ifdef OSD_DEBUG #ifdef OSD_DEBUG
uint64_t peer_osd = msgr.clients.find(subop->peer_fd) != msgr.clients.end() int64_t peer_osd = subop->peer_fd == SELF_FD ? osd_num :
? msgr.clients[subop->peer_fd]->osd_num : osd_num; (msgr.clients.find(subop->peer_fd) != msgr.clients.end()
printf("subop %s %jx:%jx from osd %ju: version = %ju\n", osd_op_names[opcode], subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, peer_osd, version); ? msgr.clients[subop->peer_fd]->osd_num : -subop->peer_fd);
printf("subop %s %jx:%jx from osd %jd: version = %ju\n", osd_op_names[opcode],
subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, peer_osd, version);
#endif #endif
if (op_data->fact_ver != UINT64_MAX) if (op_data->fact_ver != UINT64_MAX)
{ {
@ -403,21 +417,23 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
} }
if (retval != expected) if (retval != expected)
{ {
int64_t peer_osd = (msgr.clients.find(subop->peer_fd) != msgr.clients.end()
? msgr.clients[subop->peer_fd]->osd_num : -subop->peer_fd);
if (opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE || opcode == OSD_OP_SEC_WRITE_STABLE) if (opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE || opcode == OSD_OP_SEC_WRITE_STABLE)
{ {
printf( printf(
subop->peer_fd >= 0 subop->peer_fd >= 0
? "%1$s subop to %2$jx:%3$jx v%4$ju failed on peer %7$d: retval = %5$d (expected %6$d)\n" ? "%1$s subop to %2$jx:%3$jx v%4$ju failed on osd %7$jd: retval = %5$d (expected %6$d)\n"
: "%1$s subop to %2$jx:%3$jx v%4$ju failed locally: retval = %5$d (expected %6$d)\n", : "%1$s subop to %2$jx:%3$jx v%4$ju failed locally: retval = %5$d (expected %6$d)\n",
osd_op_names[opcode], subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, subop->req.sec_rw.version, osd_op_names[opcode], subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, subop->req.sec_rw.version,
retval, expected, subop->peer_fd retval, expected, peer_osd
); );
} }
else else
{ {
printf( printf(
"%s subop failed on peer %d: retval = %d (expected %d)\n", "%s subop failed on osd %jd: retval = %d (expected %d)\n",
osd_op_names[opcode], subop->peer_fd, retval, expected osd_op_names[opcode], peer_osd, retval, expected
); );
} }
subop->rmw_buf = NULL; subop->rmw_buf = NULL;
@ -728,7 +744,7 @@ void osd_t::submit_primary_rollback_subops(osd_op_t *cur_op, const uint64_t* osd
for (int role = 0; role < op_data->pg_size; role++) for (int role = 0; role < op_data->pg_size; role++)
{ {
if (osd_set[role] != 0 && !stripes[role].read_error && if (osd_set[role] != 0 && !stripes[role].read_error &&
msgr.osd_peer_fds.find(osd_set[role]) != msgr.osd_peer_fds.end()) (osd_set[role] == this->osd_num || msgr.osd_peer_fds.find(osd_set[role]) != msgr.osd_peer_fds.end()))
{ {
n_subops++; n_subops++;
} }
@ -745,7 +761,7 @@ void osd_t::submit_primary_rollback_subops(osd_op_t *cur_op, const uint64_t* osd
for (int role = 0; role < op_data->pg_size; role++) for (int role = 0; role < op_data->pg_size; role++)
{ {
if (osd_set[role] != 0 && !stripes[role].read_error && if (osd_set[role] != 0 && !stripes[role].read_error &&
msgr.osd_peer_fds.find(osd_set[role]) != msgr.osd_peer_fds.end()) (osd_set[role] == this->osd_num || msgr.osd_peer_fds.find(osd_set[role]) != msgr.osd_peer_fds.end()))
{ {
osd_op_t *subop = &op_data->subops[i]; osd_op_t *subop = &op_data->subops[i];
op_data->unstable_writes[i] = (obj_ver_id){ op_data->unstable_writes[i] = (obj_ver_id){

View File

@ -260,11 +260,6 @@ resume_4:
op_data->st = 4; op_data->st = 4;
return; return;
resume_5: resume_5:
if (op_data->scheme != POOL_SCHEME_REPLICATED)
{
// Remove version override just after the write, but before stabilizing
pg.ver_override.erase(op_data->oid);
}
if (op_data->errors > 0) if (op_data->errors > 0)
{ {
// Handle ENOSPC/EDOM/ERANGE/EIO. If some subops fail, but others succeed, // Handle ENOSPC/EDOM/ERANGE/EIO. If some subops fail, but others succeed,
@ -276,7 +271,7 @@ resume_5:
{ {
if (op_data->scheme != POOL_SCHEME_REPLICATED) if (op_data->scheme != POOL_SCHEME_REPLICATED)
{ {
submit_primary_rollback_subops(cur_op, op_data->prev_set); submit_primary_rollback_subops(cur_op, pg.cur_set.data());
resume_11: resume_11:
op_data->st = 11; op_data->st = 11;
return; return;
@ -287,15 +282,22 @@ resume_12:
} }
else else
{ {
pg.ver_override.erase(op_data->oid);
mark_partial_write(pg, op_data->oid, op_data->object_state, op_data->stripes, true); mark_partial_write(pg, op_data->oid, op_data->object_state, op_data->stripes, true);
pg_cancel_write_queue(pg, cur_op, op_data->oid, op_data->errcode); pg_cancel_write_queue(pg, cur_op, op_data->oid, op_data->errcode);
return; return;
} }
} }
pg.ver_override.erase(op_data->oid);
deref_object_state(pg, &op_data->object_state, true); deref_object_state(pg, &op_data->object_state, true);
pg_cancel_write_queue(pg, cur_op, op_data->oid, op_data->errcode); pg_cancel_write_queue(pg, cur_op, op_data->oid, op_data->errcode);
return; return;
} }
if (op_data->scheme != POOL_SCHEME_REPLICATED)
{
// Remove version override just after the write, but before stabilizing
pg.ver_override.erase(op_data->oid);
}
if (op_data->object_state) if (op_data->object_state)
{ {
// We must forget the unclean state of the object before deleting it // We must forget the unclean state of the object before deleting it