Compare commits

...

2 Commits

Author SHA1 Message Date
Vitaliy Filippov 42eebfc1bd Fix OSDs still crashing when the cluster is full with EC
Test / test_rebalance_verify_ec (push) Successful in 1m37s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 1m39s Details
Test / test_write_no_same (push) Successful in 9s Details
Test / test_switch_primary (push) Successful in 33s Details
Test / test_write (push) Successful in 34s Details
Test / test_write_xor (push) Successful in 35s Details
Test / test_heal_pg_size_2 (push) Successful in 2m16s Details
Test / test_heal_ec (push) Successful in 2m19s Details
Test / test_heal_csum_32k_dmj (push) Successful in 2m13s Details
Test / test_heal_antietcd (push) Successful in 2m18s Details
Test / test_heal_csum_32k_dj (push) Successful in 2m23s Details
Test / test_heal_csum_4k_dmj (push) Successful in 2m20s Details
Test / test_heal_csum_32k (push) Successful in 2m22s Details
Test / test_heal_csum_4k_dj (push) Successful in 2m22s Details
Test / test_resize_auto (push) Successful in 10s Details
Test / test_resize (push) Successful in 14s Details
Test / test_snapshot_pool2 (push) Successful in 16s Details
Test / test_osd_tags (push) Successful in 9s Details
Test / test_enospc (push) Successful in 11s Details
Test / test_enospc_imm (push) Successful in 11s Details
Test / test_enospc_xor (push) Successful in 14s Details
Test / test_enospc_imm_xor (push) Successful in 15s Details
Test / test_scrub (push) Successful in 16s Details
Test / test_scrub_zero_osd_2 (push) Successful in 16s Details
Test / test_scrub_xor (push) Successful in 15s Details
Test / test_scrub_pg_size_3 (push) Successful in 18s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 16s Details
Test / test_scrub_ec (push) Successful in 15s Details
Test / test_nfs (push) Successful in 13s Details
Test / test_heal_csum_4k (push) Successful in 2m26s Details
ENOSPC handling was introduced in 1.6.0 but it was not complete; now it is

P.S: See also client_retry_enospc (true by default)
2024-12-26 01:56:33 +03:00
Vitaliy Filippov cef98052f5 Improve logging of subop failures 2024-12-26 01:54:40 +03:00
2 changed files with 38 additions and 20 deletions

View File

@ -304,11 +304,23 @@ void osd_t::handle_primary_bs_subop(osd_op_t *subop)
(bs_op->opcode != BS_OP_WRITE && bs_op->opcode != BS_OP_WRITE_STABLE ||
bs_op->retval != -ENOSPC))
{
// die on any error except ENOSPC
throw std::runtime_error(
"local blockstore modification failed (opcode = "+std::to_string(bs_op->opcode)+
" retval = "+std::to_string(bs_op->retval)+")"
);
// die on any error except ENOSPC during write
if (bs_op->opcode == BS_OP_WRITE || bs_op->opcode == BS_OP_WRITE_STABLE)
{
printf(
"%s subop to %jx:%jx v%ju failed locally: retval = %d (expected %d)\n",
osd_op_names[bs_op_to_osd_op[bs_op->opcode]],
bs_op->oid.inode, bs_op->oid.stripe, bs_op->version, bs_op->retval, expected
);
}
else
{
printf(
"%s subop failed locally: retval = %d (expected %d)\n",
osd_op_names[bs_op_to_osd_op[bs_op->opcode]], bs_op->retval, expected
);
}
throw std::runtime_error("local blockstore modification failed");
}
bool recovery_related = cur_op->peer_fd == SELF_FD && cur_op->req.hdr.opcode != OSD_OP_SCRUB;
add_bs_subop_stats(subop, recovery_related);
@ -383,9 +395,11 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
{
uint64_t version = subop->reply.sec_rw.version;
#ifdef OSD_DEBUG
uint64_t peer_osd = msgr.clients.find(subop->peer_fd) != msgr.clients.end()
? msgr.clients[subop->peer_fd]->osd_num : osd_num;
printf("subop %s %jx:%jx from osd %ju: version = %ju\n", osd_op_names[opcode], subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, peer_osd, version);
int64_t peer_osd = subop->peer_fd == SELF_FD ? osd_num :
(msgr.clients.find(subop->peer_fd) != msgr.clients.end()
? msgr.clients[subop->peer_fd]->osd_num : -subop->peer_fd);
printf("subop %s %jx:%jx from osd %jd: version = %ju\n", osd_op_names[opcode],
subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, peer_osd, version);
#endif
if (op_data->fact_ver != UINT64_MAX)
{
@ -403,21 +417,23 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
}
if (retval != expected)
{
int64_t peer_osd = (msgr.clients.find(subop->peer_fd) != msgr.clients.end()
? msgr.clients[subop->peer_fd]->osd_num : -subop->peer_fd);
if (opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE || opcode == OSD_OP_SEC_WRITE_STABLE)
{
printf(
subop->peer_fd >= 0
? "%1$s subop to %2$jx:%3$jx v%4$ju failed on peer %7$d: retval = %5$d (expected %6$d)\n"
? "%1$s subop to %2$jx:%3$jx v%4$ju failed on osd %7$jd: retval = %5$d (expected %6$d)\n"
: "%1$s subop to %2$jx:%3$jx v%4$ju failed locally: retval = %5$d (expected %6$d)\n",
osd_op_names[opcode], subop->req.sec_rw.oid.inode, subop->req.sec_rw.oid.stripe, subop->req.sec_rw.version,
retval, expected, subop->peer_fd
retval, expected, peer_osd
);
}
else
{
printf(
"%s subop failed on peer %d: retval = %d (expected %d)\n",
osd_op_names[opcode], subop->peer_fd, retval, expected
"%s subop failed on osd %jd: retval = %d (expected %d)\n",
osd_op_names[opcode], peer_osd, retval, expected
);
}
subop->rmw_buf = NULL;
@ -728,7 +744,7 @@ void osd_t::submit_primary_rollback_subops(osd_op_t *cur_op, const uint64_t* osd
for (int role = 0; role < op_data->pg_size; role++)
{
if (osd_set[role] != 0 && !stripes[role].read_error &&
msgr.osd_peer_fds.find(osd_set[role]) != msgr.osd_peer_fds.end())
(osd_set[role] == this->osd_num || msgr.osd_peer_fds.find(osd_set[role]) != msgr.osd_peer_fds.end()))
{
n_subops++;
}
@ -745,7 +761,7 @@ void osd_t::submit_primary_rollback_subops(osd_op_t *cur_op, const uint64_t* osd
for (int role = 0; role < op_data->pg_size; role++)
{
if (osd_set[role] != 0 && !stripes[role].read_error &&
msgr.osd_peer_fds.find(osd_set[role]) != msgr.osd_peer_fds.end())
(osd_set[role] == this->osd_num || msgr.osd_peer_fds.find(osd_set[role]) != msgr.osd_peer_fds.end()))
{
osd_op_t *subop = &op_data->subops[i];
op_data->unstable_writes[i] = (obj_ver_id){

View File

@ -260,11 +260,6 @@ resume_4:
op_data->st = 4;
return;
resume_5:
if (op_data->scheme != POOL_SCHEME_REPLICATED)
{
// Remove version override just after the write, but before stabilizing
pg.ver_override.erase(op_data->oid);
}
if (op_data->errors > 0)
{
// Handle ENOSPC/EDOM/ERANGE/EIO. If some subops fail, but others succeed,
@ -276,7 +271,7 @@ resume_5:
{
if (op_data->scheme != POOL_SCHEME_REPLICATED)
{
submit_primary_rollback_subops(cur_op, op_data->prev_set);
submit_primary_rollback_subops(cur_op, pg.cur_set.data());
resume_11:
op_data->st = 11;
return;
@ -287,15 +282,22 @@ resume_12:
}
else
{
pg.ver_override.erase(op_data->oid);
mark_partial_write(pg, op_data->oid, op_data->object_state, op_data->stripes, true);
pg_cancel_write_queue(pg, cur_op, op_data->oid, op_data->errcode);
return;
}
}
pg.ver_override.erase(op_data->oid);
deref_object_state(pg, &op_data->object_state, true);
pg_cancel_write_queue(pg, cur_op, op_data->oid, op_data->errcode);
return;
}
if (op_data->scheme != POOL_SCHEME_REPLICATED)
{
// Remove version override just after the write, but before stabilizing
pg.ver_override.erase(op_data->oid);
}
if (op_data->object_state)
{
// We must forget the unclean state of the object before deleting it