Fix a rare use-after-free in cluster_client caused by a reenterability issue

epoch-deletions
Vitaliy Filippov 2023-04-07 23:25:15 +03:00
parent cd1e890bd4
commit 888a6975ab
1 changed files with 32 additions and 17 deletions

View File

@ -1118,6 +1118,24 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
if (part->op.reply.hdr.retval != expected) if (part->op.reply.hdr.retval != expected)
{ {
// Operation failed, retry // Operation failed, retry
part->flags |= PART_ERROR;
if (!op->retval || op->retval == -EPIPE)
{
// Don't overwrite other errors with -EPIPE
op->retval = part->op.reply.hdr.retval;
}
int stop_fd = -1;
if (op->retval != -EINTR && op->retval != -EIO)
{
stop_fd = part->op.peer_fd;
fprintf(
stderr, "%s operation failed on OSD %lu: retval=%ld (expected %d), dropping connection\n",
osd_op_names[part->op.req.hdr.opcode], part->osd_num, part->op.reply.hdr.retval, expected
);
}
// All next things like timer, continue_sync/rw and stop_client may affect the operation again
// So do all these things after modifying operation state, otherwise we may hit reenterability bugs
// FIXME postpone such things to set_immediate here to avoid bugs
if (part->op.reply.hdr.retval == -EPIPE) if (part->op.reply.hdr.retval == -EPIPE)
{ {
// Mark op->up_wait = true before stopping the client // Mark op->up_wait = true before stopping the client
@ -1131,20 +1149,17 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
}); });
} }
} }
if (!op->retval || op->retval == -EPIPE) if (op->inflight_count == 0)
{ {
// Don't overwrite other errors with -EPIPE if (op->opcode == OSD_OP_SYNC)
op->retval = part->op.reply.hdr.retval; continue_sync(op);
else
continue_rw(op);
} }
if (op->retval != -EINTR && op->retval != -EIO) if (stop_fd >= 0)
{ {
fprintf( msgr.stop_client(stop_fd);
stderr, "%s operation failed on OSD %lu: retval=%ld (expected %d), dropping connection\n",
osd_op_names[part->op.req.hdr.opcode], part->osd_num, part->op.reply.hdr.retval, expected
);
msgr.stop_client(part->op.peer_fd);
} }
part->flags |= PART_ERROR;
} }
else else
{ {
@ -1158,13 +1173,13 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
copy_part_bitmap(op, part); copy_part_bitmap(op, part);
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0; op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
} }
} if (op->inflight_count == 0)
if (op->inflight_count == 0) {
{ if (op->opcode == OSD_OP_SYNC)
if (op->opcode == OSD_OP_SYNC) continue_sync(op);
continue_sync(op); else
else continue_rw(op);
continue_rw(op); }
} }
} }