Several bug fixes

- Do not block flock() requests
- Fix stop_client(0) attempts leading to std::bad_function_call
- Fix degraded writes crashing due to an unset stripes[i].missing (at least with a missing parity device)
- Fix recovery B/W reporting
trace-sqes
Vitaliy Filippov 2020-05-24 00:55:03 +03:00
parent d1602b50b3
commit e09d0e0678
5 changed files with 26 additions and 19 deletions

View File

@ -291,7 +291,7 @@ void blockstore_impl_t::open_data()
{ {
throw std::runtime_error("data_offset exceeds device size = "+std::to_string(data_size)); throw std::runtime_error("data_offset exceeds device size = "+std::to_string(data_size));
} }
if (!disable_flock && flock(data_fd, LOCK_EX) != 0) if (!disable_flock && flock(data_fd, LOCK_EX|LOCK_NB) != 0)
{ {
throw std::runtime_error(std::string("Failed to lock data device: ") + strerror(errno)); throw std::runtime_error(std::string("Failed to lock data device: ") + strerror(errno));
} }
@ -312,7 +312,7 @@ void blockstore_impl_t::open_meta()
{ {
throw std::runtime_error("meta_offset exceeds device size = "+std::to_string(meta_size)); throw std::runtime_error("meta_offset exceeds device size = "+std::to_string(meta_size));
} }
if (!disable_flock && flock(meta_fd, LOCK_EX) != 0) if (!disable_flock && flock(meta_fd, LOCK_EX|LOCK_NB) != 0)
{ {
throw std::runtime_error(std::string("Failed to lock metadata device: ") + strerror(errno)); throw std::runtime_error(std::string("Failed to lock metadata device: ") + strerror(errno));
} }
@ -338,7 +338,7 @@ void blockstore_impl_t::open_journal()
throw std::runtime_error("Failed to open journal device"); throw std::runtime_error("Failed to open journal device");
} }
check_size(journal.fd, &journal.device_size, "journal device"); check_size(journal.fd, &journal.device_size, "journal device");
if (!disable_flock && flock(journal.fd, LOCK_EX) != 0) if (!disable_flock && flock(journal.fd, LOCK_EX|LOCK_NB) != 0)
{ {
throw std::runtime_error(std::string("Failed to lock journal device: ") + strerror(errno)); throw std::runtime_error(std::string("Failed to lock journal device: ") + strerror(errno));
} }

View File

@ -293,6 +293,7 @@ restart:
int peer_fd; int peer_fd;
while ((peer_fd = accept(listen_fd, (sockaddr*)&addr, &peer_addr_size)) >= 0) while ((peer_fd = accept(listen_fd, (sockaddr*)&addr, &peer_addr_size)) >= 0)
{ {
assert(peer_fd != 0);
char peer_str[256]; char peer_str[256];
printf("[OSD %lu] new client %d: connection from %s port %d\n", this->osd_num, peer_fd, printf("[OSD %lu] new client %d: connection from %s port %d\n", this->osd_num, peer_fd,
inet_ntop(AF_INET, &addr.sin_addr, peer_str, 256), ntohs(addr.sin_port)); inet_ntop(AF_INET, &addr.sin_addr, peer_str, 256), ntohs(addr.sin_port));
@ -401,6 +402,7 @@ void osd_t::cancel_op(osd_op_t *op)
void osd_t::stop_client(int peer_fd) void osd_t::stop_client(int peer_fd)
{ {
assert(peer_fd != 0);
auto it = clients.find(peer_fd); auto it = clients.find(peer_fd);
if (it == clients.end()) if (it == clients.end())
{ {
@ -437,6 +439,7 @@ void osd_t::stop_client(int peer_fd)
if (cl.read_op) if (cl.read_op)
{ {
delete cl.read_op; delete cl.read_op;
cl.read_op = NULL;
} }
for (auto rit = read_ready_clients.begin(); rit != read_ready_clients.end(); rit++) for (auto rit = read_ready_clients.begin(); rit != read_ready_clients.end(); rit++)
{ {
@ -455,6 +458,7 @@ void osd_t::stop_client(int peer_fd)
} }
} }
free(cl.in_buf); free(cl.in_buf);
assert(peer_fd != 0);
close(peer_fd); close(peer_fd);
if (repeer_osd) if (repeer_osd)
{ {

View File

@ -247,7 +247,7 @@ resume_5:
{ {
int recovery_type = op_data->object_state->state & (OBJ_DEGRADED|OBJ_INCOMPLETE) ? 0 : 1; int recovery_type = op_data->object_state->state & (OBJ_DEGRADED|OBJ_INCOMPLETE) ? 0 : 1;
recovery_stat_count[0][recovery_type]++; recovery_stat_count[0][recovery_type]++;
if (recovery_stat_count[0][recovery_type]) if (!recovery_stat_count[0][recovery_type])
{ {
recovery_stat_count[0][recovery_type]++; recovery_stat_count[0][recovery_type]++;
recovery_stat_bytes[0][recovery_type] = 0; recovery_stat_bytes[0][recovery_type] = 0;

View File

@ -346,7 +346,7 @@ void osd_t::submit_primary_del_subops(osd_op_t *cur_op, uint64_t *cur_set, pg_os
}; };
subops[i].callback = [cur_op, this](osd_op_t *subop) subops[i].callback = [cur_op, this](osd_op_t *subop)
{ {
int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : 0; int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : -1;
handle_primary_subop(OSD_OP_SECONDARY_DELETE, cur_op, subop->reply.hdr.retval, 0, 0); handle_primary_subop(OSD_OP_SECONDARY_DELETE, cur_op, subop->reply.hdr.retval, 0, 0);
if (fail_fd >= 0) if (fail_fd >= 0)
{ {
@ -399,7 +399,7 @@ void osd_t::submit_primary_sync_subops(osd_op_t *cur_op)
}; };
subops[i].callback = [cur_op, this](osd_op_t *subop) subops[i].callback = [cur_op, this](osd_op_t *subop)
{ {
int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : 0; int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : -1;
handle_primary_subop(OSD_OP_SECONDARY_SYNC, cur_op, subop->reply.hdr.retval, 0, 0); handle_primary_subop(OSD_OP_SECONDARY_SYNC, cur_op, subop->reply.hdr.retval, 0, 0);
if (fail_fd >= 0) if (fail_fd >= 0)
{ {
@ -454,7 +454,7 @@ void osd_t::submit_primary_stab_subops(osd_op_t *cur_op)
subops[i].send_list.push_back(op_data->unstable_writes + stab_osd.start, stab_osd.len * sizeof(obj_ver_id)); subops[i].send_list.push_back(op_data->unstable_writes + stab_osd.start, stab_osd.len * sizeof(obj_ver_id));
subops[i].callback = [cur_op, this](osd_op_t *subop) subops[i].callback = [cur_op, this](osd_op_t *subop)
{ {
int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : 0; int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : -1;
handle_primary_subop(OSD_OP_SECONDARY_STABILIZE, cur_op, subop->reply.hdr.retval, 0, 0); handle_primary_subop(OSD_OP_SECONDARY_STABILIZE, cur_op, subop->reply.hdr.retval, 0, 0);
if (fail_fd >= 0) if (fail_fd >= 0)
{ {

View File

@ -232,23 +232,26 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
// Some stripe(s) are missing, so we need to read parity // Some stripe(s) are missing, so we need to read parity
for (int role = 0; role < pg_size; role++) for (int role = 0; role < pg_size; role++)
{ {
if (read_osd_set[role] == 0 && stripes[role].read_end != 0) if (read_osd_set[role] == 0)
{ {
stripes[role].missing = true; stripes[role].missing = true;
int found = 0; if (stripes[role].read_end != 0)
for (int r2 = 0; r2 < pg_size && found < pg_minsize; r2++)
{ {
// Read the non-covered range of <role> from at least <minsize> other stripes to reconstruct it int found = 0;
if (read_osd_set[r2] != 0) for (int r2 = 0; r2 < pg_size && found < pg_minsize; r2++)
{ {
extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]); // Read the non-covered range of <role> from at least <minsize> other stripes to reconstruct it
found++; if (read_osd_set[r2] != 0)
{
extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]);
found++;
}
}
if (found < pg_minsize)
{
// FIXME Object is incomplete - refuse partial overwrite
assert(0);
} }
}
if (found < pg_minsize)
{
// FIXME Object is incomplete - refuse partial overwrite
assert(0);
} }
} }
} }