From e09d0e0678ddb4bcc4d0f44c1c71e688cd7749c4 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sun, 24 May 2020 00:55:03 +0300 Subject: [PATCH] Several bug fixes - Do not block flock() requests - Fix stop_client(0) attempts leading to std::bad_function_call - Fix degraded writes crashing due to an unset stripes[i].missing (at least with a missing parity device) - Fix recovery B/W reporting --- blockstore_open.cpp | 6 +++--- osd.cpp | 4 ++++ osd_primary.cpp | 2 +- osd_primary_subops.cpp | 6 +++--- osd_rmw.cpp | 27 +++++++++++++++------------ 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/blockstore_open.cpp b/blockstore_open.cpp index 19befbfa..487f21d6 100644 --- a/blockstore_open.cpp +++ b/blockstore_open.cpp @@ -291,7 +291,7 @@ void blockstore_impl_t::open_data() { throw std::runtime_error("data_offset exceeds device size = "+std::to_string(data_size)); } - if (!disable_flock && flock(data_fd, LOCK_EX) != 0) + if (!disable_flock && flock(data_fd, LOCK_EX|LOCK_NB) != 0) { throw std::runtime_error(std::string("Failed to lock data device: ") + strerror(errno)); } @@ -312,7 +312,7 @@ void blockstore_impl_t::open_meta() { throw std::runtime_error("meta_offset exceeds device size = "+std::to_string(meta_size)); } - if (!disable_flock && flock(meta_fd, LOCK_EX) != 0) + if (!disable_flock && flock(meta_fd, LOCK_EX|LOCK_NB) != 0) { throw std::runtime_error(std::string("Failed to lock metadata device: ") + strerror(errno)); } @@ -338,7 +338,7 @@ void blockstore_impl_t::open_journal() throw std::runtime_error("Failed to open journal device"); } check_size(journal.fd, &journal.device_size, "journal device"); - if (!disable_flock && flock(journal.fd, LOCK_EX) != 0) + if (!disable_flock && flock(journal.fd, LOCK_EX|LOCK_NB) != 0) { throw std::runtime_error(std::string("Failed to lock journal device: ") + strerror(errno)); } diff --git a/osd.cpp b/osd.cpp index 26435554..0c5e84df 100644 --- a/osd.cpp +++ b/osd.cpp @@ -293,6 +293,7 @@ restart: int peer_fd; while ((peer_fd = accept(listen_fd, (sockaddr*)&addr, &peer_addr_size)) >= 0) { + assert(peer_fd != 0); char peer_str[256]; printf("[OSD %lu] new client %d: connection from %s port %d\n", this->osd_num, peer_fd, inet_ntop(AF_INET, &addr.sin_addr, peer_str, 256), ntohs(addr.sin_port)); @@ -401,6 +402,7 @@ void osd_t::cancel_op(osd_op_t *op) void osd_t::stop_client(int peer_fd) { + assert(peer_fd != 0); auto it = clients.find(peer_fd); if (it == clients.end()) { @@ -437,6 +439,7 @@ void osd_t::stop_client(int peer_fd) if (cl.read_op) { delete cl.read_op; + cl.read_op = NULL; } for (auto rit = read_ready_clients.begin(); rit != read_ready_clients.end(); rit++) { @@ -455,6 +458,7 @@ void osd_t::stop_client(int peer_fd) } } free(cl.in_buf); + assert(peer_fd != 0); close(peer_fd); if (repeer_osd) { diff --git a/osd_primary.cpp b/osd_primary.cpp index 62ba2177..6196bac0 100644 --- a/osd_primary.cpp +++ b/osd_primary.cpp @@ -247,7 +247,7 @@ resume_5: { int recovery_type = op_data->object_state->state & (OBJ_DEGRADED|OBJ_INCOMPLETE) ? 0 : 1; recovery_stat_count[0][recovery_type]++; - if (recovery_stat_count[0][recovery_type]) + if (!recovery_stat_count[0][recovery_type]) { recovery_stat_count[0][recovery_type]++; recovery_stat_bytes[0][recovery_type] = 0; diff --git a/osd_primary_subops.cpp b/osd_primary_subops.cpp index 2e0adf82..0e78cc4b 100644 --- a/osd_primary_subops.cpp +++ b/osd_primary_subops.cpp @@ -346,7 +346,7 @@ void osd_t::submit_primary_del_subops(osd_op_t *cur_op, uint64_t *cur_set, pg_os }; subops[i].callback = [cur_op, this](osd_op_t *subop) { - int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : 0; + int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : -1; handle_primary_subop(OSD_OP_SECONDARY_DELETE, cur_op, subop->reply.hdr.retval, 0, 0); if (fail_fd >= 0) { @@ -399,7 +399,7 @@ void osd_t::submit_primary_sync_subops(osd_op_t *cur_op) }; subops[i].callback = [cur_op, this](osd_op_t *subop) { - int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : 0; + int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : -1; handle_primary_subop(OSD_OP_SECONDARY_SYNC, cur_op, subop->reply.hdr.retval, 0, 0); if (fail_fd >= 0) { @@ -454,7 +454,7 @@ void osd_t::submit_primary_stab_subops(osd_op_t *cur_op) subops[i].send_list.push_back(op_data->unstable_writes + stab_osd.start, stab_osd.len * sizeof(obj_ver_id)); subops[i].callback = [cur_op, this](osd_op_t *subop) { - int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : 0; + int fail_fd = subop->reply.hdr.retval != 0 ? subop->peer_fd : -1; handle_primary_subop(OSD_OP_SECONDARY_STABILIZE, cur_op, subop->reply.hdr.retval, 0, 0); if (fail_fd >= 0) { diff --git a/osd_rmw.cpp b/osd_rmw.cpp index c28bc04e..7b5595b7 100644 --- a/osd_rmw.cpp +++ b/osd_rmw.cpp @@ -232,23 +232,26 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_ // Some stripe(s) are missing, so we need to read parity for (int role = 0; role < pg_size; role++) { - if (read_osd_set[role] == 0 && stripes[role].read_end != 0) + if (read_osd_set[role] == 0) { stripes[role].missing = true; - int found = 0; - for (int r2 = 0; r2 < pg_size && found < pg_minsize; r2++) + if (stripes[role].read_end != 0) { - // Read the non-covered range of from at least other stripes to reconstruct it - if (read_osd_set[r2] != 0) + int found = 0; + for (int r2 = 0; r2 < pg_size && found < pg_minsize; r2++) { - extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]); - found++; + // Read the non-covered range of from at least other stripes to reconstruct it + if (read_osd_set[r2] != 0) + { + extend_read(stripes[role].read_start, stripes[role].read_end, stripes[r2]); + found++; + } + } + if (found < pg_minsize) + { + // FIXME Object is incomplete - refuse partial overwrite + assert(0); } - } - if (found < pg_minsize) - { - // FIXME Object is incomplete - refuse partial overwrite - assert(0); } } }