From 1bc08174f99e9e029affeb8a73844cefea49d6b2 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Fri, 1 May 2020 12:55:15 +0300 Subject: [PATCH] Sync before listing objects so flushes do not fail thereafter --- osd.h | 1 + osd_flush.cpp | 1 - osd_peering.cpp | 69 ++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/osd.h b/osd.h index 26605e5b..9be33f44 100644 --- a/osd.h +++ b/osd.h @@ -343,6 +343,7 @@ class osd_t void handle_peers(); void repeer_pgs(osd_num_t osd_num); void start_pg_peering(pg_num_t pg_num); + void submit_sync_and_list_subop(osd_num_t role_osd, pg_peering_state_t *ps); void submit_list_subop(osd_num_t role_osd, pg_peering_state_t *ps); void discard_list_subop(osd_op_t *list_op); bool stop_pg(pg_num_t pg_num); diff --git a/osd_flush.cpp b/osd_flush.cpp index 9b863f8e..5a56eb3b 100644 --- a/osd_flush.cpp +++ b/osd_flush.cpp @@ -4,7 +4,6 @@ void osd_t::submit_pg_flush_ops(pg_num_t pg_num) { - // FIXME: SYNC before flushing pg_t & pg = pgs[pg_num]; pg_flush_batch_t *fb = new pg_flush_batch_t(); pg.flush_batch = fb; diff --git a/osd_peering.cpp b/osd_peering.cpp index 87dd5b32..1a6b946c 100644 --- a/osd_peering.cpp +++ b/osd_peering.cpp @@ -386,11 +386,78 @@ void osd_t::start_pg_peering(pg_num_t pg_num) { continue; } - submit_list_subop(peer_osd, pg.peering_state); + submit_sync_and_list_subop(peer_osd, pg.peering_state); } ringloop->wakeup(); } +void osd_t::submit_sync_and_list_subop(osd_num_t role_osd, pg_peering_state_t *ps) +{ + // Sync before listing, if not readonly + if (readonly) + { + submit_list_subop(role_osd, ps); + } + else if (role_osd == this->osd_num) + { + // Self + osd_op_t *op = new osd_op_t(); + op->op_type = 0; + op->peer_fd = 0; + op->bs_op = new blockstore_op_t(); + op->bs_op->opcode = BS_OP_SYNC; + op->bs_op->callback = [this, ps, op, role_osd](blockstore_op_t *bs_op) + { + if (bs_op->retval < 0) + { + printf("Local OP_SYNC failed: %d (%s)\n", bs_op->retval, strerror(-bs_op->retval)); + force_stop(1); + return; + } + delete op; + ps->list_ops.erase(role_osd); + submit_list_subop(role_osd, ps); + }; + bs->enqueue_op(op->bs_op); + ps->list_ops[role_osd] = op; + } + else + { + // Peer + auto & cl = clients[osd_peer_fds[role_osd]]; + osd_op_t *op = new osd_op_t(); + op->op_type = OSD_OP_OUT; + op->send_list.push_back(op->req.buf, OSD_PACKET_SIZE); + op->peer_fd = cl.peer_fd; + op->req = { + .sec_sync = { + .header = { + .magic = SECONDARY_OSD_OP_MAGIC, + .id = this->next_subop_id++, + .opcode = OSD_OP_SECONDARY_SYNC, + }, + }, + }; + op->callback = [this, ps, role_osd](osd_op_t *op) + { + if (op->reply.hdr.retval < 0) + { + // FIXME: Mark peer as failed and don't reconnect immediately after dropping the connection + printf("Failed to sync OSD %lu: %ld (%s), disconnecting peer\n", role_osd, op->reply.hdr.retval, strerror(-op->reply.hdr.retval)); + ps->list_ops.erase(role_osd); + stop_client(op->peer_fd); + delete op; + return; + } + delete op; + ps->list_ops.erase(role_osd); + submit_list_subop(role_osd, ps); + }; + outbox_push(cl, op); + ps->list_ops[role_osd] = op; + } +} + void osd_t::submit_list_subop(osd_num_t role_osd, pg_peering_state_t *ps) { if (role_osd == this->osd_num)