Read object lists from peers and own blockstore

blocking-uring-test
Vitaliy Filippov 2020-01-22 02:36:14 +03:00
parent 8c05ee252c
commit a8bc44064d
6 changed files with 125 additions and 44 deletions

View File

@ -62,6 +62,7 @@ bool blockstore_impl_t::is_stalled()
// main event loop - produce requests // main event loop - produce requests
void blockstore_impl_t::loop() void blockstore_impl_t::loop()
{ {
// FIXME: initialized == 10 is ugly
if (initialized != 10) if (initialized != 10)
{ {
// read metadata, then journal // read metadata, then journal
@ -89,6 +90,7 @@ void blockstore_impl_t::loop()
delete journal_init_reader; delete journal_init_reader;
journal_init_reader = NULL; journal_init_reader = NULL;
initialized = 10; initialized = 10;
ringloop->wakeup();
} }
} }
} }
@ -168,6 +170,11 @@ void blockstore_impl_t::loop()
{ {
dequeue_op = dequeue_stable(op); dequeue_op = dequeue_stable(op);
} }
else if ((op->opcode & BS_OP_TYPE_MASK) == BS_OP_LIST)
{
process_list(op);
dequeue_op = true;
}
if (dequeue_op) if (dequeue_op)
{ {
submit_queue.erase(op_ptr); submit_queue.erase(op_ptr);
@ -304,13 +311,6 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op, bool first)
op->callback(op); op->callback(op);
return; return;
} }
else if (type == BS_OP_LIST)
{
// List operation is processed synchronously
process_list(op);
op->callback(op);
return;
}
// Call constructor without allocating memory. We'll call destructor before returning op back // Call constructor without allocating memory. We'll call destructor before returning op back
new ((void*)op->private_data) blockstore_op_private_t; new ((void*)op->private_data) blockstore_op_private_t;
PRIV(op)->wait_for = 0; PRIV(op)->wait_for = 0;
@ -397,4 +397,5 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
} }
} }
} }
FINISH_OP(op);
} }

15
osd.h
View File

@ -52,6 +52,7 @@ struct osd_op_t
}; };
blockstore_op_t bs_op; blockstore_op_t bs_op;
void *buf = NULL; void *buf = NULL;
std::function<void(osd_op_t*)> callback;
~osd_op_t(); ~osd_op_t();
}; };
@ -83,8 +84,8 @@ struct osd_client_t
// Outbound operations sent to this client (which is probably an OSD peer) // Outbound operations sent to this client (which is probably an OSD peer)
std::map<int, osd_op_t*> sent_ops; std::map<int, osd_op_t*> sent_ops;
// Completed operations to send replies back to the client // Outbound messages (replies or requests)
std::deque<osd_op_t*> completions; std::deque<osd_op_t*> outbox;
// Write state // Write state
osd_op_t *write_op = NULL; osd_op_t *write_op = NULL;
@ -141,9 +142,14 @@ namespace std
#define OSD_HALF_STABLE 0x10000 #define OSD_HALF_STABLE 0x10000
#define OSD_NEEDS_ROLLBACK 0x20000 #define OSD_NEEDS_ROLLBACK 0x20000
class osd_t;
struct osd_pg_peering_state_t struct osd_pg_peering_state_t
{ {
osd_t* self;
uint64_t pg_num;
std::unordered_map<uint64_t, osd_op_t*> list_ops; std::unordered_map<uint64_t, osd_op_t*> list_ops;
int list_done = 0;
}; };
struct osd_pg_t struct osd_pg_t
@ -187,7 +193,7 @@ class osd_t
std::map<uint64_t, int> osd_peer_fds; std::map<uint64_t, int> osd_peer_fds;
std::vector<osd_pg_t> pgs; std::vector<osd_pg_t> pgs;
bool needs_peering = false; int peering_state = 0;
unsigned pg_count = 0; unsigned pg_count = 0;
// client & peer I/O // client & peer I/O
@ -218,6 +224,7 @@ class osd_t
void send_replies(); void send_replies();
void make_reply(osd_op_t *op); void make_reply(osd_op_t *op);
void handle_send(ring_data_t *data, int peer_fd); void handle_send(ring_data_t *data, int peer_fd);
void outbox_push(osd_client_t & cl, osd_op_t *op);
// peer handling (primary OSD logic) // peer handling (primary OSD logic)
void connect_peer(unsigned osd_num, const char *peer_host, int peer_port, std::function<void(int)> callback); void connect_peer(unsigned osd_num, const char *peer_host, int peer_port, std::function<void(int)> callback);
@ -226,9 +233,9 @@ class osd_t
osd_peer_def_t parse_peer(std::string peer); osd_peer_def_t parse_peer(std::string peer);
void init_primary(); void init_primary();
void handle_peers(); void handle_peers();
void start_pg_peering(int i);
// op execution // op execution
void handle_reply(osd_op_t *cur_op);
void exec_op(osd_op_t *cur_op); void exec_op(osd_op_t *cur_op);
void exec_sync_stab_all(osd_op_t *cur_op); void exec_sync_stab_all(osd_op_t *cur_op);
void exec_show_config(osd_op_t *cur_op); void exec_show_config(osd_op_t *cur_op);

View File

@ -2,11 +2,6 @@
#include "json11/json11.hpp" #include "json11/json11.hpp"
void osd_t::handle_reply(osd_op_t *cur_op)
{
}
void osd_t::secondary_op_callback(osd_op_t *cur_op) void osd_t::secondary_op_callback(osd_op_t *cur_op)
{ {
inflight_ops--; inflight_ops--;
@ -14,14 +9,8 @@ void osd_t::secondary_op_callback(osd_op_t *cur_op)
if (cl_it != clients.end()) if (cl_it != clients.end())
{ {
auto & cl = cl_it->second; auto & cl = cl_it->second;
if (cl.write_state == 0)
{
cl.write_state = CL_WRITE_READY;
write_ready_clients.push_back(cur_op->peer_fd);
}
make_reply(cur_op); make_reply(cur_op);
cl.completions.push_back(cur_op); outbox_push(cl, cur_op);
ringloop->wakeup();
} }
else else
{ {
@ -80,7 +69,7 @@ void osd_t::exec_show_config(osd_op_t *cur_op)
cl.write_state = CL_WRITE_READY; cl.write_state = CL_WRITE_READY;
write_ready_clients.push_back(cur_op->peer_fd); write_ready_clients.push_back(cur_op->peer_fd);
make_reply(cur_op); make_reply(cur_op);
cl.completions.push_back(cur_op); cl.outbox.push_back(cur_op);
ringloop->wakeup(); ringloop->wakeup();
} }

View File

@ -18,7 +18,7 @@ void osd_t::init_primary()
.object_map = spp::sparse_hash_map<object_id, int>(), .object_map = spp::sparse_hash_map<object_id, int>(),
}); });
pg_count = 1; pg_count = 1;
needs_peering = true; peering_state = 1;
} }
osd_peer_def_t osd_t::parse_peer(std::string peer) osd_peer_def_t osd_t::parse_peer(std::string peer)
@ -120,7 +120,7 @@ void osd_t::handle_connect_result(int peer_fd)
// Ideally: Connect -> Ask & check config -> Start PG peering // Ideally: Connect -> Ask & check config -> Start PG peering
void osd_t::handle_peers() void osd_t::handle_peers()
{ {
if (needs_peering) if (peering_state & 1)
{ {
for (int i = 0; i < peers.size(); i++) for (int i = 0; i < peers.size(); i++)
{ {
@ -131,27 +131,100 @@ void osd_t::handle_peers()
connect_peer(peers[i].osd_num, peers[i].addr.c_str(), peers[i].port, [this](int peer_fd) connect_peer(peers[i].osd_num, peers[i].addr.c_str(), peers[i].port, [this](int peer_fd)
{ {
printf("Connected with peer OSD %lu (fd %d)\n", clients[peer_fd].osd_num, peer_fd); printf("Connected with peer OSD %lu (fd %d)\n", clients[peer_fd].osd_num, peer_fd);
// Restart PG peering int i;
pgs[0].state = PG_PEERING; for (i = 0; i < peers.size(); i++)
pgs[0].acting_set_ids.clear(); {
pgs[0].acting_sets.clear(); auto it = osd_peer_fds.find(peers[i].osd_num);
pgs[0].object_map.clear(); if (it == osd_peer_fds.end() || clients[it->second].peer_state != PEER_CONNECTED)
if (pgs[0].peering_state) {
delete pgs[0].peering_state; break;
ringloop->wakeup(); }
}
if (i >= peers.size())
{
// Start PG peering
pgs[0].state = PG_PEERING;
pgs[0].acting_set_ids.clear();
pgs[0].acting_sets.clear();
pgs[0].object_map.clear();
if (pgs[0].peering_state)
delete pgs[0].peering_state;
peering_state = 2;
ringloop->wakeup();
}
}); });
} }
} }
} }
for (int i = 0; i < pgs.size(); i++) if (peering_state & 2)
{ {
if (pgs[i].state == PG_PEERING) for (int i = 0; i < pgs.size(); i++)
{ {
if (!pgs[i].peering_state) if (pgs[i].state == PG_PEERING)
{ {
pgs[i].peering_state = new osd_pg_peering_state_t(); if (!pgs[i].peering_state)
{
start_pg_peering(i);
}
else if (pgs[i].peering_state->list_done >= 3)
{
// FIXME
peering_state = 0;
}
} }
} }
} }
} }
void osd_t::start_pg_peering(int pg_idx)
{
auto & pg = pgs[pg_idx];
auto ps = pg.peering_state = new osd_pg_peering_state_t();
ps->self = this;
ps->pg_num = pg_idx; // FIXME probably shouldn't be pg_idx
{
osd_op_t *op = new osd_op_t();
op->op_type = 0;
op->peer_fd = 0;
op->bs_op.opcode = BS_OP_LIST;
op->bs_op.callback = [ps, op](blockstore_op_t *bs_op)
{
printf(
"Got object list from OSD %lu (local): %d objects (%lu of them stable)\n",
ps->self->osd_num, bs_op->retval, bs_op->version
);
ps->list_done++;
};
pg.peering_state->list_ops[osd_num] = op;
bs->enqueue_op(&op->bs_op);
}
for (int i = 0; i < peers.size(); i++)
{
auto & cl = clients[osd_peer_fds[peers[i].osd_num]];
osd_op_t *op = new osd_op_t();
op->op_type = OSD_OP_OUT;
op->peer_fd = cl.peer_fd;
op->op = {
.sec_list = {
.header = {
.magic = SECONDARY_OSD_OP_MAGIC,
.id = 1,
.opcode = OSD_OP_SECONDARY_LIST,
},
.pgnum = 1,
.pgtotal = 1,
},
};
op->callback = [ps](osd_op_t *op)
{
printf(
"Got object list from OSD %lu: %ld objects (%lu of them stable)\n",
ps->self->clients[op->peer_fd].osd_num, op->reply.hdr.retval,
op->reply.sec_list.stable_count
);
ps->list_done++;
};
pg.peering_state->list_ops[cl.osd_num] = op;
outbox_push(cl, op);
}
}

View File

@ -90,7 +90,7 @@ void osd_t::handle_read(ring_data_t *data, int peer_fd)
cl.sent_ops.erase(req_it); cl.sent_ops.erase(req_it);
cl.read_reply_id = 0; cl.read_reply_id = 0;
cl.read_state = 0; cl.read_state = 0;
handle_reply(request); request->callback(request);
} }
} }
} }
@ -168,6 +168,6 @@ void osd_t::handle_read_reply(osd_client_t *cl)
{ {
cl->read_state = 0; cl->read_state = 0;
cl->sent_ops.erase(req_it); cl->sent_ops.erase(req_it);
handle_reply(request); request->callback(request);
} }
} }

View File

@ -1,5 +1,16 @@
#include "osd.h" #include "osd.h"
void osd_t::outbox_push(osd_client_t & cl, osd_op_t *cur_op)
{
if (cl.write_state == 0)
{
cl.write_state = CL_WRITE_READY;
write_ready_clients.push_back(cur_op->peer_fd);
}
cl.outbox.push_back(cur_op);
ringloop->wakeup();
}
void osd_t::send_replies() void osd_t::send_replies()
{ {
for (int i = 0; i < write_ready_clients.size(); i++) for (int i = 0; i < write_ready_clients.size(); i++)
@ -16,8 +27,8 @@ void osd_t::send_replies()
if (!cl.write_buf) if (!cl.write_buf)
{ {
// pick next command // pick next command
cl.write_op = cl.completions.front(); cl.write_op = cl.outbox.front();
cl.completions.pop_front(); cl.outbox.pop_front();
if (cl.write_op->op_type == OSD_OP_OUT) if (cl.write_op->op_type == OSD_OP_OUT)
{ {
cl.write_buf = &cl.write_op->op_buf; cl.write_buf = &cl.write_op->op_buf;
@ -135,7 +146,7 @@ void osd_t::handle_send(ring_data_t *data, int peer_fd)
cl.sent_ops[cl.write_op->op.hdr.id] = cl.write_op; cl.sent_ops[cl.write_op->op.hdr.id] = cl.write_op;
} }
cl.write_op = NULL; cl.write_op = NULL;
cl.write_state = cl.completions.size() > 0 ? CL_WRITE_READY : 0; cl.write_state = cl.outbox.size() > 0 ? CL_WRITE_READY : 0;
} }
} }
} }