forked from vitalif/vitastor
Begin reply code
parent
e052959d7b
commit
02a0eb49c2
|
@ -296,5 +296,5 @@ void blockstore::enqueue_op(blockstore_operation *op)
|
|||
{
|
||||
enqueue_write(op);
|
||||
}
|
||||
ringloop->wakeup(ring_consumer);
|
||||
ringloop->wakeup();
|
||||
}
|
||||
|
|
|
@ -101,7 +101,7 @@ void journal_flusher_t::unshift_flush(obj_ver_id ov)
|
|||
void journal_flusher_t::force_start()
|
||||
{
|
||||
start_forced = true;
|
||||
bs->ringloop->wakeup(bs->ring_consumer);
|
||||
bs->ringloop->wakeup();
|
||||
}
|
||||
|
||||
#define await_sqe(label) \
|
||||
|
@ -330,12 +330,12 @@ bool journal_flusher_co::loop()
|
|||
if (meta_new.submitted)
|
||||
{
|
||||
meta_new.it->second.state = 1;
|
||||
bs->ringloop->wakeup(bs->ring_consumer);
|
||||
bs->ringloop->wakeup();
|
||||
}
|
||||
if (meta_old.submitted)
|
||||
{
|
||||
meta_old.it->second.state = 1;
|
||||
bs->ringloop->wakeup(bs->ring_consumer);
|
||||
bs->ringloop->wakeup();
|
||||
}
|
||||
// Reads completed, submit writes
|
||||
for (it = v.begin(); it != v.end(); it++)
|
||||
|
@ -624,7 +624,7 @@ bool journal_flusher_co::fsync_batch(bool fsync_meta, int wait_base)
|
|||
}
|
||||
// Sync completed. All previous coroutines waiting for it must be resumed
|
||||
cur_sync->state = 2;
|
||||
bs->ringloop->wakeup(bs->ring_consumer);
|
||||
bs->ringloop->wakeup();
|
||||
}
|
||||
// Wait until someone else sends and completes a sync.
|
||||
resume_2:
|
||||
|
|
154
osd.cpp
154
osd.cpp
|
@ -6,15 +6,23 @@
|
|||
#include "osd_ops.h"
|
||||
#include "ringloop.h"
|
||||
|
||||
#define CL_READ_OP 1
|
||||
#define CL_READ_DATA 2
|
||||
|
||||
struct osd_op_t
|
||||
{
|
||||
int peer_fd;
|
||||
union
|
||||
{
|
||||
osd_any_op_t op;
|
||||
uint8_t op_buf[OSD_OP_PACKET_SIZE];
|
||||
uint8_t op_buf[OSD_OP_PACKET_SIZE] = { 0 };
|
||||
};
|
||||
union
|
||||
{
|
||||
osd_any_reply_t reply;
|
||||
uint8_t reply_buf[OSD_REPLY_PACKET_SIZE] = { 0 };
|
||||
};
|
||||
blockstore_operation bs_op;
|
||||
int client_fd;
|
||||
void *buf = NULL;
|
||||
};
|
||||
|
||||
|
@ -23,15 +31,29 @@ struct osd_client_t
|
|||
sockaddr_in peer_addr;
|
||||
socklen_t peer_addr_size;
|
||||
int peer_fd;
|
||||
bool ready = false;
|
||||
bool reading = false;
|
||||
int in_flight_ops = 0;
|
||||
//int in_flight_ops = 0;
|
||||
|
||||
struct osd_op_t *cur_op = NULL;
|
||||
iovec iov;
|
||||
msghdr msg;
|
||||
void *cur_buf = NULL;
|
||||
int cur_done = 0, cur_remaining = 0;
|
||||
// Read state
|
||||
bool read_ready = false;
|
||||
bool reading = false;
|
||||
osd_op_t *read_op = NULL;
|
||||
iovec read_iov;
|
||||
msghdr read_msg;
|
||||
void *read_buf = NULL;
|
||||
int read_remaining = 0;
|
||||
int read_state = 0;
|
||||
|
||||
// Completed operations to send replies back to the client
|
||||
std::deque<osd_op_t*> completions;
|
||||
|
||||
// Write state
|
||||
osd_op_t *write_op = NULL;
|
||||
int write_state = 0;
|
||||
iovec write_iov;
|
||||
msghdr write_msg;
|
||||
void *write_buf = NULL;
|
||||
int write_remaining = 0;
|
||||
int write_state = 0;
|
||||
};
|
||||
|
||||
class osd_t
|
||||
|
@ -54,7 +76,8 @@ class osd_t
|
|||
int bind_port, listen_backlog;
|
||||
|
||||
std::unordered_map<int,osd_client_t> clients;
|
||||
std::deque<int> ready_clients;
|
||||
std::deque<int> read_ready_clients;
|
||||
std::list<int> write_ready_clients;
|
||||
|
||||
void handle_epoll_events();
|
||||
public:
|
||||
|
@ -106,7 +129,7 @@ osd_t::osd_t(blockstore *bs, ring_loop_t *ringloop)
|
|||
throw std::runtime_error(std::string("epoll_create: ") + strerror(errno));
|
||||
}
|
||||
|
||||
struct epoll_event ev;
|
||||
epoll_event ev;
|
||||
ev.data.fd = listen_fd;
|
||||
ev.events = EPOLLIN;
|
||||
if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, listen_fd, &ev) < 0)
|
||||
|
@ -131,13 +154,13 @@ void osd_t::loop()
|
|||
{
|
||||
return;
|
||||
}
|
||||
struct io_uring_sqe *sqe = ringloop->get_sqe();
|
||||
io_uring_sqe *sqe = ringloop->get_sqe();
|
||||
if (!sqe)
|
||||
{
|
||||
wait_state = 0;
|
||||
return;
|
||||
}
|
||||
struct ring_data_t *data = ((ring_data_t*)sqe->user_data);
|
||||
ring_data_t *data = ((ring_data_t*)sqe->user_data);
|
||||
my_uring_prep_poll_add(sqe, epoll_fd, POLLIN);
|
||||
data->callback = [&](ring_data_t *data)
|
||||
{
|
||||
|
@ -168,7 +191,7 @@ int osd_t::handle_epoll_events()
|
|||
if (events[i].data.fd == listen_fd)
|
||||
{
|
||||
// Accept new connections
|
||||
struct sockaddr_in addr;
|
||||
sockaddr_in addr;
|
||||
socklen_t peer_addr_size = sizeof(addr);
|
||||
int peer_fd;
|
||||
while ((peer_fd = accept(listen_fd, &addr, &peer_addr_size)) >= 0)
|
||||
|
@ -178,10 +201,9 @@ int osd_t::handle_epoll_events()
|
|||
.peer_addr = addr,
|
||||
.peer_addr_size = peer_addr_size,
|
||||
.peer_fd = peer_fd,
|
||||
.ready = false,
|
||||
};
|
||||
// Add FD to epoll
|
||||
struct epoll_event ev;
|
||||
epoll_event ev;
|
||||
ev.data.fd = peer_fd;
|
||||
ev.events = EPOLLIN | EPOLLHUP;
|
||||
if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, peer_fd, &ev) < 0)
|
||||
|
@ -204,12 +226,12 @@ int osd_t::handle_epoll_events()
|
|||
// Stop client
|
||||
stop_client(cl.peer_fd);
|
||||
}
|
||||
else if (!cl.ready)
|
||||
else if (!cl.read_ready)
|
||||
{
|
||||
// Mark client as ready (i.e. some data is available)
|
||||
cl.ready = true;
|
||||
cl.read_ready = true;
|
||||
if (!cl.reading)
|
||||
ready_clients.push_back(cl.peer_fd);
|
||||
read_ready_clients.push_back(cl.peer_fd);
|
||||
}
|
||||
}
|
||||
count++;
|
||||
|
@ -220,7 +242,7 @@ int osd_t::handle_epoll_events()
|
|||
|
||||
void osd_t::stop_client(int peer_fd)
|
||||
{
|
||||
struct epoll_event ev;
|
||||
epoll_event ev;
|
||||
ev.data.fd = peer_fd;
|
||||
ev.events = EPOLLIN | EPOLLHUP;
|
||||
if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, peer_fd, &ev) < 0)
|
||||
|
@ -228,13 +250,13 @@ void osd_t::stop_client(int peer_fd)
|
|||
throw std::runtime_error(std::string("epoll_ctl: ") + strerror(errno));
|
||||
}
|
||||
auto it = clients.find(peer_fd);
|
||||
if (it->ready)
|
||||
if (it->read_ready)
|
||||
{
|
||||
for (auto rit = ready_clients.begin(); rit != ready_clients.end(); rit++)
|
||||
for (auto rit = read_ready_clients.begin(); rit != read_ready_clients.end(); rit++)
|
||||
{
|
||||
if (*rit == peer_fd)
|
||||
{
|
||||
ready_clients.erase(rit);
|
||||
read_ready_clients.erase(rit);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -245,36 +267,37 @@ void osd_t::stop_client(int peer_fd)
|
|||
|
||||
void osd_t::read_commands()
|
||||
{
|
||||
for (int i = 0; i < ready_clients.size(); i++)
|
||||
for (int i = 0; i < read_ready_clients.size(); i++)
|
||||
{
|
||||
int peer_fd = ready_clients[i];
|
||||
int peer_fd = read_ready_clients[i];
|
||||
auto & cl = clients[peer_fd];
|
||||
if (!cl.cur_buf)
|
||||
{
|
||||
// no reads in progress, so this is probably a new command
|
||||
cl.cur_op = new osd_op_t;
|
||||
cl.cur_buf = &cl.cur_op->op_buf;
|
||||
cl.cur_done = 0;
|
||||
cl.cur_remaining = OSD_OP_PACKET_SIZE;
|
||||
}
|
||||
struct io_uring_sqe* sqe = ringloop->get_sqe();
|
||||
io_uring_sqe* sqe = ringloop->get_sqe();
|
||||
if (!sqe)
|
||||
{
|
||||
ready_clients.erase(ready_clients.begin(), ready_clients().begin() + i);
|
||||
read_ready_clients.erase(read_ready_clients.begin(), read_ready_clients().begin() + i);
|
||||
ringloop->submit();
|
||||
return;
|
||||
}
|
||||
struct ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
||||
cl.iov.iov_base = cl.cur_buf;
|
||||
cl.iov.iov_len = cl.cur_remaining;
|
||||
cl.msg.msg_iov = &cl.iov;
|
||||
cl.msg.msg_iovlen = 1;
|
||||
ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
||||
if (!cl.read_buf)
|
||||
{
|
||||
// no reads in progress, so this is probably a new command
|
||||
cl.read_op = new osd_op_t;
|
||||
cl.read_buf = &cl.read_op->op_buf;
|
||||
cl.read_remaining = OSD_OP_PACKET_SIZE;
|
||||
cl.read_state = CL_READ_OP;
|
||||
}
|
||||
cl.read_iov.iov_base = cl.read_buf;
|
||||
cl.read_iov.iov_len = cl.read_remaining;
|
||||
cl.read_msg.msg_iov = &cl.read_iov;
|
||||
cl.read_msg.msg_iovlen = 1;
|
||||
data->callback = [this, peer_fd](ring_data_t *data) { handle_read(data, peer_fd); };
|
||||
my_uring_prep_recvmsg(sqe, peer_fd, &cl.msg, 0);
|
||||
ringloop->submit();
|
||||
my_uring_prep_recvmsg(sqe, peer_fd, &cl.read_msg, 0);
|
||||
cl.reading = true;
|
||||
cl.ready = false;
|
||||
cl.read_ready = false;
|
||||
}
|
||||
ready_clients.clear();
|
||||
read_ready_clients.clear();
|
||||
ringloop->submit();
|
||||
}
|
||||
|
||||
void osd_t::handle_read(ring_data_t *data, int peer_fd)
|
||||
|
@ -290,21 +313,20 @@ void osd_t::handle_read(ring_data_t *data, int peer_fd)
|
|||
return;
|
||||
}
|
||||
cl->reading = false;
|
||||
if (cl->ready)
|
||||
if (cl->read_ready)
|
||||
{
|
||||
ready_clients.push_back(peer_fd);
|
||||
read_ready_clients.push_back(peer_fd);
|
||||
}
|
||||
if (data->res > 0)
|
||||
{
|
||||
cl->cur_done += data->res;
|
||||
cl->cur_remaining -= data->res;
|
||||
cl->cur_buf += data->res;
|
||||
if (cl->cur_remaining <= 0)
|
||||
cl->read_remaining -= data->res;
|
||||
cl->read_buf += data->res;
|
||||
if (cl->read_remaining <= 0)
|
||||
{
|
||||
cl->cur_buf = NULL;
|
||||
if (cl->read_state == CL_READ_COMMAND)
|
||||
cl->read_buf = NULL;
|
||||
if (cl->read_state == CL_READ_OP)
|
||||
{
|
||||
osd_op_t *cur_op = cl->cur_op;
|
||||
osd_op_t *cur_op = cl->read_op;
|
||||
if (cur_op->op.hdr.opcode == OSD_OP_SECONDARY_READ ||
|
||||
cur_op->op.hdr.opcode == OSD_OP_SECONDARY_WRITE ||
|
||||
cur_op->op.hdr.opcode == OSD_OP_SECONDARY_STABILIZE)
|
||||
|
@ -316,24 +338,25 @@ void osd_t::handle_read(ring_data_t *data, int peer_fd)
|
|||
cur_op->op.hdr.opcode == OSD_OP_SECONDARY_STABILIZE)
|
||||
{
|
||||
// Read data
|
||||
cl->cur_buf = cur_op->buf;
|
||||
cl->cur_done = 0;
|
||||
cl->cur_remaining = cur_op->op.sec_rw.len;
|
||||
cl->read_buf = cur_op->buf;
|
||||
cl->read_remaining = cur_op->op.sec_rw.len;
|
||||
cl->read_state = CL_READ_DATA;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Command is ready
|
||||
cur_op->peer_fd = peer_fd;
|
||||
enqueue_op(cur_op);
|
||||
cl->cur_op = NULL;
|
||||
cl->read_op = NULL;
|
||||
cl->read_state = 0;
|
||||
}
|
||||
}
|
||||
else if (cl->read_state == CL_READ_DATA)
|
||||
{
|
||||
// Command is ready
|
||||
cur_op->peer_fd = peer_fd;
|
||||
enqueue_op(cur_op);
|
||||
cl->cur_op = NULL;
|
||||
cl->read_op = NULL;
|
||||
cl->read_state = 0;
|
||||
}
|
||||
}
|
||||
|
@ -341,14 +364,18 @@ void osd_t::handle_read(ring_data_t *data, int peer_fd)
|
|||
}
|
||||
}
|
||||
|
||||
void osd_t::enqueue_op(int peer_fd, osd_op_t *cur_op)
|
||||
void osd_t::enqueue_op(osd_op_t *cur_op)
|
||||
{
|
||||
cur_op->bs_op->callback = [this, peer_fd, cur_op](blockstore_operation* bs_op)
|
||||
cur_op->bs_op->callback = [this, cur_op](blockstore_operation* bs_op)
|
||||
{
|
||||
auto cl = clients.find(peer_fd);
|
||||
auto cl = clients.find(cur_op->peer_fd);
|
||||
if (cl != clients.end())
|
||||
{
|
||||
cl->replies.push(cur_op);
|
||||
cur_op->reply.hdr.magic = SECONDARY_OSD_REPLY_MAGIC;
|
||||
cur_op->reply.hdr.id = cur_op->op.hdr.id;
|
||||
cur_op->reply.hdr.retval = bs_op->retval;
|
||||
cl->completions.push(cur_op);
|
||||
ringloop->wakeup();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -367,6 +394,7 @@ void osd_t::enqueue_op(int peer_fd, osd_op_t *cur_op)
|
|||
cur_op->bs_op->callback();
|
||||
return;
|
||||
}
|
||||
// FIXME: list op is not a blockstore op yet
|
||||
cur_op->bs_op->flags = (cur_op->op->hdr.opcode == OSD_OP_SECONDARY_READ ? OP_READ
|
||||
: (cur_op->op->hdr.opcode == OSD_OP_SECONDARY_WRITE ? OP_WRITE
|
||||
: (cur_op->op->hdr.opcode == OSD_OP_SECONDARY_SYNC ? OP_SYNC
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#define SECONDARY_OSD_REPLY_MAGIC 0xd17a57243b580b99baa699b87b434553
|
||||
// Operation request headers and operation reply headers have fixed size after which comes data
|
||||
#define OSD_OP_PACKET_SIZE 0x80
|
||||
#define OSD_REPLY_PACKET_SIZE 0x80
|
||||
#define OSD_REPLY_PACKET_SIZE 0x40
|
||||
// Opcodes
|
||||
#define OSD_OP_MIN 0x01
|
||||
#define OSD_OP_SECONDARY_READ 0x01
|
||||
|
@ -130,6 +130,7 @@ union osd_any_op_t
|
|||
|
||||
union osd_any_reply_t
|
||||
{
|
||||
osd_reply_header_t hdr;
|
||||
osd_reply_secondary_rw_t sec_rw;
|
||||
osd_reply_secondary_del_t sec_del;
|
||||
osd_reply_secondary_sync_t sec_sync;
|
||||
|
|
|
@ -27,7 +27,7 @@ int ring_loop_t::register_consumer(ring_consumer_t & consumer)
|
|||
return consumer.number;
|
||||
}
|
||||
|
||||
void ring_loop_t::wakeup(ring_consumer_t & consumer)
|
||||
void ring_loop_t::wakeup()
|
||||
{
|
||||
loop_again = true;
|
||||
}
|
||||
|
|
|
@ -136,7 +136,7 @@ public:
|
|||
return sqe;
|
||||
}
|
||||
int register_consumer(ring_consumer_t & consumer);
|
||||
void wakeup(ring_consumer_t & consumer);
|
||||
void wakeup();
|
||||
void unregister_consumer(ring_consumer_t & consumer);
|
||||
void loop();
|
||||
inline int submit()
|
||||
|
|
Loading…
Reference in New Issue