forked from vitalif/vitastor
Incoming data pre-buffering
parent
b27ad550cf
commit
8315407558
2
osd.cpp
2
osd.cpp
|
@ -220,6 +220,7 @@ restart:
|
||||||
.peer_port = ntohs(addr.sin_port),
|
.peer_port = ntohs(addr.sin_port),
|
||||||
.peer_fd = peer_fd,
|
.peer_fd = peer_fd,
|
||||||
.peer_state = PEER_CONNECTED,
|
.peer_state = PEER_CONNECTED,
|
||||||
|
.in_buf = malloc(receive_buffer_size),
|
||||||
};
|
};
|
||||||
// Add FD to epoll
|
// Add FD to epoll
|
||||||
epoll_event ev;
|
epoll_event ev;
|
||||||
|
@ -344,6 +345,7 @@ void osd_t::stop_client(int peer_fd)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
free(cl.in_buf);
|
||||||
clients.erase(it);
|
clients.erase(it);
|
||||||
close(peer_fd);
|
close(peer_fd);
|
||||||
}
|
}
|
||||||
|
|
6
osd.h
6
osd.h
|
@ -22,7 +22,7 @@
|
||||||
#define OSD_OP_IN 0
|
#define OSD_OP_IN 0
|
||||||
#define OSD_OP_OUT 1
|
#define OSD_OP_OUT 1
|
||||||
|
|
||||||
#define CL_READ_OP 1
|
#define CL_READ_HDR 1
|
||||||
#define CL_READ_DATA 2
|
#define CL_READ_DATA 2
|
||||||
#define CL_READ_REPLY_DATA 3
|
#define CL_READ_REPLY_DATA 3
|
||||||
#define CL_WRITE_READY 1
|
#define CL_WRITE_READY 1
|
||||||
|
@ -125,6 +125,8 @@ struct osd_client_t
|
||||||
std::function<void(osd_num_t, int)> connect_callback;
|
std::function<void(osd_num_t, int)> connect_callback;
|
||||||
osd_num_t osd_num = 0;
|
osd_num_t osd_num = 0;
|
||||||
|
|
||||||
|
void *in_buf = NULL;
|
||||||
|
|
||||||
// Read state
|
// Read state
|
||||||
int read_ready = 0;
|
int read_ready = 0;
|
||||||
osd_op_t *read_op = NULL;
|
osd_op_t *read_op = NULL;
|
||||||
|
@ -170,6 +172,7 @@ class osd_t
|
||||||
int bind_port, listen_backlog;
|
int bind_port, listen_backlog;
|
||||||
int client_queue_depth = 128;
|
int client_queue_depth = 128;
|
||||||
bool allow_test_ops = true;
|
bool allow_test_ops = true;
|
||||||
|
int receive_buffer_size = 9000;
|
||||||
|
|
||||||
// peer OSDs
|
// peer OSDs
|
||||||
|
|
||||||
|
@ -215,6 +218,7 @@ class osd_t
|
||||||
void handle_epoll_events();
|
void handle_epoll_events();
|
||||||
void read_requests();
|
void read_requests();
|
||||||
void handle_read(ring_data_t *data, int peer_fd);
|
void handle_read(ring_data_t *data, int peer_fd);
|
||||||
|
void handle_finished_read(osd_client_t & cl);
|
||||||
void handle_op_hdr(osd_client_t *cl);
|
void handle_op_hdr(osd_client_t *cl);
|
||||||
void handle_reply_hdr(osd_client_t *cl);
|
void handle_reply_hdr(osd_client_t *cl);
|
||||||
bool try_send(osd_client_t & cl);
|
bool try_send(osd_client_t & cl);
|
||||||
|
|
|
@ -84,6 +84,7 @@ void osd_t::connect_peer(osd_num_t osd_num, const char *peer_host, int peer_port
|
||||||
.peer_state = PEER_CONNECTING,
|
.peer_state = PEER_CONNECTING,
|
||||||
.connect_callback = callback,
|
.connect_callback = callback,
|
||||||
.osd_num = osd_num,
|
.osd_num = osd_num,
|
||||||
|
.in_buf = malloc(receive_buffer_size),
|
||||||
};
|
};
|
||||||
osd_peer_fds[osd_num] = peer_fd;
|
osd_peer_fds[osd_num] = peer_fd;
|
||||||
// Add FD to epoll (EPOLLOUT for tracking connect() result)
|
// Add FD to epoll (EPOLLOUT for tracking connect() result)
|
||||||
|
|
|
@ -13,22 +13,16 @@ void osd_t::read_requests()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
||||||
if (!cl.read_buf)
|
if (!cl.read_op || cl.read_remaining < receive_buffer_size)
|
||||||
{
|
{
|
||||||
// no reads in progress
|
cl.read_iov.iov_base = cl.in_buf;
|
||||||
// so this is either a new command or a reply to a previously sent command
|
cl.read_iov.iov_len = receive_buffer_size;
|
||||||
if (!cl.read_op)
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
cl.read_op = new osd_op_t;
|
|
||||||
cl.read_op->peer_fd = peer_fd;
|
|
||||||
}
|
|
||||||
cl.read_op->op_type = OSD_OP_IN;
|
|
||||||
cl.read_buf = &cl.read_op->req.buf;
|
|
||||||
cl.read_remaining = OSD_PACKET_SIZE;
|
|
||||||
cl.read_state = CL_READ_OP;
|
|
||||||
}
|
|
||||||
cl.read_iov.iov_base = cl.read_buf;
|
cl.read_iov.iov_base = cl.read_buf;
|
||||||
cl.read_iov.iov_len = cl.read_remaining;
|
cl.read_iov.iov_len = cl.read_remaining;
|
||||||
|
}
|
||||||
cl.read_msg.msg_iov = &cl.read_iov;
|
cl.read_msg.msg_iov = &cl.read_iov;
|
||||||
cl.read_msg.msg_iovlen = 1;
|
cl.read_msg.msg_iovlen = 1;
|
||||||
data->callback = [this, peer_fd](ring_data_t *data) { handle_read(data, peer_fd); };
|
data->callback = [this, peer_fd](ring_data_t *data) { handle_read(data, peer_fd); };
|
||||||
|
@ -60,22 +54,65 @@ void osd_t::handle_read(ring_data_t *data, int peer_fd)
|
||||||
read_ready_clients.push_back(peer_fd);
|
read_ready_clients.push_back(peer_fd);
|
||||||
if (data->res > 0)
|
if (data->res > 0)
|
||||||
{
|
{
|
||||||
|
if (cl.read_iov.iov_base == cl.in_buf)
|
||||||
|
{
|
||||||
|
// Compose operation(s) from the buffer
|
||||||
|
int remain = data->res;
|
||||||
|
void *curbuf = cl.in_buf;
|
||||||
|
while (remain > 0)
|
||||||
|
{
|
||||||
|
if (!cl.read_op)
|
||||||
|
{
|
||||||
|
cl.read_op = new osd_op_t;
|
||||||
|
cl.read_op->peer_fd = peer_fd;
|
||||||
|
cl.read_op->op_type = OSD_OP_IN;
|
||||||
|
cl.read_buf = cl.read_op->req.buf;
|
||||||
|
cl.read_remaining = OSD_PACKET_SIZE;
|
||||||
|
cl.read_state = CL_READ_HDR;
|
||||||
|
}
|
||||||
|
if (cl.read_remaining > remain)
|
||||||
|
{
|
||||||
|
memcpy(cl.read_buf, curbuf, remain);
|
||||||
|
cl.read_remaining -= remain;
|
||||||
|
cl.read_buf += remain;
|
||||||
|
remain = 0;
|
||||||
|
if (cl.read_remaining <= 0)
|
||||||
|
handle_finished_read(cl);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
memcpy(cl.read_buf, curbuf, cl.read_remaining);
|
||||||
|
curbuf += cl.read_remaining;
|
||||||
|
remain -= cl.read_remaining;
|
||||||
|
cl.read_remaining = 0;
|
||||||
|
cl.read_buf = NULL;
|
||||||
|
handle_finished_read(cl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Long data
|
||||||
cl.read_remaining -= data->res;
|
cl.read_remaining -= data->res;
|
||||||
cl.read_buf += data->res;
|
cl.read_buf += data->res;
|
||||||
if (cl.read_remaining <= 0)
|
if (cl.read_remaining <= 0)
|
||||||
{
|
{
|
||||||
cl.read_buf = NULL;
|
handle_finished_read(cl);
|
||||||
if (cl.read_state == CL_READ_OP)
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void osd_t::handle_finished_read(osd_client_t & cl)
|
||||||
|
{
|
||||||
|
if (cl.read_state == CL_READ_HDR)
|
||||||
{
|
{
|
||||||
if (cl.read_op->req.hdr.magic == SECONDARY_OSD_REPLY_MAGIC)
|
if (cl.read_op->req.hdr.magic == SECONDARY_OSD_REPLY_MAGIC)
|
||||||
{
|
|
||||||
handle_reply_hdr(&cl);
|
handle_reply_hdr(&cl);
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
|
||||||
handle_op_hdr(&cl);
|
handle_op_hdr(&cl);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else if (cl.read_state == CL_READ_DATA)
|
else if (cl.read_state == CL_READ_DATA)
|
||||||
{
|
{
|
||||||
// Operation is ready
|
// Operation is ready
|
||||||
|
@ -90,6 +127,7 @@ void osd_t::handle_read(ring_data_t *data, int peer_fd)
|
||||||
osd_op_t *request = req_it->second;
|
osd_op_t *request = req_it->second;
|
||||||
cl.sent_ops.erase(req_it);
|
cl.sent_ops.erase(req_it);
|
||||||
cl.read_reply_id = 0;
|
cl.read_reply_id = 0;
|
||||||
|
cl.read_op = NULL;
|
||||||
cl.read_state = 0;
|
cl.read_state = 0;
|
||||||
// Measure subop latency
|
// Measure subop latency
|
||||||
timespec tv_end;
|
timespec tv_end;
|
||||||
|
@ -101,9 +139,6 @@ void osd_t::handle_read(ring_data_t *data, int peer_fd)
|
||||||
);
|
);
|
||||||
request->callback(request);
|
request->callback(request);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void osd_t::handle_op_hdr(osd_client_t *cl)
|
void osd_t::handle_op_hdr(osd_client_t *cl)
|
||||||
|
@ -190,6 +225,7 @@ void osd_t::handle_reply_hdr(osd_client_t *cl)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cl->read_state = 0;
|
cl->read_state = 0;
|
||||||
|
cl->read_op = NULL;
|
||||||
cl->sent_ops.erase(req_it);
|
cl->sent_ops.erase(req_it);
|
||||||
// Measure subop latency
|
// Measure subop latency
|
||||||
timespec tv_end;
|
timespec tv_end;
|
||||||
|
|
Loading…
Reference in New Issue