Compare commits
1 Commits
separate-d
...
test-zctr
Author | SHA1 | Date | |
---|---|---|---|
83939f5a22 |
@@ -42,6 +42,7 @@ struct op_buf_t
|
||||
struct sec_data
|
||||
{
|
||||
int connect_fd;
|
||||
int data_fd;
|
||||
/* block_size = 1 << block_order (128KB by default) */
|
||||
uint64_t block_order = 17, block_size = 1 << 17;
|
||||
std::unordered_map<uint64_t, op_buf_t*> queue;
|
||||
@@ -157,6 +158,7 @@ static void sec_cleanup(struct thread_data *td)
|
||||
sec_data *bsd = (sec_data*)td->io_ops_data;
|
||||
if (bsd)
|
||||
{
|
||||
close(bsd->data_fd);
|
||||
close(bsd->connect_fd);
|
||||
delete bsd;
|
||||
}
|
||||
@@ -170,20 +172,20 @@ static int sec_init(struct thread_data *td)
|
||||
bsd->block_order = o->block_order == 0 ? 17 : o->block_order;
|
||||
bsd->block_size = 1 << o->block_order;
|
||||
|
||||
sockaddr addr;
|
||||
if (!string_to_addr(std::string(o->host ? o->host : "127.0.0.1"), false, o->port > 0 ? o->port : 11203, &addr))
|
||||
struct sockaddr_storage addr = { 0 };
|
||||
if (!string_to_addr(o->host ? o->host : "127.0.0.1", false, o->port > 0 ? o->port : 11203, (struct sockaddr*)&addr))
|
||||
{
|
||||
fprintf(stderr, "server address: %s is not valid\n", o->host ? o->host : "127.0.0.1");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bsd->connect_fd = socket(addr.sa_family, SOCK_STREAM, 0);
|
||||
bsd->connect_fd = socket(addr.ss_family, SOCK_STREAM, 0);
|
||||
if (bsd->connect_fd < 0)
|
||||
{
|
||||
perror("socket");
|
||||
return 1;
|
||||
}
|
||||
if (connect(bsd->connect_fd, (sockaddr*)&addr, sizeof(addr)) < 0)
|
||||
if (connect(bsd->connect_fd, (sockaddr*)&addr, addr.ss_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)) < 0)
|
||||
{
|
||||
perror("connect");
|
||||
return 1;
|
||||
@@ -199,6 +201,39 @@ static int sec_init(struct thread_data *td)
|
||||
}
|
||||
}
|
||||
|
||||
if (!string_to_addr(o->host ? o->host : "127.0.0.1", false, 1 + (o->port > 0 ? o->port : 11203), (sockaddr*)&addr))
|
||||
{
|
||||
fprintf(stderr, "server address: %s is not valid\n", o->host ? o->host : "127.0.0.1");
|
||||
return 1;
|
||||
}
|
||||
|
||||
bsd->data_fd = socket(addr.ss_family, SOCK_STREAM, 0);
|
||||
if (bsd->data_fd < 0)
|
||||
{
|
||||
perror("socket");
|
||||
return 1;
|
||||
}
|
||||
/* int mss = 4096;
|
||||
if (setsockopt(bsd->data_fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) < 0)
|
||||
{
|
||||
perror("setsockopt TCP_MAXSEG");
|
||||
return 1;
|
||||
}*/
|
||||
if (connect(bsd->data_fd, (sockaddr*)&addr, addr.ss_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)) < 0)
|
||||
{
|
||||
perror("connect");
|
||||
return 1;
|
||||
}
|
||||
setsockopt(bsd->data_fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one));
|
||||
if (o->zerocopy_send)
|
||||
{
|
||||
if (setsockopt(bsd->data_fd, SOL_SOCKET, SO_ZEROCOPY, &one, sizeof(one)) < 0)
|
||||
{
|
||||
perror("setsockopt zerocopy");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: read config (block size) from OSD
|
||||
|
||||
return 0;
|
||||
@@ -298,16 +333,18 @@ static enum fio_q_status sec_queue(struct thread_data *td, struct io_u *io)
|
||||
bsd->op_n++;
|
||||
bsd->queue[n] = op_buf;
|
||||
|
||||
iovec iov[2] = { { .iov_base = op.buf, .iov_len = OSD_PACKET_SIZE } };
|
||||
int iovcnt = 1, wtotal = OSD_PACKET_SIZE;
|
||||
if (io->ddir == DDIR_WRITE)
|
||||
{
|
||||
iov[iovcnt++] = { .iov_base = io->xfer_buf, .iov_len = io->xfer_buflen };
|
||||
wtotal += io->xfer_buflen;
|
||||
// It may make you laugh but ZCTR is only stable if we write data before header :-) O_o
|
||||
if (write_blocking(bsd->data_fd, io->xfer_buf, io->xfer_buflen) != io->xfer_buflen)
|
||||
{
|
||||
perror("write");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (sendv_blocking(bsd->connect_fd, iov, iovcnt, opt->zerocopy_send ? MSG_ZEROCOPY : 0) != wtotal)
|
||||
if (write_blocking(bsd->connect_fd, op.buf, OSD_PACKET_SIZE) != OSD_PACKET_SIZE)
|
||||
{
|
||||
perror("sendmsg");
|
||||
perror("write");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -346,23 +383,21 @@ static int sec_getevents(struct thread_data *td, unsigned int min, unsigned int
|
||||
fprintf(stderr, "Short read: retval = %ld instead of %llu\n", reply.hdr.retval, io->xfer_buflen);
|
||||
exit(1);
|
||||
}
|
||||
// Support bitmap
|
||||
uint64_t bitmap = 0;
|
||||
int iovcnt = 0;
|
||||
iovec iov[2];
|
||||
if (reply.sec_rw.attr_len > 0)
|
||||
{
|
||||
if (reply.sec_rw.attr_len <= 8)
|
||||
iov[iovcnt++] = { .iov_base = &bitmap, .iov_len = reply.sec_rw.attr_len };
|
||||
{
|
||||
uint64_t bitmap = 0;
|
||||
read_blocking(bsd->connect_fd, &bitmap, reply.sec_rw.attr_len);
|
||||
}
|
||||
else
|
||||
iov[iovcnt++] = { .iov_base = (void*)(bitmap = (uint64_t)malloc(reply.sec_rw.attr_len)), .iov_len = reply.sec_rw.attr_len };
|
||||
}
|
||||
iov[iovcnt++] = { .iov_base = io->xfer_buf, .iov_len = io->xfer_buflen };
|
||||
readv_blocking(bsd->connect_fd, iov, iovcnt);
|
||||
if (reply.sec_rw.attr_len > 8)
|
||||
{
|
||||
free((void*)bitmap);
|
||||
{
|
||||
void *bitmap = malloc(reply.sec_rw.attr_len);
|
||||
read_blocking(bsd->connect_fd, bitmap, reply.sec_rw.attr_len);
|
||||
free(bitmap);
|
||||
}
|
||||
}
|
||||
read_blocking(bsd->data_fd, io->xfer_buf, io->xfer_buflen);
|
||||
}
|
||||
else if (io->ddir == DDIR_WRITE)
|
||||
{
|
||||
|
@@ -4,12 +4,10 @@
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/random.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "base64.h"
|
||||
#include "addr_util.h"
|
||||
#include "messenger.h"
|
||||
|
||||
@@ -196,7 +194,7 @@ void osd_messenger_t::connect_peer(uint64_t peer_osd, json11::Json peer_state)
|
||||
try_connect_peer(peer_osd);
|
||||
}
|
||||
|
||||
void osd_messenger_t::try_connect_peer(osd_num_t peer_osd)
|
||||
void osd_messenger_t::try_connect_peer(uint64_t peer_osd)
|
||||
{
|
||||
auto wp_it = wanted_peers.find(peer_osd);
|
||||
if (wp_it == wanted_peers.end() || wp_it->second.connecting ||
|
||||
@@ -217,75 +215,40 @@ void osd_messenger_t::try_connect_peer(osd_num_t peer_osd)
|
||||
wp.cur_addr = wp.address_list[wp.address_index].string_value();
|
||||
wp.cur_port = wp.port;
|
||||
wp.connecting = true;
|
||||
try_connect_peer_addr(peer_osd, wp.cur_addr.c_str(), wp.cur_port, NULL, [this](osd_num_t peer_osd, int peer_fd)
|
||||
{
|
||||
if (peer_fd >= 0)
|
||||
osd_peer_fds[peer_osd] = peer_fd;
|
||||
on_connect_peer(peer_osd, peer_fd);
|
||||
});
|
||||
try_connect_peer_addr(peer_osd, wp.cur_addr.c_str(), wp.cur_port);
|
||||
}
|
||||
|
||||
static std::string urandom_str(int bytes)
|
||||
{
|
||||
std::string str;
|
||||
str.resize(bytes);
|
||||
char *buf = (char*)str.data();
|
||||
while (bytes > 0)
|
||||
{
|
||||
int r = getrandom(buf, bytes, 0);
|
||||
if (r < 0)
|
||||
throw std::runtime_error(std::string("getrandom: ") + strerror(errno));
|
||||
buf += r;
|
||||
bytes -= r;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
void osd_messenger_t::try_connect_peer_addr(osd_num_t peer_osd, const char *peer_host, int peer_port,
|
||||
osd_client_t *meta_cl, std::function<void(osd_num_t, int)> connect_callback)
|
||||
void osd_messenger_t::try_connect_peer_addr(osd_num_t peer_osd, const char *peer_host, int peer_port)
|
||||
{
|
||||
assert(peer_osd != this->osd_num);
|
||||
struct sockaddr addr;
|
||||
if (!meta_cl)
|
||||
if (!string_to_addr(peer_host, 0, peer_port, &addr))
|
||||
{
|
||||
if (!string_to_addr(peer_host, 0, peer_port, &addr))
|
||||
{
|
||||
connect_callback(peer_osd, -EINVAL);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
addr = meta_cl->peer_addr;
|
||||
on_connect_peer(peer_osd, -EINVAL);
|
||||
return;
|
||||
}
|
||||
int peer_fd = socket(addr.sa_family, SOCK_STREAM, 0);
|
||||
if (peer_fd >= 0)
|
||||
{
|
||||
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
||||
int r = connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
||||
if (r < 0 && errno != EINPROGRESS)
|
||||
{
|
||||
close(peer_fd);
|
||||
peer_fd = -1;
|
||||
}
|
||||
}
|
||||
if (peer_fd < 0)
|
||||
{
|
||||
connect_callback(peer_osd, -errno);
|
||||
on_connect_peer(peer_osd, -errno);
|
||||
return;
|
||||
}
|
||||
fcntl(peer_fd, F_SETFL, fcntl(peer_fd, F_GETFL, 0) | O_NONBLOCK);
|
||||
int r = connect(peer_fd, (sockaddr*)&addr, sizeof(addr));
|
||||
if (r < 0 && errno != EINPROGRESS)
|
||||
{
|
||||
close(peer_fd);
|
||||
on_connect_peer(peer_osd, -errno);
|
||||
return;
|
||||
}
|
||||
clients[peer_fd] = new osd_client_t();
|
||||
clients[peer_fd]->peer_addr = addr;
|
||||
clients[peer_fd]->peer_port = ((struct sockaddr_in*)&addr)->sin_port;
|
||||
clients[peer_fd]->peer_port = peer_port;
|
||||
clients[peer_fd]->peer_fd = peer_fd;
|
||||
clients[peer_fd]->peer_state = PEER_CONNECTING;
|
||||
clients[peer_fd]->connect_timeout_id = -1;
|
||||
clients[peer_fd]->connect_callback = connect_callback;
|
||||
clients[peer_fd]->osd_num = peer_osd;
|
||||
clients[peer_fd]->in_buf = malloc_or_die(receive_buffer_size);
|
||||
clients[peer_fd]->data_for = meta_cl ? addr_to_string(meta_cl->peer_addr) : "";
|
||||
clients[peer_fd]->data_connection_cookie = meta_cl
|
||||
? meta_cl->data_connection_cookie : base64_encode(urandom_str(12));
|
||||
tfd->set_fd_handler(peer_fd, true, [this](int peer_fd, int epoll_events)
|
||||
{
|
||||
// Either OUT (connected) or HUP
|
||||
@@ -295,12 +258,10 @@ void osd_messenger_t::try_connect_peer_addr(osd_num_t peer_osd, const char *peer
|
||||
{
|
||||
clients[peer_fd]->connect_timeout_id = tfd->set_timer(1000*peer_connect_timeout, false, [this, peer_fd](int timer_id)
|
||||
{
|
||||
auto cl = clients.at(peer_fd);
|
||||
auto connect_callback = cl->connect_callback;
|
||||
cl->connect_callback = NULL;
|
||||
osd_num_t peer_osd = cl->osd_num;
|
||||
osd_num_t peer_osd = clients.at(peer_fd)->osd_num;
|
||||
stop_client(peer_fd, true);
|
||||
connect_callback(peer_osd, -EPIPE);
|
||||
on_connect_peer(peer_osd, -EPIPE);
|
||||
return;
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -322,10 +283,8 @@ void osd_messenger_t::handle_connect_epoll(int peer_fd)
|
||||
}
|
||||
if (result != 0)
|
||||
{
|
||||
auto connect_callback = cl->connect_callback;
|
||||
cl->connect_callback = NULL;
|
||||
stop_client(peer_fd, true);
|
||||
connect_callback(peer_osd, -result);
|
||||
on_connect_peer(peer_osd, -result);
|
||||
return;
|
||||
}
|
||||
int one = 1;
|
||||
@@ -405,11 +364,6 @@ void osd_messenger_t::on_connect_peer(osd_num_t peer_osd, int peer_fd)
|
||||
|
||||
void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
||||
{
|
||||
json11::Json::object payload;
|
||||
if (cl->data_connection_cookie != "")
|
||||
{
|
||||
payload["data_cookie"] = cl->data_connection_cookie;
|
||||
}
|
||||
osd_op_t *op = new osd_op_t();
|
||||
op->op_type = OSD_OP_OUT;
|
||||
op->peer_fd = cl->peer_fd;
|
||||
@@ -422,33 +376,24 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
||||
},
|
||||
},
|
||||
};
|
||||
if (cl->data_for == "")
|
||||
{
|
||||
#ifdef WITH_RDMA
|
||||
if (rdma_context)
|
||||
if (rdma_context)
|
||||
{
|
||||
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge, rdma_max_msg);
|
||||
if (cl->rdma_conn)
|
||||
{
|
||||
cl->rdma_conn = msgr_rdma_connection_t::create(rdma_context, rdma_max_send, rdma_max_recv, rdma_max_sge, rdma_max_msg);
|
||||
if (cl->rdma_conn)
|
||||
{
|
||||
payload["connect_rdma"] = cl->rdma_conn->addr.to_string();
|
||||
payload["rdma_max_msg"] = cl->rdma_conn->max_msg;
|
||||
}
|
||||
json11::Json payload = json11::Json::object {
|
||||
{ "connect_rdma", cl->rdma_conn->addr.to_string() },
|
||||
{ "rdma_max_msg", cl->rdma_conn->max_msg },
|
||||
};
|
||||
std::string payload_str = payload.dump();
|
||||
op->req.show_conf.json_len = payload_str.size();
|
||||
op->buf = malloc_or_die(payload_str.size());
|
||||
op->iov.push_back(op->buf, payload_str.size());
|
||||
memcpy(op->buf, payload_str.c_str(), payload_str.size());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
// Mark it as a data connection
|
||||
payload["data_for"] = cl->data_for;
|
||||
}
|
||||
if (payload.size())
|
||||
{
|
||||
std::string payload_str = json11::Json(payload).dump();
|
||||
op->req.show_conf.json_len = payload_str.size();
|
||||
op->buf = malloc_or_die(payload_str.size());
|
||||
op->iov.push_back(op->buf, payload_str.size());
|
||||
memcpy(op->buf, payload_str.c_str(), payload_str.size());
|
||||
}
|
||||
op->callback = [this, cl](osd_op_t *op)
|
||||
{
|
||||
std::string json_err;
|
||||
@@ -481,30 +426,18 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
||||
cl->osd_num, config["protocol_version"].uint64_value(), OSD_PROTOCOL_VERSION
|
||||
);
|
||||
}
|
||||
else if (cl->data_for != "" && config["data_for"] != cl->data_for)
|
||||
{
|
||||
err = true;
|
||||
fprintf(
|
||||
stderr, "OSD %lu does not support separate data connections."
|
||||
" Proceeding with a single connection\n", cl->osd_num
|
||||
);
|
||||
}
|
||||
}
|
||||
if (err)
|
||||
{
|
||||
osd_num_t peer_osd = cl->osd_num;
|
||||
auto connect_callback = cl->connect_callback;
|
||||
cl->connect_callback = NULL;
|
||||
stop_client(op->peer_fd);
|
||||
connect_callback(peer_osd, -EINVAL);
|
||||
on_connect_peer(peer_osd, -1);
|
||||
delete op;
|
||||
return;
|
||||
}
|
||||
#ifdef WITH_RDMA
|
||||
if (rdma_context && cl->rdma_conn && config["rdma_address"].is_string())
|
||||
if (config["rdma_address"].is_string())
|
||||
{
|
||||
// Prevent creating data connection - we are trying RDMA
|
||||
cl->data_connection_cookie = "";
|
||||
msgr_rdma_address_t addr;
|
||||
if (!msgr_rdma_address_t::from_string(config["rdma_address"].string_value().c_str(), &addr) ||
|
||||
cl->rdma_conn->connect(&addr) != 0)
|
||||
@@ -517,10 +450,8 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
||||
cl->rdma_conn = NULL;
|
||||
// FIXME: Keep TCP connection in this case
|
||||
osd_num_t peer_osd = cl->osd_num;
|
||||
auto connect_callback = cl->connect_callback;
|
||||
cl->connect_callback = NULL;
|
||||
stop_client(cl->peer_fd);
|
||||
connect_callback(peer_osd, -EPIPE);
|
||||
on_connect_peer(peer_osd, -1);
|
||||
delete op;
|
||||
return;
|
||||
}
|
||||
@@ -542,37 +473,8 @@ void osd_messenger_t::check_peer_config(osd_client_t *cl)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (cl->data_connection_cookie != "")
|
||||
{
|
||||
// Try to open second connection to the same address
|
||||
try_connect_peer_addr(cl->osd_num, NULL, 0, cl, [this, peer_fd = cl->peer_fd](osd_num_t data_peer, int data_peer_fd)
|
||||
{
|
||||
auto cl_it = clients.find(peer_fd);
|
||||
if (cl_it != clients.end())
|
||||
{
|
||||
// Proceed with or without the data connection
|
||||
auto cl = cl_it->second;
|
||||
if (data_peer_fd >= 0)
|
||||
{
|
||||
cl->data_connection_fd = data_peer_fd;
|
||||
auto data_cl = clients.at(data_peer_fd);
|
||||
data_cl->meta_connection_fd = cl->peer_fd;
|
||||
}
|
||||
osd_peer_fds[cl->osd_num] = cl->peer_fd;
|
||||
on_connect_peer(cl->osd_num, cl->peer_fd);
|
||||
}
|
||||
else if (data_peer_fd >= 0)
|
||||
{
|
||||
stop_client(data_peer_fd);
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
auto connect_callback = cl->connect_callback;
|
||||
cl->connect_callback = NULL;
|
||||
connect_callback(cl->osd_num, cl->peer_fd);
|
||||
}
|
||||
osd_peer_fds[cl->osd_num] = cl->peer_fd;
|
||||
on_connect_peer(cl->osd_num, cl->peer_fd);
|
||||
delete op;
|
||||
};
|
||||
outbox_push(op);
|
||||
@@ -598,7 +500,6 @@ void osd_messenger_t::accept_connections(int listen_fd)
|
||||
clients[peer_fd]->peer_fd = peer_fd;
|
||||
clients[peer_fd]->peer_state = PEER_CONNECTED;
|
||||
clients[peer_fd]->in_buf = malloc_or_die(receive_buffer_size);
|
||||
clients_by_addr[addr_to_string(addr)] = peer_fd;
|
||||
// Add FD to epoll
|
||||
tfd->set_fd_handler(peer_fd, false, [this](int peer_fd, int epoll_events)
|
||||
{
|
||||
|
@@ -57,10 +57,6 @@ struct osd_client_t
|
||||
int ping_time_remaining = 0;
|
||||
int idle_time_remaining = 0;
|
||||
osd_num_t osd_num = 0;
|
||||
std::function<void(osd_num_t, int)> connect_callback;
|
||||
|
||||
int data_connection_fd = -1, meta_connection_fd = -1;
|
||||
std::string data_connection_cookie, data_for;
|
||||
|
||||
void *in_buf = NULL;
|
||||
|
||||
@@ -152,7 +148,6 @@ public:
|
||||
osd_num_t osd_num;
|
||||
uint64_t next_subop_id = 1;
|
||||
std::map<int, osd_client_t*> clients;
|
||||
std::map<std::string, int> clients_by_addr;
|
||||
std::map<osd_num_t, osd_wanted_peer_t> wanted_peers;
|
||||
std::map<uint64_t, int> osd_peer_fds;
|
||||
// op statistics
|
||||
@@ -162,7 +157,6 @@ public:
|
||||
void parse_config(const json11::Json & config);
|
||||
void connect_peer(uint64_t osd_num, json11::Json peer_state);
|
||||
void stop_client(int peer_fd, bool force = false, bool force_delete = false);
|
||||
void break_data_client_pair(osd_client_t *cl);
|
||||
void outbox_push(osd_op_t *cur_op);
|
||||
std::function<void(osd_op_t*)> exec_op;
|
||||
std::function<void(osd_num_t)> repeer_pgs;
|
||||
@@ -180,8 +174,7 @@ public:
|
||||
|
||||
protected:
|
||||
void try_connect_peer(uint64_t osd_num);
|
||||
void try_connect_peer_addr(osd_num_t peer_osd, const char *peer_host, int peer_port,
|
||||
osd_client_t *meta_cl, std::function<void(osd_num_t, int)> connect_callback);
|
||||
void try_connect_peer_addr(osd_num_t peer_osd, const char *peer_host, int peer_port);
|
||||
void handle_peer_epoll(int peer_fd, int epoll_events);
|
||||
void handle_connect_epoll(int peer_fd);
|
||||
void on_connect_peer(osd_num_t peer_osd, int peer_fd);
|
||||
|
@@ -4,7 +4,6 @@
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "addr_util.h"
|
||||
#include "messenger.h"
|
||||
|
||||
void osd_messenger_t::cancel_osd_ops(osd_client_t *cl)
|
||||
@@ -59,8 +58,7 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
|
||||
{
|
||||
if (cl->osd_num)
|
||||
{
|
||||
fprintf(stderr, "[OSD %lu] Stopping client %d (OSD %speer %lu)\n",
|
||||
osd_num, peer_fd, cl->meta_connection_fd >= 0 ? " data" : "", cl->osd_num);
|
||||
fprintf(stderr, "[OSD %lu] Stopping client %d (OSD peer %lu)\n", osd_num, peer_fd, cl->osd_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -70,7 +68,7 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
|
||||
// First set state to STOPPED so another stop_client() call doesn't try to free it again
|
||||
cl->refs++;
|
||||
cl->peer_state = PEER_STOPPED;
|
||||
if (cl->osd_num && cl->meta_connection_fd < 0)
|
||||
if (cl->osd_num)
|
||||
{
|
||||
// ...and forget OSD peer
|
||||
osd_peer_fds.erase(cl->osd_num);
|
||||
@@ -102,17 +100,9 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
|
||||
#endif
|
||||
if (cl->osd_num)
|
||||
{
|
||||
if (cl->meta_connection_fd < 0)
|
||||
{
|
||||
// Then repeer PGs because cancel_op() callbacks can try to perform
|
||||
// some actions and we need correct PG states to not do something silly
|
||||
repeer_pgs(cl->osd_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
// FIXME Try to re-establish data connection
|
||||
// Only when the connection is outbound, but here it's always outbound
|
||||
}
|
||||
// Then repeer PGs because cancel_op() callbacks can try to perform
|
||||
// some actions and we need correct PG states to not do something silly
|
||||
repeer_pgs(cl->osd_num);
|
||||
}
|
||||
// Then cancel all operations
|
||||
if (cl->read_op)
|
||||
@@ -138,7 +128,6 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
|
||||
delete cl->rdma_conn;
|
||||
}
|
||||
#endif
|
||||
clients_by_addr.erase(addr_to_string(cl->peer_addr));
|
||||
#endif
|
||||
// Find the item again because it can be invalidated at this point
|
||||
it = clients.find(peer_fd);
|
||||
@@ -146,40 +135,9 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
|
||||
{
|
||||
clients.erase(it);
|
||||
}
|
||||
// Break metadata/data connection pair
|
||||
if (cl->data_connection_fd >= 0)
|
||||
{
|
||||
// No sense to keep data connection when metadata connection is stopped
|
||||
auto dc_it = clients.find(cl->data_connection_fd);
|
||||
cl->data_connection_fd = -1;
|
||||
if (dc_it != clients.end() && dc_it->second->meta_connection_fd == cl->peer_fd)
|
||||
{
|
||||
stop_client(dc_it->second->peer_fd);
|
||||
}
|
||||
}
|
||||
break_data_client_pair(cl);
|
||||
// Refcount and delete
|
||||
cl->refs--;
|
||||
if (cl->refs <= 0 || force_delete)
|
||||
{
|
||||
delete cl;
|
||||
}
|
||||
}
|
||||
|
||||
void osd_messenger_t::break_data_client_pair(osd_client_t *cl)
|
||||
{
|
||||
if (cl->meta_connection_fd >= 0)
|
||||
{
|
||||
auto dc_it = clients.find(cl->meta_connection_fd);
|
||||
if (dc_it != clients.end() && dc_it->second->data_connection_fd == cl->peer_fd)
|
||||
dc_it->second->data_connection_fd = -1;
|
||||
cl->meta_connection_fd = -1;
|
||||
}
|
||||
if (cl->data_connection_fd >= 0)
|
||||
{
|
||||
auto dc_it = clients.find(cl->data_connection_fd);
|
||||
if (dc_it != clients.end() && dc_it->second->meta_connection_fd == cl->peer_fd)
|
||||
dc_it->second->meta_connection_fd = -1;
|
||||
cl->data_connection_fd = -1;
|
||||
}
|
||||
}
|
||||
|
@@ -178,37 +178,6 @@ void osd_t::exec_show_config(osd_op_t *cur_op)
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#endif
|
||||
if (req_json["data_for"].is_string())
|
||||
{
|
||||
auto cli = msgr.clients.at(cur_op->peer_fd);
|
||||
auto md_it = msgr.clients_by_addr.find(req_json["data_for"].string_value());
|
||||
if (md_it != msgr.clients_by_addr.end())
|
||||
{
|
||||
int md_peer_fd = md_it->second;
|
||||
auto md_it = msgr.clients.find(md_peer_fd);
|
||||
if (md_it != msgr.clients.end() && md_it->second->data_connection_cookie != "" &&
|
||||
req_json["data_cookie"].string_value() == md_it->second->data_connection_cookie)
|
||||
{
|
||||
// Break previous metadata/data connections for both FDs, if present
|
||||
msgr.break_data_client_pair(cli);
|
||||
msgr.break_data_client_pair(md_it->second);
|
||||
// And setup the new pair
|
||||
cli->meta_connection_fd = md_it->second->peer_fd;
|
||||
md_it->second->data_connection_fd = cli->peer_fd;
|
||||
wire_config["data_for"] = req_json["data_for"];
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (req_json["data_cookie"].is_string())
|
||||
{
|
||||
auto cli = msgr.clients.at(cur_op->peer_fd);
|
||||
cli->data_connection_cookie = req_json["data_cookie"].string_value();
|
||||
}
|
||||
#ifdef WITH_RDMA
|
||||
}
|
||||
#endif
|
||||
if (cur_op->buf)
|
||||
free(cur_op->buf);
|
||||
|
@@ -24,7 +24,9 @@
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/socket.h>
|
||||
#include <poll.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <arpa/inet.h>
|
||||
@@ -43,19 +45,32 @@
|
||||
|
||||
int bind_stub(std::string bind_address, int bind_port);
|
||||
|
||||
void run_stub(int peer_fd);
|
||||
void run_stub(int peer_fd, int peer_data_fd);
|
||||
|
||||
int main(int narg, char *args[])
|
||||
{
|
||||
int listen_fd = bind_stub("0.0.0.0", 11203);
|
||||
int listen_data_fd = bind_stub("0.0.0.0", 11204);
|
||||
/* int mss = 8192;
|
||||
if (setsockopt(listen_data_fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) < 0)
|
||||
{
|
||||
throw std::runtime_error(std::string("setsockopt TCP_MAXSEG: ") + strerror(errno));
|
||||
}
|
||||
int rcvlowat = 4096;
|
||||
if (setsockopt(listen_data_fd, SOL_SOCKET, SO_RCVLOWAT, &rcvlowat, sizeof(rcvlowat)) < 0)
|
||||
{
|
||||
throw std::runtime_error(std::string("setsockopt SO_RCVLOWAT: ") + strerror(errno));
|
||||
}*/
|
||||
// Accept new connections
|
||||
sockaddr addr;
|
||||
socklen_t peer_addr_size = sizeof(addr);
|
||||
int peer_fd;
|
||||
socklen_t peer_addr_size;
|
||||
int peer_fd, peer_data_fd;
|
||||
const int one = 1;
|
||||
while (1)
|
||||
{
|
||||
printf("stub_osd: waiting for 1 client\n");
|
||||
peer_fd = accept(listen_fd, &addr, &peer_addr_size);
|
||||
peer_addr_size = sizeof(addr);
|
||||
peer_fd = accept(listen_fd, (sockaddr*)&addr, &peer_addr_size);
|
||||
if (peer_fd == -1)
|
||||
{
|
||||
if (errno == EAGAIN)
|
||||
@@ -63,15 +78,27 @@ int main(int narg, char *args[])
|
||||
else
|
||||
throw std::runtime_error(std::string("accept: ") + strerror(errno));
|
||||
}
|
||||
printf("stub_osd: new client %d: connection from %s\n", peer_fd,
|
||||
addr_to_string(addr).c_str());
|
||||
int one = 1;
|
||||
setsockopt(peer_fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one));
|
||||
run_stub(peer_fd);
|
||||
close(peer_fd);
|
||||
printf("stub_osd: client %d disconnected\n", peer_fd);
|
||||
// Try to accept next connection
|
||||
printf("stub_osd: new client %d: connection from %s\n", peer_fd,
|
||||
addr_to_string(*((sockaddr*)&addr)).c_str());
|
||||
printf("stub_osd: waiting for 1 data connection\n");
|
||||
peer_addr_size = sizeof(addr);
|
||||
peer_data_fd = accept(listen_data_fd, (sockaddr*)&addr, &peer_addr_size);
|
||||
if (peer_data_fd == -1)
|
||||
{
|
||||
if (errno == EAGAIN)
|
||||
continue;
|
||||
else
|
||||
throw std::runtime_error(std::string("accept: ") + strerror(errno));
|
||||
}
|
||||
setsockopt(peer_data_fd, SOL_TCP, TCP_NODELAY, &one, sizeof(one));
|
||||
printf("stub_osd: new client %d: data connection from %s\n", peer_data_fd,
|
||||
addr_to_string(*((sockaddr*)&addr)).c_str());
|
||||
run_stub(peer_fd, peer_data_fd);
|
||||
close(peer_data_fd);
|
||||
close(peer_fd);
|
||||
printf("stub_osd: client %d / data %d disconnected\n", peer_fd, peer_data_fd);
|
||||
// Try to accept next connection
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -80,13 +107,13 @@ int bind_stub(std::string bind_address, int bind_port)
|
||||
{
|
||||
int listen_backlog = 128;
|
||||
|
||||
sockaddr addr;
|
||||
if (!string_to_addr(bind_address, 0, bind_port, &addr))
|
||||
sockaddr_storage addr = { 0 };
|
||||
if (!string_to_addr(bind_address, 0, bind_port, (sockaddr*)&addr))
|
||||
{
|
||||
throw std::runtime_error("bind address "+bind_address+" is not valid");
|
||||
}
|
||||
|
||||
int listen_fd = socket(addr.sa_family, SOCK_STREAM, 0);
|
||||
int listen_fd = socket(addr.ss_family, SOCK_STREAM, 0);
|
||||
if (listen_fd < 0)
|
||||
{
|
||||
throw std::runtime_error(std::string("socket: ") + strerror(errno));
|
||||
@@ -94,7 +121,7 @@ int bind_stub(std::string bind_address, int bind_port)
|
||||
int enable = 1;
|
||||
setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable));
|
||||
|
||||
if (bind(listen_fd, &addr, sizeof(addr)) < 0)
|
||||
if (bind(listen_fd, (sockaddr*)&addr, addr.ss_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6)) < 0)
|
||||
{
|
||||
close(listen_fd);
|
||||
throw std::runtime_error(std::string("bind: ") + strerror(errno));
|
||||
@@ -109,11 +136,16 @@ int bind_stub(std::string bind_address, int bind_port)
|
||||
return listen_fd;
|
||||
}
|
||||
|
||||
void run_stub(int peer_fd)
|
||||
void run_stub(int peer_fd, int peer_data_fd)
|
||||
{
|
||||
osd_any_op_t op;
|
||||
osd_any_reply_t reply = { 0 };
|
||||
void *buf = NULL;
|
||||
unsigned bufsize = 4*1024*1024;
|
||||
void *buf = mmap(NULL, bufsize, PROT_READ, MAP_SHARED, peer_data_fd, 0);
|
||||
if (buf == MAP_FAILED)
|
||||
{
|
||||
throw std::runtime_error(std::string("mmap: ") + strerror(errno));
|
||||
}
|
||||
while (1)
|
||||
{
|
||||
int r = read_blocking(peer_fd, op.buf, OSD_PACKET_SIZE);
|
||||
@@ -132,19 +164,36 @@ void run_stub(int peer_fd)
|
||||
if (op.hdr.opcode == OSD_OP_SEC_READ)
|
||||
{
|
||||
reply.hdr.retval = op.sec_rw.len;
|
||||
buf = malloc(op.sec_rw.len);
|
||||
void *buf = malloc(op.sec_rw.len);
|
||||
r = write_blocking(peer_fd, reply.buf, OSD_PACKET_SIZE);
|
||||
if (r == OSD_PACKET_SIZE)
|
||||
r = write_blocking(peer_fd, buf, op.sec_rw.len);
|
||||
r = write_blocking(peer_data_fd, buf, op.sec_rw.len);
|
||||
free(buf);
|
||||
if (r < op.sec_rw.len)
|
||||
break;
|
||||
}
|
||||
else if (op.hdr.opcode == OSD_OP_SEC_WRITE || op.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
|
||||
{
|
||||
buf = malloc(op.sec_rw.len);
|
||||
r = read_blocking(peer_fd, buf, op.sec_rw.len);
|
||||
free(buf);
|
||||
struct pollfd pfd = { .fd = peer_data_fd, .events = POLLIN };
|
||||
poll(&pfd, 1, 10000);
|
||||
struct tcp_zerocopy_receive zc = { .address = (uint64_t)buf, .length = op.sec_rw.len };
|
||||
socklen_t zc_len = sizeof(zc);
|
||||
r = getsockopt(peer_data_fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len);
|
||||
r = r == -1 ? 0 : zc.length;
|
||||
if (r > 0)
|
||||
{
|
||||
uint64_t hash = 0;
|
||||
for (int k = 0; k < r/8; k++)
|
||||
hash ^= ((uint64_t*)buf)[k];
|
||||
printf("ZCTR: op=%lx r=%d len=%d skip=%d hash=%lx\n", op.hdr.id, r, zc.length, zc.recv_skip_hint, hash);
|
||||
}
|
||||
if (r < op.sec_rw.len)
|
||||
{
|
||||
int rest = op.sec_rw.len - r;
|
||||
void *buf = malloc(rest);
|
||||
r += read_blocking(peer_data_fd, buf, rest);
|
||||
free(buf);
|
||||
}
|
||||
reply.hdr.retval = op.sec_rw.len;
|
||||
if (r == op.sec_rw.len)
|
||||
r = write_blocking(peer_fd, reply.buf, OSD_PACKET_SIZE);
|
||||
@@ -166,5 +215,4 @@ void run_stub(int peer_fd)
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(buf);
|
||||
}
|
||||
|
Reference in New Issue
Block a user