From fd05e13bc4e7c400e020e44fe774e602f1b35677 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sat, 29 Feb 2020 01:46:12 +0300 Subject: [PATCH] Use EPOLLET Its latency is slightly better, too --- osd.cpp | 63 ++++++++++++++++++++++++------------------------- osd.h | 5 ++-- osd_peering.cpp | 4 ++-- osd_receive.cpp | 17 ++++++------- 4 files changed, 44 insertions(+), 45 deletions(-) diff --git a/osd.cpp b/osd.cpp index f2169ac67..d8a0b5f43 100644 --- a/osd.cpp +++ b/osd.cpp @@ -113,8 +113,7 @@ osd_t::osd_t(blockstore_config_t & config, blockstore_t *bs, ring_loop_t *ringlo epoll_event ev; ev.data.fd = listen_fd; - // FIXME: Use EPOLLET - ev.events = EPOLLIN; + ev.events = EPOLLIN | EPOLLET; if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, listen_fd, &ev) < 0) { close(listen_fd); @@ -168,40 +167,23 @@ bool osd_t::shutdown() void osd_t::loop() { - if (wait_state == 0) - { - io_uring_sqe *sqe = ringloop->get_sqe(); - if (!sqe) - { - wait_state = 0; - return; - } - ring_data_t *data = ((ring_data_t*)sqe->user_data); - my_uring_prep_poll_add(sqe, epoll_fd, POLLIN); - data->callback = [&](ring_data_t *data) - { - if (data->res < 0) - { - throw std::runtime_error(std::string("epoll failed: ") + strerror(-data->res)); - } - handle_epoll_events(); - }; - wait_state = 1; - } - else if (wait_state == 2) + if (!wait_state) { handle_epoll_events(); + wait_state = 1; } handle_peers(); - send_replies(); read_requests(); + send_replies(); ringloop->submit(); } -int osd_t::handle_epoll_events() +void osd_t::handle_epoll_events() { + int nfds; epoll_event events[MAX_EPOLL_EVENTS]; - int nfds = epoll_wait(epoll_fd, events, MAX_EPOLL_EVENTS, 0); +restart: + nfds = epoll_wait(epoll_fd, events, MAX_EPOLL_EVENTS, 0); for (int i = 0; i < nfds; i++) { if (events[i].data.fd == listen_fd) @@ -226,7 +208,7 @@ int osd_t::handle_epoll_events() // Add FD to epoll epoll_event ev; ev.data.fd = peer_fd; - ev.events = EPOLLIN | EPOLLRDHUP; + ev.events = EPOLLIN | EPOLLET | EPOLLRDHUP; if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, peer_fd, &ev) < 0) { throw std::runtime_error(std::string("epoll_ctl: ") + strerror(errno)); @@ -253,11 +235,11 @@ int osd_t::handle_epoll_events() printf("osd: client %d disconnected\n", cl.peer_fd); stop_client(cl.peer_fd); } - else if (!cl.read_ready) + else { // Mark client as ready (i.e. some data is available) - cl.read_ready = true; - if (!cl.reading) + cl.read_ready++; + if (cl.read_ready == 1) { read_ready_clients.push_back(cl.peer_fd); ringloop->wakeup(); @@ -265,8 +247,25 @@ int osd_t::handle_epoll_events() } } } - wait_state = nfds == MAX_EPOLL_EVENTS ? 2 : 0; - return nfds; + if (nfds == MAX_EPOLL_EVENTS) + { + goto restart; + } + io_uring_sqe *sqe = ringloop->get_sqe(); + if (!sqe) + { + throw std::runtime_error("can't get SQE, will fall out of sync with EPOLLET"); + } + ring_data_t *data = ((ring_data_t*)sqe->user_data); + my_uring_prep_poll_add(sqe, epoll_fd, POLLIN); + data->callback = [this](ring_data_t *data) + { + if (data->res < 0) + { + throw std::runtime_error(std::string("epoll failed: ") + strerror(-data->res)); + } + handle_epoll_events(); + }; } void osd_t::cancel_osd_ops(osd_client_t & cl) diff --git a/osd.h b/osd.h index bc81fc5f1..858b1d1e0 100644 --- a/osd.h +++ b/osd.h @@ -131,8 +131,7 @@ struct osd_client_t osd_num_t osd_num = 0; // Read state - bool read_ready = false; - bool reading = false; + int read_ready = 0; osd_op_t *read_op = NULL; int read_reply_id = 0; iovec read_iov; @@ -221,7 +220,7 @@ class osd_t // event loop, socket read/write void loop(); - int handle_epoll_events(); + void handle_epoll_events(); void read_requests(); void handle_read(ring_data_t *data, int peer_fd); void handle_op_hdr(osd_client_t *cl); diff --git a/osd_peering.cpp b/osd_peering.cpp index 537d02bc4..57b5aee7f 100644 --- a/osd_peering.cpp +++ b/osd_peering.cpp @@ -83,7 +83,7 @@ void osd_t::connect_peer(osd_num_t osd_num, const char *peer_host, int peer_port // Add FD to epoll (EPOLLOUT for tracking connect() result) epoll_event ev; ev.data.fd = peer_fd; - ev.events = EPOLLOUT | EPOLLIN | EPOLLRDHUP; + ev.events = EPOLLOUT | EPOLLIN | EPOLLRDHUP | EPOLLET; if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, peer_fd, &ev) < 0) { throw std::runtime_error(std::string("epoll_ctl: ") + strerror(errno)); @@ -114,7 +114,7 @@ void osd_t::handle_connect_result(int peer_fd) cl.peer_state = PEER_CONNECTED; epoll_event ev; ev.data.fd = peer_fd; - ev.events = EPOLLIN | EPOLLRDHUP; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, peer_fd, &ev) < 0) { throw std::runtime_error(std::string("epoll_ctl: ") + strerror(errno)); diff --git a/osd_receive.cpp b/osd_receive.cpp index 4aac634b8..9d5157a9a 100644 --- a/osd_receive.cpp +++ b/osd_receive.cpp @@ -33,8 +33,6 @@ void osd_t::read_requests() cl.read_msg.msg_iovlen = 1; data->callback = [this, peer_fd](ring_data_t *data) { handle_read(data, peer_fd); }; my_uring_prep_recvmsg(sqe, peer_fd, &cl.read_msg, 0); - cl.reading = true; - cl.read_ready = false; } read_ready_clients.clear(); } @@ -45,18 +43,21 @@ void osd_t::handle_read(ring_data_t *data, int peer_fd) if (cl_it != clients.end()) { auto & cl = cl_it->second; - if (data->res < 0 && data->res != -EAGAIN) + if (data->res == -EAGAIN) + { + cl.read_ready--; + if (cl.read_ready > 0) + read_ready_clients.push_back(peer_fd); + return; + } + else if (data->res < 0) { // this is a client socket, so don't panic. just disconnect it printf("Client %d socket read error: %d (%s). Disconnecting client\n", peer_fd, -data->res, strerror(-data->res)); stop_client(peer_fd); return; } - cl.reading = false; - if (cl.read_ready) - { - read_ready_clients.push_back(peer_fd); - } + read_ready_clients.push_back(peer_fd); if (data->res > 0) { cl.read_remaining -= data->res;