Fix "can't get SQE, will fall out of sync with EPOLLET" when overflowing the ring
OSDs shouldn't crash or hang with long iodepths anymore
parent
2ccb75974b
commit
23ea409081
|
@ -357,9 +357,6 @@ and calculate disk offsets almost by hand. This will be fixed in near future.
|
||||||
|
|
||||||
## Known Problems
|
## Known Problems
|
||||||
|
|
||||||
- OSDs may currently crash with "can't get SQE, will fall out of sync with EPOLLET"
|
|
||||||
if you try to load them with very long iodepths because io_uring queue (ring) is limited
|
|
||||||
and OSDs don't check if it fills up.
|
|
||||||
- Object deletion requests may currently lead to 'incomplete' objects if your OSDs crash during
|
- Object deletion requests may currently lead to 'incomplete' objects if your OSDs crash during
|
||||||
deletion because proper handling of object cleanup in a cluster should be "three-phase"
|
deletion because proper handling of object cleanup in a cluster should be "three-phase"
|
||||||
and it's currently not implemented. Inode removal tool currently can't handle unclean
|
and it's currently not implemented. Inode removal tool currently can't handle unclean
|
||||||
|
|
|
@ -122,9 +122,6 @@ bool osd_messenger_t::try_send(osd_client_t *cl)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
cl->write_msg.msg_iov = cl->send_list.data();
|
|
||||||
cl->write_msg.msg_iovlen = cl->send_list.size();
|
|
||||||
cl->refs++;
|
|
||||||
if (ringloop && !use_sync_send_recv)
|
if (ringloop && !use_sync_send_recv)
|
||||||
{
|
{
|
||||||
io_uring_sqe* sqe = ringloop->get_sqe();
|
io_uring_sqe* sqe = ringloop->get_sqe();
|
||||||
|
@ -132,12 +129,18 @@ bool osd_messenger_t::try_send(osd_client_t *cl)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
cl->write_msg.msg_iov = cl->send_list.data();
|
||||||
|
cl->write_msg.msg_iovlen = cl->send_list.size();
|
||||||
|
cl->refs++;
|
||||||
ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
||||||
data->callback = [this, cl](ring_data_t *data) { handle_send(data->res, cl); };
|
data->callback = [this, cl](ring_data_t *data) { handle_send(data->res, cl); };
|
||||||
my_uring_prep_sendmsg(sqe, peer_fd, &cl->write_msg, 0);
|
my_uring_prep_sendmsg(sqe, peer_fd, &cl->write_msg, 0);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
cl->write_msg.msg_iov = cl->send_list.data();
|
||||||
|
cl->write_msg.msg_iovlen = cl->send_list.size();
|
||||||
|
cl->refs++;
|
||||||
int result = sendmsg(peer_fd, &cl->write_msg, MSG_NOSIGNAL);
|
int result = sendmsg(peer_fd, &cl->write_msg, MSG_NOSIGNAL);
|
||||||
if (result < 0)
|
if (result < 0)
|
||||||
{
|
{
|
||||||
|
|
12
ringloop.cpp
12
ringloop.cpp
|
@ -66,9 +66,17 @@ void ring_loop_t::loop()
|
||||||
struct ring_data_t *d = (struct ring_data_t*)cqe->user_data;
|
struct ring_data_t *d = (struct ring_data_t*)cqe->user_data;
|
||||||
if (d->callback)
|
if (d->callback)
|
||||||
{
|
{
|
||||||
d->res = cqe->res;
|
// First free ring_data item, then call the callback
|
||||||
d->callback(d);
|
// so it has at least 1 free slot for the next event
|
||||||
|
// which is required for EPOLLET to function properly
|
||||||
|
struct ring_data_t dl;
|
||||||
|
dl.iov = d->iov;
|
||||||
|
dl.res = cqe->res;
|
||||||
|
dl.callback.swap(d->callback);
|
||||||
|
free_ring_data[free_ring_data_ptr++] = d - ring_datas;
|
||||||
|
dl.callback(&dl);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
free_ring_data[free_ring_data_ptr++] = d - ring_datas;
|
free_ring_data[free_ring_data_ptr++] = d - ring_datas;
|
||||||
io_uring_cqe_seen(&ring, cqe);
|
io_uring_cqe_seen(&ring, cqe);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue