forked from vitalif/vitastor
Fix "can't get SQE, will fall out of sync with EPOLLET" when overflowing the ring
OSDs shouldn't crash or hang with long iodepths anymore
parent
2ccb75974b
commit
23ea409081
|
@ -357,9 +357,6 @@ and calculate disk offsets almost by hand. This will be fixed in near future.
|
|||
|
||||
## Known Problems
|
||||
|
||||
- OSDs may currently crash with "can't get SQE, will fall out of sync with EPOLLET"
|
||||
if you try to load them with very long iodepths because io_uring queue (ring) is limited
|
||||
and OSDs don't check if it fills up.
|
||||
- Object deletion requests may currently lead to 'incomplete' objects if your OSDs crash during
|
||||
deletion because proper handling of object cleanup in a cluster should be "three-phase"
|
||||
and it's currently not implemented. Inode removal tool currently can't handle unclean
|
||||
|
|
|
@ -122,9 +122,6 @@ bool osd_messenger_t::try_send(osd_client_t *cl)
|
|||
{
|
||||
return true;
|
||||
}
|
||||
cl->write_msg.msg_iov = cl->send_list.data();
|
||||
cl->write_msg.msg_iovlen = cl->send_list.size();
|
||||
cl->refs++;
|
||||
if (ringloop && !use_sync_send_recv)
|
||||
{
|
||||
io_uring_sqe* sqe = ringloop->get_sqe();
|
||||
|
@ -132,12 +129,18 @@ bool osd_messenger_t::try_send(osd_client_t *cl)
|
|||
{
|
||||
return false;
|
||||
}
|
||||
cl->write_msg.msg_iov = cl->send_list.data();
|
||||
cl->write_msg.msg_iovlen = cl->send_list.size();
|
||||
cl->refs++;
|
||||
ring_data_t* data = ((ring_data_t*)sqe->user_data);
|
||||
data->callback = [this, cl](ring_data_t *data) { handle_send(data->res, cl); };
|
||||
my_uring_prep_sendmsg(sqe, peer_fd, &cl->write_msg, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
cl->write_msg.msg_iov = cl->send_list.data();
|
||||
cl->write_msg.msg_iovlen = cl->send_list.size();
|
||||
cl->refs++;
|
||||
int result = sendmsg(peer_fd, &cl->write_msg, MSG_NOSIGNAL);
|
||||
if (result < 0)
|
||||
{
|
||||
|
|
14
ringloop.cpp
14
ringloop.cpp
|
@ -66,10 +66,18 @@ void ring_loop_t::loop()
|
|||
struct ring_data_t *d = (struct ring_data_t*)cqe->user_data;
|
||||
if (d->callback)
|
||||
{
|
||||
d->res = cqe->res;
|
||||
d->callback(d);
|
||||
// First free ring_data item, then call the callback
|
||||
// so it has at least 1 free slot for the next event
|
||||
// which is required for EPOLLET to function properly
|
||||
struct ring_data_t dl;
|
||||
dl.iov = d->iov;
|
||||
dl.res = cqe->res;
|
||||
dl.callback.swap(d->callback);
|
||||
free_ring_data[free_ring_data_ptr++] = d - ring_datas;
|
||||
dl.callback(&dl);
|
||||
}
|
||||
free_ring_data[free_ring_data_ptr++] = d - ring_datas;
|
||||
else
|
||||
free_ring_data[free_ring_data_ptr++] = d - ring_datas;
|
||||
io_uring_cqe_seen(&ring, cqe);
|
||||
}
|
||||
while (get_sqe_queue.size() > 0)
|
||||
|
|
Loading…
Reference in New Issue