From 88671cf745e631ff303cafd3d5c6da44b01ed920 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sat, 13 Mar 2021 17:19:20 +0300 Subject: [PATCH] Fix a bug causing all flushers to wait for an fsync without actually trying to do it This happened because flusher_count became dynamic and fsync_batch() was comparing the number of flushers currently ready to do an fsync with the maximum number of flushers. Also the number wasn't rechecked on every loop which was also incorrect. Now the interrupted_rebalance test passes even without IMMEDIATE_COMMIT=1. --- src/blockstore_flush.cpp | 43 +++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/src/blockstore_flush.cpp b/src/blockstore_flush.cpp index d4ef69fd..0e2169c3 100644 --- a/src/blockstore_flush.cpp +++ b/src/blockstore_flush.cpp @@ -823,31 +823,34 @@ bool journal_flusher_co::fsync_batch(bool fsync_meta, int wait_base) sync_found: cur_sync->ready_count++; flusher->syncing_flushers++; - if (flusher->syncing_flushers >= flusher->flusher_count || !flusher->flush_queue.size()) + resume_1: + if (!cur_sync->state) { - // Sync batch is ready. Do it. - await_sqe(0); - data->iov = { 0 }; - data->callback = simple_callback_w; - my_uring_prep_fsync(sqe, fsync_meta ? bs->meta_fd : bs->data_fd, IORING_FSYNC_DATASYNC); - cur_sync->state = 1; - wait_count++; - resume_1: - if (wait_count > 0) + if (flusher->syncing_flushers >= flusher->cur_flusher_count || !flusher->flush_queue.size()) { + // Sync batch is ready. Do it. + await_sqe(0); + data->iov = { 0 }; + data->callback = simple_callback_w; + my_uring_prep_fsync(sqe, fsync_meta ? bs->meta_fd : bs->data_fd, IORING_FSYNC_DATASYNC); + cur_sync->state = 1; + wait_count++; + resume_2: + if (wait_count > 0) + { + wait_state = 2; + return false; + } + // Sync completed. All previous coroutines waiting for it must be resumed + cur_sync->state = 2; + bs->ringloop->wakeup(); + } + else + { + // Wait until someone else sends and completes a sync. wait_state = 1; return false; } - // Sync completed. All previous coroutines waiting for it must be resumed - cur_sync->state = 2; - bs->ringloop->wakeup(); - } - // Wait until someone else sends and completes a sync. - resume_2: - if (!cur_sync->state) - { - wait_state = 2; - return false; } flusher->syncing_flushers--; cur_sync->ready_count--;