From de6919b02bb7cb70ccd3842910b4474798b2f76c Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sat, 12 Sep 2020 19:14:51 +0300 Subject: [PATCH] Add option to disable multiple overwrites of the same journal sector This makes sense for some SSDs like Intel D3-4510 because they don't like overwrites of the same sector: $ fio -direct=1 -rw=write -bs=4k -size=4k -loops=100000 -iodepth=1 write: IOPS=3142, BW=12.3MiB/s (12.9MB/s)(97.9MiB/7977msec) $ fio -direct=1 -rw=write -bs=4k -size=128k -loops=100000 -iodepth=1 write: IOPS=20.8k, BW=81.4MiB/s (85.3MB/s)(543MiB/6675msec) --- blockstore_journal.cpp | 9 +++++++-- blockstore_journal.h | 2 ++ blockstore_open.cpp | 2 ++ lp/mon.js | 1 + 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/blockstore_journal.cpp b/blockstore_journal.cpp index 41d1c423..525ea7ba 100644 --- a/blockstore_journal.cpp +++ b/blockstore_journal.cpp @@ -17,7 +17,9 @@ int blockstore_journal_check_t::check_available(blockstore_op_t *op, int entries int required = entries_required; while (1) { - int fits = (bs->journal.block_size - next_in_pos) / size; + int fits = bs->journal.no_same_sector_overwrites && bs->journal.sector_info[next_sector].written + ? 0 + : (bs->journal.block_size - next_in_pos) / size; if (fits > 0) { if (first_sector == -1) @@ -110,10 +112,12 @@ int blockstore_journal_check_t::check_available(blockstore_op_t *op, int entries journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type, uint32_t size) { - if (journal.block_size - journal.in_sector_pos < size) + if (journal.block_size - journal.in_sector_pos < size || + journal.no_same_sector_overwrites && journal.sector_info[journal.cur_sector].written) { assert(!journal.sector_info[journal.cur_sector].dirty); // Move to the next journal sector + journal.sector_info[journal.cur_sector].written = false; if (journal.sector_info[journal.cur_sector].usage_count > 0) { // Also select next sector buffer in memory @@ -148,6 +152,7 @@ journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type, void prepare_journal_sector_write(journal_t & journal, int cur_sector, io_uring_sqe *sqe, std::function cb) { journal.sector_info[cur_sector].dirty = false; + journal.sector_info[cur_sector].written = true; journal.sector_info[cur_sector].usage_count++; ring_data_t *data = ((ring_data_t*)sqe->user_data); data->iov = (struct iovec){ diff --git a/blockstore_journal.h b/blockstore_journal.h index b7ddf430..a0d2e5eb 100644 --- a/blockstore_journal.h +++ b/blockstore_journal.h @@ -129,6 +129,7 @@ struct journal_sector_info_t { uint64_t offset; uint64_t usage_count; + bool written; bool dirty; }; @@ -153,6 +154,7 @@ struct journal_t void *sector_buf = NULL; journal_sector_info_t *sector_info = NULL; uint64_t sector_count; + bool no_same_sector_overwrites = false; int cur_sector = 0; int in_sector_pos = 0; diff --git a/blockstore_open.cpp b/blockstore_open.cpp index 487f21d6..76c2d815 100644 --- a/blockstore_open.cpp +++ b/blockstore_open.cpp @@ -59,6 +59,8 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config) journal_device = config["journal_device"]; journal.offset = strtoull(config["journal_offset"].c_str(), NULL, 10); journal.sector_count = strtoull(config["journal_sector_buffer_count"].c_str(), NULL, 10); + journal.no_same_sector_overwrites = config["journal_no_same_sector_overwrites"] == "true" || + config["journal_no_same_sector_overwrites"] == "1" || config["journal_no_same_sector_overwrites"] == "yes"; journal.inmemory = config["inmemory_journal"] != "false"; disk_alignment = strtoull(config["disk_alignment"].c_str(), NULL, 10); journal_block_size = strtoull(config["journal_block_size"].c_str(), NULL, 10); diff --git a/lp/mon.js b/lp/mon.js index 52804c72..2b2a708c 100644 --- a/lp/mon.js +++ b/lp/mon.js @@ -81,6 +81,7 @@ class Mon inmemory_metadata, inmemory_journal, journal_sector_buffer_count, + journal_no_same_sector_overwrites, }, */ global: {}, /* node_placement: {