forked from vitalif/vitastor
Make fsync flags separate for data, metadata and journal
parent
d5386aa958
commit
d0ab2a20b2
|
@ -635,15 +635,17 @@ bool journal_flusher_co::fsync_batch(bool fsync_meta, int wait_base)
|
||||||
goto resume_1;
|
goto resume_1;
|
||||||
else if (wait_state == wait_base+2)
|
else if (wait_state == wait_base+2)
|
||||||
goto resume_2;
|
goto resume_2;
|
||||||
if (!bs->disable_fsync)
|
if (!(fsync_meta ? bs->disable_meta_fsync : bs->disable_journal_fsync))
|
||||||
{
|
{
|
||||||
cur_sync = flusher->syncs.end();
|
cur_sync = flusher->syncs.end();
|
||||||
while (cur_sync != flusher->syncs.begin())
|
while (cur_sync != flusher->syncs.begin())
|
||||||
{
|
{
|
||||||
cur_sync--;
|
cur_sync--;
|
||||||
if (cur_sync->fsync_meta == fsync_meta && cur_sync->state == 0)
|
if (cur_sync->fsync_meta == fsync_meta && cur_sync->state == 0)
|
||||||
|
{
|
||||||
goto sync_found;
|
goto sync_found;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
cur_sync = flusher->syncs.emplace(flusher->syncs.end(), (flusher_sync_t){
|
cur_sync = flusher->syncs.emplace(flusher->syncs.end(), (flusher_sync_t){
|
||||||
.fsync_meta = fsync_meta,
|
.fsync_meta = fsync_meta,
|
||||||
.ready_count = 0,
|
.ready_count = 0,
|
||||||
|
|
|
@ -194,9 +194,8 @@ class blockstore_impl_t
|
||||||
// Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment
|
// Sparse write tracking granularity. 4 KB is a good choice. Must be a multiple of disk_alignment
|
||||||
uint64_t bitmap_granularity = 4096;
|
uint64_t bitmap_granularity = 4096;
|
||||||
bool readonly = false;
|
bool readonly = false;
|
||||||
// FIXME: separate flags for data, metadata and journal
|
|
||||||
// It is safe to disable fsync() if drive write cache is writethrough
|
// It is safe to disable fsync() if drive write cache is writethrough
|
||||||
bool disable_fsync = false;
|
bool disable_data_fsync = false, disable_meta_fsync = false, disable_journal_fsync = false;
|
||||||
bool inmemory_meta = false;
|
bool inmemory_meta = false;
|
||||||
int flusher_count;
|
int flusher_count;
|
||||||
/******* END OF OPTIONS *******/
|
/******* END OF OPTIONS *******/
|
||||||
|
|
|
@ -251,7 +251,7 @@ resume_1:
|
||||||
data->callback = simple_callback;
|
data->callback = simple_callback;
|
||||||
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
|
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset);
|
||||||
wait_count++;
|
wait_count++;
|
||||||
if (!bs->disable_fsync)
|
if (!bs->disable_journal_fsync)
|
||||||
{
|
{
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC);
|
||||||
|
@ -331,7 +331,7 @@ resume_1:
|
||||||
data->callback = simple_callback;
|
data->callback = simple_callback;
|
||||||
wait_count++;
|
wait_count++;
|
||||||
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + init_write_sector);
|
my_uring_prep_writev(sqe, bs->journal.fd, &data->iov, 1, bs->journal.offset + init_write_sector);
|
||||||
if (!bs->disable_fsync)
|
if (!bs->disable_journal_fsync)
|
||||||
{
|
{
|
||||||
GET_SQE();
|
GET_SQE();
|
||||||
data->iov = { 0 };
|
data->iov = { 0 };
|
||||||
|
|
|
@ -22,9 +22,17 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
{
|
{
|
||||||
readonly = true;
|
readonly = true;
|
||||||
}
|
}
|
||||||
if (config["disable_fsync"] == "true" || config["disable_fsync"] == "1" || config["disable_fsync"] == "yes")
|
if (config["disable_data_fsync"] == "true" || config["disable_data_fsync"] == "1" || config["disable_data_fsync"] == "yes")
|
||||||
{
|
{
|
||||||
disable_fsync = true;
|
disable_data_fsync = true;
|
||||||
|
}
|
||||||
|
if (config["disable_meta_fsync"] == "true" || config["disable_meta_fsync"] == "1" || config["disable_meta_fsync"] == "yes")
|
||||||
|
{
|
||||||
|
disable_meta_fsync = true;
|
||||||
|
}
|
||||||
|
if (config["disable_journal_fsync"] == "true" || config["disable_journal_fsync"] == "1" || config["disable_journal_fsync"] == "yes")
|
||||||
|
{
|
||||||
|
disable_journal_fsync = true;
|
||||||
}
|
}
|
||||||
metadata_buf_size = strtoull(config["meta_buf_size"].c_str(), NULL, 10);
|
metadata_buf_size = strtoull(config["meta_buf_size"].c_str(), NULL, 10);
|
||||||
cfg_journal_size = strtoull(config["journal_size"].c_str(), NULL, 10);
|
cfg_journal_size = strtoull(config["journal_size"].c_str(), NULL, 10);
|
||||||
|
@ -265,6 +273,7 @@ void blockstore_impl_t::open_meta()
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
meta_fd = data_fd;
|
meta_fd = data_fd;
|
||||||
|
disable_meta_fsync = disable_data_fsync;
|
||||||
meta_size = 0;
|
meta_size = 0;
|
||||||
if (meta_offset >= data_size)
|
if (meta_offset >= data_size)
|
||||||
{
|
{
|
||||||
|
@ -287,6 +296,7 @@ void blockstore_impl_t::open_journal()
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
journal.fd = meta_fd;
|
journal.fd = meta_fd;
|
||||||
|
disable_journal_fsync = disable_meta_fsync;
|
||||||
journal.device_size = 0;
|
journal.device_size = 0;
|
||||||
if (journal.offset >= data_size)
|
if (journal.offset >= data_size)
|
||||||
{
|
{
|
||||||
|
|
|
@ -39,7 +39,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
if (PRIV(op)->sync_state == SYNC_HAS_SMALL)
|
if (PRIV(op)->sync_state == SYNC_HAS_SMALL)
|
||||||
{
|
{
|
||||||
// No big writes, just fsync the journal
|
// No big writes, just fsync the journal
|
||||||
int n_sqes = disable_fsync ? 0 : 1;
|
int n_sqes = disable_journal_fsync ? 0 : 1;
|
||||||
if (journal.sector_info[journal.cur_sector].dirty)
|
if (journal.sector_info[journal.cur_sector].dirty)
|
||||||
{
|
{
|
||||||
n_sqes++;
|
n_sqes++;
|
||||||
|
@ -61,7 +61,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
PRIV(op)->min_used_journal_sector = PRIV(op)->max_used_journal_sector = 0;
|
PRIV(op)->min_used_journal_sector = PRIV(op)->max_used_journal_sector = 0;
|
||||||
}
|
}
|
||||||
if (!disable_fsync)
|
if (!disable_journal_fsync)
|
||||||
{
|
{
|
||||||
ring_data_t *data = ((ring_data_t*)sqes[s]->user_data);
|
ring_data_t *data = ((ring_data_t*)sqes[s]->user_data);
|
||||||
my_uring_prep_fsync(sqes[s++], journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqes[s++], journal.fd, IORING_FSYNC_DATASYNC);
|
||||||
|
@ -79,7 +79,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
else if (PRIV(op)->sync_state == SYNC_HAS_BIG)
|
else if (PRIV(op)->sync_state == SYNC_HAS_BIG)
|
||||||
{
|
{
|
||||||
// 1st step: fsync data
|
// 1st step: fsync data
|
||||||
if (!disable_fsync)
|
if (!disable_data_fsync)
|
||||||
{
|
{
|
||||||
BS_SUBMIT_GET_SQE(sqe, data);
|
BS_SUBMIT_GET_SQE(sqe, data);
|
||||||
my_uring_prep_fsync(sqe, data_fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe, data_fd, IORING_FSYNC_DATASYNC);
|
||||||
|
@ -104,8 +104,8 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// Get SQEs. Don't bother about merging, submit each journal sector as a separate request
|
// Get SQEs. Don't bother about merging, submit each journal sector as a separate request
|
||||||
struct io_uring_sqe *sqe[space_check.sectors_required + (disable_fsync ? 0 : 1)];
|
struct io_uring_sqe *sqe[space_check.sectors_required + (disable_journal_fsync ? 0 : 1)];
|
||||||
for (int i = 0; i < space_check.sectors_required + (disable_fsync ? 0 : 1); i++)
|
for (int i = 0; i < space_check.sectors_required + (disable_journal_fsync ? 0 : 1); i++)
|
||||||
{
|
{
|
||||||
BS_SUBMIT_GET_SQE_DECL(sqe[i]);
|
BS_SUBMIT_GET_SQE_DECL(sqe[i]);
|
||||||
}
|
}
|
||||||
|
@ -148,7 +148,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
}
|
}
|
||||||
PRIV(op)->max_used_journal_sector = 1 + journal.cur_sector;
|
PRIV(op)->max_used_journal_sector = 1 + journal.cur_sector;
|
||||||
// ... And a journal fsync
|
// ... And a journal fsync
|
||||||
if (!disable_fsync)
|
if (!disable_journal_fsync)
|
||||||
{
|
{
|
||||||
my_uring_prep_fsync(sqe[s], journal.fd, IORING_FSYNC_DATASYNC);
|
my_uring_prep_fsync(sqe[s], journal.fd, IORING_FSYNC_DATASYNC);
|
||||||
struct ring_data_t *data = ((ring_data_t*)sqe[s]->user_data);
|
struct ring_data_t *data = ((ring_data_t*)sqe[s]->user_data);
|
||||||
|
@ -157,7 +157,9 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
PRIV(op)->pending_ops = 1 + s;
|
PRIV(op)->pending_ops = 1 + s;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
PRIV(op)->pending_ops = s;
|
PRIV(op)->pending_ops = s;
|
||||||
|
}
|
||||||
PRIV(op)->sync_state = SYNC_JOURNAL_SYNC_SENT;
|
PRIV(op)->sync_state = SYNC_JOURNAL_SYNC_SENT;
|
||||||
ringloop->submit();
|
ringloop->submit();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue