Do not block reads by previous unfinished writes

trace-sqes
Vitaliy Filippov 2020-03-13 21:28:42 +03:00
parent eba053febe
commit 6982fe1255
5 changed files with 6 additions and 21 deletions

View File

@ -50,6 +50,7 @@ Input:
- version == 0: read the last stable version,
- version == UINT64_MAX: read the last version,
- otherwise: read the newest version that is <= the specified version
- reads aren't guaranteed to return data from previous unfinished writes
For writes:
- if version == 0, a new version is assigned automatically
- if version != 0, it is assigned for the new write if possible, otherwise -EINVAL is returned

View File

@ -258,19 +258,6 @@ void blockstore_impl_t::check_wait(blockstore_op_t *op)
}
PRIV(op)->wait_for = 0;
}
else if (PRIV(op)->wait_for == WAIT_IN_FLIGHT)
{
auto dirty_it = dirty_db.find((obj_ver_id){
.oid = op->oid,
.version = PRIV(op)->wait_detail,
});
if (dirty_it != dirty_db.end() && IS_IN_FLIGHT(dirty_it->second.state))
{
// do not submit
return;
}
PRIV(op)->wait_for = 0;
}
else if (PRIV(op)->wait_for == WAIT_JOURNAL)
{
if (journal.used_start == PRIV(op)->wait_detail)

View File

@ -128,8 +128,6 @@ struct __attribute__((__packed__)) dirty_entry
// Suspend operation until there are more free SQEs
#define WAIT_SQE 1
// Suspend operation until version <wait_detail> of object <oid> is written
#define WAIT_IN_FLIGHT 2
// Suspend operation until there are <wait_detail> bytes of free space in the journal on disk
#define WAIT_JOURNAL 3
// Suspend operation until the next journal sector buffer is free

View File

@ -8,12 +8,10 @@ int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_
// Zero-length version - skip
return 1;
}
if (IS_IN_FLIGHT(item_state))
else if (IS_IN_FLIGHT(item_state))
{
// Pause until it's written somewhere
PRIV(op)->wait_for = WAIT_IN_FLIGHT;
PRIV(op)->wait_detail = item_version;
return 0;
// Write not finished yet - skip
return 1;
}
else if (IS_DELETE(item_state))
{

View File

@ -63,13 +63,14 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
};
enqueue_op(sync_op);
}
// Immediately add the operation into dirty_db, so subsequent reads could see it
#ifdef BLOCKSTORE_DEBUG
if (is_del)
printf("Delete %lu:%lu v%lu\n", op->oid.inode, op->oid.stripe, op->version);
else
printf("Write %lu:%lu v%lu offset=%u len=%u\n", op->oid.inode, op->oid.stripe, op->version, op->offset, op->len);
#endif
// No strict need to add it into dirty_db here, it's just left
// from the previous implementation where reads waited for writes
dirty_db.emplace((obj_ver_id){
.oid = op->oid,
.version = op->version,