forked from vitalif/vitastor
Do not block reads by previous unfinished writes
parent
eba053febe
commit
6982fe1255
|
@ -50,6 +50,7 @@ Input:
|
||||||
- version == 0: read the last stable version,
|
- version == 0: read the last stable version,
|
||||||
- version == UINT64_MAX: read the last version,
|
- version == UINT64_MAX: read the last version,
|
||||||
- otherwise: read the newest version that is <= the specified version
|
- otherwise: read the newest version that is <= the specified version
|
||||||
|
- reads aren't guaranteed to return data from previous unfinished writes
|
||||||
For writes:
|
For writes:
|
||||||
- if version == 0, a new version is assigned automatically
|
- if version == 0, a new version is assigned automatically
|
||||||
- if version != 0, it is assigned for the new write if possible, otherwise -EINVAL is returned
|
- if version != 0, it is assigned for the new write if possible, otherwise -EINVAL is returned
|
||||||
|
|
|
@ -258,19 +258,6 @@ void blockstore_impl_t::check_wait(blockstore_op_t *op)
|
||||||
}
|
}
|
||||||
PRIV(op)->wait_for = 0;
|
PRIV(op)->wait_for = 0;
|
||||||
}
|
}
|
||||||
else if (PRIV(op)->wait_for == WAIT_IN_FLIGHT)
|
|
||||||
{
|
|
||||||
auto dirty_it = dirty_db.find((obj_ver_id){
|
|
||||||
.oid = op->oid,
|
|
||||||
.version = PRIV(op)->wait_detail,
|
|
||||||
});
|
|
||||||
if (dirty_it != dirty_db.end() && IS_IN_FLIGHT(dirty_it->second.state))
|
|
||||||
{
|
|
||||||
// do not submit
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
PRIV(op)->wait_for = 0;
|
|
||||||
}
|
|
||||||
else if (PRIV(op)->wait_for == WAIT_JOURNAL)
|
else if (PRIV(op)->wait_for == WAIT_JOURNAL)
|
||||||
{
|
{
|
||||||
if (journal.used_start == PRIV(op)->wait_detail)
|
if (journal.used_start == PRIV(op)->wait_detail)
|
||||||
|
|
|
@ -128,8 +128,6 @@ struct __attribute__((__packed__)) dirty_entry
|
||||||
|
|
||||||
// Suspend operation until there are more free SQEs
|
// Suspend operation until there are more free SQEs
|
||||||
#define WAIT_SQE 1
|
#define WAIT_SQE 1
|
||||||
// Suspend operation until version <wait_detail> of object <oid> is written
|
|
||||||
#define WAIT_IN_FLIGHT 2
|
|
||||||
// Suspend operation until there are <wait_detail> bytes of free space in the journal on disk
|
// Suspend operation until there are <wait_detail> bytes of free space in the journal on disk
|
||||||
#define WAIT_JOURNAL 3
|
#define WAIT_JOURNAL 3
|
||||||
// Suspend operation until the next journal sector buffer is free
|
// Suspend operation until the next journal sector buffer is free
|
||||||
|
|
|
@ -8,12 +8,10 @@ int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_
|
||||||
// Zero-length version - skip
|
// Zero-length version - skip
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (IS_IN_FLIGHT(item_state))
|
else if (IS_IN_FLIGHT(item_state))
|
||||||
{
|
{
|
||||||
// Pause until it's written somewhere
|
// Write not finished yet - skip
|
||||||
PRIV(op)->wait_for = WAIT_IN_FLIGHT;
|
return 1;
|
||||||
PRIV(op)->wait_detail = item_version;
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
else if (IS_DELETE(item_state))
|
else if (IS_DELETE(item_state))
|
||||||
{
|
{
|
||||||
|
|
|
@ -63,13 +63,14 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
};
|
};
|
||||||
enqueue_op(sync_op);
|
enqueue_op(sync_op);
|
||||||
}
|
}
|
||||||
// Immediately add the operation into dirty_db, so subsequent reads could see it
|
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
if (is_del)
|
if (is_del)
|
||||||
printf("Delete %lu:%lu v%lu\n", op->oid.inode, op->oid.stripe, op->version);
|
printf("Delete %lu:%lu v%lu\n", op->oid.inode, op->oid.stripe, op->version);
|
||||||
else
|
else
|
||||||
printf("Write %lu:%lu v%lu offset=%u len=%u\n", op->oid.inode, op->oid.stripe, op->version, op->offset, op->len);
|
printf("Write %lu:%lu v%lu offset=%u len=%u\n", op->oid.inode, op->oid.stripe, op->version, op->offset, op->len);
|
||||||
#endif
|
#endif
|
||||||
|
// No strict need to add it into dirty_db here, it's just left
|
||||||
|
// from the previous implementation where reads waited for writes
|
||||||
dirty_db.emplace((obj_ver_id){
|
dirty_db.emplace((obj_ver_id){
|
||||||
.oid = op->oid,
|
.oid = op->oid,
|
||||||
.version = op->version,
|
.version = op->version,
|
||||||
|
|
Loading…
Reference in New Issue