forked from vitalif/vitastor
Do not break subsequent small writes badly when a big write is canceled
parent
29bac892ad
commit
314b20437b
|
@ -271,6 +271,7 @@ class blockstore_impl_t
|
||||||
|
|
||||||
// Write
|
// Write
|
||||||
bool enqueue_write(blockstore_op_t *op);
|
bool enqueue_write(blockstore_op_t *op);
|
||||||
|
void cancel_all_writes(blockstore_op_t *op, blockstore_dirty_db_t::iterator dirty_it, int retval);
|
||||||
int dequeue_write(blockstore_op_t *op);
|
int dequeue_write(blockstore_op_t *op);
|
||||||
int dequeue_del(blockstore_op_t *op);
|
int dequeue_del(blockstore_op_t *op);
|
||||||
int continue_write(blockstore_op_t *op);
|
int continue_write(blockstore_op_t *op);
|
||||||
|
|
|
@ -124,6 +124,29 @@ bool blockstore_impl_t::enqueue_write(blockstore_op_t *op)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void blockstore_impl_t::cancel_all_writes(blockstore_op_t *op, blockstore_dirty_db_t::iterator dirty_it, int retval)
|
||||||
|
{
|
||||||
|
while (dirty_it != dirty_db.end() && dirty_it->first.oid == op->oid)
|
||||||
|
{
|
||||||
|
dirty_db.erase(dirty_it++);
|
||||||
|
}
|
||||||
|
bool found = false;
|
||||||
|
for (auto other_op: submit_queue)
|
||||||
|
{
|
||||||
|
if (!found && other_op == op)
|
||||||
|
found = true;
|
||||||
|
else if (found && other_op->oid == op->oid &&
|
||||||
|
(other_op->opcode == BS_OP_WRITE || other_op->opcode == BS_OP_WRITE_STABLE))
|
||||||
|
{
|
||||||
|
// Mark operations to cancel them
|
||||||
|
PRIV(other_op)->real_version = UINT64_MAX;
|
||||||
|
other_op->retval = retval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
op->retval = retval;
|
||||||
|
FINISH_OP(op);
|
||||||
|
}
|
||||||
|
|
||||||
// First step of the write algorithm: dequeue operation and submit initial write(s)
|
// First step of the write algorithm: dequeue operation and submit initial write(s)
|
||||||
int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
{
|
{
|
||||||
|
@ -143,6 +166,12 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
}
|
}
|
||||||
if (PRIV(op)->real_version != 0)
|
if (PRIV(op)->real_version != 0)
|
||||||
{
|
{
|
||||||
|
if (PRIV(op)->real_version == UINT64_MAX)
|
||||||
|
{
|
||||||
|
// This is the flag value used to cancel operations
|
||||||
|
FINISH_OP(op);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
// Restore original low version number for unblocked operations
|
// Restore original low version number for unblocked operations
|
||||||
#ifdef BLOCKSTORE_DEBUG
|
#ifdef BLOCKSTORE_DEBUG
|
||||||
printf("Restoring %lx:%lx version: v%lu -> v%lu\n", op->oid.inode, op->oid.stripe, op->version, PRIV(op)->real_version);
|
printf("Restoring %lx:%lx version: v%lu -> v%lu\n", op->oid.inode, op->oid.stripe, op->version, PRIV(op)->real_version);
|
||||||
|
@ -152,10 +181,8 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
if (prev_it->first.oid == op->oid && prev_it->first.version >= PRIV(op)->real_version)
|
if (prev_it->first.oid == op->oid && prev_it->first.version >= PRIV(op)->real_version)
|
||||||
{
|
{
|
||||||
// Original version is still invalid
|
// Original version is still invalid
|
||||||
// FIXME Oops. Successive small writes will currently break in an unexpected way. Fix it
|
// All subsequent writes to the same object must be canceled too
|
||||||
dirty_db.erase(dirty_it);
|
cancel_all_writes(op, dirty_it, -EEXIST);
|
||||||
op->retval = -EEXIST;
|
|
||||||
FINISH_OP(op);
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
op->version = PRIV(op)->real_version;
|
op->version = PRIV(op)->real_version;
|
||||||
|
@ -189,10 +216,7 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||||
PRIV(op)->wait_for = WAIT_FREE;
|
PRIV(op)->wait_for = WAIT_FREE;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// FIXME Oops. Successive small writes will currently break in an unexpected way. Fix it
|
cancel_all_writes(op, dirty_it, -ENOSPC);
|
||||||
dirty_db.erase(dirty_it);
|
|
||||||
op->retval = -ENOSPC;
|
|
||||||
FINISH_OP(op);
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
write_iodepth++;
|
write_iodepth++;
|
||||||
|
|
Loading…
Reference in New Issue