forked from vitalif/vitastor
Make deletions instantly stable
"2-phase" (write->stabilize) process is pointless for deletions because it doesn't protect us from incomplete objects. This happens because it removes the version information from metadata after stabilization. Deletions require "3-phase" process with a potentially very long 3rd phase. So, deletions will be allowed to generate degraded and incomplete objects, and for it to not affect users' ability to delete something, the cluster will allow to delete whole inodes while storing a list of them in etcd. Proper TRIM will be impossible until the implementation of the aforementioned "3-phase" process, though. By the way, this change also fixes a possible write stall after rebalancing which was caused by the lack of "stabilize delete" operations.sync-io-test
parent
985c309d7f
commit
571be0f380
|
@ -624,6 +624,9 @@ int blockstore_init_journal::handle_journal_part(void *buf, uint64_t done_pos, u
|
|||
.journal_sector = proc_pos,
|
||||
});
|
||||
bs->journal.used_sectors[proc_pos]++;
|
||||
// Deletions are treated as immediately stable, because
|
||||
// "2-phase commit" (write->stabilize) isn't sufficient for them anyway
|
||||
bs->mark_stable(ov);
|
||||
}
|
||||
}
|
||||
started = true;
|
||||
|
|
|
@ -275,7 +275,16 @@ void blockstore_impl_t::ack_one_sync(blockstore_op_t *op)
|
|||
#endif
|
||||
auto & unstab = unstable_writes[it->oid];
|
||||
unstab = unstab < it->version ? it->version : unstab;
|
||||
dirty_db[*it].state = dirty_db[*it].state == ST_DEL_WRITTEN ? ST_DEL_SYNCED : ST_J_SYNCED;
|
||||
if (dirty_db[*it].state == ST_DEL_WRITTEN)
|
||||
{
|
||||
dirty_db[*it].state = ST_DEL_SYNCED;
|
||||
// Deletions are treated as immediately stable
|
||||
mark_stable(*it);
|
||||
}
|
||||
else /* == ST_J_WRITTEN */
|
||||
{
|
||||
dirty_db[*it].state = ST_J_SYNCED;
|
||||
}
|
||||
}
|
||||
in_progress_syncs.erase(PRIV(op)->in_progress_ptr);
|
||||
op->retval = 0;
|
||||
|
|
|
@ -355,6 +355,11 @@ resume_4:
|
|||
else if (dirty_it->second.state == ST_DEL_SUBMITTED)
|
||||
{
|
||||
dirty_it->second.state = imm ? ST_DEL_SYNCED : ST_DEL_WRITTEN;
|
||||
if (imm)
|
||||
{
|
||||
// Deletions are treated as immediately stable
|
||||
mark_stable(dirty_it->first);
|
||||
}
|
||||
}
|
||||
if (immediate_commit == IMMEDIATE_ALL)
|
||||
{
|
||||
|
|
2
osd.h
2
osd.h
|
@ -193,7 +193,7 @@ class osd_t
|
|||
void continue_primary_del(osd_op_t *cur_op);
|
||||
bool check_write_queue(osd_op_t *cur_op, pg_t & pg);
|
||||
void remove_object_from_state(object_id & oid, pg_osd_set_state_t *object_state, pg_t &pg);
|
||||
bool finalize_primary_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state);
|
||||
bool remember_unstable_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state);
|
||||
void handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op);
|
||||
void handle_primary_bs_subop(osd_op_t *subop);
|
||||
void add_bs_subop_stats(osd_op_t *subop);
|
||||
|
|
|
@ -284,7 +284,7 @@ resume_9:
|
|||
// FIXME: Check for immediate_commit == IMMEDIATE_SMALL
|
||||
resume_6:
|
||||
resume_7:
|
||||
if (!finalize_primary_write(cur_op, pg, pg.cur_loc_set, 6))
|
||||
if (!remember_unstable_write(cur_op, pg, pg.cur_loc_set, 6))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -305,7 +305,7 @@ resume_7:
|
|||
}
|
||||
}
|
||||
|
||||
bool osd_t::finalize_primary_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state)
|
||||
bool osd_t::remember_unstable_write(osd_op_t *cur_op, pg_t & pg, pg_osd_set_t & loc_set, int base_state)
|
||||
{
|
||||
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||
if (op_data->st == base_state)
|
||||
|
@ -598,8 +598,6 @@ void osd_t::continue_primary_del(osd_op_t *cur_op)
|
|||
else if (op_data->st == 3) goto resume_3;
|
||||
else if (op_data->st == 4) goto resume_4;
|
||||
else if (op_data->st == 5) goto resume_5;
|
||||
else if (op_data->st == 6) goto resume_6;
|
||||
else if (op_data->st == 7) goto resume_7;
|
||||
assert(op_data->st == 0);
|
||||
// Delete is forbidden even in active PGs if they're also degraded or have previous dead OSDs
|
||||
if (pg.state & (PG_DEGRADED | PG_LEFT_ON_DEAD))
|
||||
|
@ -641,12 +639,6 @@ resume_5:
|
|||
}
|
||||
// Remove version override
|
||||
pg.ver_override.erase(op_data->oid);
|
||||
resume_6:
|
||||
resume_7:
|
||||
if (!finalize_primary_write(cur_op, pg, op_data->object_state ? op_data->object_state->osd_set : pg.cur_loc_set, 6))
|
||||
{
|
||||
return;
|
||||
}
|
||||
// Adjust PG stats after "instant stabilize", because we need object_state above
|
||||
if (!op_data->object_state)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue