forked from vitalif/vitastor
Supposed fix for "unexpected state during flush: 0x51" with EC
parent
3d16cde23c
commit
3eb389b321
|
@ -307,35 +307,49 @@ int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
|
||||||
return STAB_SPLIT_DONE;
|
return STAB_SPLIT_DONE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (IS_IN_FLIGHT(dirty_it->second.state))
|
|
||||||
{
|
|
||||||
// Object write is still in progress. Wait until the write request completes
|
|
||||||
return STAB_SPLIT_WAIT;
|
|
||||||
}
|
|
||||||
else if (!IS_SYNCED(dirty_it->second.state))
|
|
||||||
{
|
|
||||||
// Object not synced yet - sync it
|
|
||||||
// In previous versions we returned EBUSY here and required
|
|
||||||
// the caller (OSD) to issue a global sync first. But a global sync
|
|
||||||
// waits for all writes in the queue including inflight writes. And
|
|
||||||
// inflight writes may themselves be blocked by unstable writes being
|
|
||||||
// still present in the journal and not flushed away from it.
|
|
||||||
// So we must sync specific objects here.
|
|
||||||
//
|
|
||||||
// Even more, we have to process "stabilize" request in parts. That is,
|
|
||||||
// we must stabilize all objects which are already synced. Otherwise
|
|
||||||
// they may block objects which are NOT synced yet.
|
|
||||||
return STAB_SPLIT_SYNC;
|
|
||||||
}
|
|
||||||
else if (IS_STABLE(dirty_it->second.state))
|
else if (IS_STABLE(dirty_it->second.state))
|
||||||
{
|
{
|
||||||
// Already stable
|
// Already stable
|
||||||
return STAB_SPLIT_DONE;
|
return STAB_SPLIT_DONE;
|
||||||
}
|
}
|
||||||
else
|
while (true)
|
||||||
{
|
{
|
||||||
return STAB_SPLIT_TODO;
|
if (IS_IN_FLIGHT(dirty_it->second.state))
|
||||||
|
{
|
||||||
|
// Object write is still in progress. Wait until the write request completes
|
||||||
|
return STAB_SPLIT_WAIT;
|
||||||
|
}
|
||||||
|
else if (!IS_SYNCED(dirty_it->second.state))
|
||||||
|
{
|
||||||
|
// Object not synced yet - sync it
|
||||||
|
// In previous versions we returned EBUSY here and required
|
||||||
|
// the caller (OSD) to issue a global sync first. But a global sync
|
||||||
|
// waits for all writes in the queue including inflight writes. And
|
||||||
|
// inflight writes may themselves be blocked by unstable writes being
|
||||||
|
// still present in the journal and not flushed away from it.
|
||||||
|
// So we must sync specific objects here.
|
||||||
|
//
|
||||||
|
// Even more, we have to process "stabilize" request in parts. That is,
|
||||||
|
// we must stabilize all objects which are already synced. Otherwise
|
||||||
|
// they may block objects which are NOT synced yet.
|
||||||
|
return STAB_SPLIT_SYNC;
|
||||||
|
}
|
||||||
|
else if (IS_STABLE(dirty_it->second.state))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Check previous versions too
|
||||||
|
if (dirty_it == dirty_db.begin())
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
dirty_it--;
|
||||||
|
if (dirty_it->first.oid != ov.oid)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return STAB_SPLIT_TODO;
|
||||||
});
|
});
|
||||||
if (r != 1)
|
if (r != 1)
|
||||||
{
|
{
|
||||||
|
|
|
@ -33,7 +33,7 @@ kill_osds()
|
||||||
for kill_osd in $(seq 2 $OSD_COUNT); do
|
for kill_osd in $(seq 2 $OSD_COUNT); do
|
||||||
sleep 15
|
sleep 15
|
||||||
# Wait for all PGs to clear has_degraded - all data will be at least in 2 copies
|
# Wait for all PGs to clear has_degraded - all data will be at least in 2 copies
|
||||||
wait_condition 60 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
|
wait_condition 600 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
|
||||||
jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
|
jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
|
||||||
echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
|
echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
|
||||||
p=OSD${kill_osd}_PID
|
p=OSD${kill_osd}_PID
|
||||||
|
|
Loading…
Reference in New Issue