Compare commits

...

3 Commits

Author SHA1 Message Date
Vitaliy Filippov b3c15db331 32M journal by default in simple-offsets
Test / test_snapshot_ec (push) Successful in 30s Details
Test / test_rm (push) Successful in 18s Details
Test / test_move_reappear (push) Successful in 24s Details
Test / test_snapshot_down (push) Successful in 26s Details
Test / test_snapshot_down_ec (push) Successful in 30s Details
Test / test_splitbrain (push) Successful in 23s Details
Test / test_snapshot_chain (push) Successful in 2m17s Details
Test / test_snapshot_chain_ec (push) Successful in 2m55s Details
Test / test_rebalance_verify_imm (push) Successful in 2m46s Details
Test / test_rebalance_verify (push) Successful in 3m9s Details
Test / test_switch_primary (push) Successful in 39s Details
Test / test_write (push) Successful in 43s Details
Test / test_write_no_same (push) Successful in 19s Details
Test / test_write_xor (push) Successful in 55s Details
Test / test_rebalance_verify_ec (push) Successful in 3m35s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 3m37s Details
Test / test_heal_pg_size_2 (push) Successful in 3m36s Details
Test / test_heal_ec (push) Successful in 5m47s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m21s Details
Test / test_heal_csum_32k_dj (push) Successful in 6m16s Details
Test / test_heal_csum_32k (push) Successful in 6m45s Details
Test / test_scrub (push) Successful in 1m56s Details
Test / test_heal_csum_4k_dj (push) Successful in 6m39s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m42s Details
Test / test_scrub_zero_osd_2 (push) Successful in 1m16s Details
Test / test_scrub_xor (push) Successful in 47s Details
Test / test_scrub_pg_size_3 (push) Successful in 1m26s Details
Test / test_heal_csum_4k (push) Successful in 6m32s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 48s Details
Test / test_scrub_ec (push) Successful in 49s Details
2024-02-21 15:25:02 +03:00
Vitaliy Filippov 685bcd6ef9 Do not reserve extra space for big_writes during sync - sync itself is needed to commit and clear them 2024-02-21 13:00:14 +03:00
Vitaliy Filippov 3eb389b321 Supposed fix for "unexpected state during flush: 0x51" with EC
Test / test_move_reappear (push) Successful in 22s Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 1m32s Details
Test / test_rm (push) Successful in 16s Details
Test / test_snapshot_down (push) Successful in 31s Details
Test / test_snapshot_down_ec (push) Successful in 32s Details
Test / test_splitbrain (push) Successful in 25s Details
Test / test_snapshot_chain (push) Successful in 2m4s Details
Test / test_snapshot_chain_ec (push) Successful in 2m51s Details
Test / test_rebalance_verify_imm (push) Successful in 2m47s Details
Test / test_rebalance_verify (push) Successful in 3m30s Details
Test / test_switch_primary (push) Successful in 38s Details
Test / test_write (push) Successful in 51s Details
Test / test_write_no_same (push) Successful in 16s Details
Test / test_write_xor (push) Successful in 52s Details
Test / test_rebalance_verify_ec (push) Successful in 3m32s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 3m7s Details
Test / test_scrub_zero_osd_2 (push) Successful in 59s Details
Test / test_scrub (push) Successful in 1m2s Details
Test / test_scrub_xor (push) Successful in 36s Details
Test / test_scrub_ec (push) Successful in 38s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 40s Details
Test / test_scrub_pg_size_3 (push) Successful in 49s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m12s Details
Test / test_heal_csum_32k_dj (push) Successful in 5m8s Details
Test / test_heal_csum_32k (push) Successful in 4m55s Details
Test / test_heal_ec (push) Failing after 10m14s Details
Test / test_heal_csum_4k_dmj (push) Successful in 4m59s Details
Test / test_heal_csum_4k_dj (push) Successful in 5m5s Details
Test / test_heal_pg_size_2 (push) Successful in 3m54s Details
Test / test_heal_csum_4k (push) Successful in 3m49s Details
2024-02-21 01:32:06 +03:00
7 changed files with 43 additions and 30 deletions

View File

@ -261,7 +261,7 @@ Options (see also [Cluster-Wide Disk Layout Parameters](../config/layout-cluster
```
--object_size 128k Set blockstore block size
--bitmap_granularity 4k Set bitmap granularity
--journal_size 16M Set journal size
--journal_size 32M Set journal size
--data_csum_type none Set data checksum type (crc32c or none)
--csum_block_size 4k Set data checksum block size
--device_block_size 4k Set device block size

View File

@ -267,7 +267,7 @@ OSD отключены fsync-и.
```
--object_size 128k Размер блока хранилища
--bitmap_granularity 4k Гранулярность битовых карт
--journal_size 16M Размер журнала
--journal_size 32M Размер журнала
--data_csum_type none Задать тип контрольных сумм (crc32c или none)
--csum_block_size 4k Задать размер блока расчёта контрольных сумм
--device_block_size 4k Размер блока устройства

View File

@ -307,35 +307,49 @@ int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
return STAB_SPLIT_DONE;
}
}
else if (IS_IN_FLIGHT(dirty_it->second.state))
{
// Object write is still in progress. Wait until the write request completes
return STAB_SPLIT_WAIT;
}
else if (!IS_SYNCED(dirty_it->second.state))
{
// Object not synced yet - sync it
// In previous versions we returned EBUSY here and required
// the caller (OSD) to issue a global sync first. But a global sync
// waits for all writes in the queue including inflight writes. And
// inflight writes may themselves be blocked by unstable writes being
// still present in the journal and not flushed away from it.
// So we must sync specific objects here.
//
// Even more, we have to process "stabilize" request in parts. That is,
// we must stabilize all objects which are already synced. Otherwise
// they may block objects which are NOT synced yet.
return STAB_SPLIT_SYNC;
}
else if (IS_STABLE(dirty_it->second.state))
{
// Already stable
return STAB_SPLIT_DONE;
}
else
while (true)
{
return STAB_SPLIT_TODO;
if (IS_IN_FLIGHT(dirty_it->second.state))
{
// Object write is still in progress. Wait until the write request completes
return STAB_SPLIT_WAIT;
}
else if (!IS_SYNCED(dirty_it->second.state))
{
// Object not synced yet - sync it
// In previous versions we returned EBUSY here and required
// the caller (OSD) to issue a global sync first. But a global sync
// waits for all writes in the queue including inflight writes. And
// inflight writes may themselves be blocked by unstable writes being
// still present in the journal and not flushed away from it.
// So we must sync specific objects here.
//
// Even more, we have to process "stabilize" request in parts. That is,
// we must stabilize all objects which are already synced. Otherwise
// they may block objects which are NOT synced yet.
return STAB_SPLIT_SYNC;
}
else if (IS_STABLE(dirty_it->second.state))
{
break;
}
// Check previous versions too
if (dirty_it == dirty_db.begin())
{
break;
}
dirty_it--;
if (dirty_it->first.oid != ov.oid)
{
break;
}
}
return STAB_SPLIT_TODO;
});
if (r != 1)
{

View File

@ -76,7 +76,6 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
// 2nd step: Data device is synced, prepare & write journal entries
// Check space in the journal and journal memory buffers
blockstore_journal_check_t space_check(this);
auto reservation = (unstable_writes.size()+unstable_unsynced+PRIV(op)->sync_big_writes.size())*journal.block_size;
if (dsk.csum_block_size)
{
// More complex check because all journal entries have different lengths
@ -86,14 +85,14 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
left--;
auto & dirty_entry = dirty_db.at(sbw);
uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len);
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, left ? 0 : reservation))
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, 0))
{
return 0;
}
}
}
else if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, reservation))
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, 0))
{
return 0;
}

View File

@ -47,7 +47,7 @@ void disk_tool_simple_offsets(json11::Json cfg, bool json_output)
if (!bitmap_granularity)
bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
if (!journal_size)
journal_size = 16*1024*1024;
journal_size = 32*1024*1024;
if (!device_block_size)
device_block_size = 4096;
if (!data_csum_type)

View File

@ -167,7 +167,7 @@ static const char *help_text =
" Calculate offsets for old simple&stupid (no superblock) OSD deployment. Options:\n"
" --object_size 128k Set blockstore block size\n"
" --bitmap_granularity 4k Set bitmap granularity\n"
" --journal_size 16M Set journal size\n"
" --journal_size 32M Set journal size\n"
" --data_csum_type none Set data checksum type (crc32c or none)\n"
" --csum_block_size 4k Set data checksum block size\n"
" --device_block_size 4k Set device block size\n"

View File

@ -33,7 +33,7 @@ kill_osds()
for kill_osd in $(seq 2 $OSD_COUNT); do
sleep 15
# Wait for all PGs to clear has_degraded - all data will be at least in 2 copies
wait_condition 60 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
wait_condition 600 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
p=OSD${kill_osd}_PID