Compare commits
No commits in common. "b3c15db3318ce7c6b41a8e12c435229cec2049df" and "3d16cde23c605ea8f5fd4ae866b46db838144152" have entirely different histories.
b3c15db331
...
3d16cde23c
|
@ -261,7 +261,7 @@ Options (see also [Cluster-Wide Disk Layout Parameters](../config/layout-cluster
|
||||||
```
|
```
|
||||||
--object_size 128k Set blockstore block size
|
--object_size 128k Set blockstore block size
|
||||||
--bitmap_granularity 4k Set bitmap granularity
|
--bitmap_granularity 4k Set bitmap granularity
|
||||||
--journal_size 32M Set journal size
|
--journal_size 16M Set journal size
|
||||||
--data_csum_type none Set data checksum type (crc32c or none)
|
--data_csum_type none Set data checksum type (crc32c or none)
|
||||||
--csum_block_size 4k Set data checksum block size
|
--csum_block_size 4k Set data checksum block size
|
||||||
--device_block_size 4k Set device block size
|
--device_block_size 4k Set device block size
|
||||||
|
|
|
@ -267,7 +267,7 @@ OSD отключены fsync-и.
|
||||||
```
|
```
|
||||||
--object_size 128k Размер блока хранилища
|
--object_size 128k Размер блока хранилища
|
||||||
--bitmap_granularity 4k Гранулярность битовых карт
|
--bitmap_granularity 4k Гранулярность битовых карт
|
||||||
--journal_size 32M Размер журнала
|
--journal_size 16M Размер журнала
|
||||||
--data_csum_type none Задать тип контрольных сумм (crc32c или none)
|
--data_csum_type none Задать тип контрольных сумм (crc32c или none)
|
||||||
--csum_block_size 4k Задать размер блока расчёта контрольных сумм
|
--csum_block_size 4k Задать размер блока расчёта контрольных сумм
|
||||||
--device_block_size 4k Размер блока устройства
|
--device_block_size 4k Размер блока устройства
|
||||||
|
|
|
@ -307,49 +307,35 @@ int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
|
||||||
return STAB_SPLIT_DONE;
|
return STAB_SPLIT_DONE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (IS_IN_FLIGHT(dirty_it->second.state))
|
||||||
|
{
|
||||||
|
// Object write is still in progress. Wait until the write request completes
|
||||||
|
return STAB_SPLIT_WAIT;
|
||||||
|
}
|
||||||
|
else if (!IS_SYNCED(dirty_it->second.state))
|
||||||
|
{
|
||||||
|
// Object not synced yet - sync it
|
||||||
|
// In previous versions we returned EBUSY here and required
|
||||||
|
// the caller (OSD) to issue a global sync first. But a global sync
|
||||||
|
// waits for all writes in the queue including inflight writes. And
|
||||||
|
// inflight writes may themselves be blocked by unstable writes being
|
||||||
|
// still present in the journal and not flushed away from it.
|
||||||
|
// So we must sync specific objects here.
|
||||||
|
//
|
||||||
|
// Even more, we have to process "stabilize" request in parts. That is,
|
||||||
|
// we must stabilize all objects which are already synced. Otherwise
|
||||||
|
// they may block objects which are NOT synced yet.
|
||||||
|
return STAB_SPLIT_SYNC;
|
||||||
|
}
|
||||||
else if (IS_STABLE(dirty_it->second.state))
|
else if (IS_STABLE(dirty_it->second.state))
|
||||||
{
|
{
|
||||||
// Already stable
|
// Already stable
|
||||||
return STAB_SPLIT_DONE;
|
return STAB_SPLIT_DONE;
|
||||||
}
|
}
|
||||||
while (true)
|
else
|
||||||
{
|
{
|
||||||
if (IS_IN_FLIGHT(dirty_it->second.state))
|
return STAB_SPLIT_TODO;
|
||||||
{
|
|
||||||
// Object write is still in progress. Wait until the write request completes
|
|
||||||
return STAB_SPLIT_WAIT;
|
|
||||||
}
|
|
||||||
else if (!IS_SYNCED(dirty_it->second.state))
|
|
||||||
{
|
|
||||||
// Object not synced yet - sync it
|
|
||||||
// In previous versions we returned EBUSY here and required
|
|
||||||
// the caller (OSD) to issue a global sync first. But a global sync
|
|
||||||
// waits for all writes in the queue including inflight writes. And
|
|
||||||
// inflight writes may themselves be blocked by unstable writes being
|
|
||||||
// still present in the journal and not flushed away from it.
|
|
||||||
// So we must sync specific objects here.
|
|
||||||
//
|
|
||||||
// Even more, we have to process "stabilize" request in parts. That is,
|
|
||||||
// we must stabilize all objects which are already synced. Otherwise
|
|
||||||
// they may block objects which are NOT synced yet.
|
|
||||||
return STAB_SPLIT_SYNC;
|
|
||||||
}
|
|
||||||
else if (IS_STABLE(dirty_it->second.state))
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Check previous versions too
|
|
||||||
if (dirty_it == dirty_db.begin())
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
dirty_it--;
|
|
||||||
if (dirty_it->first.oid != ov.oid)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return STAB_SPLIT_TODO;
|
|
||||||
});
|
});
|
||||||
if (r != 1)
|
if (r != 1)
|
||||||
{
|
{
|
||||||
|
|
|
@ -76,6 +76,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
// 2nd step: Data device is synced, prepare & write journal entries
|
// 2nd step: Data device is synced, prepare & write journal entries
|
||||||
// Check space in the journal and journal memory buffers
|
// Check space in the journal and journal memory buffers
|
||||||
blockstore_journal_check_t space_check(this);
|
blockstore_journal_check_t space_check(this);
|
||||||
|
auto reservation = (unstable_writes.size()+unstable_unsynced+PRIV(op)->sync_big_writes.size())*journal.block_size;
|
||||||
if (dsk.csum_block_size)
|
if (dsk.csum_block_size)
|
||||||
{
|
{
|
||||||
// More complex check because all journal entries have different lengths
|
// More complex check because all journal entries have different lengths
|
||||||
|
@ -85,14 +86,14 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||||
left--;
|
left--;
|
||||||
auto & dirty_entry = dirty_db.at(sbw);
|
auto & dirty_entry = dirty_db.at(sbw);
|
||||||
uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len);
|
uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len);
|
||||||
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, 0))
|
if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, left ? 0 : reservation))
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
|
else if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
|
||||||
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, 0))
|
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, reservation))
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ void disk_tool_simple_offsets(json11::Json cfg, bool json_output)
|
||||||
if (!bitmap_granularity)
|
if (!bitmap_granularity)
|
||||||
bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
||||||
if (!journal_size)
|
if (!journal_size)
|
||||||
journal_size = 32*1024*1024;
|
journal_size = 16*1024*1024;
|
||||||
if (!device_block_size)
|
if (!device_block_size)
|
||||||
device_block_size = 4096;
|
device_block_size = 4096;
|
||||||
if (!data_csum_type)
|
if (!data_csum_type)
|
||||||
|
|
|
@ -167,7 +167,7 @@ static const char *help_text =
|
||||||
" Calculate offsets for old simple&stupid (no superblock) OSD deployment. Options:\n"
|
" Calculate offsets for old simple&stupid (no superblock) OSD deployment. Options:\n"
|
||||||
" --object_size 128k Set blockstore block size\n"
|
" --object_size 128k Set blockstore block size\n"
|
||||||
" --bitmap_granularity 4k Set bitmap granularity\n"
|
" --bitmap_granularity 4k Set bitmap granularity\n"
|
||||||
" --journal_size 32M Set journal size\n"
|
" --journal_size 16M Set journal size\n"
|
||||||
" --data_csum_type none Set data checksum type (crc32c or none)\n"
|
" --data_csum_type none Set data checksum type (crc32c or none)\n"
|
||||||
" --csum_block_size 4k Set data checksum block size\n"
|
" --csum_block_size 4k Set data checksum block size\n"
|
||||||
" --device_block_size 4k Set device block size\n"
|
" --device_block_size 4k Set device block size\n"
|
||||||
|
|
|
@ -33,7 +33,7 @@ kill_osds()
|
||||||
for kill_osd in $(seq 2 $OSD_COUNT); do
|
for kill_osd in $(seq 2 $OSD_COUNT); do
|
||||||
sleep 15
|
sleep 15
|
||||||
# Wait for all PGs to clear has_degraded - all data will be at least in 2 copies
|
# Wait for all PGs to clear has_degraded - all data will be at least in 2 copies
|
||||||
wait_condition 600 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
|
wait_condition 60 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
|
||||||
jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
|
jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
|
||||||
echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
|
echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
|
||||||
p=OSD${kill_osd}_PID
|
p=OSD${kill_osd}_PID
|
||||||
|
|
Loading…
Reference in New Issue