7 changed files with 30 additions and 43 deletions
--- a/docs/usage/disk.en.md
+++ b/docs/usage/disk.en.md
@ -261,7 +261,7 @@ Options (see also [Cluster-Wide Disk Layout Parameters](../config/layout-cluster
 ```
 --object_size 128k       Set blockstore block size
 --bitmap_granularity 4k  Set bitmap granularity
--journal_size 32M       Set journal size
+--journal_size 16M       Set journal size
 --data_csum_type none    Set data checksum type (crc32c or none)
 --csum_block_size 4k     Set data checksum block size
 --device_block_size 4k   Set device block size
--- a/docs/usage/disk.ru.md
+++ b/docs/usage/disk.ru.md
@ -267,7 +267,7 @@ OSD отключены fsync-и.
 ```
 --object_size 128k       Размер блока хранилища
 --bitmap_granularity 4k  Гранулярность битовых карт
--journal_size 32M       Размер журнала
+--journal_size 16M       Размер журнала
 --data_csum_type none    Задать тип контрольных сумм (crc32c или none)
 --csum_block_size 4k     Задать размер блока расчёта контрольных сумм
 --device_block_size 4k   Размер блока устройства
--- a/src/blockstore_stable.cpp
+++ b/src/blockstore_stable.cpp
@ -307,49 +307,35 @@ int blockstore_impl_t::dequeue_stable(blockstore_op_t *op)
                return STAB_SPLIT_DONE;
            }
        }
        else if (IS_IN_FLIGHT(dirty_it->second.state))
        {
            // Object write is still in progress. Wait until the write request completes
            return STAB_SPLIT_WAIT;
        }
        else if (!IS_SYNCED(dirty_it->second.state))
        {
            // Object not synced yet - sync it
            // In previous versions we returned EBUSY here and required
            // the caller (OSD) to issue a global sync first. But a global sync
            // waits for all writes in the queue including inflight writes. And
            // inflight writes may themselves be blocked by unstable writes being
            // still present in the journal and not flushed away from it.
            // So we must sync specific objects here.
            //
            // Even more, we have to process "stabilize" request in parts. That is,
            // we must stabilize all objects which are already synced. Otherwise
            // they may block objects which are NOT synced yet.
            return STAB_SPLIT_SYNC;
        }
        else if (IS_STABLE(dirty_it->second.state))
        {
            // Already stable
            return STAB_SPLIT_DONE;
        }
-        while (true)
+        else
        {
-            if (IS_IN_FLIGHT(dirty_it->second.state))
+            return STAB_SPLIT_TODO;
            {
                // Object write is still in progress. Wait until the write request completes
                return STAB_SPLIT_WAIT;
            }
            else if (!IS_SYNCED(dirty_it->second.state))
            {
                // Object not synced yet - sync it
                // In previous versions we returned EBUSY here and required
                // the caller (OSD) to issue a global sync first. But a global sync
                // waits for all writes in the queue including inflight writes. And
                // inflight writes may themselves be blocked by unstable writes being
                // still present in the journal and not flushed away from it.
                // So we must sync specific objects here.
                //
                // Even more, we have to process "stabilize" request in parts. That is,
                // we must stabilize all objects which are already synced. Otherwise
                // they may block objects which are NOT synced yet.
                return STAB_SPLIT_SYNC;
            }
            else if (IS_STABLE(dirty_it->second.state))
            {
                break;
            }
            // Check previous versions too
            if (dirty_it == dirty_db.begin())
            {
                break;
            }
            dirty_it--;
            if (dirty_it->first.oid != ov.oid)
            {
                break;
            }
        }
        return STAB_SPLIT_TODO;
    });
    if (r != 1)
    {
--- a/src/blockstore_sync.cpp
+++ b/src/blockstore_sync.cpp
@ -76,6 +76,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
        // 2nd step: Data device is synced, prepare & write journal entries
        // Check space in the journal and journal memory buffers
        blockstore_journal_check_t space_check(this);
        auto reservation = (unstable_writes.size()+unstable_unsynced+PRIV(op)->sync_big_writes.size())*journal.block_size;
        if (dsk.csum_block_size)
        {
            // More complex check because all journal entries have different lengths
@ -85,14 +86,14 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
                left--;
                auto & dirty_entry = dirty_db.at(sbw);
                uint64_t dyn_size = dsk.dirty_dyn_size(dirty_entry.offset, dirty_entry.len);
-                if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, 0))
+                if (!space_check.check_available(op, 1, sizeof(journal_entry_big_write) + dyn_size, left ? 0 : reservation))
                {
                    return 0;
                }
            }
        }
        else if (!space_check.check_available(op, PRIV(op)->sync_big_writes.size(),
-            sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, 0))
+            sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, reservation))
        {
            return 0;
        }
--- a/src/disk_simple_offsets.cpp
+++ b/src/disk_simple_offsets.cpp
@ -47,7 +47,7 @@ void disk_tool_simple_offsets(json11::Json cfg, bool json_output)
    if (!bitmap_granularity)
        bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
    if (!journal_size)
-        journal_size = 32*1024*1024;
+        journal_size = 16*1024*1024;
    if (!device_block_size)
        device_block_size = 4096;
    if (!data_csum_type)
--- a/src/disk_tool.cpp
+++ b/src/disk_tool.cpp
@ -167,7 +167,7 @@ static const char *help_text =
    "  Calculate offsets for old simple&stupid (no superblock) OSD deployment. Options:\n"
    "    --object_size 128k       Set blockstore block size\n"
    "    --bitmap_granularity 4k  Set bitmap granularity\n"
-    "    --journal_size 32M       Set journal size\n"
+    "    --journal_size 16M       Set journal size\n"
    "    --data_csum_type none    Set data checksum type (crc32c or none)\n"
    "    --csum_block_size 4k     Set data checksum block size\n"
    "    --device_block_size 4k   Set device block size\n"
--- a/tests/test_heal.sh
+++ b/tests/test_heal.sh
@ -33,7 +33,7 @@ kill_osds()
    for kill_osd in $(seq 2 $OSD_COUNT); do
        sleep 15
        # Wait for all PGs to clear has_degraded - all data will be at least in 2 copies
-        wait_condition 600 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
+        wait_condition 60 "$ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only |\
            jq -s -e '[ .[] | select(.state | contains(["'"'"active"'"'"])) | select(.state | contains(["'"'"has_degraded"'"'"]) | not) ] | length == '$PG_COUNT"
        echo Killing OSD $kill_osd and starting OSD $((kill_osd-1))
        p=OSD${kill_osd}_PID