Fix another possible reenterability issue in cluster_client
Test / buildenv (push) Successful in 12s Details
Test / build (push) Successful in 2m57s Details
Test / test_cas (push) Successful in 20s Details
Test / make_test (push) Successful in 46s Details
Test / test_change_pg_size (push) Successful in 10s Details
Test / test_create_nomaxid (push) Successful in 10s Details
Test / test_change_pg_count (push) Successful in 50s Details
Test / test_change_pg_count_ec (push) Successful in 1m4s Details
Test / test_etcd_fail (push) Successful in 1m31s Details
Test / test_add_osd (push) Successful in 2m41s Details
Test / test_interrupted_rebalance (push) Successful in 1m33s Details
Test / test_failure_domain (push) Successful in 51s Details
Test / test_interrupted_rebalance_imm (push) Successful in 2m21s Details
Test / test_snapshot (push) Successful in 50s Details
Test / test_interrupted_rebalance_ec (push) Successful in 2m1s Details
Test / test_minsize_1 (push) Successful in 16s Details
Test / test_interrupted_rebalance_ec_imm (push) Successful in 2m9s Details
Test / test_snapshot_ec (push) Successful in 42s Details
Test / test_rm (push) Successful in 15s Details
Test / test_snapshot_down (push) Failing after 27s Details
Test / test_move_reappear (push) Failing after 50s Details
Test / test_snapshot_down_ec (push) Failing after 26s Details
Test / test_splitbrain (push) Successful in 23s Details
Test / test_snapshot_chain (push) Successful in 2m26s Details
Test / test_snapshot_chain_ec (push) Successful in 2m58s Details
Test / test_rebalance_verify (push) Successful in 3m26s Details
Test / test_rebalance_verify_imm (push) Successful in 3m27s Details
Test / test_write (push) Successful in 42s Details
Test / test_write_xor (push) Successful in 51s Details
Test / test_write_no_same (push) Successful in 16s Details
Test / test_rebalance_verify_ec (push) Successful in 4m56s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 5m11s Details
Test / test_heal_pg_size_2 (push) Successful in 4m18s Details
Test / test_heal_ec (push) Successful in 5m5s Details
Test / test_heal_csum_32k_dmj (push) Successful in 5m7s Details
Test / test_heal_csum_32k_dj (push) Successful in 6m14s Details
Test / test_heal_csum_32k (push) Successful in 6m54s Details
Test / test_heal_csum_4k_dmj (push) Successful in 6m55s Details
Test / test_scrub (push) Successful in 1m23s Details
Test / test_scrub_zero_osd_2 (push) Successful in 1m8s Details
Test / test_scrub_xor (push) Successful in 1m0s Details
Test / test_heal_csum_4k_dj (push) Successful in 7m15s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 1m7s Details
Test / test_scrub_pg_size_3 (push) Successful in 1m40s Details
Test / test_scrub_ec (push) Successful in 42s Details
Test / test_heal_csum_4k (push) Successful in 6m17s Details

Non-reproducible in QEMU/FIO, only caught during K/V DB debugging
hotfix-1.1.0
Vitaliy Filippov 2023-10-08 01:31:58 +03:00
parent befca06f18
commit f245b56176
1 changed files with 33 additions and 31 deletions

View File

@ -169,46 +169,48 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
void cluster_client_t::inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *next, int inc)
{
if (opcode == OSD_OP_WRITE)
if (opcode != OSD_OP_WRITE && opcode != OSD_OP_SYNC)
{
while (next)
{
auto n2 = next->next;
if (next->opcode == OSD_OP_SYNC && (!(flags & OP_IMMEDIATE_COMMIT) || enable_writeback) ||
next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER))
{
next->prev_wait += inc;
assert(next->prev_wait >= 0);
if (!next->prev_wait)
{
if (next->opcode == OSD_OP_SYNC)
continue_sync(next);
else
continue_rw(next);
}
}
next = n2;
}
return;
}
else if (opcode == OSD_OP_SYNC)
cluster_op_t *bh_ops_local[32], **bh_ops = bh_ops_local;
int bh_op_count = 0, bh_op_max = 32;
while (next)
{
while (next)
auto n2 = next->next;
if (opcode == OSD_OP_WRITE
? (next->opcode == OSD_OP_SYNC && (!(flags & OP_IMMEDIATE_COMMIT) || enable_writeback) ||
next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER))
: (next->opcode == OSD_OP_SYNC || next->opcode == OSD_OP_WRITE))
{
auto n2 = next->next;
if (next->opcode == OSD_OP_SYNC || next->opcode == OSD_OP_WRITE)
next->prev_wait += inc;
assert(next->prev_wait >= 0);
if (!next->prev_wait)
{
next->prev_wait += inc;
assert(next->prev_wait >= 0);
if (!next->prev_wait)
// Kind of std::vector with local "small vector optimisation"
if (bh_op_count >= bh_op_max)
{
if (next->opcode == OSD_OP_SYNC)
continue_sync(next);
else
continue_rw(next);
bh_op_max *= 2;
cluster_op_t **n = (cluster_op_t**)malloc_or_die(sizeof(cluster_op_t*) * bh_op_max);
memcpy(n, bh_ops, sizeof(cluster_op_t*) * bh_op_count);
bh_ops = n;
}
bh_ops[bh_op_count++] = next;
}
next = n2;
}
next = n2;
}
for (int i = 0; i < bh_op_count; i++)
{
cluster_op_t *next = bh_ops[i];
if (next->opcode == OSD_OP_SYNC)
continue_sync(next);
else
continue_rw(next);
}
if (bh_ops != bh_ops_local)
{
free(bh_ops);
}
}