Compare commits

..

2 Commits

Author SHA1 Message Date
Vitaliy Filippov 7b37ba921d Pause pool rebalance when monitor detects that it can lead to any OSD becoming full
Test / test_root_node (push) Successful in 10s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 1m39s Details
Test / test_write_no_same (push) Successful in 9s Details
Test / test_switch_primary (push) Successful in 32s Details
Test / test_write (push) Successful in 32s Details
Test / test_write_xor (push) Successful in 37s Details
Test / test_heal_pg_size_2 (push) Successful in 2m17s Details
Test / test_heal_ec (push) Successful in 2m18s Details
Test / test_heal_antietcd (push) Successful in 2m17s Details
Test / test_heal_csum_32k_dmj (push) Successful in 2m20s Details
Test / test_heal_csum_32k_dj (push) Successful in 2m27s Details
Test / test_heal_csum_32k (push) Successful in 2m17s Details
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s Details
Test / test_heal_csum_4k_dj (push) Successful in 2m19s Details
Test / test_resize_auto (push) Successful in 9s Details
Test / test_resize (push) Successful in 14s Details
Test / test_osd_tags (push) Successful in 8s Details
Test / test_snapshot_pool2 (push) Failing after 11s Details
Test / test_enospc (push) Successful in 11s Details
Test / test_enospc_imm (push) Successful in 11s Details
Test / test_enospc_xor (push) Successful in 13s Details
Test / test_enospc_imm_xor (push) Successful in 14s Details
Test / test_scrub (push) Successful in 14s Details
Test / test_scrub_zero_osd_2 (push) Successful in 15s Details
Test / test_scrub_xor (push) Successful in 15s Details
Test / test_scrub_pg_size_3 (push) Successful in 16s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 16s Details
Test / test_scrub_ec (push) Successful in 14s Details
Test / test_nfs (push) Successful in 12s Details
Test / test_heal_csum_4k (push) Successful in 2m19s Details
2024-11-22 01:01:07 +03:00
Vitaliy Filippov 262c581400 Fix create-pool for the case of hosts split into sub-nodes
Test / test_rebalance_verify_ec (push) Successful in 1m38s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 1m40s Details
Test / test_switch_primary (push) Successful in 23s Details
Test / test_write_no_same (push) Successful in 9s Details
Test / test_write (push) Successful in 31s Details
Test / test_write_xor (push) Successful in 35s Details
Test / test_heal_pg_size_2 (push) Successful in 2m18s Details
Test / test_heal_ec (push) Successful in 2m16s Details
Test / test_heal_antietcd (push) Successful in 2m17s Details
Test / test_heal_csum_32k_dmj (push) Successful in 2m19s Details
Test / test_heal_csum_32k_dj (push) Successful in 2m19s Details
Test / test_heal_csum_32k (push) Successful in 2m20s Details
Test / test_heal_csum_4k_dmj (push) Successful in 2m19s Details
Test / test_heal_csum_4k_dj (push) Successful in 2m17s Details
Test / test_resize_auto (push) Successful in 9s Details
Test / test_resize (push) Successful in 16s Details
Test / test_snapshot_pool2 (push) Failing after 11s Details
Test / test_osd_tags (push) Successful in 7s Details
Test / test_enospc (push) Successful in 12s Details
Test / test_enospc_imm (push) Successful in 11s Details
Test / test_enospc_xor (push) Successful in 14s Details
Test / test_enospc_imm_xor (push) Successful in 13s Details
Test / test_scrub (push) Successful in 13s Details
Test / test_scrub_zero_osd_2 (push) Successful in 13s Details
Test / test_scrub_xor (push) Successful in 14s Details
Test / test_scrub_pg_size_3 (push) Successful in 17s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 17s Details
Test / test_scrub_ec (push) Successful in 14s Details
Test / test_nfs (push) Successful in 11s Details
Test / test_heal_csum_4k (push) Successful in 2m14s Details
2024-11-22 01:01:07 +03:00
4 changed files with 103 additions and 40 deletions

View File

@ -288,6 +288,24 @@ jobs:
echo ""
done
test_create_halfhost:
runs-on: ubuntu-latest
needs: build
container: ${{env.TEST_IMAGE}}:${{github.sha}}
steps:
- name: Run test
id: test
timeout-minutes: 3
run: /root/vitastor/tests/test_create_halfhost.sh
- name: Print logs
if: always() && steps.test.outcome == 'failure'
run: |
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
echo "-------- $i --------"
cat $i
echo ""
done
test_failure_domain:
runs-on: ubuntu-latest
needs: build

View File

@ -90,13 +90,19 @@ resume_1:
// If not forced, check that we have enough osds for pg_size
if (!force)
{
// Get node_placement configuration from etcd
// Get node_placement configuration from etcd and OSD stats
parent->etcd_txn(json11::Json::object {
{ "success", json11::Json::array {
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/config/node_placement") },
} }
} },
},
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/osd/stats/") },
{ "range_end", base64_encode(parent->cli->st_cli.etcd_prefix+"/osd/stats0") },
} },
},
} },
});
@ -112,10 +118,21 @@ resume_2:
return;
}
// Get state_node_tree based on node_placement and osd peer states
// Get state_node_tree based on node_placement and osd stats
{
auto kv = parent->cli->st_cli.parse_etcd_kv(parent->etcd_result["responses"][0]["response_range"]["kvs"][0]);
state_node_tree = get_state_node_tree(kv.value.object_items());
auto node_placement_kv = parent->cli->st_cli.parse_etcd_kv(parent->etcd_result["responses"][0]["response_range"]["kvs"][0]);
std::map<osd_num_t, json11::Json> osd_stats;
timespec tv_now;
clock_gettime(CLOCK_REALTIME, &tv_now);
uint64_t osd_out_time = parent->cli->config["osd_out_time"].uint64_value();
if (!osd_out_time)
osd_out_time = 600;
parent->iterate_kvs_1(parent->etcd_result["responses"][1]["response_range"]["kvs"], "/osd/stats/", [&](uint64_t cur_osd, json11::Json value)
{
if (value["time"].uint64_value()+osd_out_time >= tv_now.tv_sec)
osd_stats[cur_osd] = value;
});
state_node_tree = get_state_node_tree(node_placement_kv.value.object_items(), osd_stats);
}
// Skip tag checks, if pool has none
@ -358,56 +375,52 @@ resume_8:
// Returns a JSON object of form {"nodes": {...}, "osds": [...]} that
// contains: all nodes (osds, hosts, ...) based on node_placement config
// and current peer state, and a list of active peer osds.
json11::Json get_state_node_tree(json11::Json::object node_placement)
// and current osd stats.
json11::Json get_state_node_tree(json11::Json::object node_placement, std::map<osd_num_t, json11::Json> & osd_stats)
{
// Erase non-peer osd nodes from node_placement
// Erase non-existing osd nodes from node_placement
for (auto np_it = node_placement.begin(); np_it != node_placement.end();)
{
// Numeric nodes are osds
osd_num_t osd_num = stoull_full(np_it->first);
// If node is osd and it is not in peer states, erase it
if (osd_num > 0 &&
parent->cli->st_cli.peer_states.find(osd_num) == parent->cli->st_cli.peer_states.end())
{
// If node is osd and its stats do not exist, erase it
if (osd_num > 0 && osd_stats.find(osd_num) == osd_stats.end())
node_placement.erase(np_it++);
}
else
np_it++;
}
// List of peer osds
std::vector<std::string> peer_osds;
// List of osds
std::vector<std::string> existing_osds;
// Record peer osds and add missing osds/hosts to np
for (auto & ps: parent->cli->st_cli.peer_states)
// Record osds and add missing osds/hosts to np
for (auto & ps: osd_stats)
{
std::string osd_num = std::to_string(ps.first);
// Record peer osd
peer_osds.push_back(osd_num);
// Record osd
existing_osds.push_back(osd_num);
// Add osd, if necessary
// Add host if necessary
std::string osd_host = ps.second["host"].as_string();
if (node_placement.find(osd_host) == node_placement.end())
{
node_placement[osd_host] = json11::Json::object {
{ "level", "host" }
};
}
// Add osd if necessary
if (node_placement.find(osd_num) == node_placement.end())
{
std::string osd_host = ps.second["host"].as_string();
// Add host, if necessary
if (node_placement.find(osd_host) == node_placement.end())
{
node_placement[osd_host] = json11::Json::object {
{ "level", "host" }
};
}
node_placement[osd_num] = json11::Json::object {
{ "parent", osd_host }
};
}
}
return json11::Json::object { { "osds", peer_osds }, { "nodes", node_placement } };
return json11::Json::object { { "osds", existing_osds }, { "nodes", node_placement } };
}
// Returns new state_node_tree based on given state_node_tree with osds
@ -592,13 +605,10 @@ resume_8:
// If parent node given, ...
else if (parent_node != "")
{
// ... look for children nodes of this parent
// ... look for child nodes of this parent
for (auto & sn: node_tree)
{
auto & props = sn.second.object_items();
auto parent_prop = props.find("parent");
if (parent_prop != props.end() && (parent_prop->second.as_string() == parent_node))
if (sn.second["parent"] == parent_node)
{
nodes.push_back(sn.first);
@ -615,10 +625,7 @@ resume_8:
// ... look for all level nodes
for (auto & sn: node_tree)
{
auto & props = sn.second.object_items();
auto level_prop = props.find("level");
if (level_prop != props.end() && (level_prop->second.as_string() == level))
if (sn.second["level"] == level)
{
nodes.push_back(sn.first);
}

View File

@ -19,9 +19,12 @@ ANTIETCD=1 ./test_etcd_fail.sh
./test_interrupted_rebalance.sh
IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
SCHEME=ec ./test_interrupted_rebalance.sh
SCHEME=ec IMMEDIATE_COMMIT=1 ./test_interrupted_rebalance.sh
./test_create_halfhost.sh
./test_failure_domain.sh
./test_snapshot.sh

35
tests/test_create_halfhost.sh Executable file
View File

@ -0,0 +1,35 @@
#!/bin/bash -ex
. `dirname $0`/common.sh
node mon/mon-main.js $MON_PARAMS --etcd_address $ETCD_URL --etcd_prefix "/vitastor" >>./testdata/mon.log 2>&1 &
MON_PID=$!
wait_etcd
TIME=$(date '+%s')
$ETCDCTL put /vitastor/config/global '{"placement_levels":{"dc":10,"host":100,"half":105,"osd":110}}'
$ETCDCTL put /vitastor/config/node_placement '{
"h11":{"level":"half","parent":"host1"},
"h12":{"level":"half","parent":"host1"},
"h21":{"level":"half","parent":"host2"},
"h22":{"level":"half","parent":"host2"},
"h31":{"level":"half","parent":"host3"},
"h32":{"level":"half","parent":"host3"},
"1":{"parent":"h11"},
"2":{"parent":"h12"},
"3":{"parent":"h21"},
"4":{"parent":"h22"},
"5":{"parent":"h31"},
"6":{"parent":"h32"}
}'
$ETCDCTL put /vitastor/osd/stats/1 '{"host":"host1","size":1073741824,"time":"'$TIME'"}'
$ETCDCTL put /vitastor/osd/stats/2 '{"host":"host1","size":1073741824,"time":"'$TIME'"}'
$ETCDCTL put /vitastor/osd/stats/3 '{"host":"host2","size":1073741824,"time":"'$TIME'"}'
$ETCDCTL put /vitastor/osd/stats/4 '{"host":"host2","size":1073741824,"time":"'$TIME'"}'
$ETCDCTL put /vitastor/osd/stats/5 '{"host":"host3","size":1073741824,"time":"'$TIME'"}'
$ETCDCTL put /vitastor/osd/stats/6 '{"host":"host3","size":1073741824,"time":"'$TIME'"}'
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL osd-tree
# check that it doesn't fail
build/src/cmd/vitastor-cli --etcd_address $ETCD_URL create-pool testpool --ec 2+1 -n 32
format_green OK