Preload own PG history updates to fix PG state loop possibly applying the old metadata version

epoch-deletions
Vitaliy Filippov 2023-04-23 01:50:30 +03:00
parent 024c8658f6
commit 46462da45e
2 changed files with 9 additions and 6 deletions

View File

@ -483,6 +483,10 @@ void osd_t::report_pg_state(pg_t & pg)
pg.all_peers = pg.target_set; pg.all_peers = pg.target_set;
std::sort(pg.all_peers.begin(), pg.all_peers.end()); std::sort(pg.all_peers.begin(), pg.all_peers.end());
pg.cur_peers = pg.target_set; pg.cur_peers = pg.target_set;
// Change pg_config at the same time, otherwise our PG reconciling loop may try to apply the old metadata
auto & pg_cfg = st_cli.pool_config[pg.pool_id].pg_config[pg.pg_num];
pg_cfg.target_history = pg.target_history;
pg_cfg.all_peers = pg.all_peers;
} }
else if (pg.state == (PG_ACTIVE|PG_LEFT_ON_DEAD)) else if (pg.state == (PG_ACTIVE|PG_LEFT_ON_DEAD))
{ {
@ -522,6 +526,9 @@ void osd_t::report_pg_state(pg_t & pg)
pg.cur_peers.push_back(pg_osd); pg.cur_peers.push_back(pg_osd);
} }
} }
auto & pg_cfg = st_cli.pool_config[pg.pool_id].pg_config[pg.pg_num];
pg_cfg.target_history = pg.target_history;
pg_cfg.all_peers = pg.all_peers;
} }
if (pg.state == PG_OFFLINE && !this->pg_config_applied) if (pg.state == PG_OFFLINE && !this->pg_config_applied)
{ {

View File

@ -28,9 +28,7 @@ if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
format_error "FAILED: OSD NOT ADDED INTO DISTRIBUTION" format_error "FAILED: OSD NOT ADDED INTO DISTRIBUTION"
fi fi
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"]) ] | length) == '$PG_COUNT''); then wait_finish_rebalance 10
format_error "FAILED: $PG_COUNT PGS NOT ACTIVE"
fi
sleep 1 sleep 1
kill -9 $OSD4_PID kill -9 $OSD4_PID
@ -52,8 +50,6 @@ if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only |\
format_error "FAILED: OSD NOT REMOVED FROM DISTRIBUTION" format_error "FAILED: OSD NOT REMOVED FROM DISTRIBUTION"
fi fi
if ! ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | jq -s -e '([ .[] | select(.state == ["active"] or .state == ["active", "left_on_dead"]) ] | length) == '$PG_COUNT''); then wait_finish_rebalance 10
format_error "FAILED: $PG_COUNT PGS NOT ACTIVE"
fi
format_green OK format_green OK