forked from vitalif/vitastor
Skip offline PGs during state reporting when the state is already deleted or taken over by another OSD
This fixes OSDs being unable to report PG states in rare conditionshier-failure-domains
parent
6f4dc16c59
commit
6442010f93
|
@ -812,11 +812,21 @@ void osd_t::report_pg_states()
|
|||
pg_it->second.cur_state != 0)
|
||||
{
|
||||
pg_state_exists = true;
|
||||
if (pg.state == PG_OFFLINE && pg_it->second.cur_primary != this->osd_num)
|
||||
{
|
||||
// Nothing to check or report, PG is already taken over by another OSD
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!pg_state_exists)
|
||||
{
|
||||
if (pg.state == PG_OFFLINE)
|
||||
{
|
||||
// Nothing to check or report, PG is already stopped
|
||||
continue;
|
||||
}
|
||||
// Check that the PG key does not exist
|
||||
// Failed check indicates an unsuccessful PG lock attempt in this case
|
||||
checks.push_back(json11::Json::object {
|
||||
|
@ -901,6 +911,15 @@ void osd_t::report_pg_states()
|
|||
{
|
||||
etcd_reporting_pg_state = false;
|
||||
if (!data["succeeded"].bool_value())
|
||||
{
|
||||
std::string rpgnames = "";
|
||||
for (auto pp: reporting_pgs)
|
||||
{
|
||||
rpgnames += (rpgnames.size() ? ", " : "")+std::to_string(pp.pool_pg_num.pool_id)+"/"+std::to_string(pp.pool_pg_num.pg_num);
|
||||
}
|
||||
printf("Error reporting PG %s states, will repeat the attempt: %s\n", rpgnames.c_str(), err.c_str());
|
||||
}
|
||||
if (!data["succeeded"].bool_value())
|
||||
{
|
||||
// One of PG state updates failed, put dirty flags back
|
||||
for (auto pp: reporting_pgs)
|
||||
|
|
Loading…
Reference in New Issue