Fix possible "assertion failed: pg.inflight >= 0" error during PG stop

Vitaliy Filippov 2021-03-06 14:42:25 +03:00
parent ab21a1908b
commit 21e7686037
3 changed files with 14 additions and 12 deletions

View File

@ -473,7 +473,7 @@ bool osd_t::stop_pg(pg_t & pg)
delete pg.peering_state;
pg.peering_state = NULL;
}
if (pg.state & PG_STOPPING)
if (pg.state & (PG_STOPPING | PG_OFFLINE))
{
return false;
}

View File

@ -391,19 +391,19 @@ continue_others:
// Remove version override
pg.ver_override.erase(op_data->oid);
object_id oid = op_data->oid;
// Remove the operation from queue before calling finish_op so it doesn't see the completed operation in queue
auto next_it = pg.write_queue.find(oid);
if (next_it != pg.write_queue.end() && next_it->second == cur_op)
{
pg.write_queue.erase(next_it++);
}
// finish_op would invalidate next_it if it cleared pg.write_queue, but it doesn't do that :)
finish_op(cur_op, cur_op->reply.hdr.retval);
// Continue other write operations to the same object
auto next_it = pg.write_queue.find(oid);
auto this_it = next_it;
if (this_it != pg.write_queue.end() && this_it->second == cur_op)
if (next_it != pg.write_queue.end() && next_it->first == oid)
{
next_it++;
pg.write_queue.erase(this_it);
if (next_it != pg.write_queue.end() && next_it->first == oid)
{
osd_op_t *next_op = next_it->second;
continue_primary_write(next_op);
}
osd_op_t *next_op = next_it->second;
continue_primary_write(next_op);
}
}

View File

@ -43,7 +43,9 @@ void osd_t::finish_op(osd_op_t *cur_op, int retval)
auto & pg = pgs.at({ .pool_id = INODE_POOL(cur_op->op_data->oid.inode), .pg_num = cur_op->op_data->pg_num });
pg.inflight--;
assert(pg.inflight >= 0);
if ((pg.state & PG_STOPPING) && pg.inflight == 0 && !pg.flush_batch)
if ((pg.state & PG_STOPPING) && pg.inflight == 0 && !pg.flush_batch &&
// We must either forget all PG's unstable writes or wait for it to become clean
dirty_pgs.find({ .pool_id = pg.pool_id, .pg_num = pg.pg_num }) == dirty_pgs.end())
{
finish_stop_pg(pg);
}