Allow scrub to fix corrupted object states
parent
4bfd994341
commit
6ca20aa194
|
@ -73,7 +73,10 @@ Input:
|
||||||
write request is copied into the metadata area bitwise and stored there.
|
write request is copied into the metadata area bitwise and stored there.
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
- retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC)
|
- retval = number of bytes actually read/written or negative error number
|
||||||
|
-EINVAL = invalid input parameters
|
||||||
|
-ENOENT = requested object/version does not exist for reads
|
||||||
|
-ENOSPC = no space left in the store for writes
|
||||||
- version = the version actually read or written
|
- version = the version actually read or written
|
||||||
|
|
||||||
## BS_OP_DELETE
|
## BS_OP_DELETE
|
||||||
|
|
|
@ -124,10 +124,8 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
bool dirty_found = (dirty_it != dirty_db.end() && dirty_it->first.oid == read_op->oid);
|
bool dirty_found = (dirty_it != dirty_db.end() && dirty_it->first.oid == read_op->oid);
|
||||||
if (!clean_found && !dirty_found)
|
if (!clean_found && !dirty_found)
|
||||||
{
|
{
|
||||||
// region is not allocated - return zeroes
|
|
||||||
memset(read_op->buf, 0, read_op->len);
|
|
||||||
read_op->version = 0;
|
read_op->version = 0;
|
||||||
read_op->retval = read_op->len;
|
read_op->retval = -ENOENT;
|
||||||
FINISH_OP(read_op);
|
FINISH_OP(read_op);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
@ -149,8 +147,10 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
if (IS_DELETE(dirty.state))
|
if (IS_DELETE(dirty.state))
|
||||||
{
|
{
|
||||||
assert(!result_version);
|
assert(!result_version);
|
||||||
clean_found = false;
|
read_op->version = 0;
|
||||||
break;
|
read_op->retval = -ENOENT;
|
||||||
|
FINISH_OP(read_op);
|
||||||
|
return 2;
|
||||||
}
|
}
|
||||||
if (!result_version)
|
if (!result_version)
|
||||||
{
|
{
|
||||||
|
@ -238,12 +238,19 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (fulfilled < read_op->len)
|
if (!result_version)
|
||||||
{
|
{
|
||||||
// fill remaining parts with zeroes
|
// May happen if there are entries in dirty_db but all of them are !version_ok
|
||||||
assert(fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0));
|
read_op->version = 0;
|
||||||
|
read_op->retval = -ENOENT;
|
||||||
|
FINISH_OP(read_op);
|
||||||
|
return 2;
|
||||||
}
|
}
|
||||||
|
if (fulfilled < read_op->len)
|
||||||
|
{
|
||||||
|
assert(fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0));
|
||||||
assert(fulfilled == read_op->len);
|
assert(fulfilled == read_op->len);
|
||||||
|
}
|
||||||
read_op->version = result_version;
|
read_op->version = result_version;
|
||||||
if (!PRIV(read_op)->pending_ops)
|
if (!PRIV(read_op)->pending_ops)
|
||||||
{
|
{
|
||||||
|
|
|
@ -329,21 +329,40 @@ pg_osd_set_state_t *osd_t::mark_object_corrupted(pg_t & pg, object_id oid, pg_os
|
||||||
}
|
}
|
||||||
// Mark object chunk(s) as corrupted
|
// Mark object chunk(s) as corrupted
|
||||||
int changes = 0;
|
int changes = 0;
|
||||||
for (auto & chunk: corrupted_set)
|
for (auto chunk_it = corrupted_set.begin(); chunk_it != corrupted_set.end(); )
|
||||||
{
|
{
|
||||||
bool corrupted = stripes[chunk.role].osd_num == chunk.osd_num && stripes[chunk.role].read_error;
|
auto & chunk = *chunk_it;
|
||||||
if (corrupted)
|
if (stripes[chunk.role].osd_num == chunk.osd_num)
|
||||||
|
{
|
||||||
|
if (stripes[chunk.role].not_exists)
|
||||||
{
|
{
|
||||||
if (!(chunk.loc_bad & LOC_CORRUPTED))
|
|
||||||
changes++;
|
changes++;
|
||||||
chunk.loc_bad |= LOC_CORRUPTED;
|
corrupted_set.erase(chunk_it, chunk_it+1);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
else if (inconsistent && !(chunk.loc_bad & LOC_OUTDATED))
|
if (stripes[chunk.role].read_error && chunk.loc_bad != LOC_CORRUPTED)
|
||||||
|
{
|
||||||
|
changes++;
|
||||||
|
chunk.loc_bad = LOC_CORRUPTED;
|
||||||
|
}
|
||||||
|
else if (stripes[chunk.role].read_end > 0 && !stripes[chunk.role].missing &&
|
||||||
|
(chunk.loc_bad & LOC_CORRUPTED))
|
||||||
|
{
|
||||||
|
changes++;
|
||||||
|
chunk.loc_bad &= ~LOC_CORRUPTED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (inconsistent && !chunk.loc_bad)
|
||||||
{
|
{
|
||||||
if (!(chunk.loc_bad & LOC_INCONSISTENT))
|
|
||||||
changes++;
|
changes++;
|
||||||
chunk.loc_bad |= LOC_INCONSISTENT;
|
chunk.loc_bad |= LOC_INCONSISTENT;
|
||||||
}
|
}
|
||||||
|
else if (!inconsistent && (chunk.loc_bad & LOC_INCONSISTENT))
|
||||||
|
{
|
||||||
|
changes++;
|
||||||
|
chunk.loc_bad &= ~LOC_INCONSISTENT;
|
||||||
|
}
|
||||||
|
chunk_it++;
|
||||||
}
|
}
|
||||||
if (!changes)
|
if (!changes)
|
||||||
{
|
{
|
||||||
|
|
|
@ -344,6 +344,13 @@ void osd_t::handle_primary_subop(osd_op_t *subop, osd_op_t *cur_op)
|
||||||
else
|
else
|
||||||
expected = 0;
|
expected = 0;
|
||||||
osd_primary_op_data_t *op_data = cur_op->op_data;
|
osd_primary_op_data_t *op_data = cur_op->op_data;
|
||||||
|
if (retval == -ENOENT && opcode == OSD_OP_SEC_READ)
|
||||||
|
{
|
||||||
|
// ENOENT is not an error for almost all reads, except scrub
|
||||||
|
retval = expected;
|
||||||
|
memset(((osd_rmw_stripe_t*)subop->rmw_buf)->read_buf, 0, expected);
|
||||||
|
((osd_rmw_stripe_t*)subop->rmw_buf)->not_exists = true;
|
||||||
|
}
|
||||||
if (retval == expected && (opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE || opcode == OSD_OP_SEC_WRITE_STABLE))
|
if (retval == expected && (opcode == OSD_OP_SEC_READ || opcode == OSD_OP_SEC_WRITE || opcode == OSD_OP_SEC_WRITE_STABLE))
|
||||||
{
|
{
|
||||||
uint64_t version = subop->reply.sec_rw.version;
|
uint64_t version = subop->reply.sec_rw.version;
|
||||||
|
|
|
@ -1131,12 +1131,14 @@ std::vector<int> ec_find_good(osd_rmw_stripe_t *stripes, int pg_size, int pg_min
|
||||||
uint32_t chunk_size, uint32_t bitmap_size, int max_bruteforce)
|
uint32_t chunk_size, uint32_t bitmap_size, int max_bruteforce)
|
||||||
{
|
{
|
||||||
std::vector<int> found_valid;
|
std::vector<int> found_valid;
|
||||||
int cur_live[pg_size], live_count = 0;
|
int cur_live[pg_size], live_count = 0, exists_count = 0;
|
||||||
osd_num_t fake_osd_set[pg_size];
|
osd_num_t fake_osd_set[pg_size];
|
||||||
for (int role = 0; role < pg_size; role++)
|
for (int role = 0; role < pg_size; role++)
|
||||||
{
|
{
|
||||||
if (!stripes[role].missing)
|
if (!stripes[role].missing)
|
||||||
{
|
{
|
||||||
|
if (!stripes[role].not_exists)
|
||||||
|
exists_count++;
|
||||||
cur_live[live_count++] = role;
|
cur_live[live_count++] = role;
|
||||||
fake_osd_set[role] = role+1;
|
fake_osd_set[role] = role+1;
|
||||||
}
|
}
|
||||||
|
@ -1145,6 +1147,14 @@ std::vector<int> ec_find_good(osd_rmw_stripe_t *stripes, int pg_size, int pg_min
|
||||||
{
|
{
|
||||||
return std::vector<int>();
|
return std::vector<int>();
|
||||||
}
|
}
|
||||||
|
if (exists_count <= pg_minsize)
|
||||||
|
{
|
||||||
|
// Special case: user manually deleted some chunks
|
||||||
|
for (int role = 0; role < pg_size; role++)
|
||||||
|
if (!stripes[role].missing && !stripes[role].not_exists)
|
||||||
|
found_valid.push_back(role);
|
||||||
|
return found_valid;
|
||||||
|
}
|
||||||
// Try to locate errors using brute force if there isn't too many combinations
|
// Try to locate errors using brute force if there isn't too many combinations
|
||||||
osd_rmw_stripe_t brute_stripes[pg_size];
|
osd_rmw_stripe_t brute_stripes[pg_size];
|
||||||
int out_count = live_count-pg_minsize;
|
int out_count = live_count-pg_minsize;
|
||||||
|
|
|
@ -30,6 +30,7 @@ struct osd_rmw_stripe_t
|
||||||
osd_num_t osd_num;
|
osd_num_t osd_num;
|
||||||
bool missing: 1;
|
bool missing: 1;
|
||||||
bool read_error: 1;
|
bool read_error: 1;
|
||||||
|
bool not_exists: 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Here pg_minsize is the number of data chunks, not the minimum number of alive OSDs for the PG to operate
|
// Here pg_minsize is the number of data chunks, not the minimum number of alive OSDs for the PG to operate
|
||||||
|
|
|
@ -455,7 +455,8 @@ resume_2:
|
||||||
for (int role = 0; role < op_data->pg_size; role++)
|
for (int role = 0; role < op_data->pg_size; role++)
|
||||||
{
|
{
|
||||||
eq_to[role] = -1;
|
eq_to[role] = -1;
|
||||||
if (op_data->stripes[role].read_end != 0 && !op_data->stripes[role].missing)
|
if (op_data->stripes[role].read_end != 0 && !op_data->stripes[role].missing &&
|
||||||
|
!op_data->stripes[role].not_exists)
|
||||||
{
|
{
|
||||||
total++;
|
total++;
|
||||||
eq_to[role] = role;
|
eq_to[role] = role;
|
||||||
|
@ -560,7 +561,9 @@ resume_2:
|
||||||
}
|
}
|
||||||
for (int role = 0; role < op_data->pg_size; role++)
|
for (int role = 0; role < op_data->pg_size; role++)
|
||||||
{
|
{
|
||||||
if (op_data->stripes[role].osd_num != 0 && op_data->stripes[role].read_error || inconsistent)
|
if (op_data->stripes[role].osd_num != 0 &&
|
||||||
|
(op_data->stripes[role].read_error || op_data->stripes[role].not_exists) ||
|
||||||
|
inconsistent)
|
||||||
{
|
{
|
||||||
// Got at least 1 read error or mismatch, mark the object as corrupted
|
// Got at least 1 read error or mismatch, mark the object as corrupted
|
||||||
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
|
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
|
||||||
|
|
Loading…
Reference in New Issue