Fix EC just-bitmap reads (len=0) (fixes SCHEME=ec test_snapshot.sh)

mon-self-restart
Vitaliy Filippov 2023-05-07 14:00:02 +03:00
parent 44f86f1999
commit 5da1d8e1b5
5 changed files with 69 additions and 35 deletions

View File

@ -191,7 +191,7 @@ struct __attribute__((__packed__)) osd_op_rw_t
uint64_t inode; uint64_t inode;
// offset // offset
uint64_t offset; uint64_t offset;
// length // length. 0 means to read all bitmaps of the specified range, but no data.
uint32_t len; uint32_t len;
// flags (for future) // flags (for future)
uint32_t flags; uint32_t flags;

View File

@ -186,10 +186,22 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
cur_op->reply.rw.bitmap_len = 0; cur_op->reply.rw.bitmap_len = 0;
{ {
auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num }); auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num });
for (int role = 0; role < op_data->pg_data_size; role++) if (cur_op->req.rw.len == 0)
{ {
op_data->stripes[role].read_start = op_data->stripes[role].req_start; // len=0 => bitmap read
op_data->stripes[role].read_end = op_data->stripes[role].req_end; for (int role = 0; role < op_data->pg_data_size; role++)
{
op_data->stripes[role].read_start = 0;
op_data->stripes[role].read_end = UINT32_MAX;
}
}
else
{
for (int role = 0; role < op_data->pg_data_size; role++)
{
op_data->stripes[role].read_start = op_data->stripes[role].req_start;
op_data->stripes[role].read_end = op_data->stripes[role].req_end;
}
} }
// Determine version // Determine version
auto vo_it = pg.ver_override.find(op_data->oid); auto vo_it = pg.ver_override.find(op_data->oid);

View File

@ -151,6 +151,13 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o
{ {
int stripe_num = rep ? 0 : role; int stripe_num = rep ? 0 : role;
osd_op_t *subop = op_data->subops + i; osd_op_t *subop = op_data->subops + i;
uint32_t subop_len = wr
? stripes[stripe_num].write_end - stripes[stripe_num].write_start
: stripes[stripe_num].read_end - stripes[stripe_num].read_start;
if (!wr && stripes[stripe_num].read_end == UINT32_MAX)
{
subop_len = 0;
}
if (role_osd_num == this->osd_num) if (role_osd_num == this->osd_num)
{ {
clock_gettime(CLOCK_REALTIME, &subop->tv_begin); clock_gettime(CLOCK_REALTIME, &subop->tv_begin);
@ -169,7 +176,7 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o
}, },
.version = op_version, .version = op_version,
.offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start, .offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start,
.len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start, .len = subop_len,
.buf = wr ? stripes[stripe_num].write_buf : stripes[stripe_num].read_buf, .buf = wr ? stripes[stripe_num].write_buf : stripes[stripe_num].read_buf,
.bitmap = stripes[stripe_num].bmp_buf, .bitmap = stripes[stripe_num].bmp_buf,
}); });
@ -199,7 +206,7 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o
}, },
.version = op_version, .version = op_version,
.offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start, .offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start,
.len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start, .len = subop_len,
.attr_len = wr ? clean_entry_bitmap_size : 0, .attr_len = wr ? clean_entry_bitmap_size : 0,
}; };
#ifdef OSD_DEBUG #ifdef OSD_DEBUG
@ -218,9 +225,9 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o
} }
else else
{ {
if (stripes[stripe_num].read_end > stripes[stripe_num].read_start) if (subop_len > 0)
{ {
subop->iov.push_back(stripes[stripe_num].read_buf, stripes[stripe_num].read_end - stripes[stripe_num].read_start); subop->iov.push_back(stripes[stripe_num].read_buf, subop_len);
} }
} }
subop->callback = [cur_op, this](osd_op_t *subop) subop->callback = [cur_op, this](osd_op_t *subop)

View File

@ -28,7 +28,9 @@ static inline void extend_read(uint32_t start, uint32_t end, osd_rmw_stripe_t &
} }
else else
{ {
if (stripe.read_end < end) if (stripe.read_end < end && end != UINT32_MAX ||
// UINT32_MAX means that stripe only needs bitmap, end != 0 => needs also data
stripe.read_end == UINT32_MAX && end != 0)
stripe.read_end = end; stripe.read_end = end;
if (stripe.read_start > start) if (stripe.read_start > start)
stripe.read_start = start; stripe.read_start = start;
@ -105,24 +107,30 @@ void reconstruct_stripes_xor(osd_rmw_stripe_t *stripes, int pg_size, uint32_t bi
} }
else if (prev >= 0) else if (prev >= 0)
{ {
assert(stripes[role].read_start >= stripes[prev].read_start && if (stripes[role].read_end != UINT32_MAX)
stripes[role].read_start >= stripes[other].read_start); {
memxor( assert(stripes[role].read_start >= stripes[prev].read_start &&
(uint8_t*)stripes[prev].read_buf + (stripes[role].read_start - stripes[prev].read_start), stripes[role].read_start >= stripes[other].read_start);
(uint8_t*)stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start), memxor(
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start (uint8_t*)stripes[prev].read_buf + (stripes[role].read_start - stripes[prev].read_start),
); (uint8_t*)stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start),
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start
);
}
memxor(stripes[prev].bmp_buf, stripes[other].bmp_buf, stripes[role].bmp_buf, bitmap_size); memxor(stripes[prev].bmp_buf, stripes[other].bmp_buf, stripes[role].bmp_buf, bitmap_size);
prev = -1; prev = -1;
} }
else else
{ {
assert(stripes[role].read_start >= stripes[other].read_start); if (stripes[role].read_end != UINT32_MAX)
memxor( {
stripes[role].read_buf, assert(stripes[role].read_start >= stripes[other].read_start);
(uint8_t*)stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start), memxor(
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start stripes[role].read_buf,
); (uint8_t*)stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start),
stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start
);
}
memxor(stripes[role].bmp_buf, stripes[other].bmp_buf, stripes[role].bmp_buf, bitmap_size); memxor(stripes[role].bmp_buf, stripes[other].bmp_buf, stripes[role].bmp_buf, bitmap_size);
} }
} }
@ -356,20 +364,23 @@ void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsi
uint64_t read_start = 0, read_end = 0; uint64_t read_start = 0, read_end = 0;
auto recover_seq = [&]() auto recover_seq = [&]()
{ {
int orig = 0; if (read_end != UINT32_MAX)
for (int other = 0; other < pg_size && orig < pg_minsize; other++)
{ {
if (stripes[other].read_end != 0 && !stripes[other].missing) int orig = 0;
for (int other = 0; other < pg_size && orig < pg_minsize; other++)
{ {
assert(stripes[other].read_start <= read_start); if (stripes[other].read_end != 0 && !stripes[other].missing)
assert(stripes[other].read_end >= read_end); {
data_ptrs[orig++] = (uint8_t*)stripes[other].read_buf + (read_start - stripes[other].read_start); assert(stripes[other].read_start <= read_start);
assert(stripes[other].read_end >= read_end);
data_ptrs[orig++] = (uint8_t*)stripes[other].read_buf + (read_start - stripes[other].read_start);
}
} }
ec_encode_data(
read_end-read_start, pg_minsize, wanted, dectable + wanted_base*32*pg_minsize,
data_ptrs, data_ptrs + pg_minsize
);
} }
ec_encode_data(
read_end-read_start, pg_minsize, wanted, dectable + wanted_base*32*pg_minsize,
data_ptrs, data_ptrs + pg_minsize
);
wanted_base += wanted; wanted_base += wanted;
wanted = 0; wanted = 0;
}; };
@ -438,7 +449,8 @@ void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsi
if (stripes[role].read_end != 0 && stripes[role].missing) if (stripes[role].read_end != 0 && stripes[role].missing)
{ {
recovered = true; recovered = true;
if (stripes[role].read_end > stripes[role].read_start) if (stripes[role].read_end > stripes[role].read_start &&
stripes[role].read_end != UINT32_MAX)
{ {
for (int other = 0; other < pg_size; other++) for (int other = 0; other < pg_size; other++)
{ {
@ -557,7 +569,8 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad
uint64_t buf_size = add_size; uint64_t buf_size = add_size;
for (int role = 0; role < read_pg_size; role++) for (int role = 0; role < read_pg_size; role++)
{ {
if (stripes[role].read_end != 0) if (stripes[role].read_end != 0 &&
stripes[role].read_end != UINT32_MAX)
{ {
buf_size += stripes[role].read_end - stripes[role].read_start; buf_size += stripes[role].read_end - stripes[role].read_start;
} }
@ -567,7 +580,8 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad
uint64_t buf_pos = add_size; uint64_t buf_pos = add_size;
for (int role = 0; role < read_pg_size; role++) for (int role = 0; role < read_pg_size; role++)
{ {
if (stripes[role].read_end != 0) if (stripes[role].read_end != 0 &&
stripes[role].read_end != UINT32_MAX)
{ {
stripes[role].read_buf = (uint8_t*)buf + buf_pos; stripes[role].read_buf = (uint8_t*)buf + buf_pos;
buf_pos += stripes[role].read_end - stripes[role].read_start; buf_pos += stripes[role].read_end - stripes[role].read_start;

View File

@ -23,6 +23,7 @@ struct osd_rmw_stripe_t
void *read_buf, *write_buf; void *read_buf, *write_buf;
void *bmp_buf; void *bmp_buf;
uint32_t req_start, req_end; uint32_t req_start, req_end;
// read_end=UINT32_MAX means to only read bitmap, but not data
uint32_t read_start, read_end; uint32_t read_start, read_end;
uint32_t write_start, write_end; uint32_t write_start, write_end;
bool missing; bool missing;