diff --git a/src/osd_ops.h b/src/osd_ops.h index 9f232c75..8055bbfc 100644 --- a/src/osd_ops.h +++ b/src/osd_ops.h @@ -191,7 +191,7 @@ struct __attribute__((__packed__)) osd_op_rw_t uint64_t inode; // offset uint64_t offset; - // length + // length. 0 means to read all bitmaps of the specified range, but no data. uint32_t len; // flags (for future) uint32_t flags; diff --git a/src/osd_primary.cpp b/src/osd_primary.cpp index 95d947c3..2ceeacee 100644 --- a/src/osd_primary.cpp +++ b/src/osd_primary.cpp @@ -186,10 +186,22 @@ void osd_t::continue_primary_read(osd_op_t *cur_op) cur_op->reply.rw.bitmap_len = 0; { auto & pg = pgs.at({ .pool_id = INODE_POOL(op_data->oid.inode), .pg_num = op_data->pg_num }); - for (int role = 0; role < op_data->pg_data_size; role++) + if (cur_op->req.rw.len == 0) { - op_data->stripes[role].read_start = op_data->stripes[role].req_start; - op_data->stripes[role].read_end = op_data->stripes[role].req_end; + // len=0 => bitmap read + for (int role = 0; role < op_data->pg_data_size; role++) + { + op_data->stripes[role].read_start = 0; + op_data->stripes[role].read_end = UINT32_MAX; + } + } + else + { + for (int role = 0; role < op_data->pg_data_size; role++) + { + op_data->stripes[role].read_start = op_data->stripes[role].req_start; + op_data->stripes[role].read_end = op_data->stripes[role].req_end; + } } // Determine version auto vo_it = pg.ver_override.find(op_data->oid); diff --git a/src/osd_primary_subops.cpp b/src/osd_primary_subops.cpp index 5b6ab48d..da8520a0 100644 --- a/src/osd_primary_subops.cpp +++ b/src/osd_primary_subops.cpp @@ -151,6 +151,13 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o { int stripe_num = rep ? 0 : role; osd_op_t *subop = op_data->subops + i; + uint32_t subop_len = wr + ? stripes[stripe_num].write_end - stripes[stripe_num].write_start + : stripes[stripe_num].read_end - stripes[stripe_num].read_start; + if (!wr && stripes[stripe_num].read_end == UINT32_MAX) + { + subop_len = 0; + } if (role_osd_num == this->osd_num) { clock_gettime(CLOCK_REALTIME, &subop->tv_begin); @@ -169,7 +176,7 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o }, .version = op_version, .offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start, - .len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start, + .len = subop_len, .buf = wr ? stripes[stripe_num].write_buf : stripes[stripe_num].read_buf, .bitmap = stripes[stripe_num].bmp_buf, }); @@ -199,7 +206,7 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o }, .version = op_version, .offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start, - .len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start, + .len = subop_len, .attr_len = wr ? clean_entry_bitmap_size : 0, }; #ifdef OSD_DEBUG @@ -218,9 +225,9 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o } else { - if (stripes[stripe_num].read_end > stripes[stripe_num].read_start) + if (subop_len > 0) { - subop->iov.push_back(stripes[stripe_num].read_buf, stripes[stripe_num].read_end - stripes[stripe_num].read_start); + subop->iov.push_back(stripes[stripe_num].read_buf, subop_len); } } subop->callback = [cur_op, this](osd_op_t *subop) diff --git a/src/osd_rmw.cpp b/src/osd_rmw.cpp index 593ff95c..aedc3a6b 100644 --- a/src/osd_rmw.cpp +++ b/src/osd_rmw.cpp @@ -28,7 +28,9 @@ static inline void extend_read(uint32_t start, uint32_t end, osd_rmw_stripe_t & } else { - if (stripe.read_end < end) + if (stripe.read_end < end && end != UINT32_MAX || + // UINT32_MAX means that stripe only needs bitmap, end != 0 => needs also data + stripe.read_end == UINT32_MAX && end != 0) stripe.read_end = end; if (stripe.read_start > start) stripe.read_start = start; @@ -105,24 +107,30 @@ void reconstruct_stripes_xor(osd_rmw_stripe_t *stripes, int pg_size, uint32_t bi } else if (prev >= 0) { - assert(stripes[role].read_start >= stripes[prev].read_start && - stripes[role].read_start >= stripes[other].read_start); - memxor( - (uint8_t*)stripes[prev].read_buf + (stripes[role].read_start - stripes[prev].read_start), - (uint8_t*)stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start), - stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start - ); + if (stripes[role].read_end != UINT32_MAX) + { + assert(stripes[role].read_start >= stripes[prev].read_start && + stripes[role].read_start >= stripes[other].read_start); + memxor( + (uint8_t*)stripes[prev].read_buf + (stripes[role].read_start - stripes[prev].read_start), + (uint8_t*)stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start), + stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start + ); + } memxor(stripes[prev].bmp_buf, stripes[other].bmp_buf, stripes[role].bmp_buf, bitmap_size); prev = -1; } else { - assert(stripes[role].read_start >= stripes[other].read_start); - memxor( - stripes[role].read_buf, - (uint8_t*)stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start), - stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start - ); + if (stripes[role].read_end != UINT32_MAX) + { + assert(stripes[role].read_start >= stripes[other].read_start); + memxor( + stripes[role].read_buf, + (uint8_t*)stripes[other].read_buf + (stripes[role].read_start - stripes[other].read_start), + stripes[role].read_buf, stripes[role].read_end - stripes[role].read_start + ); + } memxor(stripes[role].bmp_buf, stripes[other].bmp_buf, stripes[role].bmp_buf, bitmap_size); } } @@ -356,20 +364,23 @@ void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsi uint64_t read_start = 0, read_end = 0; auto recover_seq = [&]() { - int orig = 0; - for (int other = 0; other < pg_size && orig < pg_minsize; other++) + if (read_end != UINT32_MAX) { - if (stripes[other].read_end != 0 && !stripes[other].missing) + int orig = 0; + for (int other = 0; other < pg_size && orig < pg_minsize; other++) { - assert(stripes[other].read_start <= read_start); - assert(stripes[other].read_end >= read_end); - data_ptrs[orig++] = (uint8_t*)stripes[other].read_buf + (read_start - stripes[other].read_start); + if (stripes[other].read_end != 0 && !stripes[other].missing) + { + assert(stripes[other].read_start <= read_start); + assert(stripes[other].read_end >= read_end); + data_ptrs[orig++] = (uint8_t*)stripes[other].read_buf + (read_start - stripes[other].read_start); + } } + ec_encode_data( + read_end-read_start, pg_minsize, wanted, dectable + wanted_base*32*pg_minsize, + data_ptrs, data_ptrs + pg_minsize + ); } - ec_encode_data( - read_end-read_start, pg_minsize, wanted, dectable + wanted_base*32*pg_minsize, - data_ptrs, data_ptrs + pg_minsize - ); wanted_base += wanted; wanted = 0; }; @@ -438,7 +449,8 @@ void reconstruct_stripes_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsi if (stripes[role].read_end != 0 && stripes[role].missing) { recovered = true; - if (stripes[role].read_end > stripes[role].read_start) + if (stripes[role].read_end > stripes[role].read_start && + stripes[role].read_end != UINT32_MAX) { for (int other = 0; other < pg_size; other++) { @@ -557,7 +569,8 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad uint64_t buf_size = add_size; for (int role = 0; role < read_pg_size; role++) { - if (stripes[role].read_end != 0) + if (stripes[role].read_end != 0 && + stripes[role].read_end != UINT32_MAX) { buf_size += stripes[role].read_end - stripes[role].read_start; } @@ -567,7 +580,8 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad uint64_t buf_pos = add_size; for (int role = 0; role < read_pg_size; role++) { - if (stripes[role].read_end != 0) + if (stripes[role].read_end != 0 && + stripes[role].read_end != UINT32_MAX) { stripes[role].read_buf = (uint8_t*)buf + buf_pos; buf_pos += stripes[role].read_end - stripes[role].read_start; diff --git a/src/osd_rmw.h b/src/osd_rmw.h index e185113b..672584c8 100644 --- a/src/osd_rmw.h +++ b/src/osd_rmw.h @@ -23,6 +23,7 @@ struct osd_rmw_stripe_t void *read_buf, *write_buf; void *bmp_buf; uint32_t req_start, req_end; + // read_end=UINT32_MAX means to only read bitmap, but not data uint32_t read_start, read_end; uint32_t write_start, write_end; bool missing;