From 92c6e16eba2f48f985274697407c02b982687ea3 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Thu, 9 Feb 2023 01:38:45 +0300 Subject: [PATCH] Fix checksum verification in big_write journal reads --- src/blockstore_flush.cpp | 10 +++---- src/blockstore_impl.h | 6 ++-- src/blockstore_read.cpp | 59 +++++++++++++++++++++++++++++++--------- 3 files changed, 54 insertions(+), 21 deletions(-) diff --git a/src/blockstore_flush.cpp b/src/blockstore_flush.cpp index 8fcf3987..736ce37b 100644 --- a/src/blockstore_flush.cpp +++ b/src/blockstore_flush.cpp @@ -648,13 +648,13 @@ void journal_flusher_co::update_metadata_entry() // Copy initial (big_write) data checksums if (bs->dsk.csum_block_size && clean_init_bitmap) { - uint8_t *new_clean_data_csum = new_clean_bitmap + 2*bs->dsk.clean_entry_bitmap_size + - clean_bitmap_offset / bs->dsk.csum_block_size * (bs->dsk.data_csum_type & 0xFF); + uint8_t *new_clean_data_csum = new_clean_bitmap + 2*bs->dsk.clean_entry_bitmap_size; // big_write partial checksums are calculated from a padded csum_block_size, we can just copy them memset(new_clean_data_csum, 0, bs->dsk.data_block_size / bs->dsk.csum_block_size * (bs->dsk.data_csum_type & 0xFF)); uint64_t dyn_size = bs->dsk.dirty_dyn_size(clean_bitmap_offset, clean_bitmap_len); uint32_t *csums = (uint32_t*)(clean_init_dyn_ptr + bs->dsk.clean_entry_bitmap_size); - memcpy(new_clean_data_csum, csums, dyn_size - bs->dsk.clean_entry_bitmap_size); + memcpy(new_clean_data_csum + clean_bitmap_offset / bs->dsk.csum_block_size * (bs->dsk.data_csum_type & 0xFF), + csums, dyn_size - bs->dsk.clean_entry_bitmap_size); } // Calculate or copy small_write checksums uint32_t *new_data_csums = (uint32_t*)(new_clean_bitmap + 2*bs->dsk.clean_entry_bitmap_size); @@ -758,7 +758,7 @@ bool journal_flusher_co::clear_incomplete_csum_block_bits(int wait_base) { assert(!(v[i].offset % bs->dsk.csum_block_size)); assert(!(v[i].len % bs->dsk.csum_block_size)); - bs->verify_padded_checksums(new_clean_bitmap, v[i].offset, &iov, 1, [&](uint32_t bad_block, uint32_t calc_csum, uint32_t stored_csum) + bs->verify_padded_checksums(new_clean_bitmap, false, v[i].offset, &iov, 1, [&](uint32_t bad_block, uint32_t calc_csum, uint32_t stored_csum) { printf("Checksum mismatch in object %lx:%lx v%lu in data area at offset 0x%lx+0x%x: got %08x, expected %08x\n", cur.oid.inode, cur.oid.stripe, old_clean_ver, old_clean_loc, bad_block, calc_csum, stored_csum); @@ -1028,7 +1028,7 @@ void journal_flusher_co::scan_dirty() uint8_t *bmp_ptr = bs->get_clean_entry_bitmap(old_clean_loc, 0); uint64_t fulfilled = 0; read_to_fill_incomplete = bs->fill_partial_checksum_blocks( - v, fulfilled, bmp_ptr, NULL, v[0].offset/bs->dsk.csum_block_size * bs->dsk.csum_block_size, + v, fulfilled, bmp_ptr, false, NULL, v[0].offset/bs->dsk.csum_block_size * bs->dsk.csum_block_size, ((v[v.size()-1].offset+v[v.size()-1].len-1) / bs->dsk.csum_block_size + 1) * bs->dsk.csum_block_size ); } diff --git a/src/blockstore_impl.h b/src/blockstore_impl.h index df223cce..ab549075 100644 --- a/src/blockstore_impl.h +++ b/src/blockstore_impl.h @@ -336,18 +336,18 @@ class blockstore_impl_t bool fulfill_clean_read(blockstore_op_t *read_op, uint64_t & fulfilled, uint8_t *clean_entry_bitmap, uint64_t clean_loc, uint64_t clean_ver); int fill_partial_checksum_blocks(std::vector & rv, uint64_t & fulfilled, - uint8_t *clean_entry_bitmap, uint8_t *read_buf, uint64_t read_offset, uint64_t read_end); + uint8_t *clean_entry_bitmap, bool from_journal, uint8_t *read_buf, uint64_t read_offset, uint64_t read_end); int pad_journal_read(std::vector & rv, copy_buffer_t & cp, uint64_t dirty_offset, uint64_t dirty_end, uint64_t dirty_loc, uint8_t *csum_ptr, uint64_t offset, uint64_t submit_len, uint64_t & blk_begin, uint64_t & blk_end, uint8_t* & blk_buf); bool read_range_fulfilled(std::vector & rv, uint64_t & fulfilled, uint8_t *read_buf, uint8_t *clean_entry_bitmap, uint32_t item_start, uint32_t item_end); bool read_checksum_block(blockstore_op_t *op, int rv_pos, uint64_t &fulfilled, uint64_t clean_loc); - bool verify_padded_checksums(uint8_t *clean_entry_bitmap, uint32_t offset, + bool verify_padded_checksums(uint8_t *clean_entry_bitmap, bool is_journal, uint32_t offset, iovec *iov, int n_iov, std::function bad_block_cb); bool verify_journal_checksums(uint8_t *csums, uint32_t offset, iovec *iov, int n_iov, std::function bad_block_cb); - bool verify_read_padded_checksums(blockstore_op_t *op, uint64_t clean_loc, iovec *iov, int n_iov); + bool verify_clean_padded_checksums(blockstore_op_t *op, uint64_t clean_loc, iovec *iov, int n_iov); int fulfill_read_push(blockstore_op_t *op, void *buf, uint64_t offset, uint64_t len, uint32_t item_state, uint64_t item_version); void handle_read_event(ring_data_t *data, blockstore_op_t *op); diff --git a/src/blockstore_read.cpp b/src/blockstore_read.cpp index d4367dd6..ef833f56 100644 --- a/src/blockstore_read.cpp +++ b/src/blockstore_read.cpp @@ -160,7 +160,7 @@ uint8_t* blockstore_impl_t::get_clean_entry_bitmap(uint64_t block_loc, int offse } int blockstore_impl_t::fill_partial_checksum_blocks(std::vector & rv, uint64_t & fulfilled, - uint8_t *clean_entry_bitmap, uint8_t *read_buf, uint64_t read_offset, uint64_t read_end) + uint8_t *clean_entry_bitmap, bool from_journal, uint8_t *read_buf, uint64_t read_offset, uint64_t read_end) { if (read_end == read_offset) return 0; @@ -195,6 +195,8 @@ int blockstore_impl_t::fill_partial_checksum_blocks(std::vector & .copy_flags = COPY_BUF_CSUM_FILL, .offset = start_block*dsk.csum_block_size, .len = (end_block-start_block)*dsk.csum_block_size, + // save clean_entry_bitmap if we're reading clean data from the journal + .csum_buf = from_journal ? clean_entry_bitmap : NULL, }); start_block = end_block; required++; @@ -423,8 +425,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op) } if (fulfilled < read_op->len) { - uint8_t *clean_entry_bitmap = get_clean_entry_bitmap(clean_it->second.location, 0); - if (!fulfill_clean_read(read_op, fulfilled, clean_entry_bitmap, clean_it->second.location, clean_it->second.version)) + if (!fulfill_clean_read(read_op, fulfilled, NULL, clean_it->second.location, clean_it->second.version)) { goto undo_read; } @@ -527,10 +528,17 @@ int blockstore_impl_t::pad_journal_read(std::vector & rv, copy_bu bool blockstore_impl_t::fulfill_clean_read(blockstore_op_t *read_op, uint64_t & fulfilled, uint8_t *clean_entry_bitmap, uint64_t clean_loc, uint64_t clean_ver) { + bool from_journal = clean_entry_bitmap != NULL; + if (!clean_entry_bitmap) + { + // NULL clean_entry_bitmap means we're reading from data, not from the journal, + // and the bitmap location is obvious + clean_entry_bitmap = get_clean_entry_bitmap(clean_loc, 0); + } if (dsk.csum_block_size > dsk.bitmap_granularity) { auto & rv = PRIV(read_op)->read_vec; - int req = fill_partial_checksum_blocks(rv, fulfilled, clean_entry_bitmap, + int req = fill_partial_checksum_blocks(rv, fulfilled, clean_entry_bitmap, from_journal, (uint8_t*)read_op->buf, read_op->offset, read_op->offset+read_op->len); for (int i = req; i > 0; i--) { @@ -586,11 +594,11 @@ bool blockstore_impl_t::fulfill_clean_read(blockstore_op_t *read_op, uint64_t & return true; } -bool blockstore_impl_t::verify_padded_checksums(uint8_t *clean_entry_bitmap, uint32_t offset, +bool blockstore_impl_t::verify_padded_checksums(uint8_t *clean_entry_bitmap, bool is_journal, uint32_t offset, iovec *iov, int n_iov, std::function bad_block_cb) { assert(!(offset % dsk.csum_block_size)); - uint32_t *csums = (uint32_t*)(clean_entry_bitmap + 2*dsk.clean_entry_bitmap_size); + uint32_t *csums = (uint32_t*)(clean_entry_bitmap + (is_journal ? 1 : 2)*dsk.clean_entry_bitmap_size); uint32_t block_csum = 0; uint32_t block_done = 0; uint32_t block_num = clean_entry_bitmap ? offset/dsk.csum_block_size : 0; @@ -678,18 +686,18 @@ bool blockstore_impl_t::verify_journal_checksums(uint8_t *csums, uint32_t offset return true; } -bool blockstore_impl_t::verify_read_padded_checksums(blockstore_op_t *op, uint64_t clean_loc, iovec *iov, int n_iov) +bool blockstore_impl_t::verify_clean_padded_checksums(blockstore_op_t *op, uint64_t clean_loc, iovec *iov, int n_iov) { uint32_t offset = clean_loc % dsk.data_block_size; clean_loc = (clean_loc >> dsk.block_order) << dsk.block_order; // First verify against the newest checksum version uint8_t *clean_entry_bitmap = get_clean_entry_bitmap(clean_loc, 0); - if (verify_padded_checksums(clean_entry_bitmap, offset, iov, n_iov, NULL)) + if (verify_padded_checksums(clean_entry_bitmap, false, offset, iov, n_iov, NULL)) return true; // Check through all relevant "metadata backups" possibly added by flushers auto mb_it = used_clean_objects.lower_bound((obj_ver_id){ .oid = op->oid, .version = PRIV(op)->clean_version_used }); for (; mb_it != used_clean_objects.end() && mb_it->first.oid == op->oid; mb_it++) - if (mb_it->second.meta != NULL && verify_padded_checksums(mb_it->second.meta, offset, iov, n_iov, NULL)) + if (mb_it->second.meta != NULL && verify_padded_checksums(mb_it->second.meta, false, offset, iov, n_iov, NULL)) return true; return false; } @@ -711,14 +719,39 @@ void blockstore_impl_t::handle_read_event(ring_data_t *data, blockstore_op_t *op auto & rv = PRIV(op)->read_vec; if (dsk.csum_block_size > dsk.bitmap_granularity) { + bool ok; for (int i = rv.size()-1; i >= 0 && (rv[i].copy_flags & COPY_BUF_CSUM_FILL); i--) { - struct iovec *iov = (struct iovec*)(rv[i].buf + (rv[i].len & 0xFFFFFFFF)); + struct iovec *iov = (struct iovec*)((uint8_t*)rv[i].buf + (rv[i].len & 0xFFFFFFFF)); int n_iov = rv[i].len >> 32; - if (!((rv[i].copy_flags & COPY_BUF_JOURNAL) - ? verify_journal_checksums(rv[i].csum_buf, rv[i].disk_offset % dsk.data_block_size, iov, n_iov, NULL) - : verify_read_padded_checksums(op, rv[i].disk_offset, iov, n_iov))) + if (rv[i].copy_flags & COPY_BUF_JOURNAL) + { + // SMALL_WRITE from journal + ok = verify_journal_checksums(rv[i].csum_buf, rv[i].disk_offset % dsk.data_block_size, iov, n_iov, NULL); + } + else if (rv[i].csum_buf) + { + // BIG_WRITE from journal + ok = verify_padded_checksums(rv[i].csum_buf, true, rv[i].disk_offset % dsk.data_block_size, iov, n_iov, NULL); + } + else + { + // Clean data + ok = verify_clean_padded_checksums(op, rv[i].disk_offset, iov, n_iov); + } + if (!ok) + { + uint64_t read_len = 0; + for (int i = 0; i < n_iov; i++) + read_len += iov[i].iov_len; + printf( + "Checksum mismatch in object %lx:%lx v%lu in %s area at offset 0x%lx (read length 0x%lx)\n", + op->oid.inode, op->oid.stripe, op->version, + (rv[i].copy_flags & COPY_BUF_JOURNAL) ? "journal" : "data", + rv[i].disk_offset, read_len + ); op->retval = -EDOM; + } free(rv[i].buf); rv[i].buf = NULL; }