From b7ccd63104ff16f131eb409c906cdfa0b1cfc2eb Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Wed, 12 Feb 2020 12:30:50 +0300 Subject: [PATCH] Return version number from reads --- blockstore.cpp | 2 +- blockstore.h | 13 ++++++++++--- blockstore_impl.h | 2 +- blockstore_read.cpp | 11 +++++++++++ osd_exec_secondary.cpp | 3 ++- osd_ops.h | 2 +- 6 files changed, 26 insertions(+), 7 deletions(-) diff --git a/blockstore.cpp b/blockstore.cpp index 891ff23d0..05c4c4139 100644 --- a/blockstore.cpp +++ b/blockstore.cpp @@ -40,7 +40,7 @@ void blockstore_t::enqueue_op_first(blockstore_op_t *op) impl->enqueue_op(op, true); } -std::map & blockstore_t::get_unstable_writes() +std::unordered_map & blockstore_t::get_unstable_writes() { return impl->unstable_writes; } diff --git a/blockstore.h b/blockstore.h index 2872ab45f..824422bdb 100644 --- a/blockstore.h +++ b/blockstore.h @@ -55,8 +55,15 @@ struct blockstore_op_t std::function callback; // For reads, writes & deletes: oid is the requested object object_id oid; - // For reads: version=0 -> last stable, version=UINT64_MAX -> last unstable, version=X -> specific version - // For writes & deletes: a new version is assigned automatically + // For reads: + // version == 0 -> read the last stable version, + // version == UINT64_MAX -> read the last version, + // otherwise -> read the newest version that is <= the specified version + // after execution, version is equal to the version that was read from the blockstore + // For writes & deletes: + // if version == 0, a new version is assigned automatically + // if version != 0, it is assigned for the new write if possible, otherwise -EINVAL is returned + // after execution, version is equal to the version that was written to the blockstore uint64_t version; // For reads & writes: offset & len are the requested part of the object, buf is the buffer uint32_t offset; @@ -102,7 +109,7 @@ public: void enqueue_op_first(blockstore_op_t *op); // Unstable writes are added here (map of object_id -> version) - std::map & get_unstable_writes(); + std::unordered_map & get_unstable_writes(); // FIXME rename to object_size uint32_t get_block_size(); diff --git a/blockstore_impl.h b/blockstore_impl.h index 718100622..1ea4f366e 100644 --- a/blockstore_impl.h +++ b/blockstore_impl.h @@ -309,7 +309,7 @@ public: void enqueue_op(blockstore_op_t *op, bool first = false); // Unstable writes are added here (map of object_id -> version) - std::map unstable_writes; + std::unordered_map unstable_writes; inline uint32_t get_block_size() { return block_size; } inline uint64_t get_block_count() { return block_count; } diff --git a/blockstore_read.cpp b/blockstore_read.cpp index 2c5faa277..f02ef50f2 100644 --- a/blockstore_read.cpp +++ b/blockstore_read.cpp @@ -92,12 +92,14 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op) { // region is not allocated - return zeroes memset(read_op->buf, 0, read_op->len); + read_op->version = 0; read_op->retval = read_op->len; read_op->callback(read_op); return 1; } uint64_t fulfilled = 0; PRIV(read_op)->pending_ops = 0; + uint64_t result_version = 0; if (dirty_found) { while (dirty_it->first.oid == read_op->oid) @@ -112,6 +114,10 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op) } if (version_ok) { + if (!result_version) + { + result_version = dirty_it->first.version; + } if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len, dirty.state, dirty_it->first.version, dirty.location + (IS_JOURNAL(dirty.state) ? 0 : dirty.offset))) { @@ -129,6 +135,10 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op) } if (clean_it != clean_db.end() && fulfilled < read_op->len) { + if (!result_version) + { + result_version = clean_it->second.version; + } if (!clean_entry_bitmap_size) { if (!fulfill_read(read_op, fulfilled, 0, block_size, ST_CURRENT, 0, clean_it->second.location)) @@ -190,6 +200,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op) fulfill_read(read_op, fulfilled, 0, block_size, ST_DEL_STABLE, 0, 0); } assert(fulfilled == read_op->len); + read_op->version = result_version; if (!PRIV(read_op)->pending_ops) { // everything is fulfilled from memory diff --git a/osd_exec_secondary.cpp b/osd_exec_secondary.cpp index 8e197b5b2..75f2ccb63 100644 --- a/osd_exec_secondary.cpp +++ b/osd_exec_secondary.cpp @@ -146,7 +146,8 @@ void osd_t::make_reply(osd_op_t *op) op->reply.hdr.retval = op->bs_op.retval; if (op->op.hdr.opcode == OSD_OP_SECONDARY_LIST) op->reply.sec_list.stable_count = op->bs_op.version; - else if (op->op.hdr.opcode == OSD_OP_SECONDARY_WRITE) + else if (op->op.hdr.opcode == OSD_OP_SECONDARY_READ || + op->op.hdr.opcode == OSD_OP_SECONDARY_WRITE) op->reply.sec_rw.version = op->bs_op.version; else if (op->op.hdr.opcode == OSD_OP_SECONDARY_DELETE) op->reply.sec_del.version = op->bs_op.version; diff --git a/osd_ops.h b/osd_ops.h index ab0aa2bb8..0feb105fa 100644 --- a/osd_ops.h +++ b/osd_ops.h @@ -69,7 +69,7 @@ struct __attribute__((__packed__)) osd_op_secondary_rw_t struct __attribute__((__packed__)) osd_reply_secondary_rw_t { osd_reply_header_t header; - // for writes: assigned version number + // for reads and writes: assigned or read version number uint64_t version; };