// Copyright (c) Vitaliy Filippov, 2019+ // License: VNPL-1.1 (see README.md for details) #pragma once #ifndef _LARGEFILE64_SOURCE #define _LARGEFILE64_SOURCE #endif #include #include #include #include #include #include "object_id.h" #include "ringloop.h" // Memory alignment for direct I/O (usually 512 bytes) // All other alignments must be a multiple of this one #ifndef MEM_ALIGNMENT #define MEM_ALIGNMENT 512 #endif // Default block size is 128 KB, current allowed range is 4K - 128M #define DEFAULT_ORDER 17 #define MIN_BLOCK_SIZE 4*1024 #define MAX_BLOCK_SIZE 128*1024*1024 #define BS_OP_MIN 1 #define BS_OP_READ 1 #define BS_OP_WRITE 2 #define BS_OP_WRITE_STABLE 3 #define BS_OP_SYNC 4 #define BS_OP_STABLE 5 #define BS_OP_DELETE 6 #define BS_OP_LIST 7 #define BS_OP_ROLLBACK 8 #define BS_OP_SYNC_STAB_ALL 9 #define BS_OP_MAX 9 #define BS_OP_PRIVATE_DATA_SIZE 256 /* Blockstore opcode documentation: ## BS_OP_READ / BS_OP_WRITE / BS_OP_WRITE_STABLE Read or write object data. WRITE_STABLE writes a version that doesn't require marking as stable. Input: - oid = requested object - version = requested version. For reads: - version == 0: read the last stable version, - version == UINT64_MAX: read the last version, - otherwise: read the newest version that is <= the specified version - reads aren't guaranteed to return data from previous unfinished writes For writes: - if version == 0, a new version is assigned automatically - if version != 0, it is assigned for the new write if possible, otherwise -EINVAL is returned - offset, len = offset and length within object. length may be zero, in that case read operation only returns the version / write operation only bumps the version - buf = pre-allocated buffer for data (read) / with data (write). may be NULL if len == 0. - bitmap = bytes long arbitrary data stored for each object in the metadata area. when fits into pointer size, it should be passed as this field's value. when it doesn't fit, this field should be a pointer to that piece of data. named "bitmap" because it's used for the "external bitmap" in Vitastor. Output: - retval = number of bytes actually read/written or negative error number (-EINVAL or -ENOSPC) - version = the version actually read or written ## BS_OP_DELETE Delete an object. Input: - oid = requested object - version = requested version. Treated the same as with BS_OP_WRITE Output: - retval = 0 or negative error number (-EINVAL) - version = the version actually written (delete is initially written as an object version) ## BS_OP_SYNC Make sure all previously issued modifications reach physical media. Input: Nothing except opcode Output: - retval = 0 or negative error number (-EINVAL) ## BS_OP_STABLE / BS_OP_ROLLBACK Mark objects as stable / rollback previous unstable writes. Input: - len = count of obj_ver_id's to stabilize or rollback - stabilize: all object versions up to the requested version of each object are marked as stable - rollback: all objects are rolled back to the requested stable versions - buf = pre-allocated obj_ver_id array units long Output: - retval = 0 or negative error number (-EINVAL, -ENOENT if no such version or -EBUSY if not synced) ## BS_OP_SYNC_STAB_ALL ONLY FOR TESTS! Sync and mark all unstable object versions as stable, at once. Input: Nothing except opcode Output: - retval = 0 or negative error number (-EINVAL) ## BS_OP_LIST Get a list of all objects in this Blockstore. Input: - oid.stripe = PG alignment - len = PG count or 0 to list all objects - offset = PG number - oid.inode = min inode number or 0 to list all inodes - version = max inode number or 0 to list all inodes Output: - retval = total obj_ver_id count - version = stable obj_ver_id count - buf = obj_ver_id array allocated by the blockstore. Stable versions come first. You must free it yourself after usage with free(). Output includes all objects for which (((inode + stripe / ) % ) == ). */ struct blockstore_op_t { // operation uint64_t opcode; // finish callback std::function callback; object_id oid; uint64_t version; uint32_t offset; uint32_t len; void *buf; void *bitmap; int retval; uint8_t private_data[BS_OP_PRIVATE_DATA_SIZE]; }; typedef std::unordered_map blockstore_config_t; class blockstore_impl_t; class blockstore_t { blockstore_impl_t *impl; public: blockstore_t(blockstore_config_t & config, ring_loop_t *ringloop); ~blockstore_t(); // Event loop void loop(); // Returns true when blockstore is ready to process operations // (Although you're free to enqueue them before that) bool is_started(); // Returns true when blockstore is stalled bool is_stalled(); // Returns true when it's safe to destroy the instance. If destroying the instance // requires to purge some queues, starts that process. Should be called in the event // loop until it returns true. bool is_safe_to_stop(); // Submission void enqueue_op(blockstore_op_t *op); // Unstable writes are added here (map of object_id -> version) std::unordered_map & get_unstable_writes(); // FIXME rename to object_size uint32_t get_block_size(); uint64_t get_block_count(); uint64_t get_free_block_count(); uint32_t get_bitmap_granularity(); };