Add "external" bitmap support to the secondary OSD protocol
parent
6107a4d07b
commit
860ac24762
|
@ -27,6 +27,7 @@
|
||||||
#define DEFAULT_ORDER 17
|
#define DEFAULT_ORDER 17
|
||||||
#define MIN_BLOCK_SIZE 4*1024
|
#define MIN_BLOCK_SIZE 4*1024
|
||||||
#define MAX_BLOCK_SIZE 128*1024*1024
|
#define MAX_BLOCK_SIZE 128*1024*1024
|
||||||
|
#define DEFAULT_BITMAP_GRANULARITY 4096
|
||||||
|
|
||||||
#define BS_OP_MIN 1
|
#define BS_OP_MIN 1
|
||||||
#define BS_OP_READ 1
|
#define BS_OP_READ 1
|
||||||
|
|
|
@ -131,7 +131,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||||
}
|
}
|
||||||
if (!bitmap_granularity)
|
if (!bitmap_granularity)
|
||||||
{
|
{
|
||||||
bitmap_granularity = 4096;
|
bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
||||||
}
|
}
|
||||||
else if (bitmap_granularity % disk_alignment)
|
else if (bitmap_granularity % disk_alignment)
|
||||||
{
|
{
|
||||||
|
|
|
@ -8,8 +8,6 @@
|
||||||
|
|
||||||
#define MIN_BLOCK_SIZE 4*1024
|
#define MIN_BLOCK_SIZE 4*1024
|
||||||
#define MAX_BLOCK_SIZE 128*1024*1024
|
#define MAX_BLOCK_SIZE 128*1024*1024
|
||||||
#define DEFAULT_DISK_ALIGNMENT 4096
|
|
||||||
#define DEFAULT_BITMAP_GRANULARITY 4096
|
|
||||||
#define DEFAULT_CLIENT_MAX_DIRTY_BYTES 32*1024*1024
|
#define DEFAULT_CLIENT_MAX_DIRTY_BYTES 32*1024*1024
|
||||||
#define DEFAULT_CLIENT_MAX_DIRTY_OPS 1024
|
#define DEFAULT_CLIENT_MAX_DIRTY_OPS 1024
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#define DEFAULT_PEER_CONNECT_INTERVAL 5
|
#define DEFAULT_PEER_CONNECT_INTERVAL 5
|
||||||
#define DEFAULT_PEER_CONNECT_TIMEOUT 5
|
#define DEFAULT_PEER_CONNECT_TIMEOUT 5
|
||||||
#define DEFAULT_OSD_PING_TIMEOUT 5
|
#define DEFAULT_OSD_PING_TIMEOUT 5
|
||||||
|
#define DEFAULT_BITMAP_GRANULARITY 4096
|
||||||
|
|
||||||
struct osd_client_t
|
struct osd_client_t
|
||||||
{
|
{
|
||||||
|
|
|
@ -161,6 +161,7 @@ struct osd_op_t
|
||||||
osd_any_reply_t reply;
|
osd_any_reply_t reply;
|
||||||
blockstore_op_t *bs_op = NULL;
|
blockstore_op_t *bs_op = NULL;
|
||||||
void *buf = NULL;
|
void *buf = NULL;
|
||||||
|
void *bitmap = NULL;
|
||||||
void *rmw_buf = NULL;
|
void *rmw_buf = NULL;
|
||||||
osd_primary_op_data_t* op_data = NULL;
|
osd_primary_op_data_t* op_data = NULL;
|
||||||
std::function<void(osd_op_t*)> callback;
|
std::function<void(osd_op_t*)> callback;
|
||||||
|
|
|
@ -202,22 +202,36 @@ void osd_messenger_t::handle_op_hdr(osd_client_t *cl)
|
||||||
osd_op_t *cur_op = cl->read_op;
|
osd_op_t *cur_op = cl->read_op;
|
||||||
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
|
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
|
||||||
{
|
{
|
||||||
if (cur_op->req.sec_rw.len > 0)
|
|
||||||
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
|
|
||||||
cl->read_remaining = 0;
|
cl->read_remaining = 0;
|
||||||
}
|
}
|
||||||
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
|
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
|
||||||
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
|
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
|
||||||
{
|
{
|
||||||
|
if (cur_op->req.sec_rw.attr_len > 0)
|
||||||
|
{
|
||||||
|
if (cur_op->req.sec_rw.attr_len > sizeof(void*))
|
||||||
|
{
|
||||||
|
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(cur_op->req.sec_rw.attr_len);
|
||||||
|
cl->recv_list.push_back(cur_op->bitmap, cur_op->req.sec_rw.attr_len);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cl->recv_list.push_back(&cur_op->bitmap, cur_op->req.sec_rw.attr_len);
|
||||||
|
}
|
||||||
if (cur_op->req.sec_rw.len > 0)
|
if (cur_op->req.sec_rw.len > 0)
|
||||||
|
{
|
||||||
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
|
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
|
||||||
cl->read_remaining = cur_op->req.sec_rw.len;
|
cl->recv_list.push_back(cur_op->buf, cur_op->req.sec_rw.len);
|
||||||
|
}
|
||||||
|
cl->read_remaining = cur_op->req.sec_rw.len + cur_op->req.sec_rw.attr_len;
|
||||||
}
|
}
|
||||||
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ||
|
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ||
|
||||||
cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)
|
cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)
|
||||||
{
|
{
|
||||||
if (cur_op->req.sec_stab.len > 0)
|
if (cur_op->req.sec_stab.len > 0)
|
||||||
|
{
|
||||||
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_stab.len);
|
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_stab.len);
|
||||||
|
cl->recv_list.push_back(cur_op->buf, cur_op->req.sec_stab.len);
|
||||||
|
}
|
||||||
cl->read_remaining = cur_op->req.sec_stab.len;
|
cl->read_remaining = cur_op->req.sec_stab.len;
|
||||||
}
|
}
|
||||||
else if (cur_op->req.hdr.opcode == OSD_OP_READ)
|
else if (cur_op->req.hdr.opcode == OSD_OP_READ)
|
||||||
|
@ -227,13 +241,15 @@ void osd_messenger_t::handle_op_hdr(osd_client_t *cl)
|
||||||
else if (cur_op->req.hdr.opcode == OSD_OP_WRITE)
|
else if (cur_op->req.hdr.opcode == OSD_OP_WRITE)
|
||||||
{
|
{
|
||||||
if (cur_op->req.rw.len > 0)
|
if (cur_op->req.rw.len > 0)
|
||||||
|
{
|
||||||
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.rw.len);
|
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.rw.len);
|
||||||
|
cl->recv_list.push_back(cur_op->buf, cur_op->req.rw.len);
|
||||||
|
}
|
||||||
cl->read_remaining = cur_op->req.rw.len;
|
cl->read_remaining = cur_op->req.rw.len;
|
||||||
}
|
}
|
||||||
if (cl->read_remaining > 0)
|
if (cl->read_remaining > 0)
|
||||||
{
|
{
|
||||||
// Read data
|
// Read data
|
||||||
cl->recv_list.push_back(cur_op->buf, cl->read_remaining);
|
|
||||||
cl->read_state = CL_READ_DATA;
|
cl->read_state = CL_READ_DATA;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
|
|
@ -59,6 +59,18 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op)
|
||||||
cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ||
|
cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ||
|
||||||
cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)) && cur_op->iov.count > 0)
|
cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)) && cur_op->iov.count > 0)
|
||||||
{
|
{
|
||||||
|
to_outbox.push_back(NULL);
|
||||||
|
// Bitmap
|
||||||
|
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ && cur_op->reply.sec_rw.attr_len > 0 ||
|
||||||
|
(cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) &&
|
||||||
|
cur_op->req.sec_rw.attr_len > 0)
|
||||||
|
{
|
||||||
|
to_send_list.push_back((iovec){
|
||||||
|
.iov_base = (cur_op->reply.sec_rw.attr_len > sizeof(void*) ? cur_op->bitmap : &cur_op->bitmap),
|
||||||
|
.iov_len = cur_op->reply.sec_rw.attr_len,
|
||||||
|
});
|
||||||
|
to_outbox.push_back(NULL);
|
||||||
|
}
|
||||||
for (int i = 0; i < cur_op->iov.count; i++)
|
for (int i = 0; i < cur_op->iov.count; i++)
|
||||||
{
|
{
|
||||||
assert(cur_op->iov.buf[i].iov_base);
|
assert(cur_op->iov.buf[i].iov_base);
|
||||||
|
|
14
src/osd.cpp
14
src/osd.cpp
|
@ -12,7 +12,16 @@
|
||||||
|
|
||||||
osd_t::osd_t(blockstore_config_t & config, ring_loop_t *ringloop)
|
osd_t::osd_t(blockstore_config_t & config, ring_loop_t *ringloop)
|
||||||
{
|
{
|
||||||
config["entry_attr_size"] = "0";
|
bs_block_size = strtoull(config["block_size"].c_str(), NULL, 10);
|
||||||
|
bs_bitmap_granularity = strtoull(config["bitmap_granularity"].c_str(), NULL, 10);
|
||||||
|
if (!bs_block_size)
|
||||||
|
bs_block_size = DEFAULT_BLOCK_SIZE;
|
||||||
|
if (!bs_bitmap_granularity)
|
||||||
|
bs_bitmap_granularity = DEFAULT_BITMAP_GRANULARITY;
|
||||||
|
|
||||||
|
// Force external bitmap size
|
||||||
|
entry_attr_size = bs_block_size / bs_bitmap_granularity / 8;
|
||||||
|
config["entry_attr_size"] = entry_attr_size;
|
||||||
|
|
||||||
this->config = config;
|
this->config = config;
|
||||||
this->ringloop = ringloop;
|
this->ringloop = ringloop;
|
||||||
|
@ -20,9 +29,6 @@ osd_t::osd_t(blockstore_config_t & config, ring_loop_t *ringloop)
|
||||||
// FIXME: Create Blockstore from on-disk superblock config and check it against the OSD cluster config
|
// FIXME: Create Blockstore from on-disk superblock config and check it against the OSD cluster config
|
||||||
this->bs = new blockstore_t(config, ringloop);
|
this->bs = new blockstore_t(config, ringloop);
|
||||||
|
|
||||||
this->bs_block_size = bs->get_block_size();
|
|
||||||
this->bs_bitmap_granularity = bs->get_bitmap_granularity();
|
|
||||||
|
|
||||||
parse_config(config);
|
parse_config(config);
|
||||||
|
|
||||||
epmgr = new epoll_manager_t(ringloop);
|
epmgr = new epoll_manager_t(ringloop);
|
||||||
|
|
|
@ -115,7 +115,7 @@ class osd_t
|
||||||
bool stopping = false;
|
bool stopping = false;
|
||||||
int inflight_ops = 0;
|
int inflight_ops = 0;
|
||||||
blockstore_t *bs;
|
blockstore_t *bs;
|
||||||
uint32_t bs_block_size, bs_bitmap_granularity;
|
uint32_t bs_block_size, bs_bitmap_granularity, entry_attr_size;
|
||||||
ring_loop_t *ringloop;
|
ring_loop_t *ringloop;
|
||||||
timerfd_manager_t *tfd = NULL;
|
timerfd_manager_t *tfd = NULL;
|
||||||
epoll_manager_t *epmgr = NULL;
|
epoll_manager_t *epmgr = NULL;
|
||||||
|
|
|
@ -71,6 +71,8 @@ struct __attribute__((__packed__)) osd_op_secondary_rw_t
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
// length
|
// length
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
|
// bitmap/attribute length - bitmap comes after header, but before data
|
||||||
|
uint32_t attr_len;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __attribute__((__packed__)) osd_reply_secondary_rw_t
|
struct __attribute__((__packed__)) osd_reply_secondary_rw_t
|
||||||
|
@ -78,6 +80,8 @@ struct __attribute__((__packed__)) osd_reply_secondary_rw_t
|
||||||
osd_reply_header_t header;
|
osd_reply_header_t header;
|
||||||
// for reads and writes: assigned or read version number
|
// for reads and writes: assigned or read version number
|
||||||
uint64_t version;
|
uint64_t version;
|
||||||
|
// for reads: bitmap/attribute length (just to double-check)
|
||||||
|
uint32_t attr_len;
|
||||||
};
|
};
|
||||||
|
|
||||||
// delete object on the secondary OSD
|
// delete object on the secondary OSD
|
||||||
|
|
|
@ -17,10 +17,17 @@ void osd_t::secondary_op_callback(osd_op_t *op)
|
||||||
{
|
{
|
||||||
op->reply.sec_del.version = op->bs_op->version;
|
op->reply.sec_del.version = op->bs_op->version;
|
||||||
}
|
}
|
||||||
if (op->req.hdr.opcode == OSD_OP_SEC_READ &&
|
if (op->req.hdr.opcode == OSD_OP_SEC_READ)
|
||||||
op->bs_op->retval > 0)
|
|
||||||
{
|
{
|
||||||
op->iov.push_back(op->buf, op->bs_op->retval);
|
if (entry_attr_size > 0)
|
||||||
|
{
|
||||||
|
op->reply.sec_rw.attr_len = entry_attr_size;
|
||||||
|
op->iov.push_back((entry_attr_size > sizeof(void*) ? op->bitmap : &op->bs_op->bitmap), entry_attr_size);
|
||||||
|
}
|
||||||
|
if (op->bs_op->retval > 0)
|
||||||
|
{
|
||||||
|
op->iov.push_back(op->buf, op->bs_op->retval);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (op->req.hdr.opcode == OSD_OP_SEC_LIST)
|
else if (op->req.hdr.opcode == OSD_OP_SEC_LIST)
|
||||||
{
|
{
|
||||||
|
@ -55,11 +62,20 @@ void osd_t::exec_secondary(osd_op_t *cur_op)
|
||||||
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
|
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
|
||||||
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
|
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
|
||||||
{
|
{
|
||||||
|
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
|
||||||
|
{
|
||||||
|
// Allocate memory for the read operation
|
||||||
|
if (entry_attr_size > sizeof(void*))
|
||||||
|
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(entry_attr_size);
|
||||||
|
if (cur_op->req.sec_rw.len > 0)
|
||||||
|
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
|
||||||
|
}
|
||||||
cur_op->bs_op->oid = cur_op->req.sec_rw.oid;
|
cur_op->bs_op->oid = cur_op->req.sec_rw.oid;
|
||||||
cur_op->bs_op->version = cur_op->req.sec_rw.version;
|
cur_op->bs_op->version = cur_op->req.sec_rw.version;
|
||||||
cur_op->bs_op->offset = cur_op->req.sec_rw.offset;
|
cur_op->bs_op->offset = cur_op->req.sec_rw.offset;
|
||||||
cur_op->bs_op->len = cur_op->req.sec_rw.len;
|
cur_op->bs_op->len = cur_op->req.sec_rw.len;
|
||||||
cur_op->bs_op->buf = cur_op->buf;
|
cur_op->bs_op->buf = cur_op->buf;
|
||||||
|
cur_op->bs_op->bitmap = cur_op->bitmap;
|
||||||
#ifdef OSD_STUB
|
#ifdef OSD_STUB
|
||||||
cur_op->bs_op->retval = cur_op->bs_op->len;
|
cur_op->bs_op->retval = cur_op->bs_op->len;
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue