forked from vitalif/vitastor
Inline bitmaps
Handy for zero-copy RDMA tests (removes 4-byte s/g entries)rdma-zerocopy
parent
ce777319c3
commit
8faf8f7b58
|
@ -207,20 +207,26 @@ void osd_messenger_t::handle_op_hdr(osd_client_t *cl)
|
||||||
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
|
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
|
||||||
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
|
cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE)
|
||||||
{
|
{
|
||||||
if (cur_op->req.sec_rw.attr_len > 0)
|
if (cur_op->req.sec_rw.bitmap_len > 0)
|
||||||
{
|
{
|
||||||
if (cur_op->req.sec_rw.attr_len > sizeof(unsigned))
|
if (cur_op->req.sec_rw.bitmap_len > sizeof(void*))
|
||||||
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(cur_op->req.sec_rw.attr_len);
|
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(cur_op->req.sec_rw.bitmap_len);
|
||||||
else
|
else
|
||||||
cur_op->bitmap = &cur_op->bmp_data;
|
cur_op->bitmap = &cur_op->bmp_data;
|
||||||
cl->recv_list.push_back(cur_op->bitmap, cur_op->req.sec_rw.attr_len);
|
if (cur_op->req.sec_rw.bitmap_len <= 8)
|
||||||
|
memcpy(cur_op->bitmap, &cur_op->req.sec_rw.bitmap, cur_op->req.sec_rw.bitmap_len);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cl->recv_list.push_back(cur_op->bitmap, cur_op->req.sec_rw.bitmap_len);
|
||||||
|
cl->read_remaining += cur_op->req.sec_rw.bitmap_len;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (cur_op->req.sec_rw.len > 0)
|
if (cur_op->req.sec_rw.len > 0)
|
||||||
{
|
{
|
||||||
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
|
cur_op->buf = memalign_or_die(MEM_ALIGNMENT, cur_op->req.sec_rw.len);
|
||||||
cl->recv_list.push_back(cur_op->buf, cur_op->req.sec_rw.len);
|
cl->recv_list.push_back(cur_op->buf, cur_op->req.sec_rw.len);
|
||||||
|
cl->read_remaining += cur_op->req.sec_rw.len;
|
||||||
}
|
}
|
||||||
cl->read_remaining = cur_op->req.sec_rw.len + cur_op->req.sec_rw.attr_len;
|
|
||||||
}
|
}
|
||||||
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ||
|
else if (cur_op->req.hdr.opcode == OSD_OP_SEC_STABILIZE ||
|
||||||
cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)
|
cur_op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK)
|
||||||
|
@ -295,7 +301,7 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
|
||||||
if (op->reply.hdr.opcode == OSD_OP_SEC_READ || op->reply.hdr.opcode == OSD_OP_READ)
|
if (op->reply.hdr.opcode == OSD_OP_SEC_READ || op->reply.hdr.opcode == OSD_OP_READ)
|
||||||
{
|
{
|
||||||
// Read data. In this case we assume that the buffer is preallocated by the caller (!)
|
// Read data. In this case we assume that the buffer is preallocated by the caller (!)
|
||||||
unsigned bmp_len = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->reply.sec_rw.attr_len : op->reply.rw.bitmap_len);
|
unsigned bmp_len = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->reply.sec_rw.bitmap_len : op->reply.rw.bitmap_len);
|
||||||
unsigned expected_size = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->req.sec_rw.len : op->req.rw.len);
|
unsigned expected_size = (op->reply.hdr.opcode == OSD_OP_SEC_READ ? op->req.sec_rw.len : op->req.rw.len);
|
||||||
if (op->reply.hdr.retval >= 0 && (op->reply.hdr.retval != expected_size || bmp_len > op->bitmap_len))
|
if (op->reply.hdr.retval >= 0 && (op->reply.hdr.retval != expected_size || bmp_len > op->bitmap_len))
|
||||||
{
|
{
|
||||||
|
@ -309,14 +315,24 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl)
|
||||||
if (op->reply.hdr.retval >= 0 && bmp_len > 0)
|
if (op->reply.hdr.retval >= 0 && bmp_len > 0)
|
||||||
{
|
{
|
||||||
assert(op->bitmap);
|
assert(op->bitmap);
|
||||||
cl->recv_list.push_back(op->bitmap, bmp_len);
|
if (bmp_len <= 8)
|
||||||
|
{
|
||||||
|
memcpy(op->bitmap, (op->reply.hdr.opcode == OSD_OP_SEC_READ
|
||||||
|
? &op->reply.sec_rw.bitmap
|
||||||
|
: &op->reply.rw.bitmap), bmp_len);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cl->recv_list.push_back(op->bitmap, bmp_len);
|
||||||
|
cl->read_remaining += bmp_len;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (op->reply.hdr.retval > 0)
|
if (op->reply.hdr.retval > 0)
|
||||||
{
|
{
|
||||||
assert(op->iov.count > 0);
|
assert(op->iov.count > 0);
|
||||||
cl->recv_list.append(op->iov);
|
cl->recv_list.append(op->iov);
|
||||||
|
cl->read_remaining += op->reply.hdr.retval;
|
||||||
}
|
}
|
||||||
cl->read_remaining = op->reply.hdr.retval + bmp_len;
|
|
||||||
if (cl->read_remaining == 0)
|
if (cl->read_remaining == 0)
|
||||||
{
|
{
|
||||||
goto reuse;
|
goto reuse;
|
||||||
|
|
|
@ -50,23 +50,37 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op)
|
||||||
// Bitmap
|
// Bitmap
|
||||||
if (cur_op->op_type == OSD_OP_IN &&
|
if (cur_op->op_type == OSD_OP_IN &&
|
||||||
cur_op->req.hdr.opcode == OSD_OP_SEC_READ &&
|
cur_op->req.hdr.opcode == OSD_OP_SEC_READ &&
|
||||||
cur_op->reply.sec_rw.attr_len > 0)
|
cur_op->reply.sec_rw.bitmap_len > 0)
|
||||||
{
|
{
|
||||||
to_send_list.push_back((iovec){
|
if (cur_op->reply.sec_rw.bitmap_len <= 8)
|
||||||
.iov_base = cur_op->bitmap,
|
{
|
||||||
.iov_len = cur_op->reply.sec_rw.attr_len,
|
memcpy(&cur_op->reply.sec_rw.bitmap, cur_op->bitmap, cur_op->reply.sec_rw.bitmap_len);
|
||||||
});
|
}
|
||||||
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
|
else
|
||||||
|
{
|
||||||
|
to_send_list.push_back((iovec){
|
||||||
|
.iov_base = cur_op->bitmap,
|
||||||
|
.iov_len = cur_op->reply.sec_rw.bitmap_len,
|
||||||
|
});
|
||||||
|
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (cur_op->op_type == OSD_OP_OUT &&
|
else if (cur_op->op_type == OSD_OP_OUT &&
|
||||||
(cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) &&
|
(cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE || cur_op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE) &&
|
||||||
cur_op->req.sec_rw.attr_len > 0)
|
cur_op->req.sec_rw.bitmap_len > 0)
|
||||||
{
|
{
|
||||||
to_send_list.push_back((iovec){
|
if (cur_op->req.sec_rw.bitmap_len <= 8)
|
||||||
.iov_base = cur_op->bitmap,
|
{
|
||||||
.iov_len = cur_op->req.sec_rw.attr_len,
|
memcpy(&cur_op->req.sec_rw.bitmap, cur_op->bitmap, cur_op->req.sec_rw.bitmap_len);
|
||||||
});
|
}
|
||||||
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
|
else
|
||||||
|
{
|
||||||
|
to_send_list.push_back((iovec){
|
||||||
|
.iov_base = cur_op->bitmap,
|
||||||
|
.iov_len = cur_op->req.sec_rw.attr_len,
|
||||||
|
});
|
||||||
|
to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Operation data
|
// Operation data
|
||||||
if ((cur_op->op_type == OSD_OP_IN
|
if ((cur_op->op_type == OSD_OP_IN
|
||||||
|
|
|
@ -35,7 +35,7 @@
|
||||||
#define MEM_ALIGNMENT 512
|
#define MEM_ALIGNMENT 512
|
||||||
#endif
|
#endif
|
||||||
#define OSD_RW_MAX 64*1024*1024
|
#define OSD_RW_MAX 64*1024*1024
|
||||||
#define OSD_PROTOCOL_VERSION 1
|
#define OSD_PROTOCOL_VERSION 2
|
||||||
|
|
||||||
// common request and reply headers
|
// common request and reply headers
|
||||||
struct __attribute__((__packed__)) osd_op_header_t
|
struct __attribute__((__packed__)) osd_op_header_t
|
||||||
|
@ -74,8 +74,10 @@ struct __attribute__((__packed__)) osd_op_sec_rw_t
|
||||||
// length
|
// length
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
// bitmap/attribute length - bitmap comes after header, but before data
|
// bitmap/attribute length - bitmap comes after header, but before data
|
||||||
uint32_t attr_len;
|
uint32_t bitmap_len;
|
||||||
uint32_t pad0;
|
uint32_t pad0;
|
||||||
|
// inline bitmap (when it's no longer than 8 bytes)
|
||||||
|
uint64_t bitmap;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct __attribute__((__packed__)) osd_reply_sec_rw_t
|
struct __attribute__((__packed__)) osd_reply_sec_rw_t
|
||||||
|
@ -84,8 +86,10 @@ struct __attribute__((__packed__)) osd_reply_sec_rw_t
|
||||||
// for reads and writes: assigned or read version number
|
// for reads and writes: assigned or read version number
|
||||||
uint64_t version;
|
uint64_t version;
|
||||||
// for reads: bitmap/attribute length (just to double-check)
|
// for reads: bitmap/attribute length (just to double-check)
|
||||||
uint32_t attr_len;
|
uint32_t bitmap_len;
|
||||||
uint32_t pad0;
|
uint32_t pad0;
|
||||||
|
// inline bitmap (when it's no longer than 8 bytes)
|
||||||
|
uint64_t bitmap;
|
||||||
};
|
};
|
||||||
|
|
||||||
// delete object on the secondary OSD
|
// delete object on the secondary OSD
|
||||||
|
@ -199,6 +203,8 @@ struct __attribute__((__packed__)) osd_reply_rw_t
|
||||||
// for reads: bitmap length
|
// for reads: bitmap length
|
||||||
uint32_t bitmap_len;
|
uint32_t bitmap_len;
|
||||||
uint32_t pad0;
|
uint32_t pad0;
|
||||||
|
// inline bitmap (when it's no longer than 8 bytes)
|
||||||
|
uint64_t bitmap;
|
||||||
};
|
};
|
||||||
|
|
||||||
// sync to the primary OSD
|
// sync to the primary OSD
|
||||||
|
|
|
@ -235,7 +235,10 @@ resume_2:
|
||||||
{
|
{
|
||||||
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
|
reconstruct_stripes_jerasure(stripes, op_data->pg_size, op_data->pg_data_size, clean_entry_bitmap_size);
|
||||||
}
|
}
|
||||||
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
if (cur_op->reply.rw.bitmap_len <= 8)
|
||||||
|
memcpy(&cur_op->reply.rw.bitmap, op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||||
|
else
|
||||||
|
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||||
for (int role = 0; role < op_data->pg_size; role++)
|
for (int role = 0; role < op_data->pg_size; role++)
|
||||||
{
|
{
|
||||||
if (stripes[role].req_end != 0)
|
if (stripes[role].req_end != 0)
|
||||||
|
@ -250,7 +253,10 @@ resume_2:
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
if (cur_op->reply.rw.bitmap_len <= 8)
|
||||||
|
memcpy(&cur_op->reply.rw.bitmap, op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||||
|
else
|
||||||
|
cur_op->iov.push_back(op_data->stripes[0].bmp_buf, cur_op->reply.rw.bitmap_len);
|
||||||
cur_op->iov.push_back(cur_op->buf, cur_op->req.rw.len);
|
cur_op->iov.push_back(cur_op->buf, cur_op->req.rw.len);
|
||||||
}
|
}
|
||||||
finish_op(cur_op, cur_op->req.rw.len);
|
finish_op(cur_op, cur_op->req.rw.len);
|
||||||
|
|
|
@ -200,7 +200,7 @@ int osd_t::submit_primary_subop_batch(int submit_type, inode_t inode, uint64_t o
|
||||||
.version = op_version,
|
.version = op_version,
|
||||||
.offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start,
|
.offset = wr ? stripes[stripe_num].write_start : stripes[stripe_num].read_start,
|
||||||
.len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start,
|
.len = wr ? stripes[stripe_num].write_end - stripes[stripe_num].write_start : stripes[stripe_num].read_end - stripes[stripe_num].read_start,
|
||||||
.attr_len = wr ? clean_entry_bitmap_size : 0,
|
.bitmap_len = wr ? clean_entry_bitmap_size : 0,
|
||||||
};
|
};
|
||||||
#ifdef OSD_DEBUG
|
#ifdef OSD_DEBUG
|
||||||
printf(
|
printf(
|
||||||
|
|
|
@ -20,9 +20,9 @@ void osd_t::secondary_op_callback(osd_op_t *op)
|
||||||
if (op->req.hdr.opcode == OSD_OP_SEC_READ)
|
if (op->req.hdr.opcode == OSD_OP_SEC_READ)
|
||||||
{
|
{
|
||||||
if (op->bs_op->retval >= 0)
|
if (op->bs_op->retval >= 0)
|
||||||
op->reply.sec_rw.attr_len = clean_entry_bitmap_size;
|
op->reply.sec_rw.bitmap_len = clean_entry_bitmap_size;
|
||||||
else
|
else
|
||||||
op->reply.sec_rw.attr_len = 0;
|
op->reply.sec_rw.bitmap_len = 0;
|
||||||
if (op->bs_op->retval > 0)
|
if (op->bs_op->retval > 0)
|
||||||
op->iov.push_back(op->buf, op->bs_op->retval);
|
op->iov.push_back(op->buf, op->bs_op->retval);
|
||||||
}
|
}
|
||||||
|
@ -81,7 +81,7 @@ void osd_t::exec_secondary(osd_op_t *cur_op)
|
||||||
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
|
if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ)
|
||||||
{
|
{
|
||||||
// Allocate memory for the read operation
|
// Allocate memory for the read operation
|
||||||
if (clean_entry_bitmap_size > sizeof(unsigned))
|
if (clean_entry_bitmap_size > sizeof(void*))
|
||||||
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(clean_entry_bitmap_size);
|
cur_op->bitmap = cur_op->rmw_buf = malloc_or_die(clean_entry_bitmap_size);
|
||||||
else
|
else
|
||||||
cur_op->bitmap = &cur_op->bmp_data;
|
cur_op->bitmap = &cur_op->bmp_data;
|
||||||
|
|
Loading…
Reference in New Issue