forked from vitalif/vitastor
Allow to overwrite incomplete objects or parts of objects to recover them
parent
660c2412fb
commit
e8ac08be14
3
Makefile
3
Makefile
|
@ -23,6 +23,9 @@ osd: ./libblockstore.so osd_main.cpp osd.h osd_ops.h $(OSD_OBJS)
|
||||||
stub_osd: stub_osd.o rw_blocking.o
|
stub_osd: stub_osd.o rw_blocking.o
|
||||||
g++ $(CXXFLAGS) -o $@ stub_osd.o rw_blocking.o -ltcmalloc_minimal
|
g++ $(CXXFLAGS) -o $@ stub_osd.o rw_blocking.o -ltcmalloc_minimal
|
||||||
|
|
||||||
|
osd_rmw_test: osd_rmw_test.o
|
||||||
|
g++ $(CXXFLAGS) -o $@ osd_rmw_test.o
|
||||||
|
|
||||||
STUB_URING_OSD_OBJS := stub_uring_osd.o epoll_manager.o messenger.o msgr_send.o msgr_receive.o ringloop.o timerfd_manager.o json11.o
|
STUB_URING_OSD_OBJS := stub_uring_osd.o epoll_manager.o messenger.o msgr_send.o msgr_receive.o ringloop.o timerfd_manager.o json11.o
|
||||||
stub_uring_osd: $(STUB_URING_OSD_OBJS)
|
stub_uring_osd: $(STUB_URING_OSD_OBJS)
|
||||||
g++ $(CXXFLAGS) -o $@ -ltcmalloc_minimal $(STUB_URING_OSD_OBJS) -luring
|
g++ $(CXXFLAGS) -o $@ -ltcmalloc_minimal $(STUB_URING_OSD_OBJS) -luring
|
||||||
|
|
|
@ -239,6 +239,12 @@ resume_1:
|
||||||
{
|
{
|
||||||
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
|
cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set,
|
||||||
pg.pg_size, pg.pg_minsize, pg.pg_cursize, pg.cur_set.data(), bs_block_size);
|
pg.pg_size, pg.pg_minsize, pg.pg_cursize, pg.cur_set.data(), bs_block_size);
|
||||||
|
if (!cur_op->rmw_buf)
|
||||||
|
{
|
||||||
|
// Refuse partial overwrite of an incomplete object
|
||||||
|
cur_op->reply.hdr.retval = -EINVAL;
|
||||||
|
goto continue_others;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Read required blocks
|
// Read required blocks
|
||||||
submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, pg.pg_size, op_data->prev_set, cur_op);
|
submit_primary_subops(SUBMIT_RMW_READ, UINT64_MAX, pg.pg_size, op_data->prev_set, cur_op);
|
||||||
|
@ -361,10 +367,12 @@ resume_9:
|
||||||
remove_object_from_state(op_data->oid, op_data->object_state, pg);
|
remove_object_from_state(op_data->oid, op_data->object_state, pg);
|
||||||
pg.clean_count++;
|
pg.clean_count++;
|
||||||
}
|
}
|
||||||
|
cur_op->reply.hdr.retval = cur_op->req.rw.len;
|
||||||
|
continue_others:
|
||||||
// Remove version override
|
// Remove version override
|
||||||
pg.ver_override.erase(op_data->oid);
|
pg.ver_override.erase(op_data->oid);
|
||||||
object_id oid = op_data->oid;
|
object_id oid = op_data->oid;
|
||||||
finish_op(cur_op, cur_op->req.rw.len);
|
finish_op(cur_op, cur_op->reply.hdr.retval);
|
||||||
// Continue other write operations to the same object
|
// Continue other write operations to the same object
|
||||||
auto next_it = pg.write_queue.find(oid);
|
auto next_it = pg.write_queue.find(oid);
|
||||||
auto this_it = next_it;
|
auto this_it = next_it;
|
||||||
|
|
20
osd_rmw.cpp
20
osd_rmw.cpp
|
@ -208,7 +208,7 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||||
{
|
{
|
||||||
pg_cursize = 0;
|
pg_cursize = 0;
|
||||||
// Object is degraded/misplaced and will be moved to <write_osd_set>
|
// Object is degraded/misplaced and will be moved to <write_osd_set>
|
||||||
for (int role = 0; role < pg_size; role++)
|
for (int role = 0; role < pg_minsize; role++)
|
||||||
{
|
{
|
||||||
if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0)
|
if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0)
|
||||||
{
|
{
|
||||||
|
@ -228,6 +228,20 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||||
pg_cursize++;
|
pg_cursize++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (int role = pg_minsize; role < pg_size; role++)
|
||||||
|
{
|
||||||
|
if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0)
|
||||||
|
{
|
||||||
|
for (int r2 = 0; r2 < pg_minsize; r2++)
|
||||||
|
{
|
||||||
|
cover_read(0, chunk_size, stripes[r2]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (read_osd_set[role] != 0)
|
||||||
|
{
|
||||||
|
pg_cursize++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (pg_cursize < pg_size)
|
if (pg_cursize < pg_size)
|
||||||
{
|
{
|
||||||
|
@ -251,8 +265,8 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||||
}
|
}
|
||||||
if (found < pg_minsize)
|
if (found < pg_minsize)
|
||||||
{
|
{
|
||||||
// FIXME Object is incomplete - refuse partial overwrite
|
// Object is incomplete - refuse partial overwrite
|
||||||
assert(0);
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
110
osd_rmw_test.cpp
110
osd_rmw_test.cpp
|
@ -13,6 +13,8 @@ void test6();
|
||||||
void test7();
|
void test7();
|
||||||
void test8();
|
void test8();
|
||||||
void test9();
|
void test9();
|
||||||
|
void test10();
|
||||||
|
void test11();
|
||||||
|
|
||||||
/***
|
/***
|
||||||
|
|
||||||
|
@ -91,6 +93,28 @@ Cases:
|
||||||
}
|
}
|
||||||
+ check write0 buffer
|
+ check write0 buffer
|
||||||
|
|
||||||
|
10. full overwrite/recovery case:
|
||||||
|
calc_rmw(offset=0, len=256K, read_osd_set=[1,0,0], write_osd_set=[1,2,3])
|
||||||
|
= {
|
||||||
|
read: [ [ 0, 0 ], [ 0, 0 ], [ 0, 0 ] ],
|
||||||
|
write: [ [ 0, 128K ], [ 0, 128K ], [ 0, 128K ] ],
|
||||||
|
input buffer: [ write0, write1 ],
|
||||||
|
rmw buffer: [ write2 ],
|
||||||
|
}
|
||||||
|
then, after calc_rmw_parity_xor(): all the same
|
||||||
|
+ check write2 buffer
|
||||||
|
|
||||||
|
10. partial recovery case:
|
||||||
|
calc_rmw(offset=128K, len=128K, read_osd_set=[1,0,0], write_osd_set=[1,2,3])
|
||||||
|
= {
|
||||||
|
read: [ [ 0, 128K ], [ 0, 0 ], [ 0, 0 ] ],
|
||||||
|
write: [ [ 0, 0 ], [ 0, 128K ], [ 0, 128K ] ],
|
||||||
|
input buffer: [ write1 ],
|
||||||
|
rmw buffer: [ write2, read0 ],
|
||||||
|
}
|
||||||
|
then, after calc_rmw_parity_xor(): all the same
|
||||||
|
+ check write2 buffer
|
||||||
|
|
||||||
***/
|
***/
|
||||||
|
|
||||||
int main(int narg, char *args[])
|
int main(int narg, char *args[])
|
||||||
|
@ -109,6 +133,10 @@ int main(int narg, char *args[])
|
||||||
test8();
|
test8();
|
||||||
// Test 9
|
// Test 9
|
||||||
test9();
|
test9();
|
||||||
|
// Test 10
|
||||||
|
test10();
|
||||||
|
// Test 11
|
||||||
|
test11();
|
||||||
// End
|
// End
|
||||||
printf("all ok\n");
|
printf("all ok\n");
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -361,3 +389,85 @@ void test9()
|
||||||
check_pattern(stripes[0].write_buf, 128*1024, PATTERN1);
|
check_pattern(stripes[0].write_buf, 128*1024, PATTERN1);
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test10()
|
||||||
|
{
|
||||||
|
osd_num_t osd_set[3] = { 1, 0, 0 };
|
||||||
|
osd_num_t write_osd_set[3] = { 1, 2, 3 };
|
||||||
|
osd_rmw_stripe_t stripes[3] = { 0 };
|
||||||
|
// Test 10.0
|
||||||
|
split_stripes(2, 128*1024, 0, 256*1024, stripes);
|
||||||
|
assert(stripes[0].req_start == 0 && stripes[0].req_end == 128*1024);
|
||||||
|
assert(stripes[1].req_start == 0 && stripes[1].req_end == 128*1024);
|
||||||
|
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
|
||||||
|
// Test 10.1
|
||||||
|
void *write_buf = malloc(256*1024);
|
||||||
|
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024);
|
||||||
|
assert(rmw_buf);
|
||||||
|
assert(stripes[0].read_start == 0 && stripes[0].read_end == 0);
|
||||||
|
assert(stripes[1].read_start == 0 && stripes[1].read_end == 0);
|
||||||
|
assert(stripes[2].read_start == 0 && stripes[2].read_end == 0);
|
||||||
|
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
|
assert(stripes[0].read_buf == NULL);
|
||||||
|
assert(stripes[1].read_buf == NULL);
|
||||||
|
assert(stripes[2].read_buf == NULL);
|
||||||
|
assert(stripes[0].write_buf == write_buf);
|
||||||
|
assert(stripes[1].write_buf == write_buf+128*1024);
|
||||||
|
assert(stripes[2].write_buf == rmw_buf);
|
||||||
|
// Test 10.2
|
||||||
|
set_pattern(stripes[0].write_buf, 128*1024, PATTERN1);
|
||||||
|
set_pattern(stripes[1].write_buf, 128*1024, PATTERN2);
|
||||||
|
calc_rmw_parity_xor(stripes, 3, osd_set, write_osd_set, 128*1024);
|
||||||
|
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
|
assert(stripes[0].write_buf == write_buf);
|
||||||
|
assert(stripes[1].write_buf == write_buf+128*1024);
|
||||||
|
assert(stripes[2].write_buf == rmw_buf);
|
||||||
|
check_pattern(stripes[2].write_buf, 128*1024, PATTERN1^PATTERN2);
|
||||||
|
free(rmw_buf);
|
||||||
|
free(write_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test11()
|
||||||
|
{
|
||||||
|
osd_num_t osd_set[3] = { 1, 0, 0 };
|
||||||
|
osd_num_t write_osd_set[3] = { 1, 2, 3 };
|
||||||
|
osd_rmw_stripe_t stripes[3] = { 0 };
|
||||||
|
// Test 11.0
|
||||||
|
split_stripes(2, 128*1024, 128*1024, 256*1024, stripes);
|
||||||
|
assert(stripes[0].req_start == 0 && stripes[0].req_end == 0);
|
||||||
|
assert(stripes[1].req_start == 0 && stripes[1].req_end == 128*1024);
|
||||||
|
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
|
||||||
|
// Test 11.1
|
||||||
|
void *write_buf = malloc(256*1024);
|
||||||
|
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024);
|
||||||
|
assert(rmw_buf);
|
||||||
|
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
|
||||||
|
assert(stripes[1].read_start == 0 && stripes[1].read_end == 0);
|
||||||
|
assert(stripes[2].read_start == 0 && stripes[2].read_end == 0);
|
||||||
|
assert(stripes[0].write_start == 0 && stripes[0].write_end == 0);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
|
assert(stripes[0].read_buf == rmw_buf+128*1024);
|
||||||
|
assert(stripes[1].read_buf == NULL);
|
||||||
|
assert(stripes[2].read_buf == NULL);
|
||||||
|
assert(stripes[0].write_buf == NULL);
|
||||||
|
assert(stripes[1].write_buf == write_buf);
|
||||||
|
assert(stripes[2].write_buf == rmw_buf);
|
||||||
|
// Test 11.2
|
||||||
|
set_pattern(stripes[0].read_buf, 128*1024, PATTERN1);
|
||||||
|
set_pattern(stripes[1].write_buf, 128*1024, PATTERN2);
|
||||||
|
calc_rmw_parity_xor(stripes, 3, osd_set, write_osd_set, 128*1024);
|
||||||
|
assert(stripes[0].write_start == 0 && stripes[0].write_end == 0);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024);
|
||||||
|
assert(stripes[0].write_buf == NULL);
|
||||||
|
assert(stripes[1].write_buf == write_buf);
|
||||||
|
assert(stripes[2].write_buf == rmw_buf);
|
||||||
|
check_pattern(stripes[2].write_buf, 128*1024, PATTERN1^PATTERN2);
|
||||||
|
free(rmw_buf);
|
||||||
|
free(write_buf);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue