forked from vitalif/vitastor
Fix osd_rmw object recovery case (len==0)
parent
e56909fb45
commit
f036eecf1c
20
osd_rmw.cpp
20
osd_rmw.cpp
|
@ -56,6 +56,11 @@ static inline void cover_read(uint32_t start, uint32_t end, osd_rmw_stripe_t & s
|
||||||
|
|
||||||
void split_stripes(uint64_t pg_minsize, uint32_t bs_block_size, uint32_t start, uint32_t end, osd_rmw_stripe_t *stripes)
|
void split_stripes(uint64_t pg_minsize, uint32_t bs_block_size, uint32_t start, uint32_t end, osd_rmw_stripe_t *stripes)
|
||||||
{
|
{
|
||||||
|
if (end == 0)
|
||||||
|
{
|
||||||
|
// Zero length request - offset doesn't matter
|
||||||
|
return;
|
||||||
|
}
|
||||||
end = start+end;
|
end = start+end;
|
||||||
for (int role = 0; role < pg_minsize; role++)
|
for (int role = 0; role < pg_minsize; role++)
|
||||||
{
|
{
|
||||||
|
@ -198,11 +203,13 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||||
}
|
}
|
||||||
if (write_osd_set != read_osd_set)
|
if (write_osd_set != read_osd_set)
|
||||||
{
|
{
|
||||||
|
pg_cursize = 0;
|
||||||
// Object is degraded/misplaced and will be moved to <write_osd_set>
|
// Object is degraded/misplaced and will be moved to <write_osd_set>
|
||||||
for (int role = 0; role < pg_size; role++)
|
for (int role = 0; role < pg_size; role++)
|
||||||
{
|
{
|
||||||
if (write_osd_set[role] != read_osd_set[role])
|
if (write_osd_set[role] != read_osd_set[role])
|
||||||
{
|
{
|
||||||
|
// FIXME: For EC more than 2+1: handle case when write_osd_set == 0 and read_osd_set != 0
|
||||||
// We need to get data for any moved / recovered chunk
|
// We need to get data for any moved / recovered chunk
|
||||||
// And we need a continuous write buffer so we'll only optimize
|
// And we need a continuous write buffer so we'll only optimize
|
||||||
// for the case when the whole chunk is ovewritten in the request
|
// for the case when the whole chunk is ovewritten in the request
|
||||||
|
@ -211,8 +218,13 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||||
{
|
{
|
||||||
stripes[role].read_start = 0;
|
stripes[role].read_start = 0;
|
||||||
stripes[role].read_end = chunk_size;
|
stripes[role].read_end = chunk_size;
|
||||||
|
// Warning: We don't modify write_start/write_end here, we do it in calc_rmw_parity()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (read_osd_set[role] != 0)
|
||||||
|
{
|
||||||
|
pg_cursize++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (pg_cursize < pg_size)
|
if (pg_cursize < pg_size)
|
||||||
|
@ -235,7 +247,8 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||||
}
|
}
|
||||||
if (found < pg_minsize)
|
if (found < pg_minsize)
|
||||||
{
|
{
|
||||||
// Incomplete object (FIXME)
|
// FIXME Object is incomplete - refuse partial overwrite
|
||||||
|
assert(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -251,7 +264,7 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||||
stripes[role].write_buf = request_buf + in_pos;
|
stripes[role].write_buf = request_buf + in_pos;
|
||||||
in_pos += stripes[role].req_end - stripes[role].req_start;
|
in_pos += stripes[role].req_end - stripes[role].req_start;
|
||||||
}
|
}
|
||||||
else if (role >= pg_minsize && read_osd_set[role] != 0)
|
else if (role >= pg_minsize && write_osd_set[role] != 0 && end != 0)
|
||||||
{
|
{
|
||||||
stripes[role].write_buf = rmw_buf + buf_pos;
|
stripes[role].write_buf = rmw_buf + buf_pos;
|
||||||
buf_pos += end - start;
|
buf_pos += end - start;
|
||||||
|
@ -372,6 +385,7 @@ void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_
|
||||||
if (write_osd_set[role] != read_osd_set[role] &&
|
if (write_osd_set[role] != read_osd_set[role] &&
|
||||||
(stripes[role].req_start != 0 || stripes[role].req_end != chunk_size))
|
(stripes[role].req_start != 0 || stripes[role].req_end != chunk_size))
|
||||||
{
|
{
|
||||||
|
// FIXME again, handle case when write_osd_set[role] is 0
|
||||||
// Copy modified chunk into the read buffer to write it back
|
// Copy modified chunk into the read buffer to write it back
|
||||||
memcpy(
|
memcpy(
|
||||||
stripes[role].read_buf + stripes[role].req_start,
|
stripes[role].read_buf + stripes[role].req_start,
|
||||||
|
@ -384,7 +398,7 @@ void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!stripes[pg_minsize].missing)
|
if (!stripes[pg_minsize].missing && end != 0)
|
||||||
{
|
{
|
||||||
// Calculate new parity (EC k+1)
|
// Calculate new parity (EC k+1)
|
||||||
int parity = pg_minsize, prev = -2;
|
int parity = pg_minsize, prev = -2;
|
||||||
|
|
153
osd_rmw_test.cpp
153
osd_rmw_test.cpp
|
@ -2,6 +2,15 @@
|
||||||
#include "osd_rmw.cpp"
|
#include "osd_rmw.cpp"
|
||||||
#include "test_pattern.h"
|
#include "test_pattern.h"
|
||||||
|
|
||||||
|
void dump_stripes(osd_rmw_stripe_t *stripes, int pg_size);
|
||||||
|
void test1();
|
||||||
|
void test4();
|
||||||
|
void test5();
|
||||||
|
void test6();
|
||||||
|
void test7();
|
||||||
|
void test8();
|
||||||
|
void test9();
|
||||||
|
|
||||||
/***
|
/***
|
||||||
|
|
||||||
Cases:
|
Cases:
|
||||||
|
@ -65,21 +74,75 @@ Cases:
|
||||||
}
|
}
|
||||||
+ check write2 buffer
|
+ check write2 buffer
|
||||||
|
|
||||||
***/
|
9. object recovery case:
|
||||||
|
calc_rmw(offset=0, len=0, read_osd_set=[0,2,3], write_osd_set=[1,2,3])
|
||||||
|
= {
|
||||||
|
read: [ [ 0, 128K ], [ 0, 128K ], [ 0, 128K ] ],
|
||||||
|
write: [ [ 0, 0 ], [ 0, 0 ], [ 0, 0 ] ],
|
||||||
|
input buffer: NULL,
|
||||||
|
rmw buffer: [ read0, read1, read2 ],
|
||||||
|
}
|
||||||
|
then, after calc_rmw_parity(): {
|
||||||
|
write: [ [ 0, 128K ], [ 0, 0 ], [ 0, 0 ] ],
|
||||||
|
write0==read0,
|
||||||
|
}
|
||||||
|
+ check write0 buffer
|
||||||
|
|
||||||
void test7();
|
***/
|
||||||
void test8();
|
|
||||||
|
|
||||||
int main(int narg, char *args[])
|
int main(int narg, char *args[])
|
||||||
|
{
|
||||||
|
// Test 1
|
||||||
|
test1();
|
||||||
|
// Test 4
|
||||||
|
test4();
|
||||||
|
// Test 5
|
||||||
|
test5();
|
||||||
|
// Test 6
|
||||||
|
test6();
|
||||||
|
// Test 7
|
||||||
|
test7();
|
||||||
|
// Test 8
|
||||||
|
test8();
|
||||||
|
// Test 9
|
||||||
|
test9();
|
||||||
|
// End
|
||||||
|
printf("all ok\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_stripes(osd_rmw_stripe_t *stripes, int pg_size)
|
||||||
|
{
|
||||||
|
printf("request");
|
||||||
|
for (int i = 0; i < pg_size; i++)
|
||||||
|
{
|
||||||
|
printf(" {%uK-%uK}", stripes[i].req_start/1024, stripes[i].req_end/1024);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
printf("read");
|
||||||
|
for (int i = 0; i < pg_size; i++)
|
||||||
|
{
|
||||||
|
printf(" {%uK-%uK}", stripes[i].read_start/1024, stripes[i].read_end/1024);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
printf("write");
|
||||||
|
for (int i = 0; i < pg_size; i++)
|
||||||
|
{
|
||||||
|
printf(" {%uK-%uK}", stripes[i].write_start/1024, stripes[i].write_end/1024);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void test1()
|
||||||
{
|
{
|
||||||
osd_num_t osd_set[3] = { 1, 0, 3 };
|
osd_num_t osd_set[3] = { 1, 0, 3 };
|
||||||
osd_rmw_stripe_t stripes[3] = { 0 };
|
osd_rmw_stripe_t stripes[3] = { 0 };
|
||||||
// Test 1
|
// Test 1.1
|
||||||
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
|
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
|
||||||
assert(stripes[0].req_start == 128*1024-4096 && stripes[0].req_end == 128*1024);
|
assert(stripes[0].req_start == 128*1024-4096 && stripes[0].req_end == 128*1024);
|
||||||
assert(stripes[1].req_start == 0 && stripes[1].req_end == 4096);
|
assert(stripes[1].req_start == 0 && stripes[1].req_end == 4096);
|
||||||
assert(stripes[2].req_end == 0);
|
assert(stripes[2].req_end == 0);
|
||||||
// Test 2
|
// Test 1.2
|
||||||
for (int i = 0; i < 3; i++)
|
for (int i = 0; i < 3; i++)
|
||||||
{
|
{
|
||||||
stripes[i].read_start = stripes[i].req_start;
|
stripes[i].read_start = stripes[i].req_start;
|
||||||
|
@ -88,12 +151,17 @@ int main(int narg, char *args[])
|
||||||
assert(extend_missing_stripes(stripes, osd_set, 2, 3) == 0);
|
assert(extend_missing_stripes(stripes, osd_set, 2, 3) == 0);
|
||||||
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
|
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
|
||||||
assert(stripes[2].read_start == 0 && stripes[2].read_end == 4096);
|
assert(stripes[2].read_start == 0 && stripes[2].read_end == 4096);
|
||||||
// Test 3
|
// Test 1.3
|
||||||
stripes[0] = { .req_start = 128*1024-4096, .req_end = 128*1024 };
|
stripes[0] = { .req_start = 128*1024-4096, .req_end = 128*1024 };
|
||||||
cover_read(0, 128*1024, stripes[0]);
|
cover_read(0, 128*1024, stripes[0]);
|
||||||
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024-4096);
|
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024-4096);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test4()
|
||||||
|
{
|
||||||
|
osd_num_t osd_set[3] = { 1, 0, 3 };
|
||||||
|
osd_rmw_stripe_t stripes[3] = { 0 };
|
||||||
// Test 4.1
|
// Test 4.1
|
||||||
memset(stripes, 0, sizeof(stripes));
|
|
||||||
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
|
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
|
||||||
void* write_buf = malloc(8192);
|
void* write_buf = malloc(8192);
|
||||||
void* rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024);
|
void* rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024);
|
||||||
|
@ -120,15 +188,20 @@ int main(int narg, char *args[])
|
||||||
check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity
|
check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
free(write_buf);
|
free(write_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test5()
|
||||||
|
{
|
||||||
|
osd_num_t osd_set[3] = { 1, 0, 3 };
|
||||||
|
osd_rmw_stripe_t stripes[3] = { 0 };
|
||||||
// Test 5.1
|
// Test 5.1
|
||||||
memset(stripes, 0, sizeof(stripes));
|
|
||||||
split_stripes(2, 128*1024, 0, 64*1024*3, stripes);
|
split_stripes(2, 128*1024, 0, 64*1024*3, stripes);
|
||||||
assert(stripes[0].req_start == 0 && stripes[0].req_end == 128*1024);
|
assert(stripes[0].req_start == 0 && stripes[0].req_end == 128*1024);
|
||||||
assert(stripes[1].req_start == 0 && stripes[1].req_end == 64*1024);
|
assert(stripes[1].req_start == 0 && stripes[1].req_end == 64*1024);
|
||||||
assert(stripes[2].req_end == 0);
|
assert(stripes[2].req_end == 0);
|
||||||
// Test 5.2
|
// Test 5.2
|
||||||
write_buf = malloc(64*1024*3);
|
void *write_buf = malloc(64*1024*3);
|
||||||
rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024);
|
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 2, osd_set, 128*1024);
|
||||||
assert(stripes[0].read_start == 64*1024 && stripes[0].read_end == 128*1024);
|
assert(stripes[0].read_start == 64*1024 && stripes[0].read_end == 128*1024);
|
||||||
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
|
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
|
||||||
assert(stripes[2].read_start == 64*1024 && stripes[2].read_end == 128*1024);
|
assert(stripes[2].read_start == 64*1024 && stripes[2].read_end == 128*1024);
|
||||||
|
@ -143,12 +216,16 @@ int main(int narg, char *args[])
|
||||||
assert(stripes[2].write_buf == rmw_buf);
|
assert(stripes[2].write_buf == rmw_buf);
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
free(write_buf);
|
free(write_buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test6()
|
||||||
|
{
|
||||||
|
osd_num_t osd_set[3] = { 1, 2, 3 };
|
||||||
|
osd_rmw_stripe_t stripes[3] = { 0 };
|
||||||
// Test 6.1
|
// Test 6.1
|
||||||
memset(stripes, 0, sizeof(stripes));
|
|
||||||
split_stripes(2, 128*1024, 0, 64*1024*3, stripes);
|
split_stripes(2, 128*1024, 0, 64*1024*3, stripes);
|
||||||
osd_set[1] = 2;
|
void *write_buf = malloc(64*1024*3);
|
||||||
write_buf = malloc(64*1024*3);
|
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, osd_set, 128*1024);
|
||||||
rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, osd_set, 128*1024);
|
|
||||||
assert(stripes[0].read_end == 0);
|
assert(stripes[0].read_end == 0);
|
||||||
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
|
assert(stripes[1].read_start == 64*1024 && stripes[1].read_end == 128*1024);
|
||||||
assert(stripes[2].read_end == 0);
|
assert(stripes[2].read_end == 0);
|
||||||
|
@ -163,14 +240,6 @@ int main(int narg, char *args[])
|
||||||
assert(stripes[2].write_buf == rmw_buf);
|
assert(stripes[2].write_buf == rmw_buf);
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
free(write_buf);
|
free(write_buf);
|
||||||
osd_set[1] = 0;
|
|
||||||
// Test 7
|
|
||||||
test7();
|
|
||||||
// Test 8
|
|
||||||
test8();
|
|
||||||
// End
|
|
||||||
printf("all ok\n");
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void test7()
|
void test7()
|
||||||
|
@ -249,3 +318,43 @@ void test8()
|
||||||
free(rmw_buf);
|
free(rmw_buf);
|
||||||
free(write_buf);
|
free(write_buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void test9()
|
||||||
|
{
|
||||||
|
osd_num_t osd_set[3] = { 0, 2, 3 };
|
||||||
|
osd_num_t write_osd_set[3] = { 1, 2, 3 };
|
||||||
|
osd_rmw_stripe_t stripes[3] = { 0 };
|
||||||
|
// Test 9.0
|
||||||
|
split_stripes(2, 128*1024, 64*1024, 0, stripes);
|
||||||
|
assert(stripes[0].req_start == 0 && stripes[0].req_end == 0);
|
||||||
|
assert(stripes[1].req_start == 0 && stripes[1].req_end == 0);
|
||||||
|
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
|
||||||
|
// Test 9.1
|
||||||
|
void *write_buf = NULL;
|
||||||
|
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024);
|
||||||
|
assert(stripes[0].read_start == 0 && stripes[0].read_end == 128*1024);
|
||||||
|
assert(stripes[1].read_start == 0 && stripes[1].read_end == 128*1024);
|
||||||
|
assert(stripes[2].read_start == 0 && stripes[2].read_end == 128*1024);
|
||||||
|
assert(stripes[0].write_start == 0 && stripes[0].write_end == 0);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 0);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 0);
|
||||||
|
assert(stripes[0].read_buf == rmw_buf);
|
||||||
|
assert(stripes[1].read_buf == rmw_buf+128*1024);
|
||||||
|
assert(stripes[2].read_buf == rmw_buf+128*1024*2);
|
||||||
|
assert(stripes[0].write_buf == NULL);
|
||||||
|
assert(stripes[1].write_buf == NULL);
|
||||||
|
assert(stripes[2].write_buf == NULL);
|
||||||
|
// Test 8.2
|
||||||
|
set_pattern(stripes[1].read_buf, 128*1024, 0);
|
||||||
|
set_pattern(stripes[2].read_buf, 128*1024, PATTERN1);
|
||||||
|
calc_rmw_parity(stripes, 3, osd_set, write_osd_set, 128*1024);
|
||||||
|
assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024);
|
||||||
|
assert(stripes[1].write_start == 0 && stripes[1].write_end == 0);
|
||||||
|
assert(stripes[2].write_start == 0 && stripes[2].write_end == 0);
|
||||||
|
assert(stripes[0].write_buf == rmw_buf);
|
||||||
|
assert(stripes[1].write_buf == NULL);
|
||||||
|
assert(stripes[2].write_buf == NULL);
|
||||||
|
check_pattern(stripes[0].read_buf, 128*1024, PATTERN1);
|
||||||
|
check_pattern(stripes[0].write_buf, 128*1024, PATTERN1);
|
||||||
|
free(rmw_buf);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue