diff --git a/osd_primary.cpp b/osd_primary.cpp index c83294d9..1a61666b 100644 --- a/osd_primary.cpp +++ b/osd_primary.cpp @@ -137,7 +137,7 @@ resume_2: { if (stripes[role].read_end != 0 && stripes[role].missing) { - reconstruct_stripe(stripes, op_data->pg_size, role); + reconstruct_stripe_xor(stripes, op_data->pg_size, role); } if (stripes[role].req_end != 0) { @@ -226,7 +226,7 @@ resume_3: // Save version override for parallel reads pg.ver_override[op_data->oid] = op_data->fact_ver; // Recover missing stripes, calculate parity - calc_rmw_parity(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size); + calc_rmw_parity_xor(op_data->stripes, pg.pg_size, op_data->prev_set, pg.cur_set.data(), bs_block_size); // Send writes submit_primary_subops(SUBMIT_WRITE, pg.pg_size, pg.cur_set.data(), cur_op); resume_4: diff --git a/osd_rmw.cpp b/osd_rmw.cpp index 7b5595b7..bfba4de4 100644 --- a/osd_rmw.cpp +++ b/osd_rmw.cpp @@ -72,7 +72,7 @@ void split_stripes(uint64_t pg_minsize, uint32_t bs_block_size, uint32_t start, } } -void reconstruct_stripe(osd_rmw_stripe_t *stripes, int pg_size, int role) +void reconstruct_stripe_xor(osd_rmw_stripe_t *stripes, int pg_size, int role) { int prev = -2; for (int other = 0; other < pg_size; other++) @@ -207,9 +207,8 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_ // Object is degraded/misplaced and will be moved to for (int role = 0; role < pg_size; role++) { - if (write_osd_set[role] != read_osd_set[role]) + if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0) { - // FIXME: For EC more than 2+1: handle case when write_osd_set == 0 and read_osd_set != 0 // We need to get data for any moved / recovered chunk // And we need a continuous write buffer so we'll only optimize // for the case when the whole chunk is ovewritten in the request @@ -357,21 +356,22 @@ static void xor_multiple_buffers(buf_len_t *xor1, int n1, buf_len_t *xor2, int n } } -void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size) +void calc_rmw_parity_xor(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size) { int pg_minsize = pg_size-1; for (int role = 0; role < pg_size; role++) { if (stripes[role].read_end != 0 && stripes[role].missing) { - // Reconstruct missing stripe (EC k+1) - reconstruct_stripe(stripes, pg_size, role); + // Reconstruct missing stripe (XOR k+1) + reconstruct_stripe_xor(stripes, pg_size, role); break; } } uint32_t start = 0, end = 0; - if (!stripes[pg_minsize].missing || write_osd_set != read_osd_set) + if (write_osd_set[pg_minsize] != 0 || write_osd_set != read_osd_set) { + // Required for the next two if()s for (int role = 0; role < pg_minsize; role++) { if (stripes[role].req_end != 0) @@ -385,10 +385,9 @@ void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_ { for (int role = 0; role < pg_minsize; role++) { - if (write_osd_set[role] != read_osd_set[role] && + if (write_osd_set[role] != read_osd_set[role] && write_osd_set[role] != 0 && (stripes[role].req_start != 0 || stripes[role].req_end != chunk_size)) { - // FIXME again, handle case when write_osd_set[role] is 0 // Copy modified chunk into the read buffer to write it back memcpy( stripes[role].read_buf + stripes[role].req_start, @@ -401,9 +400,9 @@ void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_ } } } - if (!stripes[pg_minsize].missing && end != 0) + if (write_osd_set[pg_minsize] != 0 && end != 0) { - // Calculate new parity (EC k+1) + // Calculate new parity (XOR k+1) int parity = pg_minsize, prev = -2; for (int other = 0; other < pg_minsize; other++) { diff --git a/osd_rmw.h b/osd_rmw.h index b5c1b4a4..88207b73 100644 --- a/osd_rmw.h +++ b/osd_rmw.h @@ -25,7 +25,7 @@ struct osd_rmw_stripe_t void split_stripes(uint64_t pg_minsize, uint32_t bs_block_size, uint32_t start, uint32_t len, osd_rmw_stripe_t *stripes); -void reconstruct_stripe(osd_rmw_stripe_t *stripes, int pg_size, int role); +void reconstruct_stripe_xor(osd_rmw_stripe_t *stripes, int pg_size, int role); int extend_missing_stripes(osd_rmw_stripe_t *stripes, osd_num_t *osd_set, int minsize, int size); @@ -34,4 +34,4 @@ void* alloc_read_buffer(osd_rmw_stripe_t *stripes, int read_pg_size, uint64_t ad void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_set, uint64_t pg_size, uint64_t pg_minsize, uint64_t pg_cursize, uint64_t *write_osd_set, uint64_t chunk_size); -void calc_rmw_parity(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size); +void calc_rmw_parity_xor(osd_rmw_stripe_t *stripes, int pg_size, uint64_t *read_osd_set, uint64_t *write_osd_set, uint32_t chunk_size); diff --git a/osd_rmw_test.cpp b/osd_rmw_test.cpp index 40379889..961146ea 100644 --- a/osd_rmw_test.cpp +++ b/osd_rmw_test.cpp @@ -58,7 +58,7 @@ Cases: input buffer: [ write0, write1 ], rmw buffer: [ write2, read0, read1, read2 ], } - then, after calc_rmw_parity(): { + then, after calc_rmw_parity_xor(): { write: [ [ 128K-4K, 128K ], [ 0, 128K ], [ 0, 128K ] ], write1==read1, } @@ -82,7 +82,7 @@ Cases: input buffer: NULL, rmw buffer: [ read0, read1, read2 ], } - then, after calc_rmw_parity(): { + then, after calc_rmw_parity_xor(): { write: [ [ 0, 128K ], [ 0, 0 ], [ 0, 0 ] ], write0==read0, } @@ -182,7 +182,7 @@ void test4() set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); // old data set_pattern(stripes[1].read_buf, 128*1024-4096, UINT64_MAX); // didn't read it, it's missing set_pattern(stripes[2].read_buf, 128*1024-4096, 0); // old parity = 0 - calc_rmw_parity(stripes, 3, osd_set, osd_set, 128*1024); + calc_rmw_parity_xor(stripes, 3, osd_set, osd_set, 128*1024); check_pattern(stripes[2].write_buf, 4096, PATTERN0^PATTERN1); // new parity check_pattern(stripes[2].write_buf+4096, 128*1024-4096*2, 0); // new parity check_pattern(stripes[2].write_buf+128*1024-4096, 4096, PATTERN0^PATTERN1); // new parity @@ -268,7 +268,7 @@ void test7() set_pattern(stripes[0].read_buf, 128*1024, PATTERN1); // old data set_pattern(stripes[1].read_buf, 128*1024, UINT64_MAX); // didn't read it, it's missing set_pattern(stripes[2].read_buf, 128*1024, 0); // old parity = 0 - calc_rmw_parity(stripes, 3, osd_set, write_osd_set, 128*1024); + calc_rmw_parity_xor(stripes, 3, osd_set, write_osd_set, 128*1024); assert(stripes[0].write_start == 128*1024-4096 && stripes[0].write_end == 128*1024); assert(stripes[1].write_start == 0 && stripes[1].write_end == 128*1024); assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); @@ -306,7 +306,7 @@ void test8() // Test 8.2 set_pattern(write_buf, 128*1024+4096, PATTERN0); set_pattern(stripes[1].read_buf, 128*1024-4096, PATTERN1); - calc_rmw_parity(stripes, 3, osd_set, write_osd_set, 128*1024); + calc_rmw_parity_xor(stripes, 3, osd_set, write_osd_set, 128*1024); assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024); // recheck again assert(stripes[1].write_start == 0 && stripes[1].write_end == 4096); // recheck again assert(stripes[2].write_start == 0 && stripes[2].write_end == 128*1024); // recheck again @@ -344,10 +344,10 @@ void test9() assert(stripes[0].write_buf == NULL); assert(stripes[1].write_buf == NULL); assert(stripes[2].write_buf == NULL); - // Test 8.2 + // Test 9.2 set_pattern(stripes[1].read_buf, 128*1024, 0); set_pattern(stripes[2].read_buf, 128*1024, PATTERN1); - calc_rmw_parity(stripes, 3, osd_set, write_osd_set, 128*1024); + calc_rmw_parity_xor(stripes, 3, osd_set, write_osd_set, 128*1024); assert(stripes[0].write_start == 0 && stripes[0].write_end == 128*1024); assert(stripes[1].write_start == 0 && stripes[1].write_end == 0); assert(stripes[2].write_start == 0 && stripes[2].write_end == 0);