From c8d61568b5380230bf0ae46f97630e2965af03a1 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Fri, 30 Jun 2023 02:49:11 +0300 Subject: [PATCH] Fix primary_read bitmap buffers being freed too early (use-after-free) --- .gitea/workflows/test.yml | 36 ++++++++++++++++++++++++++++++++++++ src/msgr_op.cpp | 4 ++++ src/msgr_op.h | 1 + src/msgr_receive.cpp | 2 +- src/msgr_send.cpp | 9 ++++++--- src/osd_ops.h | 2 +- src/osd_primary.cpp | 10 +++++----- src/osd_primary_write.cpp | 2 ++ tests/run_tests.sh | 3 +++ tests/test_snapshot_down.sh | 37 +++++++++++++++++++++++++++++++++++++ 10 files changed, 96 insertions(+), 10 deletions(-) create mode 100755 tests/test_snapshot_down.sh diff --git a/.gitea/workflows/test.yml b/.gitea/workflows/test.yml index 74474915..90f66a47 100644 --- a/.gitea/workflows/test.yml +++ b/.gitea/workflows/test.yml @@ -406,6 +406,42 @@ jobs: echo "" done + test_snapshot_down: + runs-on: ubuntu-latest + needs: build + container: ${{env.TEST_IMAGE}}:${{github.sha}} + steps: + - name: Run test + id: test + timeout-minutes: 3 + run: /root/vitastor/tests/test_snapshot_down.sh + - name: Print logs + if: always() && steps.test.outcome == 'failure' + run: | + for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do + echo "-------- $i --------" + cat $i + echo "" + done + + test_snapshot_down_ec: + runs-on: ubuntu-latest + needs: build + container: ${{env.TEST_IMAGE}}:${{github.sha}} + steps: + - name: Run test + id: test + timeout-minutes: 3 + run: SCHEME=ec /root/vitastor/tests/test_snapshot_down.sh + - name: Print logs + if: always() && steps.test.outcome == 'failure' + run: | + for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do + echo "-------- $i --------" + cat $i + echo "" + done + test_splitbrain: runs-on: ubuntu-latest needs: build diff --git a/src/msgr_op.cpp b/src/msgr_op.cpp index 40e8a083..30dce6c1 100644 --- a/src/msgr_op.cpp +++ b/src/msgr_op.cpp @@ -9,6 +9,10 @@ osd_op_t::~osd_op_t() { assert(!bs_op); assert(!op_data); + if (bitmap_buf) + { + free(bitmap_buf); + } if (rmw_buf) { free(rmw_buf); diff --git a/src/msgr_op.h b/src/msgr_op.h index cb97ec6d..f06522ce 100644 --- a/src/msgr_op.h +++ b/src/msgr_op.h @@ -165,6 +165,7 @@ struct osd_op_t void *bitmap = NULL; unsigned bitmap_len = 0; unsigned bmp_data = 0; + void *bitmap_buf = NULL; void *rmw_buf = NULL; osd_primary_op_data_t* op_data = NULL; std::function callback; diff --git a/src/msgr_receive.cpp b/src/msgr_receive.cpp index 9c7e6d7a..6ff2f31c 100644 --- a/src/msgr_receive.cpp +++ b/src/msgr_receive.cpp @@ -369,7 +369,7 @@ bool osd_messenger_t::handle_reply_hdr(osd_client_t *cl) op->buf = malloc_or_die(op->reply.hdr.retval); cl->recv_list.push_back(op->buf, op->reply.hdr.retval); } - else if (op->reply.hdr.opcode == OSD_OP_DESCRIBE && op->reply.hdr.retval > 0) + else if (op->reply.hdr.opcode == OSD_OP_DESCRIBE && op->reply.describe.result_bytes > 0) { delete cl->read_op; cl->read_op = op; diff --git a/src/msgr_send.cpp b/src/msgr_send.cpp index 1e68b380..c721d86c 100644 --- a/src/msgr_send.cpp +++ b/src/msgr_send.cpp @@ -84,9 +84,12 @@ void osd_messenger_t::outbox_push(osd_op_t *cur_op) { for (int i = 0; i < cur_op->iov.count; i++) { - assert(cur_op->iov.buf[i].iov_base); - to_send_list.push_back(cur_op->iov.buf[i]); - to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 }); + if (cur_op->iov.buf[i].iov_len > 0) + { + assert(cur_op->iov.buf[i].iov_base); + to_send_list.push_back(cur_op->iov.buf[i]); + to_outbox.push_back((msgr_sendp_t){ .op = cur_op, .flags = 0 }); + } } } if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ_BMP) diff --git a/src/osd_ops.h b/src/osd_ops.h index 7522e0bf..217cbb0b 100644 --- a/src/osd_ops.h +++ b/src/osd_ops.h @@ -220,7 +220,7 @@ struct __attribute__((__packed__)) osd_reply_rw_t // for reads: bitmap length uint32_t bitmap_len; uint32_t pad0; - // for reads: object version + // for reads and writes: object version uint64_t version; }; diff --git a/src/osd_primary.cpp b/src/osd_primary.cpp index fc1b00d5..e7a76b0f 100644 --- a/src/osd_primary.cpp +++ b/src/osd_primary.cpp @@ -87,8 +87,7 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op) // - op_data 1, sizeof(osd_primary_op_data_t) + // - stripes - // - resulting bitmap buffers - stripe_count * (clean_entry_bitmap_size + sizeof(osd_rmw_stripe_t)) + + stripe_count * sizeof(osd_rmw_stripe_t) + chain_size * ( // - copy of the chain sizeof(inode_t) + @@ -110,11 +109,12 @@ bool osd_t::prepare_primary_rw(osd_op_t *cur_op) op_data->pg_size = pg_it->second.pg_size; cur_op->op_data = op_data; split_stripes(pg_data_size, bs_block_size, (uint32_t)(cur_op->req.rw.offset - oid.stripe), cur_op->req.rw.len, op_data->stripes); - // Allocate bitmaps along with stripes to avoid extra allocations and fragmentation + // Resulting bitmaps have to survive op_data and be freed with the op itself + assert(!cur_op->bitmap_buf); + cur_op->bitmap_buf = calloc_or_die(1, clean_entry_bitmap_size * stripe_count); for (int i = 0; i < stripe_count; i++) { - op_data->stripes[i].bmp_buf = data_buf; - data_buf = (uint8_t*)data_buf + clean_entry_bitmap_size; + op_data->stripes[i].bmp_buf = (uint8_t*)cur_op->bitmap_buf + clean_entry_bitmap_size * i; } op_data->chain_size = chain_size; if (chain_size > 0) diff --git a/src/osd_primary_write.cpp b/src/osd_primary_write.cpp index 2ac41fe3..7b8ce74e 100644 --- a/src/osd_primary_write.cpp +++ b/src/osd_primary_write.cpp @@ -83,11 +83,13 @@ retry_1: // Object is degraded/misplaced and will be moved to op_data->stripes[0].read_start = 0; op_data->stripes[0].read_end = bs_block_size; + assert(!cur_op->rmw_buf); cur_op->rmw_buf = op_data->stripes[0].read_buf = memalign_or_die(MEM_ALIGNMENT, bs_block_size); } } else { + assert(!cur_op->rmw_buf); cur_op->rmw_buf = calc_rmw(cur_op->buf, op_data->stripes, op_data->prev_set, pg.pg_size, op_data->pg_data_size, pg.pg_cursize, pg.cur_set.data(), bs_block_size, clean_entry_bitmap_size); if (!cur_op->rmw_buf) diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 6e1ed89f..3b32a342 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -35,6 +35,9 @@ SCHEME=ec ./test_snapshot.sh ./test_snapshot_chain.sh SCHEME=ec ./test_snapshot_chain.sh +./test_snapshot_down.sh +SCHEME=ec ./test_snapshot_down.sh + ./test_splitbrain.sh ./test_rebalance_verify.sh diff --git a/tests/test_snapshot_down.sh b/tests/test_snapshot_down.sh new file mode 100755 index 00000000..80422959 --- /dev/null +++ b/tests/test_snapshot_down.sh @@ -0,0 +1,37 @@ +#!/bin/bash -ex + +. `dirname $0`/run_3osds.sh +check_qemu + +# Test merge to child (without "inverse rename" optimisation) + +build/src/vitastor-cli --etcd_address $ETCD_URL create -s 128M testchain + +LD_PRELOAD="build/src/libfio_vitastor.so" \ + fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \ + -etcd=$ETCD_URL -image=testchain -mirror_file=./testdata/mirror.bin + +# Create a snapshot +build/src/vitastor-cli --etcd_address $ETCD_URL snap-create testchain@0 + +# Write something to it +LD_PRELOAD="build/src/libfio_vitastor.so" \ +fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=1M -direct=1 -iodepth=4 -rw=randwrite \ + -randrepeat=0 -etcd=$ETCD_URL -image=testchain -number_ios=8 -mirror_file=./testdata/mirror.bin + +# Check the new content +qemu-img convert -p \ + -f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \ + -O raw ./testdata/layer1.bin +cmp ./testdata/layer1.bin ./testdata/mirror.bin + +# Merge +build/src/vitastor-cli --etcd_address $ETCD_URL rm testchain@0 + +# Check the final image +qemu-img convert -p \ + -f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testchain" \ + -O raw ./testdata/layer1.bin +cmp ./testdata/layer1.bin ./testdata/mirror.bin + +format_green OK