Add (empty) osd_primary.cpp, rename osd_read to osd_receive, add FIXMEs for fsync

blocking-uring-test
Vitaliy Filippov 2020-01-28 22:40:50 +03:00
parent 1447c44b68
commit 47663bd1dc
8 changed files with 40 additions and 14 deletions

View File

@ -24,10 +24,10 @@ libblockstore.so: $(BLOCKSTORE_OBJS)
libfio_blockstore.so: ./libblockstore.so fio_engine.cpp json11.o
g++ $(CXXFLAGS) -shared -o libfio_blockstore.so fio_engine.cpp json11.o ./libblockstore.so -ltcmalloc_minimal -luring
OSD_OBJS := osd.o osd_exec_secondary.o osd_read.o osd_send.o osd_peering.o osd_peering_pg.o json11.o
OSD_OBJS := osd.o osd_exec_secondary.o osd_receive.o osd_send.o osd_peering.o osd_peering_pg.o osd_primary.o json11.o
osd_exec_secondary.o: osd_exec_secondary.cpp osd.h osd_ops.h
g++ $(CXXFLAGS) -c -o $@ $<
osd_read.o: osd_read.cpp osd.h osd_ops.h
osd_receive.o: osd_receive.cpp osd.h osd_ops.h
g++ $(CXXFLAGS) -c -o $@ $<
osd_send.o: osd_send.cpp osd.h osd_ops.h
g++ $(CXXFLAGS) -c -o $@ $<
@ -35,6 +35,8 @@ osd_peering.o: osd_peering.cpp osd.h osd_ops.h osd_peering_pg.h
g++ $(CXXFLAGS) -c -o $@ $<
osd_peering_pg.o: osd_peering_pg.cpp object_id.h osd_peering_pg.h
g++ $(CXXFLAGS) -c -o $@ $<
osd_primary.o: osd_primary.cpp osd.h osd_ops.h osd_peering_pg.h
g++ $(CXXFLAGS) -c -o $@ $<
osd.o: osd.cpp osd.h osd_ops.h osd_peering_pg.h
g++ $(CXXFLAGS) -c -o $@ $<
osd: ./libblockstore.so osd_main.cpp osd.h osd_ops.h $(OSD_OBJS)

View File

@ -110,7 +110,7 @@ void blockstore_impl_t::loop()
auto op_ptr = cur;
auto op = *(cur++);
// FIXME: This needs some simplification
// Writes should not block reads if the ring is not full and if reads don't depend on them
// Writes should not block reads if the ring is not full and reads don't depend on them
// In all other cases we should stop submission
if (PRIV(op)->wait_for)
{

View File

@ -254,6 +254,7 @@ resume_1:
if (!bs->disable_journal_fsync)
{
GET_SQE();
// FIXME Wait for completion of writes before issuing an fsync
my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC);
data->iov = { 0 };
data->callback = simple_callback;
@ -337,6 +338,7 @@ resume_1:
data->iov = { 0 };
data->callback = simple_callback;
wait_count++;
// FIXME Wait for completion of writes before issuing an fsync
my_uring_prep_fsync(sqe, bs->journal.fd, IORING_FSYNC_DATASYNC);
}
bs->ringloop->submit();

View File

@ -63,6 +63,8 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
}
if (!disable_journal_fsync)
{
// FIXME: Wait for completion of writes before issuing an fsync
// Fsync and write requests posted at the same time can be reordered
ring_data_t *data = ((ring_data_t*)sqes[s]->user_data);
my_uring_prep_fsync(sqes[s++], journal.fd, IORING_FSYNC_DATASYNC);
data->iov = { 0 };
@ -81,6 +83,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
// 1st step: fsync data
if (!disable_data_fsync)
{
// FIXME Wait for completion of writes before issuing an fsync
BS_SUBMIT_GET_SQE(sqe, data);
my_uring_prep_fsync(sqe, data_fd, IORING_FSYNC_DATASYNC);
data->iov = { 0 };
@ -150,6 +153,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
// ... And a journal fsync
if (!disable_journal_fsync)
{
// FIXME Wait for completion of writes before issuing an fsync
my_uring_prep_fsync(sqe[s], journal.fd, IORING_FSYNC_DATASYNC);
struct ring_data_t *data = ((ring_data_t*)sqe[s]->user_data);
data->iov = { 0 };

13
osd.cpp
View File

@ -278,18 +278,9 @@ void osd_t::exec_op(osd_op_t *cur_op)
{
exec_show_config(cur_op);
}
else if (cur_op->op.hdr.opcode == OSD_OP_READ)
else if (cur_op->op.hdr.opcode == OSD_OP_READ || cur_op->op.hdr.opcode == OSD_OP_WRITE)
{
// Primary OSD also works with individual stripes, but they're twice the size of the blockstore's stripe
// - convert offset & len to stripe number
// - fail operation if offset & len span multiple stripes
// - calc stripe hash and determine PG
// - check if this is our PG
// - redirect or fail operation if not
// - determine whether we need to read A and B or just A or just B or A + parity or B + parity
// and determine read ranges for both objects
// - send read requests
// - reconstruct result
exec_primary(cur_op);
}
else
{

6
osd.h
View File

@ -164,10 +164,16 @@ class osd_t
// op execution
void exec_op(osd_op_t *cur_op);
// secondary ops
void exec_sync_stab_all(osd_op_t *cur_op);
void exec_show_config(osd_op_t *cur_op);
void exec_secondary(osd_op_t *cur_op);
void secondary_op_callback(osd_op_t *cur_op);
// primary ops
void exec_primary(osd_op_t *cur_op);
void make_primary_reply(osd_op_t *op);
public:
osd_t(blockstore_config_t & config, blockstore_t *bs, ring_loop_t *ringloop);
~osd_t();

21
osd_primary.cpp Normal file
View File

@ -0,0 +1,21 @@
#include "osd.h"
void osd_t::exec_primary(osd_op_t *cur_op)
{
// read: read directly or read paired stripe(s), reconstruct, return
// write: read paired stripe(s), modify, write
// nuance: take care to read the same version from paired stripes!
// if there are no write requests in progress we're good (stripes must be in sync)
// and... remember the last readable version during a write request
// and... postpone other write requests to the same stripe until the completion of previous ones
//
// sync: sync peers, get unstable versions from somewhere, stabilize them
}
void osd_t::make_primary_reply(osd_op_t *op)
{
op->reply.hdr.magic = SECONDARY_OSD_REPLY_MAGIC;
op->reply.hdr.id = op->op.hdr.id;
op->reply.hdr.opcode = op->op.hdr.opcode;
}