Compare commits
6 Commits
Author | SHA1 | Date | |
---|---|---|---|
6909807068 | |||
ec90fe6ec1 | |||
18c72f4835 | |||
59fbcef734 | |||
40b7c21fb1 | |||
efb3678606 |
2
debian/changelog
vendored
2
debian/changelog
vendored
@@ -1,4 +1,4 @@
|
||||
vitastor (0.5.11-1) unstable; urgency=medium
|
||||
vitastor (0.5.13-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
|
12
debian/vitastor.Dockerfile
vendored
12
debian/vitastor.Dockerfile
vendored
@@ -40,10 +40,10 @@ RUN set -e -x; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
cp -r /root/vitastor vitastor-0.5.11; \
|
||||
ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.5.11/qemu; \
|
||||
ln -s /root/fio-build/fio-*/ vitastor-0.5.11/fio; \
|
||||
cd vitastor-0.5.11; \
|
||||
cp -r /root/vitastor vitastor-0.5.13; \
|
||||
ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.5.13/qemu; \
|
||||
ln -s /root/fio-build/fio-*/ vitastor-0.5.13/fio; \
|
||||
cd vitastor-0.5.13; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
QEMU=$(head -n1 qemu/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
sh copy-qemu-includes.sh; \
|
||||
@@ -59,8 +59,8 @@ RUN set -e -x; \
|
||||
echo "dep:fio=$FIO" > debian/substvars; \
|
||||
echo "dep:qemu=$QEMU" >> debian/substvars; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.5.11.orig.tar.xz vitastor-0.5.11; \
|
||||
cd vitastor-0.5.11; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.5.13.orig.tar.xz vitastor-0.5.13; \
|
||||
cd vitastor-0.5.13; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
|
@@ -48,4 +48,4 @@ FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Ve
|
||||
QEMU=`rpm -qi qemu qemu-kvm | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
perl -i -pe 's/(Requires:\s*qemu(?:-kvm)?)([^\n]+)?/$1 = '$QEMU'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform 's#^#vitastor-0.5.11/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.5.11$(rpm --eval '%dist').tar.gz *
|
||||
tar --transform 's#^#vitastor-0.5.13/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.5.13$(rpm --eval '%dist').tar.gz *
|
||||
|
@@ -37,7 +37,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.5.11.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-0.5.13.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.5.11
|
||||
Version: 0.5.13
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.5.11.el7.tar.gz
|
||||
Source0: vitastor-0.5.13.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.5.11.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-0.5.13.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.5.11
|
||||
Version: 0.5.13
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.5.11.el8.tar.gz
|
||||
Source0: vitastor-0.5.13.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -17,8 +17,8 @@ journal_flusher_t::journal_flusher_t(blockstore_impl_t *bs)
|
||||
// FIXME: allow to configure flusher_start_threshold and journal_trim_interval
|
||||
flusher_start_threshold = bs->journal_block_size / sizeof(journal_entry_stable);
|
||||
journal_trim_interval = 512;
|
||||
journal_trim_counter = 0;
|
||||
trim_wanted = 0;
|
||||
journal_trim_counter = bs->journal.flush_journal ? 1 : 0;
|
||||
trim_wanted = bs->journal.flush_journal ? 1 : 0;
|
||||
journal_superblock = bs->journal.inmemory ? bs->journal.buffer : memalign_or_die(MEM_ALIGNMENT, bs->journal_block_size);
|
||||
co = new journal_flusher_co[max_flusher_count];
|
||||
for (int i = 0; i < max_flusher_count; i++)
|
||||
@@ -626,6 +626,12 @@ resume_1:
|
||||
#endif
|
||||
flusher->trimming = false;
|
||||
}
|
||||
if (bs->journal.flush_journal && !flusher->flush_queue.size())
|
||||
{
|
||||
assert(bs->journal.used_start == bs->journal.next_free);
|
||||
printf("Journal flushed\n");
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
// All done
|
||||
flusher->active_flushers--;
|
||||
|
@@ -92,10 +92,23 @@ void blockstore_impl_t::loop()
|
||||
{
|
||||
delete journal_init_reader;
|
||||
journal_init_reader = NULL;
|
||||
initialized = 10;
|
||||
if (journal.flush_journal)
|
||||
initialized = 3;
|
||||
else
|
||||
initialized = 10;
|
||||
ringloop->wakeup();
|
||||
}
|
||||
}
|
||||
if (initialized == 3)
|
||||
{
|
||||
if (readonly)
|
||||
{
|
||||
printf("Can't flush the journal in readonly mode\n");
|
||||
exit(1);
|
||||
}
|
||||
flusher->loop();
|
||||
ringloop->submit();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@@ -143,6 +143,7 @@ struct journal_t
|
||||
int fd;
|
||||
uint64_t device_size;
|
||||
bool inmemory = false;
|
||||
bool flush_journal = false;
|
||||
void *buffer = NULL;
|
||||
|
||||
uint64_t block_size;
|
||||
|
@@ -42,6 +42,11 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||
{
|
||||
disable_flock = true;
|
||||
}
|
||||
if (config["flush_journal"] == "true" || config["flush_journal"] == "1" || config["flush_journal"] == "yes")
|
||||
{
|
||||
// Only flush journal and exit
|
||||
journal.flush_journal = true;
|
||||
}
|
||||
if (config["immediate_commit"] == "all")
|
||||
{
|
||||
immediate_commit = IMMEDIATE_ALL;
|
||||
@@ -87,7 +92,7 @@ void blockstore_impl_t::parse_config(blockstore_config_t & config)
|
||||
{
|
||||
max_flusher_count = 256;
|
||||
}
|
||||
if (!min_flusher_count)
|
||||
if (!min_flusher_count || journal.flush_journal)
|
||||
{
|
||||
min_flusher_count = 1;
|
||||
}
|
||||
|
@@ -168,6 +168,9 @@ resume_5:
|
||||
for (i = 0, v = (obj_ver_id*)op->buf; i < op->len; i++, v++)
|
||||
{
|
||||
// Mark all dirty_db entries up to op->version as stable
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("Stabilize %lx:%lx v%lu\n", v->oid.inode, v->oid.stripe, v->version);
|
||||
#endif
|
||||
mark_stable(*v);
|
||||
}
|
||||
// Acknowledge op
|
||||
@@ -183,6 +186,7 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
bool was_stable = IS_STABLE(dirty_it->second.state);
|
||||
if ((dirty_it->second.state & BS_ST_WORKFLOW_MASK) == BS_ST_SYNCED)
|
||||
{
|
||||
dirty_it->second.state = (dirty_it->second.state & ~BS_ST_WORKFLOW_MASK) | BS_ST_STABLE;
|
||||
@@ -207,11 +211,7 @@ void blockstore_impl_t::mark_stable(const obj_ver_id & v, bool forget_dirty)
|
||||
erase_dirty(dirty_it, erase_end, clean_loc);
|
||||
break;
|
||||
}
|
||||
if (IS_STABLE(dirty_it->second.state))
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (dirty_it == dirty_db.begin())
|
||||
if (was_stable || dirty_it == dirty_db.begin())
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
@@ -10,7 +10,7 @@
|
||||
#define PART_ERROR 4
|
||||
#define CACHE_DIRTY 1
|
||||
#define CACHE_FLUSHING 2
|
||||
#define CACHE_REPEATING 4
|
||||
#define CACHE_REPEATING 3
|
||||
#define OP_FLUSH_BUFFER 2
|
||||
|
||||
cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config)
|
||||
@@ -36,10 +36,10 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
|
||||
for (auto & wr: dirty_buffers)
|
||||
{
|
||||
if (affects_osd(wr.first.inode, wr.first.stripe, wr.second.len, peer_osd) &&
|
||||
!(wr.second.state & CACHE_REPEATING))
|
||||
wr.second.state != CACHE_REPEATING)
|
||||
{
|
||||
// FIXME: Flush in larger parts
|
||||
flush_buffer(wr.first, wr.second);
|
||||
flush_buffer(wr.first, &wr.second);
|
||||
}
|
||||
}
|
||||
continue_ops();
|
||||
@@ -95,21 +95,30 @@ void cluster_client_t::continue_ops(bool up_retry)
|
||||
// We're offline
|
||||
return;
|
||||
}
|
||||
bool has_flushes = false, has_writes = false;
|
||||
int j = 0;
|
||||
for (int i = 0; i < op_queue.size(); i++)
|
||||
if (continuing_ops)
|
||||
{
|
||||
bool rm = false, is_flush = op_queue[i]->flags & OP_FLUSH_BUFFER;
|
||||
auto opcode = op_queue[i]->opcode;
|
||||
if (!op_queue[i]->up_wait || up_retry)
|
||||
// Attempt to reenter the function
|
||||
continuing_ops = 2;
|
||||
return;
|
||||
}
|
||||
restart:
|
||||
continuing_ops = 1;
|
||||
op_queue_pos = 0;
|
||||
bool has_flushes = false, has_writes = false;
|
||||
while (op_queue_pos < op_queue.size())
|
||||
{
|
||||
auto op = op_queue[op_queue_pos];
|
||||
bool rm = false, is_flush = op->flags & OP_FLUSH_BUFFER;
|
||||
auto opcode = op->opcode;
|
||||
if (!op->up_wait || up_retry)
|
||||
{
|
||||
op_queue[i]->up_wait = false;
|
||||
op->up_wait = false;
|
||||
if (opcode == OSD_OP_READ || opcode == OSD_OP_WRITE)
|
||||
{
|
||||
if (is_flush || !has_flushes)
|
||||
{
|
||||
// Regular writes can't proceed before buffer flushes
|
||||
rm = continue_rw(op_queue[i]);
|
||||
rm = continue_rw(op);
|
||||
}
|
||||
}
|
||||
else if (opcode == OSD_OP_SYNC)
|
||||
@@ -117,7 +126,7 @@ void cluster_client_t::continue_ops(bool up_retry)
|
||||
if (!has_writes)
|
||||
{
|
||||
// SYNC can't proceed before previous writes
|
||||
rm = continue_sync(op_queue[i]);
|
||||
rm = continue_sync(op);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -135,12 +144,20 @@ void cluster_client_t::continue_ops(bool up_retry)
|
||||
// ...so dirty_writes can't contain anything newer than SYNC
|
||||
has_flushes = has_writes || !rm;
|
||||
}
|
||||
if (!rm)
|
||||
if (rm)
|
||||
{
|
||||
op_queue[j++] = op_queue[i];
|
||||
op_queue.erase(op_queue.begin()+op_queue_pos, op_queue.begin()+op_queue_pos+1);
|
||||
}
|
||||
else
|
||||
{
|
||||
op_queue_pos++;
|
||||
}
|
||||
if (continuing_ops == 2)
|
||||
{
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
op_queue.resize(j);
|
||||
continuing_ops = 0;
|
||||
}
|
||||
|
||||
static uint32_t is_power_of_two(uint64_t value)
|
||||
@@ -417,21 +434,30 @@ void cluster_client_t::copy_write(cluster_op_t *op, std::map<object_id, cluster_
|
||||
}
|
||||
}
|
||||
|
||||
void cluster_client_t::flush_buffer(const object_id & oid, cluster_buffer_t & wr)
|
||||
void cluster_client_t::flush_buffer(const object_id & oid, cluster_buffer_t *wr)
|
||||
{
|
||||
wr.state = CACHE_DIRTY | CACHE_REPEATING;
|
||||
wr->state = CACHE_REPEATING;
|
||||
cluster_op_t *op = new cluster_op_t;
|
||||
op->flags = OP_FLUSH_BUFFER;
|
||||
op->opcode = OSD_OP_WRITE;
|
||||
op->inode = oid.inode;
|
||||
op->offset = oid.stripe;
|
||||
op->len = wr.len;
|
||||
op->iov.push_back(wr.buf, wr.len);
|
||||
op->callback = [](cluster_op_t* op)
|
||||
op->len = wr->len;
|
||||
op->iov.push_back(wr->buf, wr->len);
|
||||
op->callback = [wr](cluster_op_t* op)
|
||||
{
|
||||
if (wr->state == CACHE_REPEATING)
|
||||
{
|
||||
wr->state = CACHE_DIRTY;
|
||||
}
|
||||
delete op;
|
||||
};
|
||||
op_queue.push_front(op);
|
||||
op_queue.insert(op_queue.begin(), op);
|
||||
if (continuing_ops)
|
||||
{
|
||||
continuing_ops = 2;
|
||||
op_queue_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
int cluster_client_t::continue_rw(cluster_op_t *op)
|
||||
|
@@ -74,7 +74,7 @@ class cluster_client_t
|
||||
int retry_timeout_id = 0;
|
||||
uint64_t op_id = 1;
|
||||
std::vector<cluster_op_t*> offline_ops;
|
||||
std::deque<cluster_op_t*> op_queue;
|
||||
std::vector<cluster_op_t*> op_queue;
|
||||
std::map<object_id, cluster_buffer_t> dirty_buffers;
|
||||
std::set<osd_num_t> dirty_osds;
|
||||
uint64_t dirty_bytes = 0, dirty_ops = 0;
|
||||
@@ -82,6 +82,8 @@ class cluster_client_t
|
||||
bool pgs_loaded = false;
|
||||
ring_consumer_t consumer;
|
||||
std::vector<std::function<void(void)>> on_ready_hooks;
|
||||
int continuing_ops = 0;
|
||||
int op_queue_pos = 0;
|
||||
|
||||
public:
|
||||
etcd_state_client_t st_cli;
|
||||
@@ -98,7 +100,7 @@ public:
|
||||
void continue_ops(bool up_retry = false);
|
||||
protected:
|
||||
bool affects_osd(uint64_t inode, uint64_t offset, uint64_t len, osd_num_t osd);
|
||||
void flush_buffer(const object_id & oid, cluster_buffer_t & wr);
|
||||
void flush_buffer(const object_id & oid, cluster_buffer_t *wr);
|
||||
void on_load_config_hook(json11::Json::object & config);
|
||||
void on_load_pgs_hook(bool success);
|
||||
void on_change_hook(json11::Json::object & changes);
|
||||
|
@@ -47,7 +47,7 @@ void configure_single_pg_pool(cluster_client_t *cli)
|
||||
cli->st_cli.on_change_hook(changes);
|
||||
}
|
||||
|
||||
int *test_write(cluster_client_t *cli, uint64_t offset, uint64_t len, uint8_t c)
|
||||
int *test_write(cluster_client_t *cli, uint64_t offset, uint64_t len, uint8_t c, std::function<void()> cb = NULL)
|
||||
{
|
||||
printf("Post write %lx+%lx\n", offset, len);
|
||||
int *r = new int;
|
||||
@@ -59,7 +59,7 @@ int *test_write(cluster_client_t *cli, uint64_t offset, uint64_t len, uint8_t c)
|
||||
op->len = len;
|
||||
op->iov.push_back(malloc_or_die(len), len);
|
||||
memset(op->iov.buf[0].iov_base, c, len);
|
||||
op->callback = [r](cluster_op_t *op)
|
||||
op->callback = [r, cb](cluster_op_t *op)
|
||||
{
|
||||
if (*r == -1)
|
||||
printf("Error: Not allowed to complete yet\n");
|
||||
@@ -68,6 +68,8 @@ int *test_write(cluster_client_t *cli, uint64_t offset, uint64_t len, uint8_t c)
|
||||
free(op->iov.buf[0].iov_base);
|
||||
printf("Done write %lx+%lx r=%d\n", op->offset, op->len, op->retval);
|
||||
delete op;
|
||||
if (cb != NULL)
|
||||
cb();
|
||||
};
|
||||
cli->execute(op);
|
||||
return r;
|
||||
@@ -310,6 +312,28 @@ void test1()
|
||||
pretend_op_completed(cli, find_op(cli, 1, OSD_OP_WRITE, 0, 0x1000), 0);
|
||||
check_completed(r1);
|
||||
|
||||
// Check disconnect inside operation callback (reenterability)
|
||||
// Probably doesn't happen too often, but possible in theory
|
||||
r1 = test_write(cli, 0, 0x1000, 0x60, [cli]()
|
||||
{
|
||||
pretend_disconnected(cli, 1);
|
||||
});
|
||||
r2 = test_write(cli, 0x1000, 0x1000, 0x61);
|
||||
check_op_count(cli, 1, 2);
|
||||
can_complete(r1);
|
||||
pretend_op_completed(cli, find_op(cli, 1, OSD_OP_WRITE, 0, 0x1000), 0);
|
||||
check_completed(r1);
|
||||
check_disconnected(cli, 1);
|
||||
pretend_connected(cli, 1);
|
||||
cli->continue_ops(true);
|
||||
check_op_count(cli, 1, 2);
|
||||
pretend_op_completed(cli, find_op(cli, 1, OSD_OP_WRITE, 0, 0x1000), 0);
|
||||
pretend_op_completed(cli, find_op(cli, 1, OSD_OP_WRITE, 0x1000, 0x1000), 0);
|
||||
check_op_count(cli, 1, 1);
|
||||
can_complete(r2);
|
||||
pretend_op_completed(cli, find_op(cli, 1, OSD_OP_WRITE, 0x1000, 0x1000), 0);
|
||||
check_completed(r2);
|
||||
|
||||
// Free client
|
||||
delete cli;
|
||||
delete tfd;
|
||||
|
@@ -35,6 +35,18 @@ fi
|
||||
# fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M
|
||||
|
||||
LD_PRELOAD=libasan.so.5 \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=1G -cluster_log_level=10
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -cluster_log_level=10
|
||||
|
||||
LD_PRELOAD=libasan.so.5 \
|
||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -buffer_pattern=0xdeadface \
|
||||
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -number_ios=1024
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))" \
|
||||
-O raw ./testdata/read.bin
|
||||
|
||||
qemu-img convert -S 4096 -p \
|
||||
-f raw ./testdata/read.bin \
|
||||
-O raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=1:size=$((128*1024*1024))"
|
||||
|
||||
format_green OK
|
||||
|
Reference in New Issue
Block a user