Compare commits
13 Commits
Author | SHA1 | Date | |
---|---|---|---|
8fdf30b21f | |||
238037ae31 | |||
09a8864686 | |||
6e6f6ecbb0 | |||
9491f81419 | |||
44c2b30167 | |||
bf8a0581cd | |||
5953942042 | |||
a276a1f737 | |||
cc24e5796e | |||
6e26732e6a | |||
b4edc79449 | |||
5f26887d32 |
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VERSION "0.8.0")
|
||||
set(VERSION "0.8.1")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -18,15 +18,19 @@ ENV CSI_ENDPOINT=""
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y wget && \
|
||||
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg && \
|
||||
(echo deb http://vitastor.io/debian buster main > /etc/apt/sources.list.d/vitastor.list) && \
|
||||
(echo deb http://deb.debian.org/debian buster-backports main > /etc/apt/sources.list.d/backports.list) && \
|
||||
(echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
|
||||
apt-get update && \
|
||||
apt-get install -y e2fsprogs xfsprogs vitastor kmod && \
|
||||
apt-get install -y e2fsprogs xfsprogs kmod && \
|
||||
apt-get clean && \
|
||||
(echo options nbd nbds_max=128 > /etc/modprobe.d/nbd.conf)
|
||||
|
||||
COPY --from=build /app/vitastor-csi /bin/
|
||||
|
||||
RUN (echo deb http://vitastor.io/debian buster main > /etc/apt/sources.list.d/vitastor.list) && \
|
||||
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg && \
|
||||
apt-get update && \
|
||||
apt-get install -y vitastor-client && \
|
||||
apt-get clean
|
||||
|
||||
ENTRYPOINT ["/bin/vitastor-csi"]
|
||||
|
@@ -1,4 +1,4 @@
|
||||
VERSION ?= v0.8.0
|
||||
VERSION ?= v0.8.1
|
||||
|
||||
all: build push
|
||||
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v0.8.0
|
||||
image: vitalif/vitastor-csi:v0.8.1
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
@@ -102,7 +102,7 @@ spec:
|
||||
- "--health-port=9898"
|
||||
env:
|
||||
- name: CSI_ENDPOINT
|
||||
value: unix://csi/csi.sock
|
||||
value: unix:///csi/csi.sock
|
||||
volumeMounts:
|
||||
- mountPath: /csi
|
||||
name: socket-dir
|
||||
|
@@ -116,7 +116,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v0.8.0
|
||||
image: vitalif/vitastor-csi:v0.8.1
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "0.8.0"
|
||||
vitastorCSIDriverVersion = "0.8.1"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@@ -1,10 +1,10 @@
|
||||
vitastor (0.8.0-1) unstable; urgency=medium
|
||||
vitastor (0.8.1-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
||||
|
||||
vitastor (0.8.0-1) unstable; urgency=medium
|
||||
vitastor (0.8.1-1) unstable; urgency=medium
|
||||
|
||||
* Implement NFS proxy
|
||||
* Add documentation
|
||||
|
8
debian/vitastor.Dockerfile
vendored
8
debian/vitastor.Dockerfile
vendored
@@ -34,8 +34,8 @@ RUN set -e -x; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
cp -r /root/vitastor vitastor-0.8.0; \
|
||||
cd vitastor-0.8.0; \
|
||||
cp -r /root/vitastor vitastor-0.8.1; \
|
||||
cd vitastor-0.8.1; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
@@ -48,8 +48,8 @@ RUN set -e -x; \
|
||||
rm -rf a b; \
|
||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.0.orig.tar.xz vitastor-0.8.0; \
|
||||
cd vitastor-0.8.0; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.1.orig.tar.xz vitastor-0.8.1; \
|
||||
cd vitastor-0.8.1; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
|
@@ -82,7 +82,7 @@ Parent node reference is required for intermediate tree nodes.
|
||||
Separate OSD settings are set in etc keys `/vitastor/config/osd/<number>`
|
||||
in JSON format `{"<key>":<value>}`.
|
||||
|
||||
As of now, there is only one setting:
|
||||
As of now, two settings are supported:
|
||||
|
||||
## reweight
|
||||
|
||||
@@ -96,6 +96,15 @@ This means an OSD configured with reweight lower than 1 receives less PGs than
|
||||
it normally would. An OSD with reweight = 0 won't store any data. You can set
|
||||
reweight to 0 to trigger rebalance and remove all data from an OSD.
|
||||
|
||||
## tags
|
||||
|
||||
- Type: string or array of strings
|
||||
|
||||
Sets tag or multiple tags for this OSD. Tags can be used to group OSDs into
|
||||
subsets and then use a specific subset for pool instead of all OSDs.
|
||||
For example you can mark SSD OSDs with tag "ssd" and HDD OSDs with "hdd" and
|
||||
such tags will work as device classes.
|
||||
|
||||
# Pool parameters
|
||||
|
||||
## name
|
||||
|
@@ -81,7 +81,10 @@
|
||||
Настройки отдельных OSD задаются в ключах etcd `/vitastor/config/osd/<number>`
|
||||
в JSON-формате `{"<key>":<value>}`.
|
||||
|
||||
На данный момент поддерживается одна настройка:
|
||||
На данный момент поддерживаются две настройки:
|
||||
|
||||
- [reweight](#reweight)
|
||||
- [tags](#tags)
|
||||
|
||||
## reweight
|
||||
|
||||
@@ -96,6 +99,15 @@
|
||||
хранении данных вообще. Вы можете установить reweight в 0, чтобы убрать
|
||||
все данные с OSD.
|
||||
|
||||
## tags
|
||||
|
||||
- Тип: строка или массив строк
|
||||
|
||||
Задаёт тег или набор тегов для данного OSD. Теги можно использовать, чтобы
|
||||
делить OSD на множества и потом размещать пул только на части OSD, а не на
|
||||
всех. Можно, например, пометить SSD OSD тегом "ssd", а HDD тегом "hdd", в
|
||||
этом смысле теги работают аналогично классам устройств.
|
||||
|
||||
# Параметры
|
||||
|
||||
## name
|
||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VERSION = '0.8.0'
|
||||
VERSION = '0.8.1'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
@@ -25,4 +25,4 @@ rm fio
|
||||
mv fio-copy fio
|
||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform 's#^#vitastor-0.8.0/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.8.0$(rpm --eval '%dist').tar.gz *
|
||||
tar --transform 's#^#vitastor-0.8.1/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.8.1$(rpm --eval '%dist').tar.gz *
|
||||
|
@@ -58,7 +58,7 @@
|
||||
+BuildRequires: gperftools-devel
|
||||
+BuildRequires: libusbx-devel >= 1.0.21
|
||||
%if %{have_usbredir}
|
||||
BuildRequires: usbredir-devel >= 0.8.0
|
||||
BuildRequires: usbredir-devel >= 0.8.1
|
||||
%endif
|
||||
@@ -856,12 +861,13 @@ BuildRequires: virglrenderer-devel
|
||||
# For smartcard NSS support
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.8.0.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-0.8.1.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.8.0
|
||||
Version: 0.8.1
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.8.0.el7.tar.gz
|
||||
Source0: vitastor-0.8.1.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.8.0.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-0.8.1.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.8.0
|
||||
Version: 0.8.1
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.8.0.el8.tar.gz
|
||||
Source0: vitastor-0.8.1.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -15,7 +15,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVERSION="0.8.0")
|
||||
add_definitions(-DVERSION="0.8.1")
|
||||
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
||||
if (${WITH_ASAN})
|
||||
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
||||
@@ -263,6 +263,14 @@ target_link_libraries(test_cas
|
||||
vitastor_client
|
||||
)
|
||||
|
||||
# test_crc32
|
||||
add_executable(test_crc32
|
||||
test_crc32.cpp
|
||||
)
|
||||
target_link_libraries(test_crc32
|
||||
vitastor_blk
|
||||
)
|
||||
|
||||
# test_cluster_client
|
||||
add_executable(test_cluster_client
|
||||
test_cluster_client.cpp
|
||||
|
@@ -615,7 +615,11 @@ resume_1:
|
||||
}
|
||||
for (it = v.begin(); it != v.end(); it++)
|
||||
{
|
||||
free(it->buf);
|
||||
// Free it if it's not taken from the journal
|
||||
if (it->buf && (!bs->journal.inmemory || it->buf < bs->journal.buffer || it->buf >= bs->journal.buffer + bs->journal.len))
|
||||
{
|
||||
free(it->buf);
|
||||
}
|
||||
}
|
||||
v.clear();
|
||||
// And sync metadata (in batches - not per each operation!)
|
||||
@@ -760,16 +764,17 @@ bool journal_flusher_co::scan_dirty(int wait_base)
|
||||
{
|
||||
submit_offset = dirty_it->second.location + offset - dirty_it->second.offset;
|
||||
submit_len = it == v.end() || it->offset >= end_offset ? end_offset-offset : it->offset-offset;
|
||||
it = v.insert(it, (copy_buffer_t){ .offset = offset, .len = submit_len, .buf = memalign_or_die(MEM_ALIGNMENT, submit_len) });
|
||||
it = v.insert(it, (copy_buffer_t){ .offset = offset, .len = submit_len });
|
||||
copy_count++;
|
||||
if (bs->journal.inmemory)
|
||||
{
|
||||
// Take it from memory
|
||||
memcpy(it->buf, (uint8_t*)bs->journal.buffer + submit_offset, submit_len);
|
||||
// Take it from memory, don't copy it
|
||||
it->buf = (uint8_t*)bs->journal.buffer + submit_offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Read it from disk
|
||||
it->buf = memalign_or_die(MEM_ALIGNMENT, submit_len);
|
||||
await_sqe(0);
|
||||
data->iov = (struct iovec){ it->buf, (size_t)submit_len };
|
||||
data->callback = simple_callback_r;
|
||||
|
@@ -166,6 +166,7 @@ struct __attribute__((__packed__)) dirty_entry
|
||||
struct fulfill_read_t
|
||||
{
|
||||
uint64_t offset, len;
|
||||
uint64_t journal_sector; // sector+1 if used and !journal.inmemory, otherwise 0
|
||||
};
|
||||
|
||||
#define PRIV(op) ((blockstore_op_private_t*)(op)->private_data)
|
||||
@@ -305,7 +306,7 @@ class blockstore_impl_t
|
||||
// Read
|
||||
int dequeue_read(blockstore_op_t *read_op);
|
||||
int fulfill_read(blockstore_op_t *read_op, uint64_t &fulfilled, uint32_t item_start, uint32_t item_end,
|
||||
uint32_t item_state, uint64_t item_version, uint64_t item_location);
|
||||
uint32_t item_state, uint64_t item_version, uint64_t item_location, uint64_t journal_sector);
|
||||
int fulfill_read_push(blockstore_op_t *op, void *buf, uint64_t offset, uint64_t len,
|
||||
uint32_t item_state, uint64_t item_version);
|
||||
void handle_read_event(ring_data_t *data, blockstore_op_t *op);
|
||||
|
@@ -42,7 +42,7 @@ int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_
|
||||
|
||||
// FIXME I've seen a bug here so I want some tests
|
||||
int blockstore_impl_t::fulfill_read(blockstore_op_t *read_op, uint64_t &fulfilled, uint32_t item_start, uint32_t item_end,
|
||||
uint32_t item_state, uint64_t item_version, uint64_t item_location)
|
||||
uint32_t item_state, uint64_t item_version, uint64_t item_location, uint64_t journal_sector)
|
||||
{
|
||||
uint32_t cur_start = item_start;
|
||||
if (cur_start < read_op->offset + read_op->len && item_end > read_op->offset)
|
||||
@@ -72,6 +72,7 @@ int blockstore_impl_t::fulfill_read(blockstore_op_t *read_op, uint64_t &fulfille
|
||||
fulfill_read_t el = {
|
||||
.offset = cur_start,
|
||||
.len = it == PRIV(read_op)->read_vec.end() || it->offset >= item_end ? item_end-cur_start : it->offset-cur_start,
|
||||
.journal_sector = journal_sector,
|
||||
};
|
||||
it = PRIV(read_op)->read_vec.insert(it, el);
|
||||
if (!fulfill_read_push(read_op,
|
||||
@@ -156,8 +157,10 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||
memcpy(read_op->bitmap, bmp_ptr, dsk.clean_entry_bitmap_size);
|
||||
}
|
||||
}
|
||||
// If inmemory_journal is false, journal trim will have to wait until the read is completed
|
||||
if (!fulfill_read(read_op, fulfilled, dirty.offset, dirty.offset + dirty.len,
|
||||
dirty.state, dirty_it->first.version, dirty.location + (IS_JOURNAL(dirty.state) ? 0 : dirty.offset)))
|
||||
dirty.state, dirty_it->first.version, dirty.location + (IS_JOURNAL(dirty.state) ? 0 : dirty.offset),
|
||||
(IS_JOURNAL(dirty.state) ? dirty.journal_sector+1 : 0)))
|
||||
{
|
||||
// need to wait. undo added requests, don't dequeue op
|
||||
PRIV(read_op)->read_vec.clear();
|
||||
@@ -186,7 +189,8 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||
{
|
||||
if (!dsk.clean_entry_bitmap_size)
|
||||
{
|
||||
if (!fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_BIG_WRITE | BS_ST_STABLE), 0, clean_it->second.location))
|
||||
if (!fulfill_read(read_op, fulfilled, 0, dsk.data_block_size,
|
||||
(BS_ST_BIG_WRITE | BS_ST_STABLE), 0, clean_it->second.location, 0))
|
||||
{
|
||||
// need to wait. undo added requests, don't dequeue op
|
||||
PRIV(read_op)->read_vec.clear();
|
||||
@@ -207,7 +211,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||
{
|
||||
// fill with zeroes
|
||||
assert(fulfill_read(read_op, fulfilled, bmp_start * dsk.bitmap_granularity,
|
||||
bmp_end * dsk.bitmap_granularity, (BS_ST_DELETE | BS_ST_STABLE), 0, 0));
|
||||
bmp_end * dsk.bitmap_granularity, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0));
|
||||
}
|
||||
bmp_start = bmp_end;
|
||||
while (clean_entry_bitmap[bmp_end >> 3] & (1 << (bmp_end & 0x7)) && bmp_end < bmp_size)
|
||||
@@ -218,7 +222,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||
{
|
||||
if (!fulfill_read(read_op, fulfilled, bmp_start * dsk.bitmap_granularity,
|
||||
bmp_end * dsk.bitmap_granularity, (BS_ST_BIG_WRITE | BS_ST_STABLE), 0,
|
||||
clean_it->second.location + bmp_start * dsk.bitmap_granularity))
|
||||
clean_it->second.location + bmp_start * dsk.bitmap_granularity, 0))
|
||||
{
|
||||
// need to wait. undo added requests, don't dequeue op
|
||||
PRIV(read_op)->read_vec.clear();
|
||||
@@ -233,7 +237,7 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||
else if (fulfilled < read_op->len)
|
||||
{
|
||||
// fill remaining parts with zeroes
|
||||
assert(fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0));
|
||||
assert(fulfill_read(read_op, fulfilled, 0, dsk.data_block_size, (BS_ST_DELETE | BS_ST_STABLE), 0, 0, 0));
|
||||
}
|
||||
assert(fulfilled == read_op->len);
|
||||
read_op->version = result_version;
|
||||
@@ -249,6 +253,15 @@ int blockstore_impl_t::dequeue_read(blockstore_op_t *read_op)
|
||||
FINISH_OP(read_op);
|
||||
return 2;
|
||||
}
|
||||
if (!journal.inmemory)
|
||||
{
|
||||
// Journal trim has to wait until the read is completed - record journal sector usage
|
||||
for (auto & rv: PRIV(read_op)->read_vec)
|
||||
{
|
||||
if (rv.journal_sector)
|
||||
journal.used_sectors[rv.journal_sector-1]++;
|
||||
}
|
||||
}
|
||||
read_op->retval = 0;
|
||||
return 2;
|
||||
}
|
||||
@@ -264,6 +277,19 @@ void blockstore_impl_t::handle_read_event(ring_data_t *data, blockstore_op_t *op
|
||||
}
|
||||
if (PRIV(op)->pending_ops == 0)
|
||||
{
|
||||
if (!journal.inmemory)
|
||||
{
|
||||
// Release journal sector usage
|
||||
for (auto & rv: PRIV(op)->read_vec)
|
||||
{
|
||||
if (rv.journal_sector)
|
||||
{
|
||||
auto used = --journal.used_sectors[rv.journal_sector-1];
|
||||
if (used == 0)
|
||||
journal.used_sectors.erase(rv.journal_sector-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (op->retval == 0)
|
||||
op->retval = op->len;
|
||||
FINISH_OP(op);
|
||||
|
@@ -222,7 +222,7 @@ void blockstore_impl_t::erase_dirty(blockstore_dirty_db_t::iterator dirty_start,
|
||||
#endif
|
||||
data_alloc->set(dirty_it->second.location >> dsk.block_order, false);
|
||||
}
|
||||
int used = --journal.used_sectors[dirty_it->second.journal_sector];
|
||||
auto used = --journal.used_sectors[dirty_it->second.journal_sector];
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf(
|
||||
"remove usage of journal offset %08lx by %lx:%lx v%lu (%d refs)\n", dirty_it->second.journal_sector,
|
||||
|
@@ -182,7 +182,8 @@ void blockstore_impl_t::cancel_all_writes(blockstore_op_t *op, blockstore_dirty_
|
||||
bool found = false;
|
||||
for (auto other_op: submit_queue)
|
||||
{
|
||||
if (!found && other_op == op)
|
||||
// <op> may be present in queue multiple times due to moving operations in submit_queue
|
||||
if (other_op == op)
|
||||
found = true;
|
||||
else if (found && other_op->oid == op->oid &&
|
||||
(other_op->opcode == BS_OP_WRITE || other_op->opcode == BS_OP_WRITE_STABLE))
|
||||
@@ -448,6 +449,12 @@ int blockstore_impl_t::continue_write(blockstore_op_t *op)
|
||||
resume_2:
|
||||
// Only for the immediate_commit mode: prepare and submit big_write journal entry
|
||||
{
|
||||
blockstore_journal_check_t space_check(this);
|
||||
if (!space_check.check_available(op, 1,
|
||||
sizeof(journal_entry_big_write) + dsk.clean_entry_bitmap_size, JOURNAL_STABILIZE_RESERVATION))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
BS_SUBMIT_CHECK_SQES(1);
|
||||
auto dirty_it = dirty_db.find((obj_ver_id){
|
||||
.oid = op->oid,
|
||||
|
@@ -221,9 +221,11 @@ void cluster_client_t::erase_op(cluster_op_t *op)
|
||||
if (op_queue_tail == op)
|
||||
op_queue_tail = op->prev;
|
||||
op->next = op->prev = NULL;
|
||||
std::function<void(cluster_op_t*)>(op->callback)(op);
|
||||
if (!(flags & OP_IMMEDIATE_COMMIT))
|
||||
inc_wait(opcode, flags, next, -1);
|
||||
// Call callback at the end to avoid inconsistencies in prev_wait
|
||||
// if the callback adds more operations itself
|
||||
std::function<void(cluster_op_t*)>(op->callback)(op);
|
||||
}
|
||||
|
||||
void cluster_client_t::continue_ops(bool up_retry)
|
||||
|
@@ -424,6 +424,7 @@ int disk_tool_t::write_json_journal(json11::Json entries)
|
||||
.stripe = sscanf_json(NULL, rec["stripe"]),
|
||||
},
|
||||
.version = rec["ver"].uint64_value(),
|
||||
.len = (uint32_t)rec["len"].uint64_value(),
|
||||
.location = sscanf_json(NULL, rec["loc"]),
|
||||
};
|
||||
fromhexstr(rec["bitmap"].string_value(), dsk.clean_entry_bitmap_size, ((uint8_t*)ne) + sizeof(journal_entry_big_write));
|
||||
|
@@ -478,15 +478,18 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||
{
|
||||
if (write_osd_set[role] != 0)
|
||||
{
|
||||
write_parity = 1;
|
||||
write_parity++;
|
||||
if (write_osd_set[role] != read_osd_set[role])
|
||||
{
|
||||
start = 0;
|
||||
end = chunk_size;
|
||||
for (int r2 = pg_minsize; r2 < role; r2++)
|
||||
{
|
||||
stripes[r2].write_start = start;
|
||||
stripes[r2].write_end = end;
|
||||
if (write_osd_set[r2] != 0)
|
||||
{
|
||||
stripes[r2].write_start = start;
|
||||
stripes[r2].write_end = end;
|
||||
}
|
||||
}
|
||||
}
|
||||
stripes[role].write_start = start;
|
||||
@@ -555,7 +558,7 @@ void* calc_rmw(void *request_buf, osd_rmw_stripe_t *stripes, uint64_t *read_osd_
|
||||
}
|
||||
}
|
||||
// Allocate read buffers
|
||||
void *rmw_buf = alloc_read_buffer(stripes, pg_size, (write_parity ? pg_size-pg_minsize : 0) * (end - start));
|
||||
void *rmw_buf = alloc_read_buffer(stripes, pg_size, write_parity * (end - start));
|
||||
// Position write buffers
|
||||
uint64_t buf_pos = 0, in_pos = 0;
|
||||
for (int role = 0; role < pg_size; role++)
|
||||
@@ -804,13 +807,11 @@ void calc_rmw_parity_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
||||
calc_rmw_parity_copy_mod(stripes, pg_size, pg_minsize, read_osd_set, write_osd_set, chunk_size, bitmap_granularity, start, end);
|
||||
if (end != 0)
|
||||
{
|
||||
int i;
|
||||
for (i = pg_minsize; i < pg_size; i++)
|
||||
{
|
||||
int write_parity = 0;
|
||||
for (int i = pg_minsize; i < pg_size; i++)
|
||||
if (write_osd_set[i] != 0)
|
||||
break;
|
||||
}
|
||||
if (i < pg_size)
|
||||
write_parity++;
|
||||
if (write_parity > 0)
|
||||
{
|
||||
// Calculate new coding chunks
|
||||
buf_len_t bufs[pg_size][3];
|
||||
@@ -830,8 +831,11 @@ void calc_rmw_parity_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
||||
}
|
||||
for (int i = pg_minsize; i < pg_size; i++)
|
||||
{
|
||||
bufs[i][nbuf[i]++] = { .buf = stripes[i].write_buf, .len = end-start };
|
||||
positions[i] = start;
|
||||
if (write_osd_set[i] != 0)
|
||||
{
|
||||
bufs[i][nbuf[i]++] = { .buf = stripes[i].write_buf, .len = end-start };
|
||||
positions[i] = start;
|
||||
}
|
||||
}
|
||||
uint32_t pos = start;
|
||||
while (pos < end)
|
||||
@@ -839,31 +843,37 @@ void calc_rmw_parity_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
||||
uint32_t next_end = end;
|
||||
for (int i = 0; i < pg_size; i++)
|
||||
{
|
||||
assert(curbuf[i] < nbuf[i]);
|
||||
assert(bufs[i][curbuf[i]].buf);
|
||||
data_ptrs[i] = (uint8_t*)bufs[i][curbuf[i]].buf + pos-positions[i];
|
||||
uint32_t this_end = bufs[i][curbuf[i]].len + positions[i];
|
||||
if (next_end > this_end)
|
||||
next_end = this_end;
|
||||
if (i < pg_minsize || write_osd_set[i] != 0)
|
||||
{
|
||||
assert(curbuf[i] < nbuf[i]);
|
||||
assert(bufs[i][curbuf[i]].buf);
|
||||
data_ptrs[i] = (uint8_t*)bufs[i][curbuf[i]].buf + pos-positions[i];
|
||||
uint32_t this_end = bufs[i][curbuf[i]].len + positions[i];
|
||||
if (next_end > this_end)
|
||||
next_end = this_end;
|
||||
}
|
||||
}
|
||||
assert(next_end > pos);
|
||||
for (int i = 0; i < pg_size; i++)
|
||||
{
|
||||
uint32_t this_end = bufs[i][curbuf[i]].len + positions[i];
|
||||
if (next_end >= this_end)
|
||||
if (i < pg_minsize || write_osd_set[i] != 0)
|
||||
{
|
||||
positions[i] += bufs[i][curbuf[i]].len;
|
||||
curbuf[i]++;
|
||||
uint32_t this_end = bufs[i][curbuf[i]].len + positions[i];
|
||||
if (next_end >= this_end)
|
||||
{
|
||||
positions[i] += bufs[i][curbuf[i]].len;
|
||||
curbuf[i]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef WITH_ISAL
|
||||
ec_encode_data(
|
||||
next_end-pos, pg_minsize, pg_size-pg_minsize, matrix->isal_data,
|
||||
next_end-pos, pg_minsize, write_parity, matrix->isal_data,
|
||||
(uint8_t**)data_ptrs, (uint8_t**)data_ptrs+pg_minsize
|
||||
);
|
||||
#else
|
||||
jerasure_matrix_encode(
|
||||
pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W, matrix->je_data,
|
||||
pg_minsize, write_parity, OSD_JERASURE_W, matrix->je_data,
|
||||
(char**)data_ptrs, (char**)data_ptrs+pg_minsize, next_end-pos
|
||||
);
|
||||
#endif
|
||||
@@ -871,16 +881,19 @@ void calc_rmw_parity_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
||||
}
|
||||
for (int i = 0; i < pg_size; i++)
|
||||
{
|
||||
data_ptrs[i] = stripes[i].bmp_buf;
|
||||
if (i < pg_minsize || write_osd_set[i] != 0)
|
||||
{
|
||||
data_ptrs[i] = stripes[i].bmp_buf;
|
||||
}
|
||||
}
|
||||
#ifdef WITH_ISAL
|
||||
ec_encode_data(
|
||||
bitmap_size, pg_minsize, pg_size-pg_minsize, matrix->isal_data,
|
||||
bitmap_size, pg_minsize, write_parity, matrix->isal_data,
|
||||
(uint8_t**)data_ptrs, (uint8_t**)data_ptrs+pg_minsize
|
||||
);
|
||||
#else
|
||||
jerasure_matrix_encode(
|
||||
pg_minsize, pg_size-pg_minsize, OSD_JERASURE_W, matrix->je_data,
|
||||
pg_minsize, write_parity, OSD_JERASURE_W, matrix->je_data,
|
||||
(char**)data_ptrs, (char**)data_ptrs+pg_minsize, bitmap_size
|
||||
);
|
||||
#endif
|
||||
|
@@ -20,6 +20,7 @@ void test11();
|
||||
void test12();
|
||||
void test13();
|
||||
void test14();
|
||||
void test15();
|
||||
|
||||
int main(int narg, char *args[])
|
||||
{
|
||||
@@ -47,6 +48,8 @@ int main(int narg, char *args[])
|
||||
test13();
|
||||
// Test 14
|
||||
test14();
|
||||
// Test 15
|
||||
test15();
|
||||
// End
|
||||
printf("all ok\n");
|
||||
return 0;
|
||||
@@ -706,7 +709,7 @@ void test13()
|
||||
|
||||
/***
|
||||
|
||||
13. basic jerasure 2+1 test
|
||||
14. basic jerasure 2+1 test
|
||||
calc_rmw(offset=128K-4K, len=8K, osd_set=[1,2,0], write_set=[1,2,3])
|
||||
= {
|
||||
read: [ [ 0, 128K ], [ 0, 128K ], [ 0, 0 ] ],
|
||||
@@ -727,13 +730,13 @@ void test14()
|
||||
osd_num_t write_osd_set[3] = { 1, 2, 3 };
|
||||
osd_rmw_stripe_t stripes[3] = {};
|
||||
unsigned bitmaps[3] = { 0 };
|
||||
// Test 13.0
|
||||
// Test 14.0
|
||||
void *write_buf = malloc_or_die(8192);
|
||||
split_stripes(2, 128*1024, 128*1024-4096, 8192, stripes);
|
||||
assert(stripes[0].req_start == 128*1024-4096 && stripes[0].req_end == 128*1024);
|
||||
assert(stripes[1].req_start == 0 && stripes[1].req_end == 4096);
|
||||
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
|
||||
// Test 13.1
|
||||
// Test 14.1
|
||||
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 3, 2, 3, write_osd_set, 128*1024, bmp);
|
||||
for (int i = 0; i < 3; i++)
|
||||
stripes[i].bmp_buf = bitmaps+i;
|
||||
@@ -750,7 +753,7 @@ void test14()
|
||||
assert(stripes[0].write_buf == write_buf);
|
||||
assert(stripes[1].write_buf == (uint8_t*)write_buf+4096);
|
||||
assert(stripes[2].write_buf == rmw_buf);
|
||||
// Test 13.2 - encode
|
||||
// Test 14.2 - encode
|
||||
set_pattern(write_buf, 8192, PATTERN3);
|
||||
set_pattern(stripes[0].read_buf, 128*1024-4096, PATTERN1);
|
||||
set_pattern(stripes[1].read_buf, 128*1024-4096, PATTERN2);
|
||||
@@ -767,7 +770,7 @@ void test14()
|
||||
assert(stripes[0].write_buf == write_buf);
|
||||
assert(stripes[1].write_buf == (uint8_t*)write_buf+4096);
|
||||
assert(stripes[2].write_buf == rmw_buf);
|
||||
// Test 13.3 - decode and verify
|
||||
// Test 14.3 - decode and verify
|
||||
osd_num_t read_osd_set[4] = { 0, 2, 3 };
|
||||
memset(stripes, 0, sizeof(stripes));
|
||||
split_stripes(2, 128*1024, 0, 128*1024, stripes);
|
||||
@@ -802,3 +805,74 @@ void test14()
|
||||
free(write_buf);
|
||||
use_ec(3, 2, false);
|
||||
}
|
||||
|
||||
/***
|
||||
|
||||
15. EC 2+2 partial overwrite with 1 missing stripe
|
||||
calc_rmw(offset=64K+28K, len=4K, osd_set=[1,2,3,0], write_set=[1,2,3,0])
|
||||
= {
|
||||
read: [ [ 28K, 32K ], [ 0, 0 ], [ 0, 0 ], [ 0, 0 ] ],
|
||||
write: [ [ 0, 0 ], [ 28K, 32K ], [ 28K, 32K ], [ 0, 0 ] ],
|
||||
input buffer: [ write1 ],
|
||||
rmw buffer: [ write2, read0 ],
|
||||
}
|
||||
|
||||
***/
|
||||
|
||||
void test15()
|
||||
{
|
||||
const int bmp = 64*1024 / 4096 / 8;
|
||||
use_ec(4, 2, true);
|
||||
osd_num_t osd_set[4] = { 1, 2, 3, 0 };
|
||||
osd_num_t write_osd_set[4] = { 1, 2, 3, 0 };
|
||||
osd_rmw_stripe_t stripes[4] = {};
|
||||
unsigned bitmaps[4] = { 0 };
|
||||
// Test 15.0
|
||||
void *write_buf = malloc_or_die(4096);
|
||||
split_stripes(2, 64*1024, (64+28)*1024, 4096, stripes);
|
||||
assert(stripes[0].req_start == 0 && stripes[0].req_end == 0);
|
||||
assert(stripes[1].req_start == 28*1024 && stripes[1].req_end == 32*1024);
|
||||
assert(stripes[2].req_start == 0 && stripes[2].req_end == 0);
|
||||
assert(stripes[3].req_start == 0 && stripes[3].req_end == 0);
|
||||
// Test 15.1
|
||||
void *rmw_buf = calc_rmw(write_buf, stripes, osd_set, 4, 2, 3, write_osd_set, 64*1024, bmp);
|
||||
for (int i = 0; i < 4; i++)
|
||||
stripes[i].bmp_buf = bitmaps+i;
|
||||
assert(rmw_buf);
|
||||
assert(stripes[0].read_start == 28*1024 && stripes[0].read_end == 32*1024);
|
||||
assert(stripes[1].read_start == 0 && stripes[1].read_end == 0);
|
||||
assert(stripes[2].read_start == 0 && stripes[2].read_end == 0);
|
||||
assert(stripes[3].read_start == 0 && stripes[3].read_end == 0);
|
||||
assert(stripes[0].write_start == 0 && stripes[0].write_end == 0);
|
||||
assert(stripes[1].write_start == 28*1024 && stripes[1].write_end == 32*1024);
|
||||
assert(stripes[2].write_start == 28*1024 && stripes[2].write_end == 32*1024);
|
||||
assert(stripes[3].write_start == 0 && stripes[3].write_end == 0);
|
||||
assert(stripes[0].read_buf == (uint8_t*)rmw_buf+4*1024);
|
||||
assert(stripes[1].read_buf == NULL);
|
||||
assert(stripes[2].read_buf == NULL);
|
||||
assert(stripes[3].read_buf == NULL);
|
||||
assert(stripes[0].write_buf == NULL);
|
||||
assert(stripes[1].write_buf == (uint8_t*)write_buf);
|
||||
assert(stripes[2].write_buf == rmw_buf);
|
||||
assert(stripes[3].write_buf == NULL);
|
||||
// Test 15.2 - encode
|
||||
set_pattern(write_buf, 4*1024, PATTERN1);
|
||||
set_pattern(stripes[0].read_buf, 4*1024, PATTERN2);
|
||||
memset(stripes[0].bmp_buf, 0, bmp);
|
||||
memset(stripes[1].bmp_buf, 0, bmp);
|
||||
calc_rmw_parity_ec(stripes, 4, 2, osd_set, write_osd_set, 64*1024, bmp);
|
||||
assert(*(uint32_t*)stripes[2].bmp_buf == 0x80);
|
||||
assert(stripes[0].write_start == 0 && stripes[0].write_end == 0);
|
||||
assert(stripes[1].write_start == 28*1024 && stripes[1].write_end == 32*1024);
|
||||
assert(stripes[2].write_start == 28*1024 && stripes[2].write_end == 32*1024);
|
||||
assert(stripes[3].write_start == 0 && stripes[3].write_end == 0);
|
||||
assert(stripes[0].write_buf == NULL);
|
||||
assert(stripes[1].write_buf == (uint8_t*)write_buf);
|
||||
assert(stripes[2].write_buf == rmw_buf);
|
||||
assert(stripes[3].write_buf == NULL);
|
||||
check_pattern(stripes[2].write_buf, 4*1024, PATTERN1^PATTERN2); // first parity is always xor :)
|
||||
// Done
|
||||
free(rmw_buf);
|
||||
free(write_buf);
|
||||
use_ec(3, 2, false);
|
||||
}
|
||||
|
28
src/test_crc32.cpp
Normal file
28
src/test_crc32.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
// Copyright (c) Vitaliy Filippov, 2019+
|
||||
// License: VNPL-1.1 (see README.md for details)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "malloc_or_die.h"
|
||||
#include "errno.h"
|
||||
#include "crc32c.h"
|
||||
|
||||
int main(int narg, char *args[])
|
||||
{
|
||||
int bufsize = 65536;
|
||||
uint8_t *buf = (uint8_t*)malloc_or_die(bufsize);
|
||||
uint32_t csum = 0;
|
||||
while (1)
|
||||
{
|
||||
int r = read(0, buf, bufsize);
|
||||
if (r <= 0 && errno != EAGAIN && errno != EINTR)
|
||||
break;
|
||||
csum = crc32c(csum, buf, r);
|
||||
}
|
||||
free(buf);
|
||||
printf("%08x\n", csum);
|
||||
return 0;
|
||||
}
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
||||
|
||||
Name: Vitastor
|
||||
Description: Vitastor client library
|
||||
Version: 0.8.0
|
||||
Version: 0.8.1
|
||||
Libs: -L${libdir} -lvitastor_client
|
||||
Cflags: -I${includedir}
|
||||
|
||||
|
Reference in New Issue
Block a user