Compare commits
16 Commits
rm-left-on
...
v0.8.5
Author | SHA1 | Date | |
---|---|---|---|
d125fb1f30 | |||
9d3fd72298 | |||
8b552a01f9 | |||
0385b2f9e8 | |||
749c837045 | |||
98001d845b | |||
c96bcae74b | |||
9f4e34a8cc | |||
81fc8bb94c | |||
bc465c16de | |||
8763e9211c | |||
9e1a80bd17 | |||
3e280f2f08 | |||
fe87b4076b | |||
a38957c1a7 | |||
137309cf29 |
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8)
|
|||||||
|
|
||||||
project(vitastor)
|
project(vitastor)
|
||||||
|
|
||||||
set(VERSION "0.8.3")
|
set(VERSION "0.8.5")
|
||||||
|
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
VERSION ?= v0.8.3
|
VERSION ?= v0.8.5
|
||||||
|
|
||||||
all: build push
|
all: build push
|
||||||
|
|
||||||
|
@@ -49,7 +49,7 @@ spec:
|
|||||||
capabilities:
|
capabilities:
|
||||||
add: ["SYS_ADMIN"]
|
add: ["SYS_ADMIN"]
|
||||||
allowPrivilegeEscalation: true
|
allowPrivilegeEscalation: true
|
||||||
image: vitalif/vitastor-csi:v0.8.3
|
image: vitalif/vitastor-csi:v0.8.5
|
||||||
args:
|
args:
|
||||||
- "--node=$(NODE_ID)"
|
- "--node=$(NODE_ID)"
|
||||||
- "--endpoint=$(CSI_ENDPOINT)"
|
- "--endpoint=$(CSI_ENDPOINT)"
|
||||||
|
@@ -116,7 +116,7 @@ spec:
|
|||||||
privileged: true
|
privileged: true
|
||||||
capabilities:
|
capabilities:
|
||||||
add: ["SYS_ADMIN"]
|
add: ["SYS_ADMIN"]
|
||||||
image: vitalif/vitastor-csi:v0.8.3
|
image: vitalif/vitastor-csi:v0.8.5
|
||||||
args:
|
args:
|
||||||
- "--node=$(NODE_ID)"
|
- "--node=$(NODE_ID)"
|
||||||
- "--endpoint=$(CSI_ENDPOINT)"
|
- "--endpoint=$(CSI_ENDPOINT)"
|
||||||
|
@@ -5,7 +5,7 @@ package vitastor
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
vitastorCSIDriverName = "csi.vitastor.io"
|
vitastorCSIDriverName = "csi.vitastor.io"
|
||||||
vitastorCSIDriverVersion = "0.8.3"
|
vitastorCSIDriverVersion = "0.8.5"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Config struct fills the parameters of request or user input
|
// Config struct fills the parameters of request or user input
|
||||||
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@@ -1,10 +1,10 @@
|
|||||||
vitastor (0.8.3-1) unstable; urgency=medium
|
vitastor (0.8.5-1) unstable; urgency=medium
|
||||||
|
|
||||||
* Bugfixes
|
* Bugfixes
|
||||||
|
|
||||||
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
||||||
|
|
||||||
vitastor (0.8.3-1) unstable; urgency=medium
|
vitastor (0.8.5-1) unstable; urgency=medium
|
||||||
|
|
||||||
* Implement NFS proxy
|
* Implement NFS proxy
|
||||||
* Add documentation
|
* Add documentation
|
||||||
|
8
debian/vitastor.Dockerfile
vendored
8
debian/vitastor.Dockerfile
vendored
@@ -34,8 +34,8 @@ RUN set -e -x; \
|
|||||||
mkdir -p /root/packages/vitastor-$REL; \
|
mkdir -p /root/packages/vitastor-$REL; \
|
||||||
rm -rf /root/packages/vitastor-$REL/*; \
|
rm -rf /root/packages/vitastor-$REL/*; \
|
||||||
cd /root/packages/vitastor-$REL; \
|
cd /root/packages/vitastor-$REL; \
|
||||||
cp -r /root/vitastor vitastor-0.8.3; \
|
cp -r /root/vitastor vitastor-0.8.5; \
|
||||||
cd vitastor-0.8.3; \
|
cd vitastor-0.8.5; \
|
||||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||||
@@ -48,8 +48,8 @@ RUN set -e -x; \
|
|||||||
rm -rf a b; \
|
rm -rf a b; \
|
||||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||||
cd /root/packages/vitastor-$REL; \
|
cd /root/packages/vitastor-$REL; \
|
||||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.3.orig.tar.xz vitastor-0.8.3; \
|
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.5.orig.tar.xz vitastor-0.8.5; \
|
||||||
cd vitastor-0.8.3; \
|
cd vitastor-0.8.5; \
|
||||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||||
|
@@ -9,7 +9,7 @@
|
|||||||
## Debian
|
## Debian
|
||||||
|
|
||||||
- Trust Vitastor package signing key:
|
- Trust Vitastor package signing key:
|
||||||
`wget -q -O - https://vitastor.io/debian/pubkey | sudo apt-key add -`
|
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||||
- Add Vitastor package repository to your /etc/apt/sources.list:
|
- Add Vitastor package repository to your /etc/apt/sources.list:
|
||||||
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
||||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||||
@@ -20,8 +20,8 @@
|
|||||||
## CentOS
|
## CentOS
|
||||||
|
|
||||||
- Add Vitastor package repository:
|
- Add Vitastor package repository:
|
||||||
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm`
|
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release.rpm`
|
||||||
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm`
|
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release.rpm`
|
||||||
- Enable EPEL: `yum/dnf install epel-release`
|
- Enable EPEL: `yum/dnf install epel-release`
|
||||||
- Enable additional CentOS repositories:
|
- Enable additional CentOS repositories:
|
||||||
- CentOS 7: `yum install centos-release-scl`
|
- CentOS 7: `yum install centos-release-scl`
|
||||||
|
@@ -9,7 +9,7 @@
|
|||||||
## Debian
|
## Debian
|
||||||
|
|
||||||
- Добавьте ключ репозитория Vitastor:
|
- Добавьте ключ репозитория Vitastor:
|
||||||
`wget -q -O - https://vitastor.io/debian/pubkey | sudo apt-key add -`
|
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||||
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
|
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
|
||||||
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
- Debian 11 (Bullseye/Sid): `deb https://vitastor.io/debian bullseye main`
|
||||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||||
@@ -20,8 +20,8 @@
|
|||||||
## CentOS
|
## CentOS
|
||||||
|
|
||||||
- Добавьте в систему репозиторий Vitastor:
|
- Добавьте в систему репозиторий Vitastor:
|
||||||
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm`
|
- CentOS 7: `yum install https://vitastor.io/rpms/centos/7/vitastor-release.rpm`
|
||||||
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release-1.0-1.el8.noarch.rpm`
|
- CentOS 8: `dnf install https://vitastor.io/rpms/centos/8/vitastor-release.rpm`
|
||||||
- Включите EPEL: `yum/dnf install epel-release`
|
- Включите EPEL: `yum/dnf install epel-release`
|
||||||
- Включите дополнительные репозитории CentOS:
|
- Включите дополнительные репозитории CentOS:
|
||||||
- CentOS 7: `yum install centos-release-scl`
|
- CentOS 7: `yum install centos-release-scl`
|
||||||
|
@@ -14,6 +14,7 @@ It supports the following commands:
|
|||||||
- [df](#df)
|
- [df](#df)
|
||||||
- [ls](#ls)
|
- [ls](#ls)
|
||||||
- [create](#create)
|
- [create](#create)
|
||||||
|
- [snap-create](#create)
|
||||||
- [modify](#modify)
|
- [modify](#modify)
|
||||||
- [rm](#rm)
|
- [rm](#rm)
|
||||||
- [flatten](#flatten)
|
- [flatten](#flatten)
|
||||||
@@ -123,6 +124,8 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
|||||||
|
|
||||||
Create a snapshot of image `<name>` (either form can be used). May be used live if only a single writer is active.
|
Create a snapshot of image `<name>` (either form can be used). May be used live if only a single writer is active.
|
||||||
|
|
||||||
|
See also about [how to export snapshots](qemu.en.md#exporting-snapshots).
|
||||||
|
|
||||||
## modify
|
## modify
|
||||||
|
|
||||||
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
|
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
|
||||||
|
@@ -15,6 +15,7 @@ vitastor-cli - интерфейс командной строки для адм
|
|||||||
- [df](#df)
|
- [df](#df)
|
||||||
- [ls](#ls)
|
- [ls](#ls)
|
||||||
- [create](#create)
|
- [create](#create)
|
||||||
|
- [snap-create](#create)
|
||||||
- [modify](#modify)
|
- [modify](#modify)
|
||||||
- [rm](#rm)
|
- [rm](#rm)
|
||||||
- [flatten](#flatten)
|
- [flatten](#flatten)
|
||||||
@@ -126,6 +127,8 @@ vitastor-cli snap-create [-p|--pool <id|name>] <image>@<snapshot>
|
|||||||
Создать снимок образа `<name>` (можно использовать любую форму команды). Снимок можно создавать без остановки
|
Создать снимок образа `<name>` (можно использовать любую форму команды). Снимок можно создавать без остановки
|
||||||
клиентов, если пишущий клиент максимум 1.
|
клиентов, если пишущий клиент максимум 1.
|
||||||
|
|
||||||
|
Смотрите также информацию о том, [как экспортировать снимки](qemu.ru.md#экспорт-снимков).
|
||||||
|
|
||||||
## modify
|
## modify
|
||||||
|
|
||||||
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
|
`vitastor-cli modify <name> [--rename <new-name>] [--resize <size>] [--readonly | --readwrite] [-f|--force]`
|
||||||
|
@@ -46,3 +46,40 @@ qemu-img convert -f qcow2 debian10.qcow2 -p -O raw 'vitastor:etcd_host=192.168.7
|
|||||||
|
|
||||||
You can also specify `:pool=<POOL>:inode=<INODE>:size=<SIZE>` instead of `:image=<IMAGE>`
|
You can also specify `:pool=<POOL>:inode=<INODE>:size=<SIZE>` instead of `:image=<IMAGE>`
|
||||||
if you don't want to use inode metadata.
|
if you don't want to use inode metadata.
|
||||||
|
|
||||||
|
### Exporting snapshots
|
||||||
|
|
||||||
|
Starting with 0.8.4, you can also export individual layers (snapshot diffs) using `qemu-img`.
|
||||||
|
|
||||||
|
Suppose you have an image `testimg` and a snapshot `testimg@0` created with `vitastor-cli snap-create testimg@0`.
|
||||||
|
|
||||||
|
Then you can export the `testimg@0` snapshot and the data written to `testimg` after creating
|
||||||
|
the snapshot separately using the following commands (key points are using `skip-parents=1` and
|
||||||
|
`-B backing_file` option):
|
||||||
|
|
||||||
|
```
|
||||||
|
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg@0' \
|
||||||
|
-O qcow2 testimg_0.qcow2
|
||||||
|
|
||||||
|
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg:skip-parents=1' \
|
||||||
|
-O qcow2 -o 'cluster_size=4k' -B testimg_0.qcow2 testimg.qcow2
|
||||||
|
```
|
||||||
|
|
||||||
|
In fact, with `cluster_size=4k` any QCOW2 file can be used instead `-B testimg_0.qcow2`, even an empty one.
|
||||||
|
|
||||||
|
QCOW2 `cluster_size=4k` option is required if you want `testimg.qcow2` to contain only the data
|
||||||
|
overwritten **exactly** in the child layer. With the default 64 KB QCOW2 cluster size you'll
|
||||||
|
get a bit of extra data from parent layers, i.e. a 4 KB overwrite will result in `testimg.qcow2`
|
||||||
|
containing 64 KB of data. And this extra data will be taken by `qemu-img` from the file passed
|
||||||
|
in `-B` option, so you really need 4 KB cluster if you use an empty image in `-B`.
|
||||||
|
|
||||||
|
After this procedure you'll get two chained QCOW2 images. To detach `testimg.qcow2` from
|
||||||
|
its parent, run:
|
||||||
|
|
||||||
|
```
|
||||||
|
qemu-img rebase -u -b '' testimg.qcow2
|
||||||
|
```
|
||||||
|
|
||||||
|
This can be used for backups. Just note that exporting an image that is currently being written to
|
||||||
|
is of course unsafe and doesn't produce a consistent result, so only export snapshots if you do this
|
||||||
|
on a live VM.
|
||||||
|
@@ -50,3 +50,40 @@ qemu-img convert -f qcow2 debian10.qcow2 -p -O raw 'vitastor:etcd_host=10.115.0.
|
|||||||
|
|
||||||
Если вы не хотите обращаться к образу по имени, вместо `:image=<IMAGE>` можно указать номер пула, номер инода и размер:
|
Если вы не хотите обращаться к образу по имени, вместо `:image=<IMAGE>` можно указать номер пула, номер инода и размер:
|
||||||
`:pool=<POOL>:inode=<INODE>:size=<SIZE>`.
|
`:pool=<POOL>:inode=<INODE>:size=<SIZE>`.
|
||||||
|
|
||||||
|
### Экспорт снимков
|
||||||
|
|
||||||
|
Начиная с 0.8.4 вы можете экспортировать отдельные слои (изменения в снимках) с помощью `qemu-img`.
|
||||||
|
|
||||||
|
Допустим, что у вас есть образ `testimg` и его снимок `testimg@0`, созданный с помощью `vitastor-cli snap-create testimg@0`.
|
||||||
|
|
||||||
|
Тогда вы можете выгрузить снимок `testimg@0` и данные, изменённые в `testimg` после создания снимка, отдельно,
|
||||||
|
с помощью следующих команд (ключевые моменты - использование `skip-parents=1` и опции `-B backing_file.qcow2`):
|
||||||
|
|
||||||
|
```
|
||||||
|
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg@0' \
|
||||||
|
-O qcow2 testimg_0.qcow2
|
||||||
|
|
||||||
|
qemu-img convert -f raw 'vitastor:etcd_host=192.168.7.2\:2379/v3:image=testimg:skip-parents=1' \
|
||||||
|
-O qcow2 -o 'cluster_size=4k' -B testimg_0.qcow2 testimg.qcow2
|
||||||
|
```
|
||||||
|
|
||||||
|
На самом деле, с `cluster_size=4k` вместо `-B testimg_0.qcow2` можно использовать любой qcow2-файл,
|
||||||
|
даже пустой.
|
||||||
|
|
||||||
|
Опция QCOW2 `cluster_size=4k` нужна, если вы хотите, чтобы `testimg.qcow2` содержал **в точности**
|
||||||
|
данные, перезаписанные в дочернем слое. С размером кластера QCOW2 по умолчанию, составляющим 64 КБ,
|
||||||
|
вы получите немного "лишних" данных из родительских слоёв - перезапись 4 КБ будет приводить к тому,
|
||||||
|
что в `testimg.qcow2` будет появляться 64 КБ данных. Причём "лишние" данные qemu-img будет брать
|
||||||
|
как раз из файла, указанного в опции `-B`, так что если там указан пустой образ, кластер обязан быть 4 КБ.
|
||||||
|
|
||||||
|
После данной процедуры вы получите два QCOW2-образа, связанных в цепочку. Чтобы "отцепить" образ
|
||||||
|
`testimg.qcow2` от базового, выполните:
|
||||||
|
|
||||||
|
```
|
||||||
|
qemu-img rebase -u -b '' testimg.qcow2
|
||||||
|
```
|
||||||
|
|
||||||
|
Это можно использовать для резервного копирования. Только помните, что экспортировать образ, в который
|
||||||
|
в то же время идёт запись, небезопасно - результат чтения не будет целостным. Так что если вы работаете
|
||||||
|
с активными виртуальными машинами, экспортируйте только их снимки, но не сам образ.
|
||||||
|
@@ -550,8 +550,8 @@ function random_combinations(osd_tree, pg_size, count, ordered)
|
|||||||
seed ^= seed << 5;
|
seed ^= seed << 5;
|
||||||
return seed + 2147483648;
|
return seed + 2147483648;
|
||||||
};
|
};
|
||||||
const hosts = Object.keys(osd_tree).sort();
|
|
||||||
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
|
const osds = Object.keys(osd_tree).reduce((a, c) => { a[c] = Object.keys(osd_tree[c]).sort(); return a; }, {});
|
||||||
|
const hosts = Object.keys(osd_tree).sort().filter(h => osds[h].length > 0);
|
||||||
const r = {};
|
const r = {};
|
||||||
// Generate random combinations including each OSD at least once
|
// Generate random combinations including each OSD at least once
|
||||||
for (let h = 0; h < hosts.length; h++)
|
for (let h = 0; h < hosts.length; h++)
|
||||||
|
@@ -79,7 +79,7 @@ StartLimitInterval=0
|
|||||||
RestartSec=10
|
RestartSec=10
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=local.target
|
WantedBy=multi-user.target
|
||||||
`);
|
`);
|
||||||
await system(`useradd etcd`);
|
await system(`useradd etcd`);
|
||||||
await system(`systemctl daemon-reload`);
|
await system(`systemctl daemon-reload`);
|
||||||
|
@@ -16,6 +16,11 @@ use PVE::Tools qw(run_command);
|
|||||||
|
|
||||||
use base qw(PVE::Storage::Plugin);
|
use base qw(PVE::Storage::Plugin);
|
||||||
|
|
||||||
|
if (@PVE::Storage::Plugin::SHARED_STORAGE)
|
||||||
|
{
|
||||||
|
push @PVE::Storage::Plugin::SHARED_STORAGE, 'vitastor';
|
||||||
|
}
|
||||||
|
|
||||||
sub api
|
sub api
|
||||||
{
|
{
|
||||||
# Trick it :)
|
# Trick it :)
|
||||||
@@ -133,9 +138,11 @@ sub properties
|
|||||||
sub options
|
sub options
|
||||||
{
|
{
|
||||||
return {
|
return {
|
||||||
|
shared => { optional => 1 },
|
||||||
|
content => { optional => 1 },
|
||||||
nodes => { optional => 1 },
|
nodes => { optional => 1 },
|
||||||
disable => { optional => 1 },
|
disable => { optional => 1 },
|
||||||
vitastor_etcd_address => { optional => 1},
|
vitastor_etcd_address => { optional => 1 },
|
||||||
vitastor_etcd_prefix => { optional => 1 },
|
vitastor_etcd_prefix => { optional => 1 },
|
||||||
vitastor_config_path => { optional => 1 },
|
vitastor_config_path => { optional => 1 },
|
||||||
vitastor_prefix => { optional => 1 },
|
vitastor_prefix => { optional => 1 },
|
||||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
|||||||
from cinder.volume import driver
|
from cinder.volume import driver
|
||||||
from cinder.volume import volume_utils
|
from cinder.volume import volume_utils
|
||||||
|
|
||||||
VERSION = '0.8.3'
|
VERSION = '0.8.5'
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@@ -25,4 +25,4 @@ rm fio
|
|||||||
mv fio-copy fio
|
mv fio-copy fio
|
||||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||||
tar --transform 's#^#vitastor-0.8.3/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.8.3$(rpm --eval '%dist').tar.gz *
|
tar --transform 's#^#vitastor-0.8.5/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.8.5$(rpm --eval '%dist').tar.gz *
|
||||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
|||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
cd /root/vitastor/rpm; \
|
cd /root/vitastor/rpm; \
|
||||||
sh build-tarball.sh; \
|
sh build-tarball.sh; \
|
||||||
cp /root/vitastor-0.8.3.el7.tar.gz ~/rpmbuild/SOURCES; \
|
cp /root/vitastor-0.8.5.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
cd ~/rpmbuild/SPECS/; \
|
cd ~/rpmbuild/SPECS/; \
|
||||||
rpmbuild -ba vitastor.spec; \
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
Name: vitastor
|
Name: vitastor
|
||||||
Version: 0.8.3
|
Version: 0.8.5
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Vitastor, a fast software-defined clustered block storage
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
License: Vitastor Network Public License 1.1
|
License: Vitastor Network Public License 1.1
|
||||||
URL: https://vitastor.io/
|
URL: https://vitastor.io/
|
||||||
Source0: vitastor-0.8.3.el7.tar.gz
|
Source0: vitastor-0.8.5.el7.tar.gz
|
||||||
|
|
||||||
BuildRequires: liburing-devel >= 0.6
|
BuildRequires: liburing-devel >= 0.6
|
||||||
BuildRequires: gperftools-devel
|
BuildRequires: gperftools-devel
|
||||||
@@ -35,6 +35,7 @@ Summary: Vitastor - OSD
|
|||||||
Requires: libJerasure2
|
Requires: libJerasure2
|
||||||
Requires: libisa-l
|
Requires: libisa-l
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
|
Requires: liburing < 2
|
||||||
Requires: vitastor-client = %{version}-%{release}
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
Requires: util-linux
|
Requires: util-linux
|
||||||
Requires: parted
|
Requires: parted
|
||||||
@@ -59,6 +60,7 @@ scheduling cluster-level operations.
|
|||||||
%package -n vitastor-client
|
%package -n vitastor-client
|
||||||
Summary: Vitastor - client
|
Summary: Vitastor - client
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
|
Requires: liburing < 2
|
||||||
|
|
||||||
|
|
||||||
%description -n vitastor-client
|
%description -n vitastor-client
|
||||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
|||||||
RUN set -e; \
|
RUN set -e; \
|
||||||
cd /root/vitastor/rpm; \
|
cd /root/vitastor/rpm; \
|
||||||
sh build-tarball.sh; \
|
sh build-tarball.sh; \
|
||||||
cp /root/vitastor-0.8.3.el8.tar.gz ~/rpmbuild/SOURCES; \
|
cp /root/vitastor-0.8.5.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||||
cd ~/rpmbuild/SPECS/; \
|
cd ~/rpmbuild/SPECS/; \
|
||||||
rpmbuild -ba vitastor.spec; \
|
rpmbuild -ba vitastor.spec; \
|
||||||
|
@@ -1,11 +1,11 @@
|
|||||||
Name: vitastor
|
Name: vitastor
|
||||||
Version: 0.8.3
|
Version: 0.8.5
|
||||||
Release: 1%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Vitastor, a fast software-defined clustered block storage
|
Summary: Vitastor, a fast software-defined clustered block storage
|
||||||
|
|
||||||
License: Vitastor Network Public License 1.1
|
License: Vitastor Network Public License 1.1
|
||||||
URL: https://vitastor.io/
|
URL: https://vitastor.io/
|
||||||
Source0: vitastor-0.8.3.el8.tar.gz
|
Source0: vitastor-0.8.5.el8.tar.gz
|
||||||
|
|
||||||
BuildRequires: liburing-devel >= 0.6
|
BuildRequires: liburing-devel >= 0.6
|
||||||
BuildRequires: gperftools-devel
|
BuildRequires: gperftools-devel
|
||||||
@@ -34,6 +34,7 @@ Summary: Vitastor - OSD
|
|||||||
Requires: libJerasure2
|
Requires: libJerasure2
|
||||||
Requires: libisa-l
|
Requires: libisa-l
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
|
Requires: liburing < 2
|
||||||
Requires: vitastor-client = %{version}-%{release}
|
Requires: vitastor-client = %{version}-%{release}
|
||||||
Requires: util-linux
|
Requires: util-linux
|
||||||
Requires: parted
|
Requires: parted
|
||||||
@@ -57,6 +58,7 @@ scheduling cluster-level operations.
|
|||||||
%package -n vitastor-client
|
%package -n vitastor-client
|
||||||
Summary: Vitastor - client
|
Summary: Vitastor - client
|
||||||
Requires: liburing >= 0.6
|
Requires: liburing >= 0.6
|
||||||
|
Requires: liburing < 2
|
||||||
|
|
||||||
|
|
||||||
%description -n vitastor-client
|
%description -n vitastor-client
|
||||||
|
@@ -15,7 +15,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
|||||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_definitions(-DVERSION="0.8.3")
|
add_definitions(-DVERSION="0.8.5")
|
||||||
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
||||||
if (${WITH_ASAN})
|
if (${WITH_ASAN})
|
||||||
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
||||||
@@ -145,7 +145,6 @@ add_library(vitastor_client SHARED
|
|||||||
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "vitastor_c.h")
|
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "vitastor_c.h")
|
||||||
target_link_libraries(vitastor_client
|
target_link_libraries(vitastor_client
|
||||||
vitastor_common
|
vitastor_common
|
||||||
tcmalloc_minimal
|
|
||||||
${LIBURING_LIBRARIES}
|
${LIBURING_LIBRARIES}
|
||||||
${IBVERBS_LIBRARIES}
|
${IBVERBS_LIBRARIES}
|
||||||
)
|
)
|
||||||
|
@@ -403,7 +403,7 @@ struct snap_merger_t
|
|||||||
op->opcode = OSD_OP_READ_BITMAP;
|
op->opcode = OSD_OP_READ_BITMAP;
|
||||||
op->inode = target;
|
op->inode = target;
|
||||||
op->offset = offset;
|
op->offset = offset;
|
||||||
op->len = 0;
|
op->len = target_block_size;
|
||||||
op->callback = [this](cluster_op_t *op)
|
op->callback = [this](cluster_op_t *op)
|
||||||
{
|
{
|
||||||
if (op->retval < 0)
|
if (op->retval < 0)
|
||||||
|
@@ -143,7 +143,7 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
|
|||||||
if (!op->prev_wait)
|
if (!op->prev_wait)
|
||||||
continue_sync(op);
|
continue_sync(op);
|
||||||
}
|
}
|
||||||
else /* if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP) */
|
else /* if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP) */
|
||||||
{
|
{
|
||||||
for (auto prev = op_queue_head; prev && prev != op; prev = prev->next)
|
for (auto prev = op_queue_head; prev && prev != op; prev = prev->next)
|
||||||
{
|
{
|
||||||
@@ -151,7 +151,8 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
|
|||||||
{
|
{
|
||||||
op->prev_wait++;
|
op->prev_wait++;
|
||||||
}
|
}
|
||||||
else if (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_READ || prev->opcode == OSD_OP_READ_BITMAP)
|
else if (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_READ ||
|
||||||
|
prev->opcode == OSD_OP_READ_BITMAP || prev->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
{
|
{
|
||||||
// Flushes are always in the beginning (we're scanning from the beginning of the queue)
|
// Flushes are always in the beginning (we're scanning from the beginning of the queue)
|
||||||
break;
|
break;
|
||||||
@@ -171,7 +172,8 @@ void cluster_client_t::inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *n
|
|||||||
auto n2 = next->next;
|
auto n2 = next->next;
|
||||||
if (next->opcode == OSD_OP_SYNC && !(flags & OP_IMMEDIATE_COMMIT) ||
|
if (next->opcode == OSD_OP_SYNC && !(flags & OP_IMMEDIATE_COMMIT) ||
|
||||||
next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER) ||
|
next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER) ||
|
||||||
(next->opcode == OSD_OP_READ || next->opcode == OSD_OP_READ_BITMAP) && (flags & OP_FLUSH_BUFFER))
|
(next->opcode == OSD_OP_READ || next->opcode == OSD_OP_READ_BITMAP ||
|
||||||
|
next->opcode == OSD_OP_READ_CHAIN_BITMAP) && (flags & OP_FLUSH_BUFFER))
|
||||||
{
|
{
|
||||||
next->prev_wait += inc;
|
next->prev_wait += inc;
|
||||||
assert(next->prev_wait >= 0);
|
assert(next->prev_wait >= 0);
|
||||||
@@ -337,7 +339,8 @@ void cluster_client_t::on_change_hook(std::map<std::string, etcd_kv_t> & changes
|
|||||||
// And now they have to be resliced!
|
// And now they have to be resliced!
|
||||||
for (auto op = op_queue_head; op; op = op->next)
|
for (auto op = op_queue_head; op; op = op->next)
|
||||||
{
|
{
|
||||||
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP) &&
|
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_READ ||
|
||||||
|
op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP) &&
|
||||||
INODE_POOL(op->cur_inode) == pool_item.first)
|
INODE_POOL(op->cur_inode) == pool_item.first)
|
||||||
{
|
{
|
||||||
op->needs_reslice = true;
|
op->needs_reslice = true;
|
||||||
@@ -409,7 +412,7 @@ void cluster_client_t::on_ready(std::function<void(void)> fn)
|
|||||||
void cluster_client_t::execute(cluster_op_t *op)
|
void cluster_client_t::execute(cluster_op_t *op)
|
||||||
{
|
{
|
||||||
if (op->opcode != OSD_OP_SYNC && op->opcode != OSD_OP_READ &&
|
if (op->opcode != OSD_OP_SYNC && op->opcode != OSD_OP_READ &&
|
||||||
op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_WRITE)
|
op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_WRITE)
|
||||||
{
|
{
|
||||||
op->retval = -EINVAL;
|
op->retval = -EINVAL;
|
||||||
std::function<void(cluster_op_t*)>(op->callback)(op);
|
std::function<void(cluster_op_t*)>(op->callback)(op);
|
||||||
@@ -441,7 +444,7 @@ void cluster_client_t::execute(cluster_op_t *op)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Check alignment
|
// Check alignment
|
||||||
if ((op->opcode == OSD_OP_READ || op->opcode == OSD_OP_WRITE) && !op->len ||
|
if (!op->len && (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP || op->opcode == OSD_OP_WRITE) ||
|
||||||
op->offset % pool_it->second.bitmap_granularity || op->len % pool_it->second.bitmap_granularity)
|
op->offset % pool_it->second.bitmap_granularity || op->len % pool_it->second.bitmap_granularity)
|
||||||
{
|
{
|
||||||
op->retval = -EINVAL;
|
op->retval = -EINVAL;
|
||||||
@@ -702,8 +705,7 @@ resume_3:
|
|||||||
// Finished successfully
|
// Finished successfully
|
||||||
// Even if the PG count has changed in meanwhile we treat it as success
|
// Even if the PG count has changed in meanwhile we treat it as success
|
||||||
// because if some operations were invalid for the new PG count we'd get errors
|
// because if some operations were invalid for the new PG count we'd get errors
|
||||||
bool is_read = op->opcode == OSD_OP_READ;
|
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
if (is_read)
|
|
||||||
{
|
{
|
||||||
// Check parent inode
|
// Check parent inode
|
||||||
auto ino_it = st_cli.inode_config.find(op->cur_inode);
|
auto ino_it = st_cli.inode_config.find(op->cur_inode);
|
||||||
@@ -727,6 +729,11 @@ resume_3:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
op->retval = op->len;
|
op->retval = op->len;
|
||||||
|
if (op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
|
{
|
||||||
|
auto & pool_cfg = st_cli.pool_config.at(INODE_POOL(op->inode));
|
||||||
|
op->retval = op->len / pool_cfg.bitmap_granularity;
|
||||||
|
}
|
||||||
erase_op(op);
|
erase_op(op);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@@ -750,7 +757,10 @@ resume_3:
|
|||||||
{
|
{
|
||||||
for (int i = 0; i < op->parts.size(); i++)
|
for (int i = 0; i < op->parts.size(); i++)
|
||||||
{
|
{
|
||||||
op->parts[i].flags = PART_RETRY;
|
if (!(op->parts[i].flags & PART_DONE))
|
||||||
|
{
|
||||||
|
op->parts[i].flags = PART_RETRY;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
goto resume_2;
|
goto resume_2;
|
||||||
}
|
}
|
||||||
@@ -809,23 +819,19 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
|
|||||||
uint64_t last_stripe = op->len > 0 ? ((op->offset + op->len - 1) / pg_block_size) * pg_block_size : first_stripe;
|
uint64_t last_stripe = op->len > 0 ? ((op->offset + op->len - 1) / pg_block_size) * pg_block_size : first_stripe;
|
||||||
op->retval = 0;
|
op->retval = 0;
|
||||||
op->parts.resize((last_stripe - first_stripe) / pg_block_size + 1);
|
op->parts.resize((last_stripe - first_stripe) / pg_block_size + 1);
|
||||||
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP)
|
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
{
|
{
|
||||||
// Allocate memory for the bitmap
|
// Allocate memory for the bitmap
|
||||||
unsigned object_bitmap_size = (((op->opcode == OSD_OP_READ_BITMAP ? pg_block_size : op->len) / pool_cfg.bitmap_granularity + 7) / 8);
|
unsigned object_bitmap_size = ((op->len / pool_cfg.bitmap_granularity + 7) / 8);
|
||||||
object_bitmap_size = (object_bitmap_size < 8 ? 8 : object_bitmap_size);
|
object_bitmap_size = (object_bitmap_size < 8 ? 8 : object_bitmap_size);
|
||||||
unsigned bitmap_mem = object_bitmap_size + (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8 * pg_data_size) * op->parts.size();
|
unsigned bitmap_mem = object_bitmap_size + (pool_cfg.data_block_size / pool_cfg.bitmap_granularity / 8 * pg_data_size) * op->parts.size();
|
||||||
if (op->bitmap_buf_size < bitmap_mem)
|
if (!op->bitmap_buf || op->bitmap_buf_size < bitmap_mem)
|
||||||
{
|
{
|
||||||
op->bitmap_buf = realloc_or_die(op->bitmap_buf, bitmap_mem);
|
op->bitmap_buf = realloc_or_die(op->bitmap_buf, bitmap_mem);
|
||||||
if (!op->bitmap_buf_size)
|
|
||||||
{
|
|
||||||
// First allocation
|
|
||||||
memset(op->bitmap_buf, 0, object_bitmap_size);
|
|
||||||
}
|
|
||||||
op->part_bitmaps = (uint8_t*)op->bitmap_buf + object_bitmap_size;
|
op->part_bitmaps = (uint8_t*)op->bitmap_buf + object_bitmap_size;
|
||||||
op->bitmap_buf_size = bitmap_mem;
|
op->bitmap_buf_size = bitmap_mem;
|
||||||
}
|
}
|
||||||
|
memset(op->bitmap_buf, 0, bitmap_mem);
|
||||||
}
|
}
|
||||||
int iov_idx = 0;
|
int iov_idx = 0;
|
||||||
size_t iov_pos = 0;
|
size_t iov_pos = 0;
|
||||||
@@ -876,13 +882,14 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
|
|||||||
if (end == begin)
|
if (end == begin)
|
||||||
op->done_count++;
|
op->done_count++;
|
||||||
}
|
}
|
||||||
else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE)
|
else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_DELETE)
|
||||||
{
|
{
|
||||||
add_iov(end-begin, false, op, iov_idx, iov_pos, op->parts[i].iov, NULL, 0);
|
add_iov(end-begin, false, op, iov_idx, iov_pos, op->parts[i].iov, NULL, 0);
|
||||||
}
|
}
|
||||||
op->parts[i].parent = op;
|
op->parts[i].parent = op;
|
||||||
op->parts[i].offset = begin;
|
op->parts[i].offset = begin;
|
||||||
op->parts[i].len = op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_DELETE ? 0 : (uint32_t)(end - begin);
|
op->parts[i].len = op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP ||
|
||||||
|
op->opcode == OSD_OP_DELETE ? 0 : (uint32_t)(end - begin);
|
||||||
op->parts[i].pg_num = pg_num;
|
op->parts[i].pg_num = pg_num;
|
||||||
op->parts[i].osd_num = 0;
|
op->parts[i].osd_num = 0;
|
||||||
op->parts[i].flags = 0;
|
op->parts[i].flags = 0;
|
||||||
@@ -929,7 +936,7 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
|||||||
pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks
|
pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks
|
||||||
);
|
);
|
||||||
uint64_t meta_rev = 0;
|
uint64_t meta_rev = 0;
|
||||||
if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE)
|
if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_READ_CHAIN_BITMAP && op->opcode != OSD_OP_DELETE)
|
||||||
{
|
{
|
||||||
auto ino_it = st_cli.inode_config.find(op->inode);
|
auto ino_it = st_cli.inode_config.find(op->inode);
|
||||||
if (ino_it != st_cli.inode_config.end())
|
if (ino_it != st_cli.inode_config.end())
|
||||||
@@ -942,7 +949,7 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
|||||||
.header = {
|
.header = {
|
||||||
.magic = SECONDARY_OSD_OP_MAGIC,
|
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||||
.id = next_op_id(),
|
.id = next_op_id(),
|
||||||
.opcode = op->opcode == OSD_OP_READ_BITMAP ? OSD_OP_READ : op->opcode,
|
.opcode = op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP ? OSD_OP_READ : op->opcode,
|
||||||
},
|
},
|
||||||
.inode = op->cur_inode,
|
.inode = op->cur_inode,
|
||||||
.offset = part->offset,
|
.offset = part->offset,
|
||||||
@@ -950,8 +957,10 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
|||||||
.meta_revision = meta_rev,
|
.meta_revision = meta_rev,
|
||||||
.version = op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE ? op->version : 0,
|
.version = op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE ? op->version : 0,
|
||||||
} },
|
} },
|
||||||
.bitmap = (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP ? (uint8_t*)op->part_bitmaps + pg_bitmap_size*i : NULL),
|
.bitmap = (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP
|
||||||
.bitmap_len = (unsigned)(op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP ? pg_bitmap_size : 0),
|
? (uint8_t*)op->part_bitmaps + pg_bitmap_size*i : NULL),
|
||||||
|
.bitmap_len = (unsigned)(op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP
|
||||||
|
? pg_bitmap_size : 0),
|
||||||
.callback = [this, part](osd_op_t *op_part)
|
.callback = [this, part](osd_op_t *op_part)
|
||||||
{
|
{
|
||||||
handle_op_part(part);
|
handle_op_part(part);
|
||||||
@@ -1130,11 +1139,11 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// OK
|
// OK
|
||||||
if (!(op->flags & OP_IMMEDIATE_COMMIT))
|
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE) && !(op->flags & OP_IMMEDIATE_COMMIT))
|
||||||
dirty_osds.insert(part->osd_num);
|
dirty_osds.insert(part->osd_num);
|
||||||
part->flags |= PART_DONE;
|
part->flags |= PART_DONE;
|
||||||
op->done_count++;
|
op->done_count++;
|
||||||
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP)
|
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_READ_CHAIN_BITMAP)
|
||||||
{
|
{
|
||||||
copy_part_bitmap(op, part);
|
copy_part_bitmap(op, part);
|
||||||
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
|
||||||
@@ -1158,7 +1167,12 @@ void cluster_client_t::copy_part_bitmap(cluster_op_t *op, cluster_op_part_t *par
|
|||||||
);
|
);
|
||||||
uint32_t object_offset = (part->op.req.rw.offset - op->offset) / pool_cfg.bitmap_granularity;
|
uint32_t object_offset = (part->op.req.rw.offset - op->offset) / pool_cfg.bitmap_granularity;
|
||||||
uint32_t part_offset = (part->op.req.rw.offset % pg_block_size) / pool_cfg.bitmap_granularity;
|
uint32_t part_offset = (part->op.req.rw.offset % pg_block_size) / pool_cfg.bitmap_granularity;
|
||||||
uint32_t part_len = (op->opcode == OSD_OP_READ_BITMAP ? pg_block_size : part->op.req.rw.len) / pool_cfg.bitmap_granularity;
|
uint32_t op_len = op->len / pool_cfg.bitmap_granularity;
|
||||||
|
uint32_t part_len = pg_block_size/pool_cfg.bitmap_granularity - part_offset;
|
||||||
|
if (part_len > op_len-object_offset)
|
||||||
|
{
|
||||||
|
part_len = op_len-object_offset;
|
||||||
|
}
|
||||||
if (!(object_offset & 0x7) && !(part_offset & 0x7) && (part_len >= 8))
|
if (!(object_offset & 0x7) && !(part_offset & 0x7) && (part_len >= 8))
|
||||||
{
|
{
|
||||||
// Copy bytes
|
// Copy bytes
|
||||||
|
@@ -11,6 +11,7 @@
|
|||||||
#define INODE_LIST_DONE 1
|
#define INODE_LIST_DONE 1
|
||||||
#define INODE_LIST_HAS_UNSTABLE 2
|
#define INODE_LIST_HAS_UNSTABLE 2
|
||||||
#define OSD_OP_READ_BITMAP OSD_OP_SEC_READ_BMP
|
#define OSD_OP_READ_BITMAP OSD_OP_SEC_READ_BMP
|
||||||
|
#define OSD_OP_READ_CHAIN_BITMAP 0x102
|
||||||
|
|
||||||
#define OSD_OP_IGNORE_READONLY 0x08
|
#define OSD_OP_IGNORE_READONLY 0x08
|
||||||
|
|
||||||
@@ -30,7 +31,7 @@ struct cluster_op_part_t
|
|||||||
|
|
||||||
struct cluster_op_t
|
struct cluster_op_t
|
||||||
{
|
{
|
||||||
uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE, OSD_OP_READ_BITMAP
|
uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE, OSD_OP_READ_BITMAP, OSD_OP_READ_CHAIN_BITMAP
|
||||||
uint64_t inode;
|
uint64_t inode;
|
||||||
uint64_t offset;
|
uint64_t offset;
|
||||||
uint64_t len;
|
uint64_t len;
|
||||||
@@ -39,9 +40,13 @@ struct cluster_op_t
|
|||||||
uint64_t version = 0;
|
uint64_t version = 0;
|
||||||
// now only OSD_OP_IGNORE_READONLY is supported
|
// now only OSD_OP_IGNORE_READONLY is supported
|
||||||
uint64_t flags = 0;
|
uint64_t flags = 0;
|
||||||
|
// negative retval is an error number
|
||||||
|
// write and read return len on success
|
||||||
|
// sync and delete return 0 on success
|
||||||
|
// read_bitmap and read_chain_bitmap return the length of bitmap in bits(!)
|
||||||
int retval;
|
int retval;
|
||||||
osd_op_buf_list_t iov;
|
osd_op_buf_list_t iov;
|
||||||
// READ and READ_BITMAP return the bitmap here
|
// READ, READ_BITMAP, READ_CHAIN_BITMAP return the bitmap here
|
||||||
void *bitmap_buf = NULL;
|
void *bitmap_buf = NULL;
|
||||||
std::function<void(cluster_op_t*)> callback;
|
std::function<void(cluster_op_t*)> callback;
|
||||||
~cluster_op_t();
|
~cluster_op_t();
|
||||||
|
@@ -168,8 +168,8 @@ resume_3:
|
|||||||
auto it = std::lower_bound(pg.target_history.begin(), pg.target_history.end(), history_set);
|
auto it = std::lower_bound(pg.target_history.begin(), pg.target_history.end(), history_set);
|
||||||
if (it == pg.target_history.end() || *it != history_set)
|
if (it == pg.target_history.end() || *it != history_set)
|
||||||
pg.target_history.insert(it, history_set);
|
pg.target_history.insert(it, history_set);
|
||||||
pg.history_changed = true;
|
|
||||||
}
|
}
|
||||||
|
pg.history_changed = true;
|
||||||
report_pg_states();
|
report_pg_states();
|
||||||
resume_10:
|
resume_10:
|
||||||
if (pg.epoch > pg.reported_epoch)
|
if (pg.epoch > pg.reported_epoch)
|
||||||
|
@@ -945,7 +945,7 @@ void calc_rmw_parity_ec(osd_rmw_stripe_t *stripes, int pg_size, int pg_minsize,
|
|||||||
{
|
{
|
||||||
if (write_osd_set[i])
|
if (write_osd_set[i])
|
||||||
{
|
{
|
||||||
memcpy(subm + item_size*pg_minsize*j, matrix_data + item_size*pg_minsize*(i-pg_minsize), item_size*pg_minsize);
|
memcpy((uint8_t*)subm + item_size*pg_minsize*j, (uint8_t*)matrix_data + item_size*pg_minsize*(i-pg_minsize), item_size*pg_minsize);
|
||||||
j++;
|
j++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -53,6 +53,7 @@ typedef struct VitastorClient
|
|||||||
char *etcd_host;
|
char *etcd_host;
|
||||||
char *etcd_prefix;
|
char *etcd_prefix;
|
||||||
char *image;
|
char *image;
|
||||||
|
int skip_parents;
|
||||||
uint64_t inode;
|
uint64_t inode;
|
||||||
uint64_t pool;
|
uint64_t pool;
|
||||||
uint64_t size;
|
uint64_t size;
|
||||||
@@ -63,6 +64,10 @@ typedef struct VitastorClient
|
|||||||
int rdma_gid_index;
|
int rdma_gid_index;
|
||||||
int rdma_mtu;
|
int rdma_mtu;
|
||||||
QemuMutex mutex;
|
QemuMutex mutex;
|
||||||
|
|
||||||
|
uint64_t last_bitmap_inode, last_bitmap_offset, last_bitmap_len;
|
||||||
|
uint32_t last_bitmap_granularity;
|
||||||
|
uint8_t *last_bitmap;
|
||||||
} VitastorClient;
|
} VitastorClient;
|
||||||
|
|
||||||
typedef struct VitastorRPC
|
typedef struct VitastorRPC
|
||||||
@@ -72,6 +77,9 @@ typedef struct VitastorRPC
|
|||||||
QEMUIOVector *iov;
|
QEMUIOVector *iov;
|
||||||
long ret;
|
long ret;
|
||||||
int complete;
|
int complete;
|
||||||
|
uint64_t inode, offset, len;
|
||||||
|
uint32_t bitmap_granularity;
|
||||||
|
uint8_t *bitmap;
|
||||||
} VitastorRPC;
|
} VitastorRPC;
|
||||||
|
|
||||||
static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
static void vitastor_co_init_task(BlockDriverState *bs, VitastorRPC *task);
|
||||||
@@ -147,6 +155,7 @@ static void vitastor_parse_filename(const char *filename, QDict *options, Error
|
|||||||
if (!strcmp(name, "inode") ||
|
if (!strcmp(name, "inode") ||
|
||||||
!strcmp(name, "pool") ||
|
!strcmp(name, "pool") ||
|
||||||
!strcmp(name, "size") ||
|
!strcmp(name, "size") ||
|
||||||
|
!strcmp(name, "skip-parents") ||
|
||||||
!strcmp(name, "use-rdma") ||
|
!strcmp(name, "use-rdma") ||
|
||||||
!strcmp(name, "rdma-port_num") ||
|
!strcmp(name, "rdma-port_num") ||
|
||||||
!strcmp(name, "rdma-gid-index") ||
|
!strcmp(name, "rdma-gid-index") ||
|
||||||
@@ -227,13 +236,16 @@ static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandle
|
|||||||
|
|
||||||
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
||||||
{
|
{
|
||||||
|
VitastorRPC task;
|
||||||
VitastorClient *client = bs->opaque;
|
VitastorClient *client = bs->opaque;
|
||||||
|
void *image = NULL;
|
||||||
int64_t ret = 0;
|
int64_t ret = 0;
|
||||||
qemu_mutex_init(&client->mutex);
|
qemu_mutex_init(&client->mutex);
|
||||||
client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
|
client->config_path = g_strdup(qdict_get_try_str(options, "config-path"));
|
||||||
// FIXME: Rename to etcd_address
|
// FIXME: Rename to etcd_address
|
||||||
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
|
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd-host"));
|
||||||
client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
|
client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd-prefix"));
|
||||||
|
client->skip_parents = qdict_get_try_int(options, "skip-parents", 0);
|
||||||
client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
|
client->use_rdma = qdict_get_try_int(options, "use-rdma", -1);
|
||||||
client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
|
client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma-device"));
|
||||||
client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
client->rdma_port_num = qdict_get_try_int(options, "rdma-port-num", 0);
|
||||||
@@ -243,23 +255,25 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
|||||||
vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
||||||
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||||
);
|
);
|
||||||
client->image = g_strdup(qdict_get_try_str(options, "image"));
|
image = client->image = g_strdup(qdict_get_try_str(options, "image"));
|
||||||
client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
client->readonly = (flags & BDRV_O_RDWR) ? 1 : 0;
|
||||||
|
// Get image metadata (size and readonly flag) or just wait until the client is ready
|
||||||
|
if (!image)
|
||||||
|
client->image = (char*)"x";
|
||||||
|
task.complete = 0;
|
||||||
|
task.bs = bs;
|
||||||
|
if (qemu_in_coroutine())
|
||||||
|
{
|
||||||
|
vitastor_co_get_metadata(&task);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
||||||
|
BDRV_POLL_WHILE(bs, !task.complete);
|
||||||
|
}
|
||||||
|
client->image = image;
|
||||||
if (client->image)
|
if (client->image)
|
||||||
{
|
{
|
||||||
// Get image metadata (size and readonly flag)
|
|
||||||
VitastorRPC task;
|
|
||||||
task.complete = 0;
|
|
||||||
task.bs = bs;
|
|
||||||
if (qemu_in_coroutine())
|
|
||||||
{
|
|
||||||
vitastor_co_get_metadata(&task);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
|
||||||
BDRV_POLL_WHILE(bs, !task.complete);
|
|
||||||
}
|
|
||||||
client->watch = (void*)task.ret;
|
client->watch = (void*)task.ret;
|
||||||
client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
|
client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
|
||||||
client->size = vitastor_c_inode_get_size(client->watch);
|
client->size = vitastor_c_inode_get_size(client->watch);
|
||||||
@@ -284,6 +298,7 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
|||||||
client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
|
client->inode = (client->inode & (((uint64_t)1 << (64-POOL_ID_BITS)) - 1)) | (client->pool << (64-POOL_ID_BITS));
|
||||||
}
|
}
|
||||||
client->size = qdict_get_try_int(options, "size", 0);
|
client->size = qdict_get_try_int(options, "size", 0);
|
||||||
|
vitastor_c_close_watch(client->proxy, (void*)task.ret);
|
||||||
}
|
}
|
||||||
if (!client->size)
|
if (!client->size)
|
||||||
{
|
{
|
||||||
@@ -305,6 +320,7 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
|||||||
qdict_del(options, "inode");
|
qdict_del(options, "inode");
|
||||||
qdict_del(options, "pool");
|
qdict_del(options, "pool");
|
||||||
qdict_del(options, "size");
|
qdict_del(options, "size");
|
||||||
|
qdict_del(options, "skip-parents");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -321,6 +337,8 @@ static void vitastor_close(BlockDriverState *bs)
|
|||||||
g_free(client->etcd_prefix);
|
g_free(client->etcd_prefix);
|
||||||
if (client->image)
|
if (client->image)
|
||||||
g_free(client->image);
|
g_free(client->image);
|
||||||
|
free(client->last_bitmap);
|
||||||
|
client->last_bitmap = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
|
||||||
@@ -486,6 +504,13 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
|
|||||||
vitastor_co_init_task(bs, &task);
|
vitastor_co_init_task(bs, &task);
|
||||||
task.iov = iov;
|
task.iov = iov;
|
||||||
|
|
||||||
|
if (client->last_bitmap)
|
||||||
|
{
|
||||||
|
// Invalidate last bitmap on write
|
||||||
|
free(client->last_bitmap);
|
||||||
|
client->last_bitmap = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||||
qemu_mutex_lock(&client->mutex);
|
qemu_mutex_lock(&client->mutex);
|
||||||
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
|
vitastor_c_write(client->proxy, inode, offset, bytes, 0, iov->iov, iov->niov, vitastor_co_generic_bh_cb, &task);
|
||||||
@@ -499,6 +524,140 @@ static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
|
|||||||
return task.ret;
|
return task.ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
|
||||||
|
#if QEMU_VERSION_MAJOR >= 2 || QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7
|
||||||
|
static void vitastor_co_read_bitmap_cb(void *opaque, long retval, uint8_t *bitmap)
|
||||||
|
{
|
||||||
|
VitastorRPC *task = opaque;
|
||||||
|
VitastorClient *client = task->bs->opaque;
|
||||||
|
task->ret = retval;
|
||||||
|
task->complete = 1;
|
||||||
|
if (retval >= 0)
|
||||||
|
{
|
||||||
|
task->bitmap = bitmap;
|
||||||
|
if (client->last_bitmap_inode == task->inode &&
|
||||||
|
client->last_bitmap_offset == task->offset &&
|
||||||
|
client->last_bitmap_len == task->len)
|
||||||
|
{
|
||||||
|
free(client->last_bitmap);
|
||||||
|
client->last_bitmap = bitmap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (qemu_coroutine_self() != task->co)
|
||||||
|
{
|
||||||
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
|
||||||
|
aio_co_wake(task->co);
|
||||||
|
#else
|
||||||
|
qemu_coroutine_enter(task->co, NULL);
|
||||||
|
qemu_aio_release(task);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int coroutine_fn vitastor_co_block_status(
|
||||||
|
BlockDriverState *bs, bool want_zero, int64_t offset, int64_t bytes,
|
||||||
|
int64_t *pnum, int64_t *map, BlockDriverState **file)
|
||||||
|
{
|
||||||
|
// Allocated => return BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID
|
||||||
|
// Not allocated => return 0
|
||||||
|
// Error => return -errno
|
||||||
|
// Set pnum to length of the extent, `*map` = `offset`, `*file` = `bs`
|
||||||
|
VitastorRPC task;
|
||||||
|
VitastorClient *client = bs->opaque;
|
||||||
|
uint64_t inode = client->watch ? vitastor_c_inode_get_num(client->watch) : client->inode;
|
||||||
|
uint8_t bit = 0;
|
||||||
|
if (client->last_bitmap && client->last_bitmap_inode == inode &&
|
||||||
|
client->last_bitmap_offset <= offset &&
|
||||||
|
client->last_bitmap_offset+client->last_bitmap_len >= (want_zero ? offset+1 : offset+bytes))
|
||||||
|
{
|
||||||
|
// Use the previously read bitmap
|
||||||
|
task.bitmap_granularity = client->last_bitmap_granularity;
|
||||||
|
task.offset = client->last_bitmap_offset;
|
||||||
|
task.len = client->last_bitmap_len;
|
||||||
|
task.bitmap = client->last_bitmap;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Read bitmap from this position, rounding to full inode PG blocks
|
||||||
|
uint32_t block_size = vitastor_c_inode_get_block_size(client->proxy, inode);
|
||||||
|
if (!block_size)
|
||||||
|
return -EAGAIN;
|
||||||
|
// Init coroutine
|
||||||
|
vitastor_co_init_task(bs, &task);
|
||||||
|
free(client->last_bitmap);
|
||||||
|
task.inode = client->last_bitmap_inode = inode;
|
||||||
|
task.bitmap_granularity = client->last_bitmap_granularity = vitastor_c_inode_get_bitmap_granularity(client->proxy, inode);
|
||||||
|
task.offset = client->last_bitmap_offset = offset / block_size * block_size;
|
||||||
|
task.len = client->last_bitmap_len = (offset+bytes+block_size-1) / block_size * block_size - task.offset;
|
||||||
|
task.bitmap = client->last_bitmap = NULL;
|
||||||
|
qemu_mutex_lock(&client->mutex);
|
||||||
|
vitastor_c_read_bitmap(client->proxy, task.inode, task.offset, task.len, !client->skip_parents, vitastor_co_read_bitmap_cb, &task);
|
||||||
|
qemu_mutex_unlock(&client->mutex);
|
||||||
|
while (!task.complete)
|
||||||
|
{
|
||||||
|
qemu_coroutine_yield();
|
||||||
|
}
|
||||||
|
if (task.ret < 0)
|
||||||
|
{
|
||||||
|
// Error
|
||||||
|
return task.ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (want_zero)
|
||||||
|
{
|
||||||
|
// Get precise mapping with all holes
|
||||||
|
uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
|
||||||
|
uint64_t bmp_len = task.len / task.bitmap_granularity;
|
||||||
|
uint64_t bmp_end = bmp_pos+1;
|
||||||
|
bit = (task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1;
|
||||||
|
while (bmp_end < bmp_len && ((task.bitmap[bmp_end >> 3] >> (bmp_end & 0x7)) & 1) == bit)
|
||||||
|
{
|
||||||
|
bmp_end++;
|
||||||
|
}
|
||||||
|
*pnum = (bmp_end-bmp_pos) * task.bitmap_granularity;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Get larger allocated extents, possibly with false positives
|
||||||
|
uint64_t bmp_pos = (offset-task.offset) / task.bitmap_granularity;
|
||||||
|
uint64_t bmp_end = (offset+bytes-task.offset) / task.bitmap_granularity - bmp_pos;
|
||||||
|
while (bmp_pos < bmp_end)
|
||||||
|
{
|
||||||
|
if (!(bmp_pos & 7) && bmp_end >= bmp_pos+8)
|
||||||
|
{
|
||||||
|
bit = bit || task.bitmap[bmp_pos >> 3];
|
||||||
|
bmp_pos += 8;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
bit = bit || ((task.bitmap[bmp_pos >> 3] >> (bmp_pos & 0x7)) & 1);
|
||||||
|
bmp_pos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*pnum = bytes;
|
||||||
|
}
|
||||||
|
if (bit)
|
||||||
|
{
|
||||||
|
*map = offset;
|
||||||
|
*file = bs;
|
||||||
|
}
|
||||||
|
return (bit ? (BDRV_BLOCK_DATA|BDRV_BLOCK_OFFSET_VALID) : 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
|
||||||
|
// QEMU 1.7-2.11
|
||||||
|
static int64_t coroutine_fn vitastor_co_get_block_status(BlockDriverState *bs,
|
||||||
|
int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
|
||||||
|
{
|
||||||
|
int64_t map = 0;
|
||||||
|
int64_t pnumbytes = 0;
|
||||||
|
int r = vitastor_co_block_status(bs, 1, sector_num*BDRV_SECTOR_SIZE, nb_sectors*BDRV_SECTOR_SIZE, &pnumbytes, &map, &file);
|
||||||
|
*pnum = pnumbytes/BDRV_SECTOR_SIZE;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 )
|
#if !( QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7 )
|
||||||
static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
|
static int coroutine_fn vitastor_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov)
|
||||||
{
|
{
|
||||||
@@ -606,6 +765,15 @@ static BlockDriver bdrv_vitastor = {
|
|||||||
.bdrv_co_truncate = vitastor_co_truncate,
|
.bdrv_co_truncate = vitastor_co_truncate,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined VITASTOR_C_API_VERSION && VITASTOR_C_API_VERSION >= 1
|
||||||
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 12
|
||||||
|
// For snapshot export
|
||||||
|
.bdrv_co_block_status = vitastor_co_block_status,
|
||||||
|
#elif QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR >= 7 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR < 12
|
||||||
|
.bdrv_co_get_block_status = vitastor_co_get_block_status,
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7
|
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 7
|
||||||
.bdrv_co_preadv = vitastor_co_preadv,
|
.bdrv_co_preadv = vitastor_co_preadv,
|
||||||
.bdrv_co_pwritev = vitastor_co_pwritev,
|
.bdrv_co_pwritev = vitastor_co_pwritev,
|
||||||
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
|||||||
|
|
||||||
Name: Vitastor
|
Name: Vitastor
|
||||||
Description: Vitastor client library
|
Description: Vitastor client library
|
||||||
Version: 0.8.3
|
Version: 0.8.5
|
||||||
Libs: -L${libdir} -lvitastor_client
|
Libs: -L${libdir} -lvitastor_client
|
||||||
Cflags: -I${includedir}
|
Cflags: -I${includedir}
|
||||||
|
|
||||||
|
@@ -207,6 +207,28 @@ void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint6
|
|||||||
client->cli->execute(op);
|
client->cli->execute(op);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void vitastor_c_read_bitmap(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
|
||||||
|
int with_parents, VitastorReadBitmapHandler cb, void *opaque)
|
||||||
|
{
|
||||||
|
cluster_op_t *op = new cluster_op_t;
|
||||||
|
op->opcode = with_parents ? OSD_OP_READ_CHAIN_BITMAP : OSD_OP_READ_BITMAP;
|
||||||
|
op->inode = inode;
|
||||||
|
op->offset = offset;
|
||||||
|
op->len = len;
|
||||||
|
op->callback = [cb, opaque](cluster_op_t *op)
|
||||||
|
{
|
||||||
|
uint8_t *bitmap = NULL;
|
||||||
|
if (op->retval >= 0)
|
||||||
|
{
|
||||||
|
bitmap = (uint8_t*)op->bitmap_buf;
|
||||||
|
op->bitmap_buf = NULL;
|
||||||
|
}
|
||||||
|
cb(opaque, op->retval, bitmap);
|
||||||
|
delete op;
|
||||||
|
};
|
||||||
|
client->cli->execute(op);
|
||||||
|
}
|
||||||
|
|
||||||
void vitastor_c_sync(vitastor_c *client, VitastorIOHandler cb, void *opaque)
|
void vitastor_c_sync(vitastor_c *client, VitastorIOHandler cb, void *opaque)
|
||||||
{
|
{
|
||||||
cluster_op_t *op = new cluster_op_t;
|
cluster_op_t *op = new cluster_op_t;
|
||||||
@@ -245,6 +267,25 @@ uint64_t vitastor_c_inode_get_num(void *handle)
|
|||||||
return watch->cfg.num;
|
return watch->cfg.num;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t vitastor_c_inode_get_block_size(vitastor_c *client, uint64_t inode_num)
|
||||||
|
{
|
||||||
|
auto pool_it = client->cli->st_cli.pool_config.find(INODE_POOL(inode_num));
|
||||||
|
if (pool_it == client->cli->st_cli.pool_config.end())
|
||||||
|
return 0;
|
||||||
|
auto & pool_cfg = pool_it->second;
|
||||||
|
uint32_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
|
||||||
|
return pool_cfg.data_block_size * pg_data_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t vitastor_c_inode_get_bitmap_granularity(vitastor_c *client, uint64_t inode_num)
|
||||||
|
{
|
||||||
|
auto pool_it = client->cli->st_cli.pool_config.find(INODE_POOL(inode_num));
|
||||||
|
if (pool_it == client->cli->st_cli.pool_config.end())
|
||||||
|
return 0;
|
||||||
|
// FIXME: READ_BITMAP may fails if parent bitmap granularity differs from inode bitmap granularity
|
||||||
|
return pool_it->second.bitmap_granularity;
|
||||||
|
}
|
||||||
|
|
||||||
int vitastor_c_inode_get_readonly(void *handle)
|
int vitastor_c_inode_get_readonly(void *handle)
|
||||||
{
|
{
|
||||||
inode_watch_t *watch = (inode_watch_t*)handle;
|
inode_watch_t *watch = (inode_watch_t*)handle;
|
||||||
|
@@ -6,6 +6,9 @@
|
|||||||
#ifndef VITASTOR_QEMU_PROXY_H
|
#ifndef VITASTOR_QEMU_PROXY_H
|
||||||
#define VITASTOR_QEMU_PROXY_H
|
#define VITASTOR_QEMU_PROXY_H
|
||||||
|
|
||||||
|
// C API wrapper version
|
||||||
|
#define VITASTOR_C_API_VERSION 1
|
||||||
|
|
||||||
#ifndef POOL_ID_BITS
|
#ifndef POOL_ID_BITS
|
||||||
#define POOL_ID_BITS 16
|
#define POOL_ID_BITS 16
|
||||||
#endif
|
#endif
|
||||||
@@ -21,6 +24,7 @@ typedef struct vitastor_c vitastor_c;
|
|||||||
|
|
||||||
typedef void VitastorReadHandler(void *opaque, long retval, uint64_t version);
|
typedef void VitastorReadHandler(void *opaque, long retval, uint64_t version);
|
||||||
typedef void VitastorIOHandler(void *opaque, long retval);
|
typedef void VitastorIOHandler(void *opaque, long retval);
|
||||||
|
typedef void VitastorReadBitmapHandler(void *opaque, long retval, uint8_t *bitmap);
|
||||||
|
|
||||||
// QEMU
|
// QEMU
|
||||||
typedef void IOHandler(void *opaque);
|
typedef void IOHandler(void *opaque);
|
||||||
@@ -42,11 +46,15 @@ void vitastor_c_read(vitastor_c *client, uint64_t inode, uint64_t offset, uint64
|
|||||||
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque);
|
struct iovec *iov, int iovcnt, VitastorReadHandler cb, void *opaque);
|
||||||
void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,
|
void vitastor_c_write(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len, uint64_t check_version,
|
||||||
struct iovec *iov, int iovcnt, VitastorIOHandler cb, void *opaque);
|
struct iovec *iov, int iovcnt, VitastorIOHandler cb, void *opaque);
|
||||||
|
void vitastor_c_read_bitmap(vitastor_c *client, uint64_t inode, uint64_t offset, uint64_t len,
|
||||||
|
int with_parents, VitastorReadBitmapHandler cb, void *opaque);
|
||||||
void vitastor_c_sync(vitastor_c *client, VitastorIOHandler cb, void *opaque);
|
void vitastor_c_sync(vitastor_c *client, VitastorIOHandler cb, void *opaque);
|
||||||
void vitastor_c_watch_inode(vitastor_c *client, char *image, VitastorIOHandler cb, void *opaque);
|
void vitastor_c_watch_inode(vitastor_c *client, char *image, VitastorIOHandler cb, void *opaque);
|
||||||
void vitastor_c_close_watch(vitastor_c *client, void *handle);
|
void vitastor_c_close_watch(vitastor_c *client, void *handle);
|
||||||
uint64_t vitastor_c_inode_get_size(void *handle);
|
uint64_t vitastor_c_inode_get_size(void *handle);
|
||||||
uint64_t vitastor_c_inode_get_num(void *handle);
|
uint64_t vitastor_c_inode_get_num(void *handle);
|
||||||
|
uint32_t vitastor_c_inode_get_block_size(vitastor_c *client, uint64_t inode_num);
|
||||||
|
uint32_t vitastor_c_inode_get_bitmap_granularity(vitastor_c *client, uint64_t inode_num);
|
||||||
int vitastor_c_inode_get_readonly(void *handle);
|
int vitastor_c_inode_get_readonly(void *handle);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@@ -22,6 +22,16 @@ LD_PRELOAD="build/src/libfio_vitastor.so" \
|
|||||||
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
LD_PRELOAD="build/src/libfio_vitastor.so" \
|
||||||
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -rw=read -etcd=$ETCD_URL -pool=1 -inode=3 -size=32M
|
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -rw=read -etcd=$ETCD_URL -pool=1 -inode=3 -size=32M
|
||||||
|
|
||||||
|
qemu-img convert -p \
|
||||||
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=2:size=$((32*1024*1024)):skip-parents=1" \
|
||||||
|
-O qcow2 ./testdata/layer0.qcow2
|
||||||
|
|
||||||
|
qemu-img create -f qcow2 ./testdata/empty.qcow2 32M
|
||||||
|
|
||||||
|
qemu-img convert -p \
|
||||||
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024)):skip-parents=1" \
|
||||||
|
-O qcow2 -o 'cluster_size=4k' -B empty.qcow2 ./testdata/layer1.qcow2
|
||||||
|
|
||||||
qemu-img convert -S 4096 -p \
|
qemu-img convert -S 4096 -p \
|
||||||
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
|
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:pool=1:inode=3:size=$((32*1024*1024))" \
|
||||||
-O raw ./testdata/merged.bin
|
-O raw ./testdata/merged.bin
|
||||||
@@ -52,4 +62,18 @@ qemu-img convert -S 4096 -p \
|
|||||||
|
|
||||||
cmp ./testdata/merged.bin ./testdata/merged-by-tool.bin
|
cmp ./testdata/merged.bin ./testdata/merged-by-tool.bin
|
||||||
|
|
||||||
|
# Test merge by qemu-img
|
||||||
|
|
||||||
|
qemu-img rebase -u -b layer0.qcow2 ./testdata/layer1.qcow2
|
||||||
|
|
||||||
|
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
|
||||||
|
|
||||||
|
cmp ./testdata/merged.bin ./testdata/rebased.bin
|
||||||
|
|
||||||
|
qemu-img rebase -u -b '' ./testdata/layer1.qcow2
|
||||||
|
|
||||||
|
qemu-img convert -S 4096 -f qcow2 ./testdata/layer1.qcow2 -O raw ./testdata/rebased.bin
|
||||||
|
|
||||||
|
cmp ./testdata/layer1.bin ./testdata/rebased.bin
|
||||||
|
|
||||||
format_green OK
|
format_green OK
|
||||||
|
Reference in New Issue
Block a user