forked from vitalif/vitastor
Compare commits
20 Commits
Author | SHA1 | Date | |
---|---|---|---|
5ef8bed75f | |||
8669998e5e | |||
b457327e77 | |||
f7fa9d5e34 | |||
49b88b01f9 | |||
71688bcb59 | |||
552e207d2b | |||
5464821fa5 | |||
6917a32ca8 | |||
f8722a8bd5 | |||
9c2f69c9fa | |||
1a93e3f33a | |||
3f35744052 | |||
66f14ac019 | |||
1364009931 | |||
d7e30b8353 | |||
cb437913d3 | |||
472bce58ab | |||
7a71e7ef01 | |||
c71e5e7bbd |
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VERSION "0.8.1")
|
||||
set(VERSION "0.8.2")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
VERSION ?= v0.8.1
|
||||
VERSION ?= v0.8.2
|
||||
|
||||
all: build push
|
||||
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v0.8.1
|
||||
image: vitalif/vitastor-csi:v0.8.2
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -116,7 +116,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v0.8.1
|
||||
image: vitalif/vitastor-csi:v0.8.2
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "0.8.1"
|
||||
vitastorCSIDriverVersion = "0.8.2"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
4
debian/changelog
vendored
4
debian/changelog
vendored
@@ -1,10 +1,10 @@
|
||||
vitastor (0.8.1-1) unstable; urgency=medium
|
||||
vitastor (0.8.2-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
-- Vitaliy Filippov <vitalif@yourcmc.ru> Fri, 03 Jun 2022 02:09:44 +0300
|
||||
|
||||
vitastor (0.8.1-1) unstable; urgency=medium
|
||||
vitastor (0.8.2-1) unstable; urgency=medium
|
||||
|
||||
* Implement NFS proxy
|
||||
* Add documentation
|
||||
|
2
debian/pve-storage-vitastor.install
vendored
2
debian/pve-storage-vitastor.install
vendored
@@ -1 +1 @@
|
||||
patches/PVE_VitastorPlugin.pm usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm
|
||||
patches/VitastorPlugin.pm usr/share/perl5/PVE/Storage/Custom/
|
||||
|
8
debian/vitastor.Dockerfile
vendored
8
debian/vitastor.Dockerfile
vendored
@@ -34,8 +34,8 @@ RUN set -e -x; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
cp -r /root/vitastor vitastor-0.8.1; \
|
||||
cd vitastor-0.8.1; \
|
||||
cp -r /root/vitastor vitastor-0.8.2; \
|
||||
cd vitastor-0.8.2; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
@@ -48,8 +48,8 @@ RUN set -e -x; \
|
||||
rm -rf a b; \
|
||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.1.orig.tar.xz vitastor-0.8.1; \
|
||||
cd vitastor-0.8.1; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.8.2.orig.tar.xz vitastor-0.8.2; \
|
||||
cd vitastor-0.8.2; \
|
||||
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
|
@@ -6,10 +6,10 @@
|
||||
|
||||
# Proxmox VE
|
||||
|
||||
To enable Vitastor support in Proxmox Virtual Environment (6.4 and 7.1 are supported):
|
||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-7.3 are supported):
|
||||
|
||||
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts
|
||||
(buster for 6.4, bullseye for 7.1)
|
||||
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
|
||||
buster for 6.4, bullseye for 7.3, pve7.1 for 7.1, pve7.2 for 7.2
|
||||
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
|
||||
- Define storage in `/etc/pve/storage.cfg` (see below)
|
||||
- Block network access from VMs to Vitastor network (to OSDs and etcd),
|
||||
@@ -35,5 +35,5 @@ vitastor: vitastor
|
||||
vitastor_nbd 0
|
||||
```
|
||||
|
||||
\* Note: you can also manually copy [patches/PVE_VitastorPlugin.pm](patches/PVE_VitastorPlugin.pm) to Proxmox hosts
|
||||
\* Note: you can also manually copy [patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) to Proxmox hosts
|
||||
as `/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm` instead of installing pve-storage-vitastor.
|
||||
|
@@ -6,10 +6,10 @@
|
||||
|
||||
# Proxmox
|
||||
|
||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4 и 7.1):
|
||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-7.3):
|
||||
|
||||
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox
|
||||
(buster для 6.4, bullseye для 7.1)
|
||||
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
|
||||
buster для 6.4, bullseye для 7.3, pve7.1 для 7.1, pve7.2 для 7.2
|
||||
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
|
||||
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
|
||||
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
|
||||
@@ -35,5 +35,5 @@ vitastor: vitastor
|
||||
```
|
||||
|
||||
\* Примечание: вместо установки пакета pve-storage-vitastor вы можете вручную скопировать файл
|
||||
[patches/PVE_VitastorPlugin.pm](patches/PVE_VitastorPlugin.pm) на хосты Proxmox как
|
||||
[patches/VitastorPlugin.pm](patches/VitastorPlugin.pm) на хосты Proxmox как
|
||||
`/usr/share/perl5/PVE/Storage/Custom/VitastorPlugin.pm`.
|
||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VERSION = '0.8.1'
|
||||
VERSION = '0.8.2'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
169
patches/pve-qemu-6.2-vitastor.patch
Normal file
169
patches/pve-qemu-6.2-vitastor.patch
Normal file
@@ -0,0 +1,169 @@
|
||||
Index: qemu/block/meson.build
|
||||
===================================================================
|
||||
--- qemu.orig/block/meson.build
|
||||
+++ qemu/block/meson.build
|
||||
@@ -91,6 +91,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
Index: qemu/meson.build
|
||||
===================================================================
|
||||
--- qemu.orig/meson.build
|
||||
+++ qemu/meson.build
|
||||
@@ -838,6 +838,26 @@ if not get_option('rbd').auto() or have_
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -1459,6 +1479,7 @@ config_host_data.set('CONFIG_LINUX_AIO',
|
||||
config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
|
||||
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||
config_host_data.set('CONFIG_SECCOMP', seccomp.found())
|
||||
@@ -3424,6 +3445,7 @@ if spice_protocol.found()
|
||||
summary_info += {' spice server support': spice}
|
||||
endif
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'xfsctl support': config_host.has_key('CONFIG_XFS')}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
Index: qemu/meson_options.txt
|
||||
===================================================================
|
||||
--- qemu.orig/meson_options.txt
|
||||
+++ qemu/meson_options.txt
|
||||
@@ -121,6 +121,8 @@ option('lzo', type : 'feature', value :
|
||||
description: 'lzo compression support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('gtk', type : 'feature', value : 'auto',
|
||||
description: 'GTK+ user interface')
|
||||
option('sdl', type : 'feature', value : 'auto',
|
||||
Index: qemu/qapi/block-core.json
|
||||
===================================================================
|
||||
--- qemu.orig/qapi/block-core.json
|
||||
+++ qemu/qapi/block-core.json
|
||||
@@ -3179,7 +3179,7 @@
|
||||
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
'pbs',
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
|
||||
|
||||
##
|
||||
# @BlockdevOptionsFile:
|
||||
@@ -4125,6 +4125,28 @@
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
+##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
# An enumeration of replication modes.
|
||||
@@ -4520,6 +4542,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'vmdk': 'BlockdevOptionsGenericCOWFormat',
|
||||
'vpc': 'BlockdevOptionsGenericFormat',
|
||||
'vvfat': 'BlockdevOptionsVVFAT'
|
||||
@@ -4910,6 +4933,17 @@
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
+##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
# Subformat options for VMDK images
|
||||
@@ -5108,6 +5142,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
Index: qemu/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
===================================================================
|
||||
--- qemu.orig/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
+++ qemu/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
@@ -31,7 +31,7 @@
|
||||
--with-git=meson \
|
||||
--with-git-submodules=update \
|
||||
--target-list="x86_64-softmmu" \
|
||||
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
--audio-drv-list="" \
|
||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||
--with-coroutine=ucontext \
|
||||
@@ -183,6 +183,7 @@
|
||||
--enable-opengl \
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+--enable-vitastor \
|
||||
--enable-rdma \
|
||||
--enable-seccomp \
|
||||
--enable-snappy \
|
169
patches/pve-qemu-7.1-vitastor.patch
Normal file
169
patches/pve-qemu-7.1-vitastor.patch
Normal file
@@ -0,0 +1,169 @@
|
||||
Index: qemu/block/meson.build
|
||||
===================================================================
|
||||
--- qemu.orig/block/meson.build
|
||||
+++ qemu/block/meson.build
|
||||
@@ -111,6 +111,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
Index: qemu/meson.build
|
||||
===================================================================
|
||||
--- qemu.orig/meson.build
|
||||
+++ qemu/meson.build
|
||||
@@ -967,6 +967,26 @@ if not get_option('rbd').auto() or have_
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'), kwargs: static_kwargs)
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -1802,6 +1822,7 @@ config_host_data.set('CONFIG_NUMA', numa
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_SDL', sdl.found())
|
||||
config_host_data.set('CONFIG_SDL_IMAGE', sdl_image.found())
|
||||
@@ -3965,6 +3986,7 @@ if spice_protocol.found()
|
||||
summary_info += {' spice server support': spice}
|
||||
endif
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
Index: qemu/meson_options.txt
|
||||
===================================================================
|
||||
--- qemu.orig/meson_options.txt
|
||||
+++ qemu/meson_options.txt
|
||||
@@ -167,6 +167,8 @@ option('lzo', type : 'feature', value :
|
||||
description: 'lzo compression support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
Index: qemu/qapi/block-core.json
|
||||
===================================================================
|
||||
--- qemu.orig/qapi/block-core.json
|
||||
+++ qemu/qapi/block-core.json
|
||||
@@ -3209,7 +3209,7 @@
|
||||
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
'pbs',
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor', 'vmdk', 'vpc', 'vvfat' ] }
|
||||
|
||||
##
|
||||
# @BlockdevOptionsFile:
|
||||
@@ -4149,6 +4149,28 @@
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
+##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
# An enumeration of replication modes.
|
||||
@@ -4593,6 +4615,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'vmdk': 'BlockdevOptionsGenericCOWFormat',
|
||||
'vpc': 'BlockdevOptionsGenericFormat',
|
||||
'vvfat': 'BlockdevOptionsVVFAT'
|
||||
@@ -4985,6 +5008,17 @@
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
+##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
# Subformat options for VMDK images
|
||||
@@ -5182,6 +5216,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
Index: qemu/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
===================================================================
|
||||
--- qemu.orig/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
+++ qemu/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||
@@ -31,7 +31,7 @@
|
||||
--with-git=meson \
|
||||
--with-git-submodules=update \
|
||||
--target-list="x86_64-softmmu" \
|
||||
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||
--audio-drv-list="" \
|
||||
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||
--with-coroutine=ucontext \
|
||||
@@ -179,6 +179,7 @@
|
||||
--enable-opengl \
|
||||
--enable-pie \
|
||||
--enable-rbd \
|
||||
+--enable-vitastor \
|
||||
--enable-rdma \
|
||||
--enable-seccomp \
|
||||
--enable-snappy \
|
@@ -9,7 +9,7 @@ for i in "$DIR"/qemu-*-vitastor.patch "$DIR"/pve-qemu-*-vitastor.patch; do
|
||||
echo '===================================================================' >> $i
|
||||
echo '--- /dev/null' >> $i
|
||||
echo '+++ a/block/vitastor.c' >> $i
|
||||
echo '@@ -0,0 +1,'$(wc -l "$DIR"/../src/qemu_driver.c)' @@' >> $i
|
||||
echo '@@ -0,0 +1,'$(wc -l "$DIR"/../src/qemu_driver.c | cut -d ' ' -f 1)' @@' >> $i
|
||||
cat "$DIR"/../src/qemu_driver.c | sed 's/^/+/' >> $i
|
||||
fi
|
||||
done
|
||||
|
@@ -25,4 +25,4 @@ rm fio
|
||||
mv fio-copy fio
|
||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform 's#^#vitastor-0.8.1/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.8.1$(rpm --eval '%dist').tar.gz *
|
||||
tar --transform 's#^#vitastor-0.8.2/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.8.2$(rpm --eval '%dist').tar.gz *
|
||||
|
@@ -58,7 +58,7 @@
|
||||
+BuildRequires: gperftools-devel
|
||||
+BuildRequires: libusbx-devel >= 1.0.21
|
||||
%if %{have_usbredir}
|
||||
BuildRequires: usbredir-devel >= 0.8.1
|
||||
BuildRequires: usbredir-devel >= 0.8.2
|
||||
%endif
|
||||
@@ -856,12 +861,13 @@ BuildRequires: virglrenderer-devel
|
||||
# For smartcard NSS support
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.8.1.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-0.8.2.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.8.1
|
||||
Version: 0.8.2
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.8.1.el7.tar.gz
|
||||
Source0: vitastor-0.8.2.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -35,7 +35,7 @@ ADD . /root/vitastor
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
cp /root/vitastor-0.8.1.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp /root/vitastor-0.8.2.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
|
@@ -1,11 +1,11 @@
|
||||
Name: vitastor
|
||||
Version: 0.8.1
|
||||
Version: 0.8.2
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-0.8.1.el8.tar.gz
|
||||
Source0: vitastor-0.8.2.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
|
@@ -15,7 +15,7 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVERSION="0.8.1")
|
||||
add_definitions(-DVERSION="0.8.2")
|
||||
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
|
||||
if (${WITH_ASAN})
|
||||
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
|
||||
|
@@ -35,24 +35,14 @@ journal_flusher_co::journal_flusher_co()
|
||||
{
|
||||
bs->live = true;
|
||||
if (data->res != data->iov.iov_len)
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"data read operation failed during flush ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
|
||||
"). can't continue, sorry :-("
|
||||
);
|
||||
}
|
||||
bs->disk_error_abort("read operation during flush", data->res, data->iov.iov_len);
|
||||
wait_count--;
|
||||
};
|
||||
simple_callback_w = [this](ring_data_t* data)
|
||||
{
|
||||
bs->live = true;
|
||||
if (data->res != data->iov.iov_len)
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"write operation failed ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
|
||||
"). state "+std::to_string(wait_state)+". in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111"
|
||||
);
|
||||
}
|
||||
bs->disk_error_abort("write operation during flush", data->res, data->iov.iov_len);
|
||||
wait_count--;
|
||||
};
|
||||
}
|
||||
@@ -306,6 +296,8 @@ bool journal_flusher_co::loop()
|
||||
goto resume_20;
|
||||
else if (wait_state == 21)
|
||||
goto resume_21;
|
||||
else if (wait_state == 22)
|
||||
goto resume_22;
|
||||
resume_0:
|
||||
if (flusher->flush_queue.size() < flusher->min_flusher_count && !flusher->trim_wanted ||
|
||||
!flusher->flush_queue.size() || !flusher->dequeuing)
|
||||
@@ -511,6 +503,13 @@ resume_1:
|
||||
);
|
||||
wait_count++;
|
||||
}
|
||||
// Wait for data writes before fsyncing it
|
||||
resume_22:
|
||||
if (wait_count > 0)
|
||||
{
|
||||
wait_state = 22;
|
||||
return false;
|
||||
}
|
||||
// Sync data before writing metadata
|
||||
resume_16:
|
||||
resume_17:
|
||||
@@ -521,7 +520,7 @@ resume_1:
|
||||
return false;
|
||||
}
|
||||
resume_5:
|
||||
// And metadata writes, but only after data writes complete
|
||||
// Submit metadata writes, but only when data is written and fsynced
|
||||
if (!bs->inmemory_meta && meta_new.it->second.state == 0 || wait_count > 0)
|
||||
{
|
||||
// metadata sector is still being read or data is still being written, wait for it
|
||||
@@ -616,7 +615,8 @@ resume_1:
|
||||
for (it = v.begin(); it != v.end(); it++)
|
||||
{
|
||||
// Free it if it's not taken from the journal
|
||||
if (it->buf && (!bs->journal.inmemory || it->buf < bs->journal.buffer || it->buf >= bs->journal.buffer + bs->journal.len))
|
||||
if (it->buf && (!bs->journal.inmemory || it->buf < bs->journal.buffer ||
|
||||
it->buf >= (uint8_t*)bs->journal.buffer + bs->journal.len))
|
||||
{
|
||||
free(it->buf);
|
||||
}
|
||||
|
@@ -306,17 +306,6 @@ void blockstore_impl_t::check_wait(blockstore_op_t *op)
|
||||
// do not submit
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("Still waiting for a journal buffer\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
PRIV(op)->wait_for = 0;
|
||||
}
|
||||
else if (PRIV(op)->wait_for == WAIT_FREE)
|
||||
{
|
||||
if (!data_alloc->get_free_count() && flusher->is_active())
|
||||
{
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("Still waiting for free space on the data device\n");
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
@@ -687,3 +676,16 @@ void blockstore_impl_t::dump_diagnostics()
|
||||
journal.dump_diagnostics();
|
||||
flusher->dump_diagnostics();
|
||||
}
|
||||
|
||||
void blockstore_impl_t::disk_error_abort(const char *op, int retval, int expected)
|
||||
{
|
||||
if (retval == -EAGAIN)
|
||||
{
|
||||
fprintf(stderr, "EAGAIN error received from a disk %s during flush."
|
||||
" It must never happen with io_uring and indicates a kernel bug."
|
||||
" Please upgrade your kernel. Aborting.\n", op);
|
||||
exit(1);
|
||||
}
|
||||
fprintf(stderr, "Disk %s failed: result is %d, expected %d. Can't continue, sorry :-(\n", op, retval, expected);
|
||||
exit(1);
|
||||
}
|
||||
|
@@ -160,8 +160,6 @@ struct __attribute__((__packed__)) dirty_entry
|
||||
#define WAIT_JOURNAL 3
|
||||
// Suspend operation until the next journal sector buffer is free
|
||||
#define WAIT_JOURNAL_BUFFER 4
|
||||
// Suspend operation until there is some free space on the data device
|
||||
#define WAIT_FREE 5
|
||||
|
||||
struct fulfill_read_t
|
||||
{
|
||||
@@ -294,6 +292,7 @@ class blockstore_impl_t
|
||||
// Journaling
|
||||
void prepare_journal_sector_write(int sector, blockstore_op_t *op);
|
||||
void handle_journal_write(ring_data_t *data, uint64_t flush_id);
|
||||
void disk_error_abort(const char *op, int retval, int expected);
|
||||
|
||||
// Asynchronous init
|
||||
int initialized;
|
||||
|
@@ -48,14 +48,12 @@ void blockstore_init_meta::handle_event(ring_data_t *data, int buf_num)
|
||||
|
||||
int blockstore_init_meta::loop()
|
||||
{
|
||||
if (wait_state == 1)
|
||||
goto resume_1;
|
||||
else if (wait_state == 2)
|
||||
goto resume_2;
|
||||
else if (wait_state == 3)
|
||||
goto resume_3;
|
||||
else if (wait_state == 4)
|
||||
goto resume_4;
|
||||
if (wait_state == 1) goto resume_1;
|
||||
else if (wait_state == 2) goto resume_2;
|
||||
else if (wait_state == 3) goto resume_3;
|
||||
else if (wait_state == 4) goto resume_4;
|
||||
else if (wait_state == 5) goto resume_5;
|
||||
else if (wait_state == 6) goto resume_6;
|
||||
printf("Reading blockstore metadata\n");
|
||||
if (bs->inmemory_meta)
|
||||
metadata_buffer = bs->metadata_buffer;
|
||||
@@ -140,6 +138,7 @@ resume_1:
|
||||
// Skip superblock
|
||||
md_offset = bs->dsk.meta_block_size;
|
||||
next_offset = md_offset;
|
||||
entries_per_block = bs->dsk.meta_block_size / bs->dsk.clean_entry_size;
|
||||
// Read the rest of the metadata
|
||||
resume_2:
|
||||
if (next_offset < bs->dsk.meta_len && submitted == 0)
|
||||
@@ -179,17 +178,15 @@ resume_2:
|
||||
if (bufs[i].state == INIT_META_READ_DONE)
|
||||
{
|
||||
// Handle result
|
||||
unsigned entries_per_block = bs->dsk.meta_block_size / bs->dsk.clean_entry_size;
|
||||
bool changed = false;
|
||||
for (uint64_t sector = 0; sector < bufs[i].size; sector += bs->dsk.meta_block_size)
|
||||
{
|
||||
// handle <count> entries
|
||||
changed = changed || handle_entries(
|
||||
bufs[i].buf + sector, entries_per_block,
|
||||
((bufs[i].offset + sector - md_offset) / bs->dsk.meta_block_size) * entries_per_block
|
||||
);
|
||||
if (handle_meta_block(bufs[i].buf + sector, entries_per_block,
|
||||
((bufs[i].offset + sector - md_offset) / bs->dsk.meta_block_size) * entries_per_block))
|
||||
changed = true;
|
||||
}
|
||||
if (changed && !bs->inmemory_meta)
|
||||
if (changed && !bs->inmemory_meta && !bs->readonly)
|
||||
{
|
||||
// write the modified buffer back
|
||||
GET_SQE();
|
||||
@@ -211,6 +208,43 @@ resume_2:
|
||||
wait_state = 2;
|
||||
return 1;
|
||||
}
|
||||
if (entries_to_zero.size() && !bs->inmemory_meta && !bs->readonly)
|
||||
{
|
||||
// we have to zero out additional entries
|
||||
for (i = 0; i < entries_to_zero.size(); )
|
||||
{
|
||||
next_offset = entries_to_zero[i]/entries_per_block;
|
||||
for (j = i; j < entries_to_zero.size() && entries_to_zero[j]/entries_per_block == next_offset; j++) {}
|
||||
GET_SQE();
|
||||
data->iov = { metadata_buffer, bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
submitted++;
|
||||
resume_5:
|
||||
if (submitted > 0)
|
||||
{
|
||||
wait_state = 5;
|
||||
return 1;
|
||||
}
|
||||
for (; i < j; i++)
|
||||
{
|
||||
uint64_t pos = (entries_to_zero[i] % entries_per_block);
|
||||
memset((uint8_t*)metadata_buffer + pos*bs->dsk.clean_entry_size, 0, bs->dsk.clean_entry_size);
|
||||
}
|
||||
GET_SQE();
|
||||
data->iov = { metadata_buffer, bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
submitted++;
|
||||
resume_6:
|
||||
if (submitted > 0)
|
||||
{
|
||||
wait_state = 6;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
entries_to_zero.clear();
|
||||
}
|
||||
// metadata read finished
|
||||
printf("Metadata entries loaded: %lu, free blocks: %lu / %lu\n", entries_loaded, bs->data_alloc->get_free_count(), bs->dsk.block_count);
|
||||
if (!bs->inmemory_meta)
|
||||
@@ -236,10 +270,13 @@ resume_2:
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool blockstore_init_meta::handle_entries(uint8_t *buf, uint64_t count, uint64_t done_cnt)
|
||||
bool blockstore_init_meta::handle_meta_block(uint8_t *buf, uint64_t entries_per_block, uint64_t done_cnt)
|
||||
{
|
||||
bool updated = false;
|
||||
for (uint64_t i = 0; i < count; i++)
|
||||
uint64_t max_i = entries_per_block;
|
||||
if (max_i > bs->dsk.block_count-done_cnt)
|
||||
max_i = bs->dsk.block_count-done_cnt;
|
||||
for (uint64_t i = 0; i < max_i; i++)
|
||||
{
|
||||
clean_disk_entry *entry = (clean_disk_entry*)(buf + i*bs->dsk.clean_entry_size);
|
||||
if (!bs->inmemory_meta && bs->dsk.clean_entry_bitmap_size)
|
||||
@@ -255,17 +292,35 @@ bool blockstore_init_meta::handle_entries(uint8_t *buf, uint64_t count, uint64_t
|
||||
if (clean_it != clean_db.end())
|
||||
{
|
||||
// free the previous block
|
||||
// here we have to zero out the entry because otherwise we'll hit
|
||||
// here we have to zero out the previous entry because otherwise we'll hit
|
||||
// "tried to overwrite non-zero metadata entry" later
|
||||
updated = true;
|
||||
memset(entry, 0, bs->dsk.clean_entry_size);
|
||||
uint64_t old_clean_loc = clean_it->second.location >> bs->dsk.block_order;
|
||||
if (bs->inmemory_meta)
|
||||
{
|
||||
uint64_t sector = (old_clean_loc / entries_per_block) * bs->dsk.meta_block_size;
|
||||
uint64_t pos = (old_clean_loc % entries_per_block);
|
||||
clean_disk_entry *old_entry = (clean_disk_entry*)((uint8_t*)bs->metadata_buffer + sector + pos*bs->dsk.clean_entry_size);
|
||||
memset(old_entry, 0, bs->dsk.clean_entry_size);
|
||||
}
|
||||
else if (old_clean_loc >= done_cnt)
|
||||
{
|
||||
updated = true;
|
||||
uint64_t sector = ((old_clean_loc - done_cnt) / entries_per_block) * bs->dsk.meta_block_size;
|
||||
uint64_t pos = (old_clean_loc % entries_per_block);
|
||||
clean_disk_entry *old_entry = (clean_disk_entry*)(buf + sector + pos*bs->dsk.clean_entry_size);
|
||||
memset(old_entry, 0, bs->dsk.clean_entry_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
entries_to_zero.push_back(clean_it->second.location >> bs->dsk.block_order);
|
||||
}
|
||||
#ifdef BLOCKSTORE_DEBUG
|
||||
printf("Free block %lu from %lx:%lx v%lu (new location is %lu)\n",
|
||||
clean_it->second.location >> bs->dsk.block_order,
|
||||
old_clean_loc,
|
||||
clean_it->first.inode, clean_it->first.stripe, clean_it->second.version,
|
||||
done_cnt+i);
|
||||
#endif
|
||||
bs->data_alloc->set(clean_it->second.location >> bs->dsk.block_order, false);
|
||||
bs->data_alloc->set(old_clean_loc, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@@ -24,7 +24,10 @@ class blockstore_init_meta
|
||||
uint64_t md_offset = 0;
|
||||
uint64_t next_offset = 0;
|
||||
uint64_t entries_loaded = 0;
|
||||
bool handle_entries(uint8_t *buf, uint64_t count, uint64_t done_cnt);
|
||||
unsigned entries_per_block = 0;
|
||||
int i = 0, j = 0;
|
||||
std::vector<uint64_t> entries_to_zero;
|
||||
bool handle_meta_block(uint8_t *buf, uint64_t count, uint64_t done_cnt);
|
||||
void handle_event(ring_data_t *data, int buf_num);
|
||||
public:
|
||||
blockstore_init_meta(blockstore_impl_t *bs);
|
||||
|
@@ -198,10 +198,7 @@ void blockstore_impl_t::handle_journal_write(ring_data_t *data, uint64_t flush_i
|
||||
if (data->res != data->iov.iov_len)
|
||||
{
|
||||
// FIXME: our state becomes corrupted after a write error. maybe do something better than just die
|
||||
throw std::runtime_error(
|
||||
"journal write failed ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
|
||||
"). in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111"
|
||||
);
|
||||
disk_error_abort("journal write", data->res, data->iov.iov_len);
|
||||
}
|
||||
auto fl_it = journal.flushing_ops.upper_bound((pending_journaling_t){ .flush_id = flush_id });
|
||||
if (fl_it != journal.flushing_ops.end() && fl_it->flush_id == flush_id)
|
||||
|
@@ -261,12 +261,6 @@ int blockstore_impl_t::dequeue_write(blockstore_op_t *op)
|
||||
if (loc == UINT64_MAX)
|
||||
{
|
||||
// no space
|
||||
if (flusher->is_active())
|
||||
{
|
||||
// hope that some space will be available after flush
|
||||
PRIV(op)->wait_for = WAIT_FREE;
|
||||
return 0;
|
||||
}
|
||||
cancel_all_writes(op, dirty_it, -ENOSPC);
|
||||
return 2;
|
||||
}
|
||||
@@ -592,10 +586,7 @@ void blockstore_impl_t::handle_write_event(ring_data_t *data, blockstore_op_t *o
|
||||
if (data->res != data->iov.iov_len)
|
||||
{
|
||||
// FIXME: our state becomes corrupted after a write error. maybe do something better than just die
|
||||
throw std::runtime_error(
|
||||
"write operation failed ("+std::to_string(data->res)+" != "+std::to_string(data->iov.iov_len)+
|
||||
"). in-memory state is corrupted. AAAAAAAaaaaaaaaa!!!111"
|
||||
);
|
||||
disk_error_abort("data write", data->res, data->iov.iov_len);
|
||||
}
|
||||
PRIV(op)->pending_ops--;
|
||||
assert(PRIV(op)->pending_ops >= 0);
|
||||
|
@@ -941,7 +941,7 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
|
||||
.req = { .rw = {
|
||||
.header = {
|
||||
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||
.id = op_id++,
|
||||
.id = next_op_id(),
|
||||
.opcode = op->opcode == OSD_OP_READ_BITMAP ? OSD_OP_READ : op->opcode,
|
||||
},
|
||||
.inode = op->cur_inode,
|
||||
@@ -1069,7 +1069,7 @@ void cluster_client_t::send_sync(cluster_op_t *op, cluster_op_part_t *part)
|
||||
.req = {
|
||||
.hdr = {
|
||||
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||
.id = op_id++,
|
||||
.id = next_op_id(),
|
||||
.opcode = OSD_OP_SYNC,
|
||||
},
|
||||
},
|
||||
@@ -1181,5 +1181,5 @@ void cluster_client_t::copy_part_bitmap(cluster_op_t *op, cluster_op_part_t *par
|
||||
|
||||
uint64_t cluster_client_t::next_op_id()
|
||||
{
|
||||
return op_id++;
|
||||
return msgr.next_subop_id++;
|
||||
}
|
||||
|
@@ -85,7 +85,6 @@ class cluster_client_t
|
||||
int up_wait_retry_interval = 500; // ms
|
||||
|
||||
int retry_timeout_id = 0;
|
||||
uint64_t op_id = 1;
|
||||
std::vector<cluster_op_t*> offline_ops;
|
||||
cluster_op_t *op_queue_head = NULL, *op_queue_tail = NULL;
|
||||
std::map<object_id, cluster_buffer_t> dirty_buffers;
|
||||
|
@@ -196,7 +196,7 @@ void cluster_client_t::send_list(inode_list_osd_t *cur_list)
|
||||
.sec_list = {
|
||||
.header = {
|
||||
.magic = SECONDARY_OSD_OP_MAGIC,
|
||||
.id = op_id++,
|
||||
.id = next_op_id(),
|
||||
.opcode = OSD_OP_SEC_LIST,
|
||||
},
|
||||
.list_pg = cur_list->pg->pg_num,
|
||||
|
@@ -52,11 +52,12 @@ static const char *help_text =
|
||||
" --disable_data_fsync 0 Disable data device cache and fsync (default off)\n"
|
||||
" --disable_meta_fsync 0 Disable metadata device cache and fsync (default off)\n"
|
||||
" --disable_journal_fsync 0 Disable journal device cache and fsync (default off)\n"
|
||||
" --hdd Enable HDD defaults (1M block, 1G journal, throttling)\n"
|
||||
" --force Bypass partition safety checks (for emptiness and so on)\n"
|
||||
" \n"
|
||||
" Options (both modes):\n"
|
||||
" --journal_size 1G/32M Set journal size (area or partition size)\n"
|
||||
" --block_size 1M/128k Set blockstore object size\n"
|
||||
" --journal_size 32M/1G Set journal size (area or partition size)\n"
|
||||
" --block_size 128k/1M Set blockstore object size\n"
|
||||
" --bitmap_granularity 4k Set bitmap granularity\n"
|
||||
" --data_device_block 4k Override data device block size\n"
|
||||
" --meta_device_block 4k Override metadata device block size\n"
|
||||
@@ -109,8 +110,9 @@ static const char *help_text =
|
||||
" Commands are passed to systemctl with vitastor-osd@<num> units as arguments.\n"
|
||||
" When --now is added to enable/disable, OSDs are also immediately started/stopped.\n"
|
||||
"\n"
|
||||
"vitastor-disk read-sb <device>\n"
|
||||
"vitastor-disk read-sb [--force] <device>\n"
|
||||
" Try to read Vitastor OSD superblock from <device> and print it in JSON format.\n"
|
||||
" --force allows to bypass \"does not refer to the device itself\" errors.\n"
|
||||
"\n"
|
||||
"vitastor-disk write-sb <device>\n"
|
||||
" Read JSON from STDIN and write it into Vitastor OSD superblock on <device>.\n"
|
||||
@@ -195,6 +197,10 @@ int main(int argc, char *argv[])
|
||||
{
|
||||
self.options["hybrid"] = "1";
|
||||
}
|
||||
else if (!strcmp(argv[i], "--hdd"))
|
||||
{
|
||||
self.options["hdd"] = "1";
|
||||
}
|
||||
else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
|
||||
{
|
||||
cmd.insert(cmd.begin(), (char*)"help");
|
||||
|
@@ -111,7 +111,7 @@ struct disk_tool_t
|
||||
int systemd_start_stop_osds(std::vector<std::string> cmd, std::vector<std::string> devices);
|
||||
int pre_exec_osd(std::string device);
|
||||
|
||||
json11::Json read_osd_superblock(std::string device, bool expect_exist = true);
|
||||
json11::Json read_osd_superblock(std::string device, bool expect_exist = true, bool ignore_nonref = false);
|
||||
uint32_t write_osd_superblock(std::string device, json11::Json params);
|
||||
|
||||
int prepare_one(std::map<std::string, std::string> options, int is_hdd = -1);
|
||||
|
@@ -83,10 +83,15 @@ int disk_tool_t::dump_journal()
|
||||
auto pos = journal_pos;
|
||||
int r = process_journal_block(data, [this, pos](int num, journal_entry *je)
|
||||
{
|
||||
if (json && first2)
|
||||
if (json)
|
||||
{
|
||||
if (dump_with_blocks)
|
||||
printf("%s{\"offset\":\"0x%lx\",\"entries\":[\n", first ? "" : ",\n", pos);
|
||||
{
|
||||
if (first2)
|
||||
printf("%s{\"offset\":\"0x%lx\",\"entries\":[\n", first ? "" : ",\n", pos);
|
||||
}
|
||||
else if (!first)
|
||||
printf("%s", ",\n");
|
||||
first = false;
|
||||
}
|
||||
dump_journal_entry(num, je, json);
|
||||
|
@@ -130,7 +130,7 @@ void disk_tool_t::dump_meta_header(blockstore_meta_header_v1_t *hdr)
|
||||
void disk_tool_t::dump_meta_entry(uint64_t block_num, clean_disk_entry *entry, uint8_t *bitmap)
|
||||
{
|
||||
printf(
|
||||
#define ENTRY_FMT "{\"block\":%lu,\"pool\":%u,\"inode\":%lu,\"stripe\":%lu,\"version\":%lu"
|
||||
#define ENTRY_FMT "{\"block\":%lu,\"pool\":%u,\"inode\":\"0x%lx\",\"stripe\":\"0x%lx\",\"version\":%lu"
|
||||
(first ? ENTRY_FMT : (",\n" ENTRY_FMT)),
|
||||
#undef ENTRY_FMT
|
||||
block_num, INODE_POOL(entry->oid.inode), INODE_NO_POOL(entry->oid.inode),
|
||||
|
@@ -539,7 +539,7 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
|
||||
fprintf(stderr, "Device list (positional arguments) and --hybrid are incompatible with --data_device\n");
|
||||
return 1;
|
||||
}
|
||||
return prepare_one(options);
|
||||
return prepare_one(options, options.find("hdd") != options.end() ? 1 : 0);
|
||||
}
|
||||
if (!devices.size())
|
||||
{
|
||||
@@ -549,12 +549,12 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
|
||||
options.erase("data_device");
|
||||
options.erase("meta_device");
|
||||
options.erase("journal_device");
|
||||
bool hybrid = options.find("hybrid") != options.end();
|
||||
auto devinfo = collect_devices(devices);
|
||||
if (!devinfo.size())
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
bool hybrid = options.find("hybrid") != options.end();
|
||||
uint64_t osd_per_disk = stoull_full(options["osd_per_disk"]);
|
||||
if (!osd_per_disk)
|
||||
osd_per_disk = 1;
|
||||
@@ -612,7 +612,8 @@ int disk_tool_t::prepare(std::vector<std::string> devices)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
prepare_one(options, dev.is_hdd ? 1 : 0);
|
||||
// Treat all disks as SSDs if not in the hybrid mode
|
||||
prepare_one(options, hybrid && dev.is_hdd ? 1 : 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -54,7 +54,7 @@ int disk_tool_t::udev_import(std::string device)
|
||||
|
||||
int disk_tool_t::read_sb(std::string device)
|
||||
{
|
||||
json11::Json sb = read_osd_superblock(device);
|
||||
json11::Json sb = read_osd_superblock(device, true, options.find("force") != options.end());
|
||||
if (sb.is_null())
|
||||
{
|
||||
return 1;
|
||||
@@ -123,7 +123,7 @@ uint32_t disk_tool_t::write_osd_superblock(std::string device, json11::Json para
|
||||
return sb_size;
|
||||
}
|
||||
|
||||
json11::Json disk_tool_t::read_osd_superblock(std::string device, bool expect_exist)
|
||||
json11::Json disk_tool_t::read_osd_superblock(std::string device, bool expect_exist, bool ignore_nonref)
|
||||
{
|
||||
vitastor_disk_superblock_t *sb = NULL;
|
||||
uint8_t *buf = NULL;
|
||||
@@ -226,7 +226,7 @@ json11::Json disk_tool_t::read_osd_superblock(std::string device, bool expect_ex
|
||||
{
|
||||
device_type = "journal";
|
||||
}
|
||||
else
|
||||
else if (!ignore_nonref)
|
||||
{
|
||||
if (expect_exist)
|
||||
fprintf(stderr, "Invalid OSD superblock on %s: does not refer to the device itself\n", device.c_str());
|
||||
|
@@ -145,10 +145,10 @@ int disable_cache(std::string dev)
|
||||
closedir(dir);
|
||||
// Check cache_type
|
||||
scsi_disk += "/cache_type";
|
||||
std::string cache_type = read_file(scsi_disk);
|
||||
std::string cache_type = trim(read_file(scsi_disk));
|
||||
if (cache_type == "")
|
||||
return 1;
|
||||
if (cache_type == "write back")
|
||||
if (cache_type != "write through")
|
||||
{
|
||||
int fd = open(scsi_disk.c_str(), O_WRONLY);
|
||||
if (fd < 0 || write_blocking(fd, (void*)"write through", strlen("write through")) != strlen("write through"))
|
||||
|
@@ -80,12 +80,20 @@ void osd_messenger_t::init()
|
||||
};
|
||||
op->callback = [this, cl](osd_op_t *op)
|
||||
{
|
||||
auto cl_it = clients.find(op->peer_fd);
|
||||
if (cl_it == clients.end() || cl_it->second != cl)
|
||||
{
|
||||
// client is already dropped
|
||||
delete op;
|
||||
return;
|
||||
}
|
||||
int fail_fd = (op->reply.hdr.retval != 0 ? op->peer_fd : -1);
|
||||
auto fail_osd_num = cl->osd_num;
|
||||
cl->ping_time_remaining = 0;
|
||||
delete op;
|
||||
if (fail_fd >= 0)
|
||||
{
|
||||
fprintf(stderr, "Ping failed for OSD %lu (client %d), disconnecting peer\n", cl->osd_num, cl->peer_fd);
|
||||
fprintf(stderr, "Ping failed for OSD %lu (client %d), disconnecting peer\n", fail_osd_num, fail_fd);
|
||||
stop_client(fail_fd, true);
|
||||
}
|
||||
};
|
||||
|
@@ -9,6 +9,8 @@
|
||||
#include "str_util.h"
|
||||
#include "osd.h"
|
||||
|
||||
#define SELF_FD -1
|
||||
|
||||
// Peering loop
|
||||
void osd_t::handle_peers()
|
||||
{
|
||||
@@ -317,7 +319,7 @@ void osd_t::submit_sync_and_list_subop(osd_num_t role_osd, pg_peering_state_t *p
|
||||
// Self
|
||||
osd_op_t *op = new osd_op_t();
|
||||
op->op_type = 0;
|
||||
op->peer_fd = -1;
|
||||
op->peer_fd = SELF_FD;
|
||||
clock_gettime(CLOCK_REALTIME, &op->tv_begin);
|
||||
op->bs_op = new blockstore_op_t();
|
||||
op->bs_op->opcode = BS_OP_SYNC;
|
||||
@@ -336,8 +338,8 @@ void osd_t::submit_sync_and_list_subop(osd_num_t role_osd, pg_peering_state_t *p
|
||||
ps->list_ops.erase(role_osd);
|
||||
submit_list_subop(role_osd, ps);
|
||||
};
|
||||
bs->enqueue_op(op->bs_op);
|
||||
ps->list_ops[role_osd] = op;
|
||||
bs->enqueue_op(op->bs_op);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -371,8 +373,8 @@ void osd_t::submit_sync_and_list_subop(osd_num_t role_osd, pg_peering_state_t *p
|
||||
ps->list_ops.erase(role_osd);
|
||||
submit_list_subop(role_osd, ps);
|
||||
};
|
||||
msgr.outbox_push(op);
|
||||
ps->list_ops[role_osd] = op;
|
||||
msgr.outbox_push(op);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -383,7 +385,7 @@ void osd_t::submit_list_subop(osd_num_t role_osd, pg_peering_state_t *ps)
|
||||
// Self
|
||||
osd_op_t *op = new osd_op_t();
|
||||
op->op_type = 0;
|
||||
op->peer_fd = -1;
|
||||
op->peer_fd = SELF_FD;
|
||||
clock_gettime(CLOCK_REALTIME, &op->tv_begin);
|
||||
op->bs_op = new blockstore_op_t();
|
||||
op->bs_op->opcode = BS_OP_LIST;
|
||||
@@ -415,8 +417,8 @@ void osd_t::submit_list_subop(osd_num_t role_osd, pg_peering_state_t *ps)
|
||||
op->bs_op = NULL;
|
||||
delete op;
|
||||
};
|
||||
bs->enqueue_op(op->bs_op);
|
||||
ps->list_ops[role_osd] = op;
|
||||
bs->enqueue_op(op->bs_op);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -463,14 +465,14 @@ void osd_t::submit_list_subop(osd_num_t role_osd, pg_peering_state_t *ps)
|
||||
ps->list_ops.erase(role_osd);
|
||||
delete op;
|
||||
};
|
||||
msgr.outbox_push(op);
|
||||
ps->list_ops[role_osd] = op;
|
||||
msgr.outbox_push(op);
|
||||
}
|
||||
}
|
||||
|
||||
void osd_t::discard_list_subop(osd_op_t *list_op)
|
||||
{
|
||||
if (list_op->peer_fd == 0)
|
||||
if (list_op->peer_fd == SELF_FD)
|
||||
{
|
||||
// Self
|
||||
list_op->bs_op->callback = [list_op](blockstore_op_t *bs_op)
|
||||
|
@@ -206,6 +206,25 @@ static void coroutine_fn vitastor_co_get_metadata(VitastorRPC *task)
|
||||
}
|
||||
}
|
||||
|
||||
static void vitastor_aio_set_fd_handler(void *ctx, int fd, int unused1, IOHandler *fd_read, IOHandler *fd_write, void *unused2, void *opaque)
|
||||
{
|
||||
aio_set_fd_handler(ctx, fd,
|
||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 5 || QEMU_VERSION_MAJOR >= 3
|
||||
0 /*is_external*/,
|
||||
#endif
|
||||
fd_read, fd_write,
|
||||
#if QEMU_VERSION_MAJOR == 1 && QEMU_VERSION_MINOR <= 6 || QEMU_VERSION_MAJOR < 1
|
||||
NULL /*io_flush*/,
|
||||
#endif
|
||||
#if QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 9 || QEMU_VERSION_MAJOR >= 3
|
||||
NULL /*io_poll*/,
|
||||
#endif
|
||||
#if QEMU_VERSION_MAJOR >= 7
|
||||
NULL /*io_poll_ready*/,
|
||||
#endif
|
||||
opaque);
|
||||
}
|
||||
|
||||
static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp)
|
||||
{
|
||||
VitastorClient *client = bs->opaque;
|
||||
@@ -221,7 +240,7 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
||||
client->rdma_gid_index = qdict_get_try_int(options, "rdma-gid-index", 0);
|
||||
client->rdma_mtu = qdict_get_try_int(options, "rdma-mtu", 0);
|
||||
client->proxy = vitastor_c_create_qemu(
|
||||
(QEMUSetFDHandler*)aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
vitastor_aio_set_fd_handler, bdrv_get_aio_context(bs), client->config_path, client->etcd_host, client->etcd_prefix,
|
||||
client->use_rdma, client->rdma_device, client->rdma_port_num, client->rdma_gid_index, client->rdma_mtu, 0
|
||||
);
|
||||
client->image = g_strdup(qdict_get_try_str(options, "image"));
|
||||
@@ -238,9 +257,9 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
|
||||
}
|
||||
else
|
||||
{
|
||||
qemu_coroutine_enter(qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
||||
bdrv_coroutine_enter(bs, qemu_coroutine_create((void(*)(void*))vitastor_co_get_metadata, &task));
|
||||
BDRV_POLL_WHILE(bs, !task.complete);
|
||||
}
|
||||
BDRV_POLL_WHILE(bs, !task.complete);
|
||||
client->watch = (void*)task.ret;
|
||||
client->readonly = client->readonly || vitastor_c_inode_get_readonly(client->watch);
|
||||
client->size = vitastor_c_inode_get_size(client->watch);
|
||||
@@ -428,7 +447,13 @@ static void vitastor_co_read_cb(void *opaque, long retval, uint64_t version)
|
||||
vitastor_co_generic_bh_cb(opaque, retval);
|
||||
}
|
||||
|
||||
static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags)
|
||||
static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs,
|
||||
#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
|
||||
int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
|
||||
#else
|
||||
uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
|
||||
#endif
|
||||
)
|
||||
{
|
||||
VitastorClient *client = bs->opaque;
|
||||
VitastorRPC task;
|
||||
@@ -448,7 +473,13 @@ static int coroutine_fn vitastor_co_preadv(BlockDriverState *bs, uint64_t offset
|
||||
return task.ret;
|
||||
}
|
||||
|
||||
static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags)
|
||||
static int coroutine_fn vitastor_co_pwritev(BlockDriverState *bs,
|
||||
#if QEMU_VERSION_MAJOR >= 7 || QEMU_VERSION_MAJOR == 6 && QEMU_VERSION_MINOR >= 2
|
||||
int64_t offset, int64_t bytes, QEMUIOVector *iov, BdrvRequestFlags flags
|
||||
#else
|
||||
uint64_t offset, uint64_t bytes, QEMUIOVector *iov, int flags
|
||||
#endif
|
||||
)
|
||||
{
|
||||
VitastorClient *client = bs->opaque;
|
||||
VitastorRPC task;
|
||||
|
@@ -6,7 +6,7 @@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@
|
||||
|
||||
Name: Vitastor
|
||||
Description: Vitastor client library
|
||||
Version: 0.8.1
|
||||
Version: 0.8.2
|
||||
Libs: -L${libdir} -lvitastor_client
|
||||
Cflags: -I${includedir}
|
||||
|
||||
|
@@ -24,6 +24,7 @@ typedef void VitastorIOHandler(void *opaque, long retval);
|
||||
|
||||
// QEMU
|
||||
typedef void IOHandler(void *opaque);
|
||||
// is_external and poll_fn are not required, but are here for compatibility
|
||||
typedef void QEMUSetFDHandler(void *ctx, int fd, int is_external, IOHandler *fd_read, IOHandler *fd_write, void *poll_fn, void *opaque);
|
||||
|
||||
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
|
||||
|
Reference in New Issue
Block a user