Compare commits
122 Commits
kv-readahe
...
v2.4.0
Author | SHA1 | Date | |
---|---|---|---|
59f87c3e30 | |||
eba383f66f | |||
4e5e8822c0 | |||
60933c1d00 | |||
![]() |
1ad6933953 | ||
![]() |
8a250f4fca | ||
![]() |
94ddf20667 | ||
![]() |
5f18496c04 | ||
08a3dcd587 | |||
3c5b9d2744 | |||
cff08d2c72 | |||
1e1f395947 | |||
e6c2628960 | |||
887f7c1530 | |||
2c6bddd831 | |||
e1715c33bb | |||
2ef80bf0b8 | |||
85ba710718 | |||
c16b0e7f92 | |||
b3d388228a | |||
bcde9de7da | |||
52bc3261e9 | |||
2d42f29385 | |||
17240c6144 | |||
9e627a4414 | |||
90b1019636 | |||
df604afbd5 | |||
47c7aa62de | |||
9f2dc48d0f | |||
6d951b21fb | |||
552f28cb3e | |||
e87b6e26f7 | |||
0c89886374 | |||
e79bef8751 | |||
ad76f84e1c | |||
db827cb34c | |||
e5c6d85ea1 | |||
6cc44c1f54 | |||
c20450c1f1 | |||
db63e58b3d | |||
31b7021330 | |||
2ebe3a468c | |||
9892fccfb0 | |||
0be86a306d | |||
d77a775948 | |||
8cc82bab39 | |||
f9d5e33ddd | |||
![]() |
f83418d93e | ||
fbf14fb0cb | |||
fb1c3e00f4 | |||
![]() |
d8332171e9 | ||
c24cc9bf0b | |||
9f57c75acf | |||
53b12641d1 | |||
![]() |
5c5c8825dc | ||
3a261ac3fc | |||
04514435de | |||
07303020fc | |||
feaf7a15cf | |||
29dda5066f | |||
1de53ef7e6 | |||
4793dbe9c3 | |||
918ea34af2 | |||
2db8184cd8 | |||
0e964b3c8c | |||
1b9296ff6c | |||
6bf136c199 | |||
b529f77264 | |||
bf9519dcdc | |||
4ba687738b | |||
8427f6fe46 | |||
efa6bc3e70 | |||
da33e9b12d | |||
![]() |
265127c1a7 | ||
2b30acfc1d | |||
7fbc38ef29 | |||
e5070e991a | |||
625552c441 | |||
78c95c94f6 | |||
488e20bf55 | |||
25d6281b3e | |||
1676e50b3a | |||
8049e3c14a | |||
93a30efd86 | |||
83fb121f36 | |||
afc97b757b | |||
68905cbf41 | |||
3fff667f13 | |||
980aec1d9b | |||
f515fcce62 | |||
97bb809b54 | |||
6022a61329 | |||
a3c1996101 | |||
8d2a1f0297 | |||
91cbc313c2 | |||
f0a025428e | |||
67071158bd | |||
cd028612c8 | |||
f390e73dae | |||
de2539c491 | |||
957a4fce7e | |||
f201ecdd51 | |||
4afb617f59 | |||
d3fde0569f | |||
438b64f6c3 | |||
2b0a802ea1 | |||
0dd49c1d67 | |||
410170db96 | |||
7d8523e0e5 | |||
db915184c6 | |||
5ae6fea49c | |||
95ec750b8c | |||
90b1de307b | |||
7e6a95c678 | |||
b2416afb28 | |||
66dc116f60 | |||
0cb8629ab6 | |||
b7322a405a | |||
5692630005 | |||
00ced7cea7 | |||
ebdb75e287 | |||
f397fe9c6a |
@@ -20,7 +20,7 @@ RUN echo 'deb http://deb.debian.org/debian bullseye-backports main' >> /etc/apt/
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install etcd qemu-system-x86 qemu-block-extra qemu-utils fio libasan5 \
|
||||
liburing1 liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
|
||||
libgoogle-perftools-dev devscripts libjerasure-dev cmake libibverbs-dev libisal-dev
|
||||
RUN apt-get -y build-dep fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
||||
RUN apt-get update && apt-get -y install jq lp-solve sudo nfs-common fdisk parted
|
||||
RUN apt-get --download-only source fio qemu=`dpkg -s qemu-system-x86|grep ^Version:|awk '{print $2}'`
|
||||
|
@@ -144,6 +144,24 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_change_pg_count_online:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_change_pg_count_online.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_change_pg_size:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -684,6 +702,24 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_write_iothreads:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: TEST_NAME=iothreads GLOBAL_CONFIG=',"client_iothread_count":4' /root/vitastor/tests/test_write.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_write_no_same:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -720,6 +756,24 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_local_read:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 10
|
||||
run: TEST_NAME=local_read POOLCFG='"local_reads":"random",' /root/vitastor/tests/test_heal.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_ec:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -756,6 +810,60 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_reweight_half:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_reweight_half.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_pool2:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot_pool2.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_read_bitmap:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot_read_bitmap.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_heal_csum_32k_dmj:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
@@ -900,24 +1008,6 @@ jobs:
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_snapshot_pool2:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
container: ${{env.TEST_IMAGE}}:${{github.sha}}
|
||||
steps:
|
||||
- name: Run test
|
||||
id: test
|
||||
timeout-minutes: 3
|
||||
run: /root/vitastor/tests/test_snapshot_pool2.sh
|
||||
- name: Print logs
|
||||
if: always() && steps.test.outcome == 'failure'
|
||||
run: |
|
||||
for i in /root/vitastor/testdata/*.log /root/vitastor/testdata/*.txt; do
|
||||
echo "-------- $i --------"
|
||||
cat $i
|
||||
echo ""
|
||||
done
|
||||
|
||||
test_osd_tags:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
|
@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(vitastor)
|
||||
|
||||
set(VITASTOR_VERSION "2.1.0")
|
||||
set(VITASTOR_VERSION "2.4.0")
|
||||
|
||||
add_subdirectory(src)
|
||||
|
@@ -19,7 +19,7 @@ Vitastor нацелен в первую очередь на SSD и SSD+HDD кл
|
||||
TCP и RDMA и на хорошем железе может достигать задержки 4 КБ чтения и записи на уровне ~0.1 мс,
|
||||
что примерно в 10 раз быстрее, чем Ceph и другие популярные программные СХД.
|
||||
|
||||
Vitastor поддерживает QEMU-драйвер, протоколы NBD и NFS, драйверы OpenStack, OpenNebula, Proxmox, Kubernetes.
|
||||
Vitastor поддерживает QEMU-драйвер, протоколы UBLK, NBD и NFS, драйверы OpenStack, OpenNebula, Proxmox, Kubernetes.
|
||||
Другие драйверы могут также быть легко реализованы.
|
||||
|
||||
Подробности смотрите в документации по ссылкам. Можете начать отсюда: [Быстрый старт](docs/intro/quickstart.ru.md).
|
||||
@@ -64,8 +64,9 @@ Vitastor поддерживает QEMU-драйвер, протоколы NBD и
|
||||
- [vitastor-cli](docs/usage/cli.ru.md) (консольный интерфейс)
|
||||
- [vitastor-disk](docs/usage/disk.ru.md) (управление дисками)
|
||||
- [fio](docs/usage/fio.ru.md) для тестов производительности
|
||||
- [NBD](docs/usage/nbd.ru.md) для монтирования ядром
|
||||
- [QEMU и qemu-img](docs/usage/qemu.ru.md)
|
||||
- [UBLK](docs/usage/ublk.ru.md) для монтирования ядром
|
||||
- [NBD](docs/usage/nbd.ru.md) - старый интерфейс для монтирования ядром
|
||||
- [QEMU, qemu-img и VDUSE](docs/usage/qemu.ru.md)
|
||||
- [NFS](docs/usage/nfs.ru.md) кластерная файловая система и псевдо-ФС прокси
|
||||
- [Администрирование](docs/usage/admin.ru.md)
|
||||
- Производительность
|
||||
|
@@ -19,7 +19,7 @@ supports TCP and RDMA and may achieve 4 KB read and write latency as low as ~0.1
|
||||
with proper hardware which is ~10 times faster than other popular SDS's like Ceph
|
||||
or internal systems of public clouds.
|
||||
|
||||
Vitastor supports QEMU, NBD, NFS protocols, OpenStack, OpenNebula, Proxmox, Kubernetes drivers.
|
||||
Vitastor supports QEMU, UBLK, NBD, NFS protocols, OpenStack, OpenNebula, Proxmox, Kubernetes drivers.
|
||||
More drivers may be created easily.
|
||||
|
||||
Read more details in the documentation. You can start from here: [Quick Start](docs/intro/quickstart.en.md).
|
||||
@@ -64,8 +64,9 @@ Read more details in the documentation. You can start from here: [Quick Start](d
|
||||
- [vitastor-cli](docs/usage/cli.en.md) (command-line interface)
|
||||
- [vitastor-disk](docs/usage/disk.en.md) (disk management tool)
|
||||
- [fio](docs/usage/fio.en.md) for benchmarks
|
||||
- [NBD](docs/usage/nbd.en.md) for kernel mounts
|
||||
- [QEMU and qemu-img](docs/usage/qemu.en.md)
|
||||
- [UBLK](docs/usage/ublk.en.md) for kernel mounts
|
||||
- [NBD](docs/usage/nbd.en.md) - old interface for kernel mounts
|
||||
- [QEMU, qemu-img and VDUSE](docs/usage/qemu.en.md)
|
||||
- [NFS](docs/usage/nfs.en.md) clustered file system and pseudo-FS proxy
|
||||
- [Administration](docs/usage/admin.en.md)
|
||||
- Performance
|
||||
|
@@ -36,7 +36,7 @@ RUN (echo deb http://vitastor.io/debian bookworm main > /etc/apt/sources.list.d/
|
||||
((echo 'Package: *'; echo 'Pin: origin "vitastor.io"'; echo 'Pin-Priority: 1000') > /etc/apt/preferences.d/vitastor.pref) && \
|
||||
wget -q -O /etc/apt/trusted.gpg.d/vitastor.gpg https://vitastor.io/debian/pubkey.gpg && \
|
||||
apt-get update && \
|
||||
apt-get install -y vitastor-client && \
|
||||
apt-get install -y vitastor-client ibverbs-providers && \
|
||||
wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-utils_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
|
||||
wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-block-extra_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
|
||||
dpkg -x qemu-utils*.deb tmp1 && \
|
||||
|
49
csi/Dockerfile.local
Normal file
49
csi/Dockerfile.local
Normal file
@@ -0,0 +1,49 @@
|
||||
# Compile stage
|
||||
FROM golang:bookworm AS build
|
||||
|
||||
ADD go.sum go.mod /app/
|
||||
RUN cd /app; CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go mod download -x
|
||||
ADD . /app
|
||||
RUN perl -i -e '$/ = undef; while(<>) { s/\n\s*(\{\s*\n)/$1\n/g; s/\}(\s*\n\s*)else\b/$1} else/g; print; }' `find /app -name '*.go'` && \
|
||||
cd /app && \
|
||||
CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go build -o vitastor-csi
|
||||
|
||||
# Final stage
|
||||
FROM debian:bookworm
|
||||
|
||||
LABEL maintainers="Vitaliy Filippov <vitalif@yourcmc.ru>"
|
||||
LABEL description="Vitastor CSI Driver"
|
||||
|
||||
ENV NODE_ID=""
|
||||
ENV CSI_ENDPOINT=""
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y wget && \
|
||||
(echo "APT::Install-Recommends false;" > /etc/apt/apt.conf) && \
|
||||
apt-get update && \
|
||||
apt-get install -y e2fsprogs xfsprogs kmod iproute2 \
|
||||
# NFS mount dependencies
|
||||
nfs-common netbase \
|
||||
# dependencies of qemu-storage-daemon
|
||||
libnuma1 liburing2 libglib2.0-0 libfuse3-3 libaio1 libzstd1 libnettle8 \
|
||||
libgmp10 libhogweed6 libp11-kit0 libidn2-0 libunistring2 libtasn1-6 libpcre2-8-0 libffi8 && \
|
||||
apt-get clean && \
|
||||
(echo options nbd nbds_max=128 > /etc/modprobe.d/nbd.conf)
|
||||
|
||||
COPY --from=build /app/vitastor-csi /bin/
|
||||
|
||||
ADD deb /deb
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get -y install /deb/vitastor-client_*.deb && \
|
||||
wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-utils_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
|
||||
wget https://vitastor.io/archive/qemu/qemu-bookworm-9.2.2%2Bds-1%2Bvitastor4/qemu-block-extra_9.2.2%2Bds-1%2Bvitastor4_amd64.deb && \
|
||||
dpkg -x qemu-utils*.deb tmp1 && \
|
||||
dpkg -x qemu-block-extra*.deb tmp1 && \
|
||||
cp -a tmp1/usr/bin/qemu-storage-daemon /usr/bin/ && \
|
||||
mkdir -p /usr/lib/x86_64-linux-gnu/qemu && \
|
||||
cp -a tmp1/usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so /usr/lib/x86_64-linux-gnu/qemu/ && \
|
||||
rm -rf tmp1 *.deb && \
|
||||
apt-get clean
|
||||
|
||||
ENTRYPOINT ["/bin/vitastor-csi"]
|
@@ -1,4 +1,4 @@
|
||||
VITASTOR_VERSION ?= v2.1.0
|
||||
VITASTOR_VERSION ?= v2.4.0
|
||||
|
||||
all: build push
|
||||
|
||||
|
@@ -49,7 +49,7 @@ spec:
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
allowPrivilegeEscalation: true
|
||||
image: vitalif/vitastor-csi:v2.1.0
|
||||
image: vitalif/vitastor-csi:v2.4.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -121,7 +121,7 @@ spec:
|
||||
privileged: true
|
||||
capabilities:
|
||||
add: ["SYS_ADMIN"]
|
||||
image: vitalif/vitastor-csi:v2.1.0
|
||||
image: vitalif/vitastor-csi:v2.4.0
|
||||
args:
|
||||
- "--node=$(NODE_ID)"
|
||||
- "--endpoint=$(CSI_ENDPOINT)"
|
||||
|
@@ -5,7 +5,7 @@ package vitastor
|
||||
|
||||
const (
|
||||
vitastorCSIDriverName = "csi.vitastor.io"
|
||||
vitastorCSIDriverVersion = "2.1.0"
|
||||
vitastorCSIDriverVersion = "2.4.0"
|
||||
)
|
||||
|
||||
// Config struct fills the parameters of request or user input
|
||||
|
@@ -33,7 +33,7 @@ import (
|
||||
type NodeServer struct
|
||||
{
|
||||
*Driver
|
||||
useVduse bool
|
||||
method MountMethod
|
||||
stateDir string
|
||||
nfsStageDir string
|
||||
mounter mount.Interface
|
||||
@@ -81,16 +81,23 @@ func NewNodeServer(driver *Driver) *NodeServer
|
||||
}
|
||||
ns := &NodeServer{
|
||||
Driver: driver,
|
||||
useVduse: checkVduseSupport(),
|
||||
method: selectMountMethod(),
|
||||
stateDir: stateDir,
|
||||
nfsStageDir: nfsStageDir,
|
||||
mounter: mount.New(""),
|
||||
volumeLocks: make(map[string]bool),
|
||||
}
|
||||
ns.cond = sync.NewCond(&ns.mu)
|
||||
if (ns.useVduse)
|
||||
if (ns.method == MOUNT_VDUSE)
|
||||
{
|
||||
ns.restoreVduseDaemons()
|
||||
}
|
||||
else if (ns.method == MOUNT_UBLK)
|
||||
{
|
||||
ns.restoreUblkDaemons()
|
||||
}
|
||||
if (ns.method == MOUNT_VDUSE || ns.method == MOUNT_UBLK)
|
||||
{
|
||||
dur, err := time.ParseDuration(os.Getenv("RESTART_INTERVAL"))
|
||||
if (err != nil)
|
||||
{
|
||||
@@ -136,7 +143,14 @@ func (ns *NodeServer) restarter()
|
||||
for
|
||||
{
|
||||
<-ticker.C
|
||||
ns.restoreVduseDaemons()
|
||||
if (ns.method == MOUNT_VDUSE)
|
||||
{
|
||||
ns.restoreVduseDaemons()
|
||||
}
|
||||
else if (ns.method == MOUNT_UBLK)
|
||||
{
|
||||
ns.restoreUblkDaemons()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,6 +245,78 @@ func (ns *NodeServer) checkVduseState(stateFile string, devs map[string]interfac
|
||||
}
|
||||
}
|
||||
|
||||
func (ns *NodeServer) restoreUblkDaemons()
|
||||
{
|
||||
pattern := ns.stateDir+"vitastor-ublk-*.json"
|
||||
stateFiles, err := filepath.Glob(pattern)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("failed to list %s: %v", pattern, err)
|
||||
}
|
||||
if (len(stateFiles) == 0)
|
||||
{
|
||||
return
|
||||
}
|
||||
for _, stateFile := range stateFiles
|
||||
{
|
||||
deviceNum := stateFile[len(ns.stateDir) + len("vitastor-ublk-") :]
|
||||
deviceNum = deviceNum[0:len(deviceNum)-5]
|
||||
ns.checkUblkState(deviceNum)
|
||||
}
|
||||
}
|
||||
|
||||
func (ns *NodeServer) checkUblkState(deviceNum string)
|
||||
{
|
||||
// Check if the ublk daemon is still active
|
||||
|
||||
// Read state file
|
||||
stateFile := ns.stateDir + "vitastor-ublk-" + deviceNum + ".json"
|
||||
stateJSON, err := os.ReadFile(stateFile)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Warningf("error reading state file %v: %v", stateFile, err)
|
||||
return
|
||||
}
|
||||
var state DeviceState
|
||||
err = json.Unmarshal(stateJSON, &state)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Warningf("state file %v contains invalid JSON (error %v): %v", stateFile, err, string(stateJSON))
|
||||
return
|
||||
}
|
||||
|
||||
// Lock volume
|
||||
ns.lockVolume(state.ConfigPath+":block:"+state.Image)
|
||||
defer ns.unlockVolume(state.ConfigPath+":block:"+state.Image)
|
||||
|
||||
// Recheck state file after locking
|
||||
_, err = os.ReadFile(stateFile)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Warningf("state file %v disappeared, skipping volume", stateFile)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if the vitastor-ublk process is still active
|
||||
pidFile := ns.stateDir + "vitastor-ublk-" + deviceNum + ".pid"
|
||||
exists := false
|
||||
proc, err := findByPidFile(pidFile)
|
||||
if (err == nil)
|
||||
{
|
||||
exists = proc.Signal(syscall.Signal(0)) == nil
|
||||
}
|
||||
if (!exists)
|
||||
{
|
||||
// Restart daemon
|
||||
klog.Warningf("recovering UBLK device /dev/ublkb%v for volume %v", deviceNum, state.Image)
|
||||
_, err = mapUblk(ns.stateDir, state.Image, state.ConfigPath, state.Readonly, "/dev/ublkb"+deviceNum)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Warningf("failed to recover ublk device for volume %v: %v", state.Image, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ns *NodeServer) restoreNfsDaemons()
|
||||
{
|
||||
pattern := ns.stateDir+"vitastor-nfs-*.json"
|
||||
@@ -417,14 +503,18 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
|
||||
}
|
||||
|
||||
var devicePath, vdpaId string
|
||||
if (!ns.useVduse)
|
||||
if (ns.method == MOUNT_UBLK)
|
||||
{
|
||||
devicePath, err = mapNbd(volName, ctxVars, false)
|
||||
devicePath, err = mapUblk(ns.stateDir, volName, ctxVars["configPath"], false, "")
|
||||
}
|
||||
else
|
||||
else if (ns.method == MOUNT_VDUSE)
|
||||
{
|
||||
devicePath, vdpaId, err = mapVduse(ns.stateDir, volName, ctxVars, false)
|
||||
}
|
||||
else /* if (ns.method == MOUNT_NBD) */
|
||||
{
|
||||
devicePath, err = mapNbd(volName, ctxVars, false)
|
||||
}
|
||||
if (err != nil)
|
||||
{
|
||||
return nil, err
|
||||
@@ -439,7 +529,8 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
|
||||
else
|
||||
{
|
||||
// Check existing format
|
||||
existingFormat, err := diskMounter.GetDiskFormat(devicePath)
|
||||
var existingFormat string
|
||||
existingFormat, err = diskMounter.GetDiskFormat(devicePath)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("failed to get disk format for path %s, error: %v", err)
|
||||
@@ -495,10 +586,6 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
|
||||
case "xfs":
|
||||
_, err = systemCombined("xfs_growfs", devicePath)
|
||||
}
|
||||
if (err != nil)
|
||||
{
|
||||
goto unmap
|
||||
}
|
||||
}
|
||||
}
|
||||
if (err != nil)
|
||||
@@ -512,14 +599,18 @@ func (ns *NodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStageVol
|
||||
return &csi.NodeStageVolumeResponse{}, nil
|
||||
|
||||
unmap:
|
||||
if (!ns.useVduse || len(devicePath) >= 8 && devicePath[0:8] == "/dev/nbd")
|
||||
if (ns.method == MOUNT_UBLK)
|
||||
{
|
||||
unmapNbd(devicePath)
|
||||
unmapUblk(ns.stateDir, devicePath)
|
||||
}
|
||||
else
|
||||
else if (ns.method == MOUNT_VDUSE)
|
||||
{
|
||||
unmapVduseById(ns.stateDir, vdpaId)
|
||||
}
|
||||
else /* if (ns.method == MOUNT_NBD) */
|
||||
{
|
||||
unmapNbd(devicePath)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -545,7 +636,7 @@ func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstag
|
||||
defer ns.unlockVolume(ctxVars["configPath"]+":block:"+volName)
|
||||
|
||||
targetPath := req.GetStagingTargetPath()
|
||||
devicePath, _, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
|
||||
devicePath, err := GetDeviceNameFromMount(targetPath)
|
||||
if (err != nil)
|
||||
{
|
||||
if (os.IsNotExist(err))
|
||||
@@ -582,14 +673,18 @@ func (ns *NodeServer) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnstag
|
||||
// unmap device
|
||||
if (len(refList) == 0)
|
||||
{
|
||||
if (!ns.useVduse)
|
||||
if (ns.method == MOUNT_UBLK)
|
||||
{
|
||||
unmapNbd(devicePath)
|
||||
unmapUblk(ns.stateDir, devicePath)
|
||||
}
|
||||
else
|
||||
else if (ns.method == MOUNT_VDUSE)
|
||||
{
|
||||
unmapVduse(ns.stateDir, devicePath)
|
||||
}
|
||||
else /* if (ns.method == MOUNT_NBD) */
|
||||
{
|
||||
unmapNbd(devicePath)
|
||||
}
|
||||
}
|
||||
|
||||
return &csi.NodeUnstageVolumeResponse{}, nil
|
||||
@@ -897,7 +992,7 @@ func (ns *NodeServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpu
|
||||
}
|
||||
|
||||
targetPath := req.GetTargetPath()
|
||||
devicePath, _, err := mount.GetDeviceNameFromMount(ns.mounter, targetPath)
|
||||
devicePath, err := GetDeviceNameFromMount(targetPath)
|
||||
if (err != nil)
|
||||
{
|
||||
if (os.IsNotExist(err))
|
||||
|
231
csi/src/utils.go
231
csi/src/utils.go
@@ -16,10 +16,20 @@ import (
|
||||
"syscall"
|
||||
|
||||
"k8s.io/klog"
|
||||
"k8s.io/utils/mount"
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
)
|
||||
|
||||
type MountMethod int
|
||||
|
||||
const (
|
||||
MOUNT_NBD MountMethod = 0
|
||||
MOUNT_VDUSE MountMethod = 1
|
||||
MOUNT_UBLK MountMethod = 2
|
||||
)
|
||||
|
||||
func Contains(list []string, s string) bool
|
||||
{
|
||||
for i := 0; i < len(list); i++
|
||||
@@ -32,29 +42,26 @@ func Contains(list []string, s string) bool
|
||||
return false
|
||||
}
|
||||
|
||||
func checkVduseSupport() bool
|
||||
func selectMountMethod() MountMethod
|
||||
{
|
||||
// Check UBLK support (ublk_drv kernel module)
|
||||
if (checkModule("ublk_drv"))
|
||||
{
|
||||
klog.Infof("UBLK support enabled successfully")
|
||||
return MOUNT_UBLK
|
||||
}
|
||||
klog.Errorf(
|
||||
"Your host apparently has no UBLK support. UBLK support disabled."+
|
||||
" For UBLK you need at least Linux 6.0 and the ublk_drv kernel module.",
|
||||
)
|
||||
// Check VDUSE support (vdpa, vduse, virtio-vdpa kernel modules)
|
||||
vduse := true
|
||||
for _, mod := range []string{"vdpa", "vduse", "virtio-vdpa"}
|
||||
{
|
||||
_, err := os.Stat("/sys/module/"+mod)
|
||||
if (err != nil)
|
||||
if (!checkModule(mod))
|
||||
{
|
||||
if (!errors.Is(err, os.ErrNotExist))
|
||||
{
|
||||
klog.Errorf("failed to check /sys/module/%s: %v", mod, err)
|
||||
}
|
||||
c := exec.Command("/sbin/modprobe", mod)
|
||||
c.Stdout = os.Stderr
|
||||
c.Stderr = os.Stderr
|
||||
err := c.Run()
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("/sbin/modprobe %s failed: %v", mod, err)
|
||||
vduse = false
|
||||
break
|
||||
}
|
||||
vduse = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// Check that vdpa tool functions
|
||||
@@ -69,18 +76,38 @@ func checkVduseSupport() bool
|
||||
vduse = false
|
||||
}
|
||||
}
|
||||
if (!vduse)
|
||||
{
|
||||
klog.Errorf(
|
||||
"Your host apparently has no VDUSE support. VDUSE support disabled, NBD will be used to map devices."+
|
||||
" For VDUSE you need at least Linux 5.15 and the following kernel modules: vdpa, virtio-vdpa, vduse.",
|
||||
)
|
||||
}
|
||||
else
|
||||
if (vduse)
|
||||
{
|
||||
klog.Infof("VDUSE support enabled successfully")
|
||||
return MOUNT_VDUSE
|
||||
}
|
||||
return vduse
|
||||
klog.Errorf(
|
||||
"Your host apparently has no VDUSE support. VDUSE support disabled, NBD will be used to map devices."+
|
||||
" For VDUSE you need at least Linux 5.15 and the following kernel modules: vdpa, virtio-vdpa, vduse.",
|
||||
)
|
||||
return MOUNT_NBD
|
||||
}
|
||||
|
||||
func checkModule(mod string) bool
|
||||
{
|
||||
_, err := os.Stat("/sys/module/"+mod)
|
||||
if (err != nil)
|
||||
{
|
||||
if (!errors.Is(err, os.ErrNotExist))
|
||||
{
|
||||
klog.Errorf("failed to check /sys/module/%s: %v", mod, err)
|
||||
}
|
||||
c := exec.Command("/sbin/modprobe", mod)
|
||||
c.Stdout = os.Stderr
|
||||
c.Stderr = os.Stderr
|
||||
err := c.Run()
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("/sbin/modprobe %s failed: %v", mod, err)
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func mapNbd(volName string, ctxVars map[string]string, readonly bool) (string, error)
|
||||
@@ -217,6 +244,7 @@ func mapVduse(stateDir string, volName string, ctxVars map[string]string, readon
|
||||
stateJSON, _ := json.Marshal(&DeviceState{
|
||||
ConfigPath: ctxVars["configPath"],
|
||||
VdpaId: vdpaId,
|
||||
|
||||
Image: volName,
|
||||
Blockdev: blockdev,
|
||||
Readonly: readonly,
|
||||
@@ -309,6 +337,117 @@ func unmapVduseById(stateDir, vdpaId string)
|
||||
}
|
||||
}
|
||||
|
||||
func mapUblk(stateDir string, volName string, configPath string, readonly bool, recoverDev string) (string, error)
|
||||
{
|
||||
pidFile := ""
|
||||
if (recoverDev != "")
|
||||
{
|
||||
if (len(recoverDev) < 10 || recoverDev[0:10] != "/dev/ublkb")
|
||||
{
|
||||
return "", fmt.Errorf("recover: %s does not start with /dev/ublkb", recoverDev)
|
||||
}
|
||||
pidFile = stateDir + "vitastor-ublk-" + recoverDev[10:] + ".pid"
|
||||
}
|
||||
else
|
||||
{
|
||||
pidFd, err := os.CreateTemp(stateDir, "vitastor-tmp-*.pid")
|
||||
if (err != nil)
|
||||
{
|
||||
return "", err
|
||||
}
|
||||
pidFile = pidFd.Name()
|
||||
pidFd.Close()
|
||||
}
|
||||
// Map device via vitastor-ublk
|
||||
args := []string{
|
||||
"map", "--image", volName, "--pidfile", pidFile,
|
||||
}
|
||||
if (configPath != "")
|
||||
{
|
||||
args = append(args, "--config_path", configPath)
|
||||
}
|
||||
if (readonly)
|
||||
{
|
||||
args = append(args, "--readonly")
|
||||
}
|
||||
if (recoverDev != "")
|
||||
{
|
||||
args = append(args, "--recover", recoverDev)
|
||||
}
|
||||
stdout, stderr, err := system("/usr/bin/vitastor-ublk", args...)
|
||||
if (err != nil)
|
||||
{
|
||||
return "", err
|
||||
}
|
||||
devicePath := strings.TrimSpace(string(stdout))
|
||||
if (devicePath == "")
|
||||
{
|
||||
return "", fmt.Errorf("vitastor-ublk did not return the name of the device. output: %s", stderr)
|
||||
}
|
||||
if (len(devicePath) >= 10 && devicePath[0:10] == "/dev/ublkb")
|
||||
{
|
||||
// Generate state file
|
||||
devNum := devicePath[10:]
|
||||
pidNew := stateDir + "vitastor-ublk-" + devNum + ".pid"
|
||||
if (pidFile != pidNew)
|
||||
{
|
||||
err := os.Rename(pidFile, pidNew)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("Failed to rename PID file %s to %s: %v", pidFile, pidNew, err)
|
||||
}
|
||||
else
|
||||
{
|
||||
pidFile = pidNew
|
||||
}
|
||||
}
|
||||
stateFile := stateDir + "vitastor-ublk-" + devNum + ".json"
|
||||
stateJSON, _ := json.Marshal(&DeviceState{
|
||||
ConfigPath: configPath,
|
||||
Image: volName,
|
||||
Readonly: readonly,
|
||||
PidFile: pidFile,
|
||||
})
|
||||
err = os.WriteFile(stateFile, stateJSON, 0600)
|
||||
if (err == nil)
|
||||
{
|
||||
klog.Infof("Attached volume %s via UBLK as %s", volName, devicePath)
|
||||
return devicePath, nil
|
||||
}
|
||||
os.Remove(stateFile)
|
||||
}
|
||||
killErr := killByPidFile(pidFile)
|
||||
if (killErr != nil)
|
||||
{
|
||||
klog.Errorf("Failed to kill started vitastor-ublk: %v", killErr)
|
||||
}
|
||||
os.Remove(pidFile)
|
||||
return "", err
|
||||
}
|
||||
|
||||
func unmapUblk(stateDir, devicePath string)
|
||||
{
|
||||
if (len(devicePath) < 10 || devicePath[0:10] != "/dev/ublkb")
|
||||
{
|
||||
klog.Errorf("%s does not start with /dev/ublkb", devicePath)
|
||||
return
|
||||
}
|
||||
unmapOut, unmapErr := exec.Command("/usr/bin/vitastor-ublk", "unmap", devicePath).CombinedOutput()
|
||||
if (unmapErr != nil)
|
||||
{
|
||||
klog.Errorf("failed to unmap UBLK device %s: %s, error: %v", devicePath, unmapOut, unmapErr)
|
||||
}
|
||||
for _, ext := range []string{"json", "pid"}
|
||||
{
|
||||
fn := stateDir + "vitastor-ublk-" + devicePath[10:] + "." + ext
|
||||
err := os.Remove(fn)
|
||||
if (err != nil)
|
||||
{
|
||||
klog.Errorf("failed to remove %s: %v", fn, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func system(program string, args ...string) ([]byte, []byte, error)
|
||||
{
|
||||
klog.Infof("Running "+program+" "+strings.Join(args, " "))
|
||||
@@ -340,3 +479,43 @@ func systemCombined(program string, args ...string) ([]byte, error)
|
||||
}
|
||||
return out.Bytes(), nil
|
||||
}
|
||||
|
||||
func GetDeviceNameFromMount(mountPath string) (string, error)
|
||||
{
|
||||
// Use /proc/self/mountinfo to correctly parse bind mounts for block device files
|
||||
mps, err := mount.ParseMountInfo("/proc/self/mountinfo")
|
||||
if (err != nil)
|
||||
{
|
||||
return "", err
|
||||
}
|
||||
|
||||
slTarget, err := filepath.EvalSymlinks(mountPath)
|
||||
if (err != nil)
|
||||
{
|
||||
slTarget = mountPath
|
||||
}
|
||||
|
||||
device := ""
|
||||
for _, mp := range mps
|
||||
{
|
||||
if (mp.MountPoint == slTarget)
|
||||
{
|
||||
device = mp.Source
|
||||
if (device[0] != '/' && mp.Root != "/")
|
||||
{
|
||||
// Handle {Source=udev Root=/vdb MountPoint=/var/lib/kubelet/tralaleylo/tralala}
|
||||
for _, other := range mps
|
||||
{
|
||||
if (other.Root == "/" && other.Source == mp.Source)
|
||||
{
|
||||
device = other.MountPoint + mp.Root
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
7
debian/build-vitastor-bookworm.sh
vendored
7
debian/build-vitastor-bookworm.sh
vendored
@@ -1,7 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
cat < vitastor.Dockerfile > ../Dockerfile
|
||||
cd ..
|
||||
mkdir -p packages
|
||||
sudo podman build --build-arg DISTRO=debian --build-arg REL=bookworm -v `pwd`/packages:/root/packages -f Dockerfile .
|
||||
rm Dockerfile
|
||||
docker build --build-arg DISTRO=debian --build-arg REL=bookworm -t vitastor-buildenv:bookworm -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=bookworm -v `dirname $0`/../:/root/vitastor vitastor-buildenv:bookworm /root/vitastor/debian/vitastor-build.sh
|
||||
|
7
debian/build-vitastor-bullseye.sh
vendored
7
debian/build-vitastor-bullseye.sh
vendored
@@ -1,7 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
cat < vitastor.Dockerfile > ../Dockerfile
|
||||
cd ..
|
||||
mkdir -p packages
|
||||
sudo podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f Dockerfile .
|
||||
rm Dockerfile
|
||||
docker build --build-arg DISTRO=debian --build-arg REL=bullseye -t vitastor-buildenv:bullseye -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=bullseye -v `dirname $0`/../:/root/vitastor vitastor-buildenv:bullseye /root/vitastor/debian/vitastor-build.sh
|
||||
|
7
debian/build-vitastor-buster.sh
vendored
7
debian/build-vitastor-buster.sh
vendored
@@ -1,7 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
cat < vitastor.Dockerfile > ../Dockerfile
|
||||
cd ..
|
||||
mkdir -p packages
|
||||
sudo podman build --build-arg DISTRO=debian --build-arg REL=buster -v `pwd`/packages:/root/packages -f Dockerfile .
|
||||
rm Dockerfile
|
||||
docker build --build-arg DISTRO=debian --build-arg REL=buster -t vitastor-buildenv:buster -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=buster -v `dirname $0`/../:/root/vitastor vitastor-buildenv:buster /root/vitastor/debian/vitastor-build.sh
|
||||
|
4
debian/build-vitastor-trixie.sh
vendored
Executable file
4
debian/build-vitastor-trixie.sh
vendored
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
docker build --build-arg DISTRO=debian --build-arg REL=trixie -t vitastor-buildenv:trixie -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=trixie -v `dirname $0`/../:/root/vitastor vitastor-buildenv:trixie /root/vitastor/debian/vitastor-build.sh
|
8
debian/build-vitastor-ubuntu-jammy.sh
vendored
8
debian/build-vitastor-ubuntu-jammy.sh
vendored
@@ -1,7 +1,5 @@
|
||||
#!/bin/bash
|
||||
# Ubuntu 22.04 Jammy Jellyfish
|
||||
|
||||
cat < vitastor.Dockerfile > ../Dockerfile
|
||||
cd ..
|
||||
mkdir -p packages
|
||||
sudo podman build --build-arg DISTRO=ubuntu --build-arg REL=jammy -v `pwd`/packages:/root/packages -f Dockerfile .
|
||||
rm Dockerfile
|
||||
docker build --build-arg DISTRO=ubuntu --build-arg REL=jammy -t vitastor-buildenv:jammy -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=jammy -v `dirname $0`/../:/root/vitastor vitastor-buildenv:jammy /root/vitastor/debian/vitastor-build.sh
|
||||
|
5
debian/build-vitastor-ubuntu-noble.sh
vendored
Executable file
5
debian/build-vitastor-ubuntu-noble.sh
vendored
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/bash
|
||||
# 24.04 Noble Numbat
|
||||
|
||||
docker build --build-arg DISTRO=ubuntu --build-arg REL=noble -t vitastor-buildenv:noble -f vitastor-buildenv.Dockerfile .
|
||||
docker run -i --rm -e REL=noble -v `dirname $0`/../:/root/vitastor vitastor-buildenv:noble /root/vitastor/debian/vitastor-build.sh
|
2
debian/changelog
vendored
2
debian/changelog
vendored
@@ -1,4 +1,4 @@
|
||||
vitastor (2.1.0-1) unstable; urgency=medium
|
||||
vitastor (2.4.0-1) unstable; urgency=medium
|
||||
|
||||
* Bugfixes
|
||||
|
||||
|
4
debian/control
vendored
4
debian/control
vendored
@@ -2,9 +2,9 @@ Source: vitastor
|
||||
Section: admin
|
||||
Priority: optional
|
||||
Maintainer: Vitaliy Filippov <vitalif@yourcmc.ru>
|
||||
Build-Depends: debhelper, liburing-dev (>= 0.6), g++ (>= 8), libstdc++6 (>= 8),
|
||||
Build-Depends: debhelper, g++ (>= 8), libstdc++6 (>= 8),
|
||||
linux-libc-dev, libgoogle-perftools-dev, libjerasure-dev, libgf-complete-dev,
|
||||
libibverbs-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev,
|
||||
libibverbs-dev, librdmacm-dev, libisal-dev, cmake, pkg-config, libnl-3-dev, libnl-genl-3-dev,
|
||||
node-bindings <!nocheck>, node-gyp, node-nan
|
||||
Standards-Version: 4.5.0
|
||||
Homepage: https://vitastor.io/
|
||||
|
2
debian/patched-qemu.Dockerfile
vendored
2
debian/patched-qemu.Dockerfile
vendored
@@ -26,7 +26,7 @@ RUN if [ "$REL" = "buster" -o "$REL" = "bullseye" -o "$REL" = "bookworm" ]; then
|
||||
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||
|
||||
RUN apt-get update
|
||||
RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts
|
||||
RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y install fio libgoogle-perftools-dev devscripts
|
||||
RUN DEBIAN_FRONTEND=noninteractive TZ=Europe/Moscow apt-get -y build-dep qemu
|
||||
# To build a custom version
|
||||
#RUN cp /root/packages/qemu-orig/* /root
|
||||
|
60
debian/vitastor-build.sh
vendored
Executable file
60
debian/vitastor-build.sh
vendored
Executable file
@@ -0,0 +1,60 @@
|
||||
#!/bin/bash
|
||||
# To be ran inside buildenv docker
|
||||
|
||||
set -e -x
|
||||
|
||||
[ -e /usr/lib/x86_64-linux-gnu/pkgconfig/libisal.pc ] || cp /root/vitastor/debian/libisal.pc /usr/lib/x86_64-linux-gnu/pkgconfig
|
||||
|
||||
mkdir -p /root/fio-build/
|
||||
cd /root/fio-build/
|
||||
rm -rf /root/fio-build/*
|
||||
dpkg-source -x /root/fio*.dsc
|
||||
|
||||
FULLVER=`head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'`
|
||||
VER=${FULLVER%%-*}
|
||||
rm -rf /root/vitastor-$VER
|
||||
mkdir /root/vitastor-$VER
|
||||
cd /root/vitastor
|
||||
cp -a $(ls | grep -v packages) /root/vitastor-$VER
|
||||
|
||||
rm -rf /root/vitastor/packages/vitastor-$REL
|
||||
mkdir -p /root/vitastor/packages/vitastor-$REL
|
||||
mv /root/vitastor-$VER /root/vitastor/packages/vitastor-$REL/
|
||||
|
||||
cd /root/vitastor/packages/vitastor-$REL/vitastor-$VER
|
||||
|
||||
rm -rf fio
|
||||
ln -s /root/fio-build/fio-*/ ./fio
|
||||
FIO=`head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'`
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h
|
||||
sh copy-fio-includes.sh
|
||||
rm fio
|
||||
mkdir -p a b debian/patches
|
||||
mv fio-copy b/fio
|
||||
diff -NaurpbB a b > debian/patches/fio-headers.patch || true
|
||||
echo fio-headers.patch >> debian/patches/series
|
||||
rm -rf a b
|
||||
|
||||
echo "dep:fio=$FIO" > debian/fio_version
|
||||
|
||||
cd /root/vitastor/packages/vitastor-$REL/vitastor-$VER
|
||||
mkdir mon/node_modules
|
||||
cd mon/node_modules
|
||||
curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx
|
||||
curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx
|
||||
|
||||
cd /root/vitastor/packages/vitastor-$REL
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER
|
||||
cd vitastor-$VER
|
||||
DEBEMAIL="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa
|
||||
rm -rf /root/vitastor/packages/vitastor-$REL/vitastor-*/
|
||||
|
||||
# Why does ubuntu rename debug packages to *.ddeb?
|
||||
cd /root/vitastor/packages/vitastor-$REL
|
||||
if ls *.ddeb >/dev/null; then
|
||||
perl -i -pe 's/\.ddeb/.deb/' *.buildinfo *.changes
|
||||
for i in *.ddeb; do
|
||||
mv $i ${i%%.ddeb}.deb
|
||||
done
|
||||
fi
|
31
debian/vitastor-buildenv.Dockerfile
vendored
Normal file
31
debian/vitastor-buildenv.Dockerfile
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
# Build environment for building Vitastor packages for Debian inside a container
|
||||
# cd ..
|
||||
# docker build --build-arg DISTRO=debian --build-arg REL=bullseye -f debian/vitastor.Dockerfile -t vitastor-buildenv:bullseye .
|
||||
# docker run --rm -e REL=bullseye -v ./:/root/vitastor /root/vitastor/debian/vitastor-build.sh
|
||||
|
||||
ARG DISTRO=debian
|
||||
ARG REL=
|
||||
FROM $DISTRO:$REL
|
||||
ARG DISTRO=debian
|
||||
ARG REL=
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
RUN set -e -x; \
|
||||
if [ "$REL" = "buster" ]; then \
|
||||
perl -i -pe 's/deb.debian.org/archive.debian.org/' /etc/apt/sources.list; \
|
||||
apt-get update; \
|
||||
apt-get -y install wget; \
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
|
||||
echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
|
||||
fi; \
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
|
||||
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/*.sources || true; \
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
|
||||
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get -y install fio libgoogle-perftools-dev devscripts libjerasure-dev cmake \
|
||||
libibverbs-dev librdmacm-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl nodejs npm node-nan node-bindings && \
|
||||
apt-get -y build-dep fio && \
|
||||
apt-get --download-only source fio
|
1
debian/vitastor-client.install
vendored
1
debian/vitastor-client.install
vendored
@@ -2,6 +2,7 @@ usr/bin/vita
|
||||
usr/bin/vitastor-cli
|
||||
usr/bin/vitastor-rm
|
||||
usr/bin/vitastor-nbd
|
||||
usr/bin/vitastor-ublk
|
||||
usr/bin/vitastor-nfs
|
||||
usr/bin/vitastor-kv
|
||||
usr/bin/vitastor-kv-stress
|
||||
|
65
debian/vitastor.Dockerfile
vendored
65
debian/vitastor.Dockerfile
vendored
@@ -1,65 +0,0 @@
|
||||
# Build Vitastor packages for Debian inside a container
|
||||
# cd ..; podman build --build-arg DISTRO=debian --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
|
||||
|
||||
ARG DISTRO=debian
|
||||
ARG REL=
|
||||
FROM $DISTRO:$REL
|
||||
ARG DISTRO=debian
|
||||
ARG REL=
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
RUN set -e -x; \
|
||||
if [ "$REL" = "buster" ]; then \
|
||||
apt-get update; \
|
||||
apt-get -y install wget; \
|
||||
wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg; \
|
||||
echo "deb https://vitastor.io/debian $REL main" >> /etc/apt/sources.list; \
|
||||
fi; \
|
||||
grep '^deb ' /etc/apt/sources.list | perl -pe 's/^deb/deb-src/' >> /etc/apt/sources.list; \
|
||||
perl -i -pe 's/Types: deb$/Types: deb deb-src/' /etc/apt/sources.list.d/debian.sources || true; \
|
||||
echo 'APT::Install-Recommends false;' >> /etc/apt/apt.conf; \
|
||||
echo 'APT::Install-Suggests false;' >> /etc/apt/apt.conf
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get -y install fio liburing-dev libgoogle-perftools-dev devscripts libjerasure-dev cmake \
|
||||
libibverbs-dev librdmacm-dev libisal-dev libnl-3-dev libnl-genl-3-dev curl nodejs npm node-nan node-bindings && \
|
||||
apt-get -y build-dep fio && \
|
||||
apt-get --download-only source fio
|
||||
|
||||
ADD . /root/vitastor
|
||||
RUN set -e -x; \
|
||||
[ -e /usr/lib/x86_64-linux-gnu/pkgconfig/libisal.pc ] || cp /root/vitastor/debian/libisal.pc /usr/lib/x86_64-linux-gnu/pkgconfig; \
|
||||
mkdir -p /root/fio-build/; \
|
||||
cd /root/fio-build/; \
|
||||
rm -rf /root/fio-build/*; \
|
||||
dpkg-source -x /root/fio*.dsc; \
|
||||
mkdir -p /root/packages/vitastor-$REL; \
|
||||
rm -rf /root/packages/vitastor-$REL/*; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
FULLVER=$(head -n1 /root/vitastor/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
VER=${FULLVER%%-*}; \
|
||||
cp -r /root/vitastor vitastor-$VER; \
|
||||
cd vitastor-$VER; \
|
||||
ln -s /root/fio-build/fio-*/ ./fio; \
|
||||
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
|
||||
ls /usr/include/linux/raw.h || cp ./debian/raw.h /usr/include/linux/raw.h; \
|
||||
sh copy-fio-includes.sh; \
|
||||
rm fio; \
|
||||
mkdir -p a b debian/patches; \
|
||||
mv fio-copy b/fio; \
|
||||
diff -NaurpbB a b > debian/patches/fio-headers.patch || true; \
|
||||
echo fio-headers.patch >> debian/patches/series; \
|
||||
rm -rf a b; \
|
||||
echo "dep:fio=$FIO" > debian/fio_version; \
|
||||
cd /root/packages/vitastor-$REL/vitastor-$VER; \
|
||||
mkdir mon/node_modules; \
|
||||
cd mon/node_modules; \
|
||||
curl -s https://git.yourcmc.ru/vitalif/antietcd/archive/master.tar.gz | tar -zx; \
|
||||
curl -s https://git.yourcmc.ru/vitalif/tinyraft/archive/master.tar.gz | tar -zx; \
|
||||
cd /root/packages/vitastor-$REL; \
|
||||
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_$VER.orig.tar.xz vitastor-$VER; \
|
||||
cd vitastor-$VER; \
|
||||
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$FULLVER""$REL" "Rebuild for $REL"; \
|
||||
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \
|
||||
rm -rf /root/packages/vitastor-$REL/vitastor-*/
|
@@ -3,7 +3,7 @@
|
||||
FROM debian:bookworm
|
||||
|
||||
ADD etc/apt /etc/apt/
|
||||
RUN apt-get update && apt-get -y install vitastor udev systemd qemu-system-x86 qemu-system-common qemu-block-extra qemu-utils jq nfs-common && apt-get clean
|
||||
RUN apt-get update && apt-get -y install vitastor ibverbs-providers udev systemd qemu-system-x86 qemu-system-common qemu-block-extra qemu-utils jq nfs-common && apt-get clean
|
||||
ADD sleep.sh /usr/bin/
|
||||
ADD install.sh /usr/bin/
|
||||
ADD scripts /opt/scripts/
|
||||
|
@@ -1,4 +1,4 @@
|
||||
VITASTOR_VERSION ?= v2.1.0
|
||||
VITASTOR_VERSION ?= v2.4.0
|
||||
|
||||
all: build push
|
||||
|
||||
|
3
docker/etc/apt/preferences
Normal file
3
docker/etc/apt/preferences
Normal file
@@ -0,0 +1,3 @@
|
||||
Package: *
|
||||
Pin: release n=bookworm-backports
|
||||
Pin-Priority: 500
|
@@ -4,7 +4,7 @@
|
||||
#
|
||||
|
||||
# Desired Vitastor version
|
||||
VITASTOR_VERSION=v2.1.0
|
||||
VITASTOR_VERSION=v2.4.0
|
||||
|
||||
# Additional arguments for all containers
|
||||
# For example, you may want to specify a custom logging driver here
|
||||
|
@@ -24,6 +24,10 @@ affect their interaction with the cluster.
|
||||
- [nbd_max_devices](#nbd_max_devices)
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
- [hostname](#hostname)
|
||||
- [ublk_queue_depth](#ublk_queue_depth)
|
||||
- [ublk_max_io_size](#ublk_max_io_size)
|
||||
- [qemu_file_mirror_path](#qemu_file_mirror_path)
|
||||
|
||||
## client_iothread_count
|
||||
|
||||
@@ -215,3 +219,37 @@ just one OSD becomes 100 % full!
|
||||
However, unlike in Ceph, 100 % full Vitastor OSDs don't crash (in Ceph they're
|
||||
unable to start at all), so you'll be able to recover from "out of space" errors
|
||||
without destroying and recreating OSDs.
|
||||
|
||||
## hostname
|
||||
|
||||
- Type: string
|
||||
- Can be changed online: yes
|
||||
|
||||
Clients use host name to find their distance to OSDs when [localized reads](pool.en.md#local_reads)
|
||||
are enabled. By default, standard [gethostname](https://man7.org/linux/man-pages/man2/gethostname.2.html)
|
||||
function is used to determine host name, but you can also override it with this parameter.
|
||||
|
||||
## ublk_queue_depth
|
||||
|
||||
- Type: integer
|
||||
- Default: 256
|
||||
|
||||
Default queue depth for [Vitastor ublk servers](../usage/ublk.en.md).
|
||||
|
||||
## ublk_max_io_size
|
||||
|
||||
- Type: integer
|
||||
|
||||
Default maximum I/O size for Vitastor [ublk servers](../usage/ublk.en.md).
|
||||
The largest of 1 MB and pool block size multiplied by EC data chunk count is used if not specified.
|
||||
|
||||
## qemu_file_mirror_path
|
||||
|
||||
- Type: string
|
||||
|
||||
When set to an FS directory path (for example, `/mnt/vitastor/`), `qemu-img info` and similar
|
||||
QAPI commands return the name of the image inside this directory instead of normal
|
||||
`vitastor://?image=abc` URI as `filename`.
|
||||
|
||||
This allows to then mount this path using [vitastor-nfs](../usage/nfs.en.md) and trick
|
||||
third-party systems like Veeam which rely on `filename` in the image info but don't support Vitastor.
|
||||
|
@@ -24,6 +24,10 @@
|
||||
- [nbd_max_devices](#nbd_max_devices)
|
||||
- [nbd_max_part](#nbd_max_part)
|
||||
- [osd_nearfull_ratio](#osd_nearfull_ratio)
|
||||
- [hostname](#hostname)
|
||||
- [ublk_queue_depth](#ublk_queue_depth)
|
||||
- [ublk_max_io_size](#ublk_max_io_size)
|
||||
- [qemu_file_mirror_path](#qemu_file_mirror_path)
|
||||
|
||||
## client_iothread_count
|
||||
|
||||
@@ -219,3 +223,40 @@ RDMA и хотите повысить пиковую производитель
|
||||
заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
|
||||
восстановить работу кластера после ошибок отсутствия свободного места
|
||||
без уничтожения и пересоздания OSD.
|
||||
|
||||
## hostname
|
||||
|
||||
- Тип: строка
|
||||
- Можно менять на лету: да
|
||||
|
||||
Клиенты используют имя хоста для определения расстояния до OSD, когда включены
|
||||
[локальные чтения](pool.ru.md#local_reads). По умолчанию для определения имени
|
||||
хоста используется стандартная функция [gethostname](https://man7.org/linux/man-pages/man2/gethostname.2.html),
|
||||
но вы также можете задать имя хоста вручную данным параметром.
|
||||
|
||||
## ublk_queue_depth
|
||||
|
||||
- Тип: целое число
|
||||
- Значение по умолчанию: 256
|
||||
|
||||
Глубина очереди по умолчанию для [ublk-серверов Vitastor](../usage/ublk.ru.md).
|
||||
|
||||
## ublk_max_io_size
|
||||
|
||||
- Тип: целое число
|
||||
|
||||
Максимальный размер запроса ввода-вывода для [ublk-серверов Vitastor](../usage/ublk.ru.md).
|
||||
Если не задан, используется максимум из 1 МБ и размера блока пула, умноженного на число частей
|
||||
данных EC-пула.
|
||||
|
||||
## qemu_file_mirror_path
|
||||
|
||||
- Тип: строка
|
||||
|
||||
Если установить эту опцию равной пути к каталогу в ФС, команда `qemu-img info` и подобные
|
||||
команды QAPI будут возвращать в поле `filename` имя образа внутри заданного каталога вместо
|
||||
обычного адреса типа `vitastor://?image=abc`.
|
||||
|
||||
Это позволяет смонтировать этот путь с помощью [vitastor-nfs](../usage/nfs.ru.md) и обмануть
|
||||
сторонние системы типа Veeam, которые полагаются на поле `filename` в информации об образе QEMU,
|
||||
но не поддерживают Vitastor.
|
||||
|
@@ -74,7 +74,7 @@ Consider `use_rdmacm` for such networks.
|
||||
## use_rdmacm
|
||||
|
||||
- Type: boolean
|
||||
- Default: true
|
||||
- Default: false
|
||||
|
||||
Use an alternative implementation of RDMA through RDMA-CM (Connection
|
||||
Manager). Works with all RDMA networks: Infiniband, iWARP and
|
||||
|
@@ -74,7 +74,7 @@ RDMA-устройства, но они не имеют соединения с
|
||||
## use_rdmacm
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
- Значение по умолчанию: true
|
||||
- Значение по умолчанию: false
|
||||
|
||||
Использовать альтернативную реализацию RDMA на основе RDMA-CM (Connection
|
||||
Manager). Работает со всеми типами RDMA-сетей: Infiniband, iWARP и
|
||||
|
@@ -63,6 +63,8 @@ with an OSD restart or, for some of them, even without restarting by updating co
|
||||
- [discard_on_start](#discard_on_start)
|
||||
- [min_discard_size](#min_discard_size)
|
||||
- [allow_net_split](#allow_net_split)
|
||||
- [enable_pg_locks](#enable_pg_locks)
|
||||
- [pg_lock_retry_interval_ms](#pg_lock_retry_interval_ms)
|
||||
|
||||
## bind_address
|
||||
|
||||
@@ -647,3 +649,20 @@ The downside is that it increases the probability of writing data into just pg_m
|
||||
OSDs during failover which can lead to PGs becoming incomplete after additional outages.
|
||||
|
||||
The old behaviour in versions up to 2.0.0 was equal to enabled allow_net_split.
|
||||
|
||||
## enable_pg_locks
|
||||
|
||||
- Type: boolean
|
||||
|
||||
Vitastor 2.2.0 introduces a new layer of split-brain prevention mechanism in
|
||||
addition to etcd: PG locks. They prevent split-brain even in abnormal theoretical cases
|
||||
when etcd is extremely laggy. As a new feature, by default, PG locks are only enabled
|
||||
for pools where they're required - pools with [localized reads](pool.en.md#local_reads).
|
||||
Use this parameter to enable or disable this function for all pools.
|
||||
|
||||
## pg_lock_retry_interval_ms
|
||||
|
||||
- Type: milliseconds
|
||||
- Default: 100
|
||||
|
||||
Retry interval for failed PG lock attempts.
|
||||
|
@@ -64,6 +64,8 @@
|
||||
- [discard_on_start](#discard_on_start)
|
||||
- [min_discard_size](#min_discard_size)
|
||||
- [allow_net_split](#allow_net_split)
|
||||
- [enable_pg_locks](#enable_pg_locks)
|
||||
- [pg_lock_retry_interval_ms](#pg_lock_retry_interval_ms)
|
||||
|
||||
## bind_address
|
||||
|
||||
@@ -679,3 +681,21 @@ pg_minsize OSD во время переключений, что может по
|
||||
неполными (incomplete), если упадут ещё какие-то OSD.
|
||||
|
||||
Старое поведение в версиях до 2.0.0 было идентично включённому allow_net_split.
|
||||
|
||||
## enable_pg_locks
|
||||
|
||||
- Тип: булево (да/нет)
|
||||
|
||||
В Vitastor 2.2.0 появился новый слой защиты от сплитбрейна в дополнение к etcd -
|
||||
блокировки PG. Они гарантируют порядок даже в теоретических ненормальных случаях,
|
||||
когда etcd очень сильно тормозит. Так как функция новая, по умолчанию она включается
|
||||
только для пулов, в которых она необходима - а именно, в пулах с включёнными
|
||||
[локальными чтениями](pool.ru.md#local_reads). Ну а с помощью данного параметра
|
||||
можно включить блокировки PG для всех пулов.
|
||||
|
||||
## pg_lock_retry_interval_ms
|
||||
|
||||
- Тип: миллисекунды
|
||||
- Значение по умолчанию: 100
|
||||
|
||||
Интервал повтора неудачных попыток блокировки PG.
|
||||
|
@@ -34,6 +34,7 @@ Parameters:
|
||||
- [failure_domain](#failure_domain)
|
||||
- [level_placement](#level_placement)
|
||||
- [raw_placement](#raw_placement)
|
||||
- [local_reads](#local_reads)
|
||||
- [max_osd_combinations](#max_osd_combinations)
|
||||
- [block_size](#block_size)
|
||||
- [bitmap_granularity](#bitmap_granularity)
|
||||
@@ -133,8 +134,8 @@ Pool name.
|
||||
## scheme
|
||||
|
||||
- Type: string
|
||||
- Required
|
||||
- One of: "replicated", "xor", "ec" or "jerasure"
|
||||
- Required
|
||||
|
||||
Redundancy scheme used for data in this pool. "jerasure" is an alias for "ec",
|
||||
both use Reed-Solomon-Vandermonde codes based on ISA-L or jerasure libraries.
|
||||
@@ -289,6 +290,30 @@ Examples:
|
||||
- EC 4+2 in 3 DC: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
|
||||
- 1 replica in fixed DC + 2 in random DCs: `dc?=meow, dc!=1, dc!=(1,2)`
|
||||
|
||||
## local_reads
|
||||
|
||||
- Type: string
|
||||
- One of: "primary", "nearest" or "random"
|
||||
- Default: primary
|
||||
|
||||
By default, Vitastor serves all read and write requests from the primary OSD of each PG.
|
||||
But it can also serve read requests for replicated pools from secondary OSDs in clean PGs
|
||||
(active or active+left_on_dead) which may be useful if you have OSDs with different network
|
||||
latency to the client - for example, if you have a cross-datacenter setup.
|
||||
|
||||
If you set this parameter to "nearest", clients will try to read from the nearest OSD
|
||||
in the [Placement Tree](#placement-tree), i.e. from an OSD from the same host or datacenter.
|
||||
Distance to different OSDs will be calculated based on client hostname, determined
|
||||
automatically or set manually in the [hostname](client.en.md#hostname) parameter.
|
||||
|
||||
If you set this parameter to "random", clients will try to distribute read requests over
|
||||
all available secondary OSDs. This mode is mainly useful for tests, but, probably, not
|
||||
really required in production setups.
|
||||
|
||||
[PG locks](osd.en.md#enable_pg_locks) are required for local reads to function. However,
|
||||
PG locks are enabled automatically by default for pools with enabled local reads, so you
|
||||
don't have to enable them explicitly.
|
||||
|
||||
## max_osd_combinations
|
||||
|
||||
- Type: integer
|
||||
@@ -324,7 +349,8 @@ Read more about this parameter in [Cluster-Wide Disk Layout Parameters](layout-c
|
||||
|
||||
## immediate_commit
|
||||
|
||||
- Type: string, one of "all", "small" and "none"
|
||||
- Type: string
|
||||
- One of: "all", "small" or "none"
|
||||
- Default: none
|
||||
|
||||
Immediate commit setting for this pool. The value from /vitastor/config/global
|
||||
|
@@ -33,6 +33,7 @@
|
||||
- [failure_domain](#failure_domain)
|
||||
- [level_placement](#level_placement)
|
||||
- [raw_placement](#raw_placement)
|
||||
- [local_reads](#local_reads)
|
||||
- [max_osd_combinations](#max_osd_combinations)
|
||||
- [block_size](#block_size)
|
||||
- [bitmap_granularity](#bitmap_granularity)
|
||||
@@ -133,8 +134,8 @@ OSD игнорируется и OSD не удаляется из распред
|
||||
## scheme
|
||||
|
||||
- Тип: строка
|
||||
- Обязательный
|
||||
- Возможные значения: "replicated", "xor", "ec" или "jerasure"
|
||||
- Обязательный
|
||||
|
||||
Схема избыточности, используемая в данном пуле. "jerasure" - синоним для "ec",
|
||||
в обеих схемах используются коды Рида-Соломона-Вандермонда, реализованные на
|
||||
@@ -287,6 +288,30 @@ meow недоступен".
|
||||
- EC 4+2 в 3 датацентрах: `any, dc=1 host!=1, dc!=1, dc=3 host!=3, dc!=(1,3), dc=5 host!=5`
|
||||
- 1 копия в фиксированном ДЦ + 2 в других ДЦ: `dc?=meow, dc!=1, dc!=(1,2)`
|
||||
|
||||
## local_reads
|
||||
|
||||
- Тип: строка
|
||||
- Возможные значения: "primary", "nearest" или "random"
|
||||
- По умолчанию: primary
|
||||
|
||||
По умолчанию Vitastor обслуживает все запросы чтения и записи с первичного OSD каждой PG.
|
||||
Однако, в чистых PG (active или active+left_on_dead) реплицированных пулов также есть
|
||||
возможность обслуживать запросы чтения с вторичных OSD, что может быть полезно, если
|
||||
у вас сильно отличается время сетевого обращения от клиента к разным OSD - например,
|
||||
если у вас несколько дата-центров.
|
||||
|
||||
Если данный параметр установлен в значение "nearest", клиенты будут стараться читать с
|
||||
ближайших по [Дереву размещения](#дерево-размещения) OSD, то есть, с OSD с того же хоста
|
||||
или датацентра. Расстояние до разных OSD будет рассчитываться с помощью имени хоста клиента,
|
||||
определяемого автоматически или заданного вручную параметром [hostname](client.ru.md#hostname).
|
||||
|
||||
Если данный параметр установлен в значение "random", клиенты будут стараться распределять
|
||||
запросы чтения по всем доступным вторичным OSD. Этот режим в основном полезен для тестов,
|
||||
но, скорее всего, редко нужен в реальных инсталляциях.
|
||||
|
||||
Для работы локальных чтений требуются [блокировки PG](osd.ru.md#enable_pg_locks). Включать
|
||||
их явно не нужно - они включаются автоматически для пулов с включёнными локальными чтениями.
|
||||
|
||||
## max_osd_combinations
|
||||
|
||||
- Тип: целое число
|
||||
@@ -324,7 +349,8 @@ meow недоступен".
|
||||
|
||||
## immediate_commit
|
||||
|
||||
- Тип: строка "all", "small" или "none"
|
||||
- Тип: строка
|
||||
- Возможные значения: "all", "small" или "none"
|
||||
- По умолчанию: none
|
||||
|
||||
Настройка мгновенного коммита для данного пула. Если не задана, используется
|
||||
|
@@ -271,3 +271,48 @@
|
||||
заполненные на 100% OSD вообще не могут стартовать), так что вы сможете
|
||||
восстановить работу кластера после ошибок отсутствия свободного места
|
||||
без уничтожения и пересоздания OSD.
|
||||
- name: hostname
|
||||
type: string
|
||||
online: true
|
||||
info: |
|
||||
Clients use host name to find their distance to OSDs when [localized reads](pool.en.md#local_reads)
|
||||
are enabled. By default, standard [gethostname](https://man7.org/linux/man-pages/man2/gethostname.2.html)
|
||||
function is used to determine host name, but you can also override it with this parameter.
|
||||
info_ru: |
|
||||
Клиенты используют имя хоста для определения расстояния до OSD, когда включены
|
||||
[локальные чтения](pool.ru.md#local_reads). По умолчанию для определения имени
|
||||
хоста используется стандартная функция [gethostname](https://man7.org/linux/man-pages/man2/gethostname.2.html),
|
||||
но вы также можете задать имя хоста вручную данным параметром.
|
||||
- name: ublk_queue_depth
|
||||
type: int
|
||||
default: 256
|
||||
online: false
|
||||
info: Default queue depth for [Vitastor ublk servers](../usage/ublk.en.md).
|
||||
info_ru: Глубина очереди по умолчанию для [ublk-серверов Vitastor](../usage/ublk.ru.md).
|
||||
- name: ublk_max_io_size
|
||||
type: int
|
||||
online: false
|
||||
info: |
|
||||
Default maximum I/O size for Vitastor [ublk servers](../usage/ublk.en.md).
|
||||
The largest of 1 MB and pool block size multiplied by EC data chunk count is used if not specified.
|
||||
info_ru: |
|
||||
Максимальный размер запроса ввода-вывода для [ublk-серверов Vitastor](../usage/ublk.ru.md).
|
||||
Если не задан, используется максимум из 1 МБ и размера блока пула, умноженного на число частей
|
||||
данных EC-пула.
|
||||
- name: qemu_file_mirror_path
|
||||
type: string
|
||||
info: |
|
||||
When set to an FS directory path (for example, `/mnt/vitastor/`), `qemu-img info` and similar
|
||||
QAPI commands return the name of the image inside this directory instead of normal
|
||||
`vitastor://?image=abc` URI as `filename`.
|
||||
|
||||
This allows to then mount this path using [vitastor-nfs](../usage/nfs.en.md) and trick
|
||||
third-party systems like Veeam which rely on `filename` in the image info but don't support Vitastor.
|
||||
info_ru: |
|
||||
Если установить эту опцию равной пути к каталогу в ФС, команда `qemu-img info` и подобные
|
||||
команды QAPI будут возвращать в поле `filename` имя образа внутри заданного каталога вместо
|
||||
обычного адреса типа `vitastor://?image=abc`.
|
||||
|
||||
Это позволяет смонтировать этот путь с помощью [vitastor-nfs](../usage/nfs.ru.md) и обмануть
|
||||
сторонние системы типа Veeam, которые полагаются на поле `filename` в информации об образе QEMU,
|
||||
но не поддерживают Vitastor.
|
||||
|
@@ -24,6 +24,8 @@
|
||||
|
||||
{{../../installation/kubernetes.en.md}}
|
||||
|
||||
{{../../installation/s3.en.md}}
|
||||
|
||||
{{../../installation/source.en.md}}
|
||||
|
||||
{{../../config.en.md|indent=1}}
|
||||
@@ -54,6 +56,8 @@
|
||||
|
||||
{{../../usage/fio.en.md}}
|
||||
|
||||
{{../../usage/ublk.en.md}}
|
||||
|
||||
{{../../usage/nbd.en.md}}
|
||||
|
||||
{{../../usage/qemu.en.md}}
|
||||
|
@@ -26,6 +26,8 @@
|
||||
|
||||
{{../../installation/source.ru.md}}
|
||||
|
||||
{{../../installation/s3.ru.md}}
|
||||
|
||||
{{../../config.ru.md|indent=1}}
|
||||
|
||||
{{../../config/common.ru.md|indent=2}}
|
||||
@@ -54,6 +56,8 @@
|
||||
|
||||
{{../../usage/fio.ru.md}}
|
||||
|
||||
{{../../usage/ublk.ru.md}}
|
||||
|
||||
{{../../usage/nbd.ru.md}}
|
||||
|
||||
{{../../usage/qemu.ru.md}}
|
||||
|
@@ -51,7 +51,7 @@
|
||||
Рассмотрите включение `use_rdmacm` для таких сетей.
|
||||
- name: use_rdmacm
|
||||
type: bool
|
||||
default: true
|
||||
default: false
|
||||
info: |
|
||||
Use an alternative implementation of RDMA through RDMA-CM (Connection
|
||||
Manager). Works with all RDMA networks: Infiniband, iWARP and
|
||||
|
@@ -781,3 +781,23 @@
|
||||
неполными (incomplete), если упадут ещё какие-то OSD.
|
||||
|
||||
Старое поведение в версиях до 2.0.0 было идентично включённому allow_net_split.
|
||||
- name: enable_pg_locks
|
||||
type: bool
|
||||
info: |
|
||||
Vitastor 2.2.0 introduces a new layer of split-brain prevention mechanism in
|
||||
addition to etcd: PG locks. They prevent split-brain even in abnormal theoretical cases
|
||||
when etcd is extremely laggy. As a new feature, by default, PG locks are only enabled
|
||||
for pools where they're required - pools with [localized reads](pool.en.md#local_reads).
|
||||
Use this parameter to enable or disable this function for all pools.
|
||||
info_ru: |
|
||||
В Vitastor 2.2.0 появился новый слой защиты от сплитбрейна в дополнение к etcd -
|
||||
блокировки PG. Они гарантируют порядок даже в теоретических ненормальных случаях,
|
||||
когда etcd очень сильно тормозит. Так как функция новая, по умолчанию она включается
|
||||
только для пулов, в которых она необходима - а именно, в пулах с включёнными
|
||||
[локальными чтениями](pool.ru.md#local_reads). Ну а с помощью данного параметра
|
||||
можно включить блокировки PG для всех пулов.
|
||||
- name: pg_lock_retry_interval_ms
|
||||
type: ms
|
||||
default: 100
|
||||
info: Retry interval for failed PG lock attempts.
|
||||
info_ru: Интервал повтора неудачных попыток блокировки PG.
|
||||
|
@@ -26,9 +26,9 @@ at Vitastor Kubernetes operator: https://github.com/Antilles7227/vitastor-operat
|
||||
The instruction is very simple.
|
||||
|
||||
1. Download a Docker image of the desired version: \
|
||||
`docker pull vitastor:2.1.0`
|
||||
`docker pull vitalif/vitastor:v2.4.0`
|
||||
2. Install scripts to the host system: \
|
||||
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:2.1.0 install.sh`
|
||||
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitalif/vitastor:v2.4.0 install.sh`
|
||||
3. Reload udev rules: \
|
||||
`udevadm control --reload-rules`
|
||||
|
||||
|
@@ -25,9 +25,9 @@ Vitastor можно установить в Docker/Podman. При этом etcd,
|
||||
Инструкция по установке максимально простая.
|
||||
|
||||
1. Скачайте Docker-образ желаемой версии: \
|
||||
`docker pull vitastor:2.1.0`
|
||||
`docker pull vitalif/vitastor:v2.4.0`
|
||||
2. Установите скрипты в хост-систему командой: \
|
||||
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitastor:2.1.0 install.sh`
|
||||
`docker run --rm -it -v /etc:/host-etc -v /usr/bin:/host-bin vitalif/vitastor:v2.4.0 install.sh`
|
||||
3. Перезагрузите правила udev: \
|
||||
`udevadm control --reload-rules`
|
||||
|
||||
|
@@ -11,12 +11,20 @@
|
||||
- Trust Vitastor package signing key:
|
||||
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||
- Add Vitastor package repository to your /etc/apt/sources.list:
|
||||
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 13 (Trixie/Sid): `deb https://vitastor.io/debian trixie main`
|
||||
- Debian 12 (Bookworm): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Ubuntu 22.04 (Jammy): `deb https://vitastor.io/debian jammy main`
|
||||
- Ubuntu 24.04 (Noble): `deb https://vitastor.io/debian noble main`
|
||||
- Add `-oldstable` to bookworm/bullseye/buster in this line to install the last
|
||||
stable version from 0.9.x branch instead of 1.x
|
||||
- To always prefer vitastor-patched QEMU and Libvirt versions, add the following to `/etc/apt/preferences`:
|
||||
```
|
||||
Package: *
|
||||
Pin: origin "vitastor.io"
|
||||
Pin-Priority: 501
|
||||
```
|
||||
- Install packages: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
|
||||
|
||||
## CentOS
|
||||
@@ -42,7 +50,6 @@
|
||||
recommended because io_uring is a relatively new technology and there is
|
||||
at least one bug which reproduces with io_uring and HP SmartArray
|
||||
controllers in 5.4
|
||||
- liburing 0.4 or newer
|
||||
- lp_solve
|
||||
- etcd 3.4.15 or newer. Earlier versions won't work because of various bugs,
|
||||
for example [#12402](https://github.com/etcd-io/etcd/pull/12402).
|
||||
|
@@ -11,12 +11,20 @@
|
||||
- Добавьте ключ репозитория Vitastor:
|
||||
`wget https://vitastor.io/debian/pubkey.gpg -O /etc/apt/trusted.gpg.d/vitastor.gpg`
|
||||
- Добавьте репозиторий Vitastor в /etc/apt/sources.list:
|
||||
- Debian 12 (Bookworm/Sid): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 13 (Trixie/Sid): `deb https://vitastor.io/debian trixie main`
|
||||
- Debian 12 (Bookworm): `deb https://vitastor.io/debian bookworm main`
|
||||
- Debian 11 (Bullseye): `deb https://vitastor.io/debian bullseye main`
|
||||
- Debian 10 (Buster): `deb https://vitastor.io/debian buster main`
|
||||
- Ubuntu 22.04 (Jammy): `deb https://vitastor.io/debian jammy main`
|
||||
- Ubuntu 24.04 (Noble): `deb https://vitastor.io/debian noble main`
|
||||
- Добавьте `-oldstable` к слову bookworm/bullseye/buster в этой строке, чтобы
|
||||
установить последнюю стабильную версию из ветки 0.9.x вместо 1.x
|
||||
- Чтобы всегда предпочитались версии пакетов QEMU и Libvirt с патчами Vitastor, добавьте в `/etc/apt/preferences`:
|
||||
```
|
||||
Package: *
|
||||
Pin: origin "vitastor.io"
|
||||
Pin-Priority: 501
|
||||
```
|
||||
- Установите пакеты: `apt update; apt install vitastor lp-solve etcd linux-image-amd64 qemu-system-x86`
|
||||
|
||||
## CentOS
|
||||
@@ -41,7 +49,6 @@
|
||||
- Ядро Linux 5.4 или новее, для поддержки io_uring. Рекомендуется даже 5.8,
|
||||
так как io_uring - относительно новый интерфейс и в версиях до 5.8 встречались
|
||||
некоторые баги, например, зависание с io_uring и контроллером HP SmartArray
|
||||
- liburing 0.4 или новее
|
||||
- lp_solve
|
||||
- etcd 3.4.15 или новее. Более старые версии не будут работать из-за разных багов,
|
||||
например, [#12402](https://github.com/etcd-io/etcd/pull/12402).
|
||||
|
@@ -6,10 +6,10 @@
|
||||
|
||||
# Proxmox VE
|
||||
|
||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-8.1 are supported):
|
||||
To enable Vitastor support in Proxmox Virtual Environment (6.4-8.x are supported):
|
||||
|
||||
- Add the corresponding Vitastor Debian repository into sources.list on Proxmox hosts:
|
||||
bookworm for 8.1, pve8.0 for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4
|
||||
trixie for 9.0+, bookworm for 8.1+, pve8.0 for 8.0, bullseye for 7.4, pve7.3 for 7.3, pve7.2 for 7.2, pve7.1 for 7.1, buster for 6.4
|
||||
- Install vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* or see note) packages from Vitastor repository
|
||||
- Define storage in `/etc/pve/storage.cfg` (see below)
|
||||
- Block network access from VMs to Vitastor network (to OSDs and etcd),
|
||||
|
@@ -6,10 +6,10 @@
|
||||
|
||||
# Proxmox VE
|
||||
|
||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-8.1):
|
||||
Чтобы подключить Vitastor к Proxmox Virtual Environment (поддерживаются версии 6.4-8.x):
|
||||
|
||||
- Добавьте соответствующий Debian-репозиторий Vitastor в sources.list на хостах Proxmox:
|
||||
bookworm для 8.1, pve8.0 для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4
|
||||
trixie для 9.0+, bookworm для 8.1+, pve8.0 для 8.0, bullseye для 7.4, pve7.3 для 7.3, pve7.2 для 7.2, pve7.1 для 7.1, buster для 6.4
|
||||
- Установите пакеты vitastor-client, pve-qemu-kvm, pve-storage-vitastor (* или см. сноску) из репозитория Vitastor
|
||||
- Определите тип хранилища в `/etc/pve/storage.cfg` (см. ниже)
|
||||
- Обязательно заблокируйте доступ от виртуальных машин к сети Vitastor (OSD и etcd), т.к. Vitastor (пока) не поддерживает аутентификацию
|
||||
|
@@ -15,7 +15,7 @@
|
||||
- gcc and g++ 8 or newer, clang 10 or newer, or other compiler with C++11 plus
|
||||
designated initializers support from C++20
|
||||
- CMake
|
||||
- liburing, jerasure headers and libraries
|
||||
- jerasure headers and libraries
|
||||
- ISA-L, libibverbs and librdmacm headers and libraries (optional)
|
||||
- tcmalloc (google-perftools-dev)
|
||||
|
||||
|
@@ -15,7 +15,7 @@
|
||||
- gcc и g++ >= 8, либо clang >= 10, либо другой компилятор с поддержкой C++11 плюс
|
||||
назначенных инициализаторов (designated initializers) из C++20
|
||||
- CMake
|
||||
- Заголовки и библиотеки liburing, jerasure
|
||||
- Заголовки и библиотеки jerasure
|
||||
- Опционально - заголовки и библиотеки ISA-L, libibverbs, librdmacm
|
||||
- tcmalloc (google-perftools-dev)
|
||||
|
||||
|
@@ -125,6 +125,13 @@ All other client-side components are based on the client library:
|
||||
all current read/write operations to it fail with EPIPE error and are retried by clients.
|
||||
- After completing all secondary read/write requests, primary OSD sends the response to
|
||||
the client.
|
||||
- When [localized reads](../config/pool.en.md#local_reads) are enabled for a PG in a
|
||||
replicated pool, and the PG is in an active and clean state (active or
|
||||
active+left_on_dead), the client can send the request to one of secondary OSDs instead
|
||||
of the primary. Secondary OSD checks the [PG lock](../config/osd.en.md#enable_pg_locks)
|
||||
and handles the request locally without communicating to the primary. PG lock is required
|
||||
for the secondary OSD to know for sure that the PG is in clean state and not switching
|
||||
primary at the moment.
|
||||
|
||||
### Nuances of request handling
|
||||
|
||||
|
@@ -125,6 +125,12 @@
|
||||
и если любое из этих соединений отключается, PG перезапускается, а все текущие запросы чтения
|
||||
и записи в неё завершаются с ошибкой EPIPE, после чего повторяются клиентами.
|
||||
- После завершения всех вторичных операций чтения/записи первичный OSD отправляет ответ клиенту.
|
||||
- Если в реплицированном пуле включены [локализованные чтения](../config/pool.ru.md#local_reads),
|
||||
а PG находится в чистом активном состоянии (active или active+left_on_dead), клиент может
|
||||
послать запрос к одному из вторичных OSD вместо первичного. Вторичный OSD проверяет
|
||||
[блокировку PG](../config/osd.ru.md#enable_pg_locks) и обрабатывает запрос локально, не
|
||||
обращаясь к первичному. Блокировка PG здесь нужна, чтобы вторичный OSD мог точно знать,
|
||||
что PG находится в чистом состоянии и не переключается на другой первичный OSD.
|
||||
|
||||
### Особенности обработки запросов
|
||||
|
||||
|
@@ -10,8 +10,17 @@ Copyright (c) Vitaliy Filippov (vitalif [at] yourcmc.ru), 2019+
|
||||
|
||||
Join Vitastor Telegram Chat: https://t.me/vitastor
|
||||
|
||||
All server-side code (OSD, Monitor and so on) is licensed under the terms of
|
||||
Vitastor Network Public License 1.1 (VNPL 1.1), a copyleft license based on
|
||||
License: VNPL 1.1 for server-side code and dual VNPL 1.1 + GPL 2.0+ for client tools.
|
||||
|
||||
Server-side code is licensed only under the terms of VNPL.
|
||||
|
||||
Client libraries (cluster_client and so on) are dual-licensed under the same
|
||||
VNPL 1.1 and also GNU GPL 2.0 or later to allow for compatibility with GPLed
|
||||
software like QEMU and fio.
|
||||
|
||||
## VNPL
|
||||
|
||||
Vitastor Network Public License 1.1 (VNPL 1.1) is a copyleft license based on
|
||||
GNU GPLv3.0 with the additional "Network Interaction" clause which requires
|
||||
opensourcing all programs directly or indirectly interacting with Vitastor
|
||||
through a computer network and expressly designed to be used in conjunction
|
||||
@@ -20,18 +29,83 @@ the terms of the same license, but also under the terms of any GPL-Compatible
|
||||
Free Software License, as listed by the Free Software Foundation.
|
||||
This is a stricter copyleft license than the Affero GPL.
|
||||
|
||||
Please note that VNPL doesn't require you to open the code of proprietary
|
||||
software running inside a VM if it's not specially designed to be used with
|
||||
Vitastor.
|
||||
The idea of VNPL is, in addition to modules linked to Vitastor code in a single
|
||||
binary file, to extend copyleft action to micro-service modules only interacting
|
||||
with it over the network.
|
||||
|
||||
Basically, you can't use the software in a proprietary environment to provide
|
||||
its functionality to users without opensourcing all intermediary components
|
||||
standing between the user and Vitastor or purchasing a commercial license
|
||||
from the author 😀.
|
||||
|
||||
Client libraries (cluster_client and so on) are dual-licensed under the same
|
||||
VNPL 1.1 and also GNU GPL 2.0 or later to allow for compatibility with GPLed
|
||||
software like QEMU and fio.
|
||||
At the same time, VNPL doesn't impose any restrictions on software *not specially designed*
|
||||
to be used with Vitastor, for example, on Windows running inside a VM with a Vitastor disk.
|
||||
|
||||
You can find the full text of VNPL-1.1 in the file [VNPL-1.1.txt](../../VNPL-1.1.txt).
|
||||
GPL 2.0 is also included in this repository as [GPL-2.0.txt](../../GPL-2.0.txt).
|
||||
## Explanation
|
||||
|
||||
Network copyleft is governed by the clause **13. Remote Network Interaction** of VNPL.
|
||||
|
||||
A program is considered to be a "Proxy Program" if it meets both conditions:
|
||||
- It is specially designed to be used with Vitastor. Basically, it means that the program
|
||||
has any functionality specific to Vitastor and thus "knows" that it works with Vitastor,
|
||||
not with something random.
|
||||
- It interacts with Vitastor directly or indirectly through any programming interface,
|
||||
including API, CLI, network or any wrapper (also considered a Proxy Program itself).
|
||||
|
||||
If, in addition to that:
|
||||
- You give any user an apportunity to interact with Vitastor directly or indirectly through
|
||||
any computer interface including the network or any number of wrappers (Proxy Programs).
|
||||
|
||||
Then VNPL requires you to publish the code of all above Proxy Programs to all above users
|
||||
under the terms of any GPL-compatible license - that is, GPL, LGPL, MIT/BSD or Apache 2,
|
||||
because "GPL compatibility" is treated as an ability to legally include licensed code in
|
||||
a GPL application.
|
||||
|
||||
So, if you have a "Proxy Program", but it's not open to the user who directly or indirectly
|
||||
interacts with Vitastor - you are forbidden to use Vitastor under the terms of VNPL and you
|
||||
need a commercial license which doesn't contain open-source requirements.
|
||||
|
||||
## Examples
|
||||
|
||||
- Vitastor Kubernetes CSI driver which creates PersistentVolumes by calling `vitastor-cli create`.
|
||||
- Yes, it interacts with Vitastor through vitastor-cli.
|
||||
- Yes, it is designed specially for use with Vitastor (it has no sense otherwise).
|
||||
- So, CSI driver **definitely IS** a Proxy Program and must be published under the terms of
|
||||
a free software license.
|
||||
- Windows, installed in a VM with the system disk on Vitastor storage.
|
||||
- Yes, it interacts with Vitastor indirectly - it reads and writes data through the block
|
||||
device interface, emulated by QEMU.
|
||||
- No, it definitely isn't designed specially for use with Vitastor - Windows was created long
|
||||
ago before Vitastor and doesn't know anything about it.
|
||||
- So, Windows **definitely IS NOT** a Proxy Program and VNPL doesn't require to open it.
|
||||
- Cloud control panel which makes requests to Vitastor Kubernetes CSI driver.
|
||||
- Yes, it interacts with Vitastor indirectly through the CSI driver, which is a Proxy Program.
|
||||
- May or may not be designed specially for use with Vitastor. How to determine exactly?
|
||||
Imagine that Vitastor is replaced with any other storage (for example, with a proprietary).
|
||||
Do control panel functions change in any way? If they do (for example, if snapshots stop working),
|
||||
then the panel contains specific functionality and thus is designed specially for use with Vitastor.
|
||||
Otherwise, the panel is universal and isn't designed specially for Vitastor.
|
||||
- So, whether you are required to open-source the panel also **depends** on whether it
|
||||
contains specific functionality or not.
|
||||
|
||||
## Why?
|
||||
|
||||
Because I believe into the spirit of copyleft (Linux wouldn't become so popular without GPL!)
|
||||
and, at the same time, I want to have a way to monetize the product.
|
||||
|
||||
Existing licenses including AGPL are useless for it with an SDS - SDS is a very deeply
|
||||
internal software which is almost definitely invisible to the user and thus AGPL doesn't
|
||||
require anyone to open the code even if they make a proprietary fork.
|
||||
|
||||
And, in fact, the current situation in the world where GPL is though to only restrict direct
|
||||
linking of programs into a single executable file, isn't much correct. Nowadays, programs
|
||||
are more often linked with network API calls, not with /usr/bin/ld, and a software product
|
||||
may consist of dozens of microservices interacting with each other over the network.
|
||||
|
||||
That's why we need VNPL to keep the license sufficiently copyleft.
|
||||
|
||||
## License Texts
|
||||
|
||||
- VNPL 1.1 in English: [VNPL-1.1.txt](../../VNPL-1.1.txt)
|
||||
- VNPL 1.1 in Russian: [VNPL-1.1-RU.txt](../../VNPL-1.1-RU.txt)
|
||||
- GPL 2.0: [GPL-2.0.txt](../../GPL-2.0.txt)
|
||||
|
@@ -12,6 +12,14 @@
|
||||
|
||||
Лицензия: VNPL 1.1 на серверный код и двойная VNPL 1.1 + GPL 2.0+ на клиентский.
|
||||
|
||||
Серверные компоненты распространяются только на условиях VNPL.
|
||||
|
||||
Клиентские библиотеки распространяются на условиях двойной лицензии VNPL 1.0
|
||||
и также на условиях GNU GPL 2.0 или более поздней версии. Так сделано в целях
|
||||
совместимости с таким ПО, как QEMU и fio.
|
||||
|
||||
## VNPL
|
||||
|
||||
VNPL - "сетевой копилефт", собственная свободная копилефт-лицензия
|
||||
Vitastor Network Public License 1.1, основанная на GNU GPL 3.0 с дополнительным
|
||||
условием "Сетевого взаимодействия", требующим распространять все программы,
|
||||
@@ -29,9 +37,70 @@ Vitastor Network Public License 1.1, основанная на GNU GPL 3.0 с д
|
||||
На Windows и любое другое ПО, не разработанное *специально* для использования
|
||||
вместе с Vitastor, никакие ограничения не накладываются.
|
||||
|
||||
Клиентские библиотеки распространяются на условиях двойной лицензии VNPL 1.0
|
||||
и также на условиях GNU GPL 2.0 или более поздней версии. Так сделано в целях
|
||||
совместимости с таким ПО, как QEMU и fio.
|
||||
## Пояснение
|
||||
|
||||
Вы можете найти полный текст VNPL 1.1 на английском языке в файле [VNPL-1.1.txt](../../VNPL-1.1.txt),
|
||||
VNPL 1.1 на русском языке в файле [VNPL-1.1-RU.txt](../../VNPL-1.1-RU.txt), а GPL 2.0 в файле [GPL-2.0.txt](../../GPL-2.0.txt).
|
||||
Сетевой копилефт регулируется пунктом лицензии **13. Удалённое сетевое взаимодействие**.
|
||||
|
||||
Программа считается "прокси-программой", если верны оба условия:
|
||||
- Она создана специально для работы вместе с Vitastor. По сути это означает, что программа
|
||||
должна иметь специфичный для Vitastor функционал, то есть, "знать", что она взаимодействует
|
||||
именно с Vitastor.
|
||||
- Она прямо или косвенно взаимодействует с Vitastor через абсолютно любой программный
|
||||
интерфейс, включая любые способы вызова: API, CLI, сеть или через какую-то обёртку (в
|
||||
свою очередь тоже являющуюся прокси-программой).
|
||||
|
||||
Если в дополнение к этому также:
|
||||
- Вы предоставляете любому пользователю возможность взаимодействовать с Vitastor по сети,
|
||||
опять-таки, через любой интерфейс или любую серию "обёрток" (прокси-программ)
|
||||
|
||||
То, согласно VNPL, вы должны открыть код "прокси-программ" **таким пользователям** на условиях
|
||||
любой GPL-совместимой лицензии - то есть, GPL, LGPL, MIT/BSD или Apache 2 - "совместимость с GPL"
|
||||
понимается как возможность включать лицензируемый код в GPL-приложение.
|
||||
|
||||
Соответственно, если у вас есть "прокси-программа", но её код не открыт пользователю,
|
||||
который прямо или косвенно взаимодействует с Vitastor - вам запрещено использовать Vitastor
|
||||
на условиях VNPL и вам нужна коммерческая лицензия, не содержащая требований об открытии кода.
|
||||
|
||||
## Примеры
|
||||
|
||||
- Kubernetes CSI-драйвер Vitastor, создающий PersistentVolume с помощью вызова `vitastor-cli create`.
|
||||
- Да, взаимодействует с Vitastor через vitastor-cli.
|
||||
- Да, создавался специально для работы с Vitastor (иначе в чём же ещё его смысл).
|
||||
- Значит, CSI-драйвер **точно считается** "прокси-программой" и должен быть открыт под свободной
|
||||
лицензией.
|
||||
- Windows, установленный в виртуальную машину на диске Vitastor.
|
||||
- Да, взаимодействует с Vitastor "прямо или косвенно" - пишет и читает данные через интерфейс
|
||||
блочного устройства, эмулируемый QEMU.
|
||||
- Нет, точно не создан *специально для работы с Vitastor* - когда его создавали, никакого
|
||||
Vitastor ещё и в помине не было.
|
||||
- Значит, Windows **точно не считается** "прокси-программой" и на него требования VNPL не распространяются.
|
||||
- Панель управления облака, делающая запросы к Kubernetes CSI-драйверу Vitastor.
|
||||
- Да, взаимодействует с Vitastor косвенно через CSI-драйвер, являющийся "прокси-программой".
|
||||
- Сходу не известно, создавалась ли конкретно для работы с Vitastor. Как понять, да или нет?
|
||||
Представьте, что Vitastor заменён на любую другую систему хранения (например, на проприетарную).
|
||||
Работа панели управления изменится? Если да (например, перестанут работать снапшоты) - значит,
|
||||
панель содержит специфичный функционал и "создана специально для работы с Vitastor".
|
||||
Если нет - значит, специфичного функционала панель не содержит и в принципе она универсальна.
|
||||
- Нужно ли открывать панель - **зависит** от того, содержит она специфичный функционал или нет.
|
||||
|
||||
## Почему так?
|
||||
|
||||
Потому что я одновременно верю в дух копилефт-лицензий (Linux не стал бы так популярен,
|
||||
если бы не GPL!) и хочу иметь возможность монетизации продукта.
|
||||
|
||||
При этом использовать даже AGPL для программной СХД бессмысленно - это глубоко внутреннее
|
||||
ПО, которое пользователь почти наверняка не увидит вообще, поэтому и открывать код никому
|
||||
никогда не придётся, даже при создании производного продукта.
|
||||
|
||||
Да и в целом сложившаяся в мире ситуация, при которой действие GPL ограничивается только
|
||||
прямым связыванием в один исполняемый файл, не очень корректна. В настоящее время программы
|
||||
гораздо чаще интегрируют сетевыми вызовами, а не с помощью /usr/bin/ld, и общий программный
|
||||
продукт может состоять из нескольких десятков микросервисов, взаимодействующих по сети.
|
||||
|
||||
Поэтому для сохранения достаточной "копилефтности" и придумана VNPL.
|
||||
|
||||
## Тексты лицензий
|
||||
|
||||
- VNPL 1.1 на английском языке: [VNPL-1.1.txt](../../VNPL-1.1.txt)
|
||||
- VNPL 1.1 на русском языке: [VNPL-1.1-RU.txt](../../VNPL-1.1-RU.txt)
|
||||
- GPL 2.0: [GPL-2.0.txt](../../GPL-2.0.txt)
|
||||
|
@@ -25,6 +25,7 @@
|
||||
- Recovery of degraded blocks
|
||||
- Rebalancing (data movement between OSDs)
|
||||
- [Lazy fsync support](../config/layout-cluster.en.md#immediate_commit)
|
||||
- [Localized read support](../config/pool.en.md#local_reads) for cross-datacenter setup optimization
|
||||
- Per-OSD and per-image I/O and space usage statistics in etcd
|
||||
- Snapshots and copy-on-write image clones
|
||||
- [Write throttling to smooth random write workloads in SSD+HDD configurations](../config/osd.en.md#throttle_small_writes)
|
||||
@@ -51,7 +52,7 @@
|
||||
- Generic user-space client library
|
||||
- [Native QEMU driver](../usage/qemu.en.md)
|
||||
- [Loadable fio engine for benchmarks](../usage/fio.en.md)
|
||||
- [NBD proxy for kernel mounts](../usage/nbd.en.md)
|
||||
- [UBLK](../usage/ublk.en.md) and [NBD](../usage/nbd.en.md) servers for kernel mounts
|
||||
- [Simplified NFS proxy for file-based image access emulation (suitable for VMWare)](../usage/nfs.en.md#pseudo-fs)
|
||||
|
||||
## Roadmap
|
||||
|
@@ -25,6 +25,7 @@
|
||||
- Восстановление деградированных блоков
|
||||
- Ребаланс, то есть перемещение данных между OSD (дисками)
|
||||
- [Поддержка "ленивого" fsync (fsync не на каждую операцию)](../config/layout-cluster.ru.md#immediate_commit)
|
||||
- [Локальные чтения](../config/pool.ru.md#local_reads) для оптимизации при нескольких датацентрах
|
||||
- Сбор статистики ввода/вывода в etcd
|
||||
- Статистика операций ввода/вывода и занятого места в разрезе инодов
|
||||
- Именование инодов через хранение их метаданных в etcd
|
||||
@@ -53,7 +54,7 @@
|
||||
- Общая пользовательская клиентская библиотека для работы с кластером
|
||||
- [Драйвер диска для QEMU](../usage/qemu.ru.md)
|
||||
- [Драйвер диска для утилиты тестирования производительности fio](../usage/fio.ru.md)
|
||||
- [NBD-прокси для монтирования образов ядром](../usage/nbd.ru.md) ("блочное устройство в режиме пользователя")
|
||||
- [UBLK](../usage/ublk.ru.md) и [NBD](../usage/nbd.ru.md) серверы для монтирования образов ядром ("блочное устройство в режиме пользователя")
|
||||
- [Упрощённая NFS-прокси для эмуляции файлового доступа к образам (подходит для VMWare)](../usage/nfs.ru.md#псевдо-фс)
|
||||
|
||||
## Планы развития
|
||||
|
@@ -14,6 +14,7 @@
|
||||
- [Removing a failed disk](#removing-a-failed-disk)
|
||||
- [Adding a disk](#adding-a-disk)
|
||||
- [Restoring from lost pool configuration](#restoring-from-lost-pool-configuration)
|
||||
- [Incompatibility problems](#Incompatibility-problems)
|
||||
- [Upgrading Vitastor](#upgrading-vitastor)
|
||||
- [OSD memory usage](#osd-memory-usage)
|
||||
|
||||
@@ -166,6 +167,17 @@ done
|
||||
|
||||
After that all PGs should peer and find all previous data.
|
||||
|
||||
## Incompatibility problems
|
||||
|
||||
### ISA-L 2.31
|
||||
|
||||
⚠ It is FORBIDDEN to use Vitastor 2.1.0 and earlier versions with ISA-L 2.31 and newer if
|
||||
you use EC N+K pools and K > 1 on a CPU with GF-NI instruction support, because it WILL
|
||||
lead to **data loss** during EC recovery.
|
||||
|
||||
If you accidentally upgraded ISA-L to 2.31 but didn't upgrade Vitastor and restarted OSDs,
|
||||
then stop them as soon as possible and either update Vitastor or roll back ISA-L.
|
||||
|
||||
## Upgrading Vitastor
|
||||
|
||||
Every upcoming Vitastor version is usually compatible with previous both forward
|
||||
|
@@ -14,6 +14,7 @@
|
||||
- [Удаление неисправного диска](#удаление-неисправного-диска)
|
||||
- [Добавление диска](#добавление-диска)
|
||||
- [Восстановление потерянной конфигурации пулов](#восстановление-потерянной-конфигурации-пулов)
|
||||
- [Проблемы несовместимости](#проблемы-несовместимости)
|
||||
- [Обновление Vitastor](#обновление-vitastor)
|
||||
- [Потребление памяти OSD](#потребление-памяти-osd)
|
||||
|
||||
@@ -163,6 +164,17 @@ done
|
||||
|
||||
После этого все PG должны пройти peering и найти все предыдущие данные.
|
||||
|
||||
## Проблемы несовместимости
|
||||
|
||||
### ISA-L 2.31
|
||||
|
||||
⚠ ЗАПРЕЩЕНО использовать Vitastor 2.1.0 и более ранних версий с библиотекой ISA-L версии 2.31
|
||||
или более новой, если вы используете EC-пулы N+K и K > 1 на CPU с поддержкой инструкций GF-NI,
|
||||
так как это приведёт к **потере данных** при восстановлении из EC.
|
||||
|
||||
Если вы случайно обновили ISA-L до 2.31, но не обновили Vitastor, и успели перезапустить OSD,
|
||||
то как можно скорее остановите их все и либо обновите Vitastor, либо откатите ISA-L.
|
||||
|
||||
## Обновление Vitastor
|
||||
|
||||
Обычно каждая следующая версия Vitastor совместима с предыдущими и "вперёд", и "назад"
|
||||
|
@@ -100,12 +100,14 @@ List images (only matching `<glob>` pattern(s) if passed).
|
||||
Options:
|
||||
|
||||
```
|
||||
--exact Do not match glob patterns as names, select only exact name matches.
|
||||
-p|--pool POOL Filter images by pool ID or name
|
||||
-l|--long Also report allocated size and I/O statistics
|
||||
--del Also include delete operation statistics
|
||||
--sort FIELD Sort by specified field (name, size, used_size, <read|write|delete>_<iops|bps|lat|queue>)
|
||||
-r|--reverse Sort in descending order
|
||||
-n|--count N Only list first N items
|
||||
--tree Show image snapshot/clone tree
|
||||
```
|
||||
|
||||
Example output:
|
||||
@@ -397,6 +399,7 @@ Optional parameters:
|
||||
| `--immediate_commit none` | Put pool only on OSDs with this or larger immediate_commit (none < small < all) |
|
||||
| `--level_placement <rules>` | Use additional failure domain rules (example: "dc=112233") |
|
||||
| `--raw_placement <rules>` | Specify raw PG generation rules ([details](../config/pool.en.md#raw_placement)) |
|
||||
| `--local_reads primary` | Local read policy for replicated pools: primary, nearest or random |
|
||||
| `--primary_affinity_tags tags` | Prefer to put primary copies on OSDs with all specified tags |
|
||||
| `--scrub_interval <time>` | Enable regular scrubbing for this pool. Format: number + unit s/m/h/d/M/y |
|
||||
| `--used_for_app fs:<name>` | Mark pool as used for VitastorFS with metadata in image `<name>` |
|
||||
|
@@ -102,12 +102,14 @@ kaveri 2/1 32 0 B 10 G 0 B 100% 0%
|
||||
Опции:
|
||||
|
||||
```
|
||||
--exact Не применять ФС-шаблоны к именам, выводить только точные совпадения
|
||||
-p|--pool POOL Фильтровать образы по пулу (ID или имени)
|
||||
-l|--long Также выводить статистику занятого места и ввода-вывода
|
||||
--del Также выводить статистику операций удаления
|
||||
--sort FIELD Сортировать по заданному полю (name, size, used_size, <read|write|delete>_<iops|bps|lat|queue>)
|
||||
-r|--reverse Сортировать в обратном порядке
|
||||
-n|--count N Показывать только первые N записей
|
||||
--tree Вывести снапшоты и клоны в виде дерева
|
||||
```
|
||||
|
||||
Пример вывода:
|
||||
@@ -414,6 +416,7 @@ OSD PARENT UP SIZE USED% TAGS WEIGHT BLOCK BITMAP
|
||||
| `--immediate_commit none` | ...только OSD с этим или большим immediate_commit (none < small < all) |
|
||||
| `--level_placement <rules>` | Задать правила дополнительных доменов отказа (пример: "dc=112233") |
|
||||
| `--raw_placement <rules>` | Задать низкоуровневые правила генерации PG ([детали](../config/pool.ru.md#raw_placement)) |
|
||||
| `--local_reads primary` | Политика локальных чтений для реплик: primary, nearest или random |
|
||||
| `--primary_affinity_tags tags` | Предпочитать OSD со всеми данными тегами для роли первичных |
|
||||
| `--scrub_interval <time>` | Включить скрабы с заданным интервалом времени (число + единица s/m/h/d/M/y) |
|
||||
| `--pg_stripe_size <number>` | Увеличить блок группировки объектов по PG |
|
||||
|
@@ -73,6 +73,8 @@ Options (automatic mode):
|
||||
--max_other 10%
|
||||
Use disks for OSD data even if they already have non-Vitastor partitions,
|
||||
but only if these take up no more than this percent of disk space.
|
||||
--dry-run
|
||||
Check and print new OSD count for each disk but do not actually create them.
|
||||
```
|
||||
|
||||
Options (single-device mode):
|
||||
|
@@ -74,6 +74,8 @@ vitastor-disk - инструмент командной строки для уп
|
||||
--max_other 10%
|
||||
Использовать диски под данные OSD, даже если на них уже есть не-Vitastor-овые
|
||||
разделы, но только в случае, если они занимают не более данного процента диска.
|
||||
--dry-run
|
||||
Проверить и вывести число новых OSD для каждого диска, но не создавать их.
|
||||
```
|
||||
|
||||
Опции для режима одного OSD:
|
||||
|
@@ -89,6 +89,8 @@ POSIX features currently not implemented in VitastorFS:
|
||||
instead of actually allocated space
|
||||
- Access times (`atime`) are not tracked (like `-o noatime`)
|
||||
- Modification time (`mtime`) is updated lazily every second (like `-o lazytime`)
|
||||
- Permission enforcement is disabled by default (and Linux NFS client doesn't
|
||||
enforce them too). Use `--enforce 1` to enable it.
|
||||
|
||||
Other notable missing features which should be addressed in the future:
|
||||
- Inode ID reuse. Currently inode IDs always grow, the limit is 2^48 inodes, so
|
||||
@@ -258,4 +260,5 @@ Options:
|
||||
| `--nfspath <PATH>` | set NFS export path to \<PATH> (default is /) |
|
||||
| `--pidfile <FILE>` | write process ID to the specified file |
|
||||
| `--logfile <FILE>` | log to the specified file |
|
||||
| `--enforce 1` | enforce permissions at the server side (no by default) |
|
||||
| `--foreground 1` | stay in foreground, do not daemonize |
|
||||
|
@@ -91,6 +91,8 @@ JSON-формате :-). Для инспекции содержимого БД
|
||||
stat(2), так что `du` всегда показывает сумму размеров файлов, а не фактически занятое место
|
||||
- Времена доступа (`atime`) не отслеживаются (как будто ФС смонтирована с `-o noatime`)
|
||||
- Времена модификации (`mtime`) отслеживаются асинхронно (как будто ФС смонтирована с `-o lazytime`)
|
||||
- Привилегии доступа по умолчанию не проверяются сервером (клиент NFS Linux их также не проверяет).
|
||||
Чтобы включить проверки, используйте опцию `--enforce 1`.
|
||||
|
||||
Другие недостающие функции, которые нужно добавить в будущем:
|
||||
- Переиспользование номеров инодов. В текущей реализации номера инодов всё время
|
||||
@@ -270,4 +272,5 @@ VitastorFS из GPUDirect.
|
||||
| `--nfspath <PATH>` | установить путь NFS-экспорта в \<PATH> (по умолчанию /) |
|
||||
| `--pidfile <FILE>` | записать ID процесса в заданный файл |
|
||||
| `--logfile <FILE>` | записывать логи в заданный файл |
|
||||
| `--enforce 1` | проверять права доступа на стороне сервера (по умолчанию нет) |
|
||||
| `--foreground 1` | не уходить в фон после запуска |
|
||||
|
@@ -130,23 +130,16 @@ Linux kernel, starting with version 5.15, supports a new interface for attaching
|
||||
to the host - VDUSE (vDPA Device in Userspace). QEMU, starting with 7.2, has support for
|
||||
exporting QEMU block devices over this protocol using qemu-storage-daemon.
|
||||
|
||||
VDUSE is currently the best interface to attach Vitastor disks as kernel devices because:
|
||||
- It avoids data copies and thus achieves much better performance than [NBD](nbd.en.md)
|
||||
- It doesn't have NBD timeout problem - the device doesn't die if an operation executes for too long
|
||||
VDUSE advantages:
|
||||
|
||||
- VDUSE copies memory 1 time instead of 2, and is thus faster than [NBD](nbd.en.md) for linear read/write.
|
||||
- It doesn't have NBD timeout problem - the device doesn't die if an operation executes for too long.
|
||||
- It doesn't have hung device problem - if the userspace process dies it can be restarted (!)
|
||||
and block device will continue operation
|
||||
- It doesn't seem to have the device number limit
|
||||
and block device will continue operation (UBLK can do it too).
|
||||
- It doesn't seem to have the device number limit (UBLK also doesn't).
|
||||
|
||||
Example performance comparison:
|
||||
|
||||
| | direct fio | NBD | VDUSE |
|
||||
|----------------------|-------------|-------------|-------------|
|
||||
| linear write | 3.85 GB/s | 1.12 GB/s | 3.85 GB/s |
|
||||
| 4k random write Q128 | 240000 iops | 120000 iops | 178000 iops |
|
||||
| 4k random write Q1 | 9500 iops | 7620 iops | 7640 iops |
|
||||
| linear read | 4.3 GB/s | 1.8 GB/s | 2.85 GB/s |
|
||||
| 4k random read Q128 | 287000 iops | 140000 iops | 189000 iops |
|
||||
| 4k random read Q1 | 9600 iops | 7640 iops | 7780 iops |
|
||||
At the same time, VDUSE may be slower or faster than [UBLK](ublk.en.md) for linear read/write,
|
||||
and iops-wise it's sometimes even slower than NBD. See performance comparison examples at the page [UBLK](ublk.en.md).
|
||||
|
||||
To try VDUSE you need at least Linux 5.15, built with VDUSE support
|
||||
(CONFIG_VDPA=m, CONFIG_VDPA_USER=m, CONFIG_VIRTIO_VDPA=m).
|
||||
@@ -193,3 +186,12 @@ To remove the device:
|
||||
vdpa dev del test1
|
||||
kill <qemu-storage-daemon_process_PID>
|
||||
```
|
||||
|
||||
## Veeam
|
||||
|
||||
Vitastor QEMU driver has a feature that allows to trick third-party systems like Veeam not able to parse qemu-img
|
||||
vitastor URIs: [qemu_file_mirror_path](../config/client.en.md#qemu_file_mirror_path).
|
||||
|
||||
To make such systems work, you should set this option to an FS directory path (for example, `/mnt/vitastor/`) and
|
||||
mount this directory using [`vitastor-nfs mount --block`](../usage/nfs.en.md). It will make them access
|
||||
your images using files and, hopefully, succeed in doing their normal job :).
|
||||
|
@@ -132,24 +132,16 @@ qemu-system-x86_64 -enable-kvm -m 2048 -M accel=kvm,memory-backend=mem \
|
||||
к системе - VDUSE (vDPA Device in Userspace), а в QEMU, начиная с версии 7.2, есть поддержка
|
||||
экспорта блочных устройств QEMU по этому протоколу через qemu-storage-daemon.
|
||||
|
||||
VDUSE - на данный момент лучший интерфейс для подключения дисков Vitastor в виде блочных
|
||||
устройств на уровне ядра, ибо:
|
||||
- VDUSE не копирует данные и поэтому достигает значительно лучшей производительности, чем [NBD](nbd.ru.md)
|
||||
- Также оно не имеет проблемы NBD-таймаута - устройство не умирает, если операция выполняется слишком долго
|
||||
- Также оно не имеет проблемы подвисающих устройств - если процесс-обработчик умирает, его можно
|
||||
перезапустить (!) и блочное устройство продолжит работать
|
||||
- По-видимому, у него нет предела числа подключаемых в систему устройств
|
||||
Преимущества VDUSE:
|
||||
|
||||
Пример сравнения производительности:
|
||||
- VDUSE копирует данные 1 раз, а не 2, и поэтому он быстрее, чем [NBD](nbd.ru.md) при линейном доступе.
|
||||
- VDUSE не имеет проблемы NBD-таймаута - устройство не умирает, если операция выполняется слишком долго.
|
||||
- VDUSE не имеет проблемы подвисающих устройств - если процесс-обработчик умирает, его можно
|
||||
перезапустить (!) и блочное устройство продолжит работать (в UBLK это тоже поддерживается).
|
||||
- По-видимому, у него нет предела числа подключаемых в систему устройств (в UBLK лимита тоже нет).
|
||||
|
||||
| | Прямой fio | NBD | VDUSE |
|
||||
|--------------------------|-------------|-------------|-------------|
|
||||
| линейная запись | 3.85 GB/s | 1.12 GB/s | 3.85 GB/s |
|
||||
| 4k случайная запись Q128 | 240000 iops | 120000 iops | 178000 iops |
|
||||
| 4k случайная запись Q1 | 9500 iops | 7620 iops | 7640 iops |
|
||||
| линейное чтение | 4.3 GB/s | 1.8 GB/s | 2.85 GB/s |
|
||||
| 4k случайное чтение Q128 | 287000 iops | 140000 iops | 189000 iops |
|
||||
| 4k случайное чтение Q1 | 9600 iops | 7640 iops | 7780 iops |
|
||||
Однако, при линейном доступе VDUSE может быть медленнее UBLK (а может быть и быстрее), а по iops
|
||||
VDUSE иногда даже медленнее NBD. Пример сравнения производительности смотрите на странице [UBLK](ublk.ru.md).
|
||||
|
||||
Чтобы попробовать VDUSE, вам нужно ядро Linux как минимум версии 5.15, собранное с поддержкой
|
||||
VDUSE (CONFIG_VDPA=m, CONFIG_VDPA_USER=m, CONFIG_VIRTIO_VDPA=m).
|
||||
@@ -196,3 +188,12 @@ vdpa dev add name test1 mgmtdev vduse
|
||||
vdpa dev del test1
|
||||
kill <PID_процесса_qemu-storage-daemon>
|
||||
```
|
||||
|
||||
## Veeam
|
||||
|
||||
Драйвер Vitastor QEMU имеет функцию, которая позволяет обманывать сторонние системы типа Veeam, которые
|
||||
не могут сами по себе разобрать адреса дисков в vitastor: [qemu_file_mirror_path](../config/client.ru.md#qemu_file_mirror_path).
|
||||
|
||||
Чтобы заставить такие системы работать, вам нужно установить эту опцию равной пути к некоторому каталогу
|
||||
в ФС (например, `/mnt/vitastor/`) и примонтировать этот каталог с помощью [`vitastor-nfs mount --block`](../usage/nfs.ru.md).
|
||||
Они начнут обращаться к образам как к файлам и, вероятно, смогут заработать корректно :).
|
||||
|
116
docs/usage/ublk.en.md
Normal file
116
docs/usage/ublk.en.md
Normal file
@@ -0,0 +1,116 @@
|
||||
[Documentation](../../README.md#documentation) → Usage → UBLK
|
||||
|
||||
-----
|
||||
|
||||
[Читать на русском](ublk.ru.md)
|
||||
|
||||
# UBLK
|
||||
|
||||
[ublk](https://docs.kernel.org/block/ublk.html) is a new io_uring-based Linux interface
|
||||
for user-space block device drivers, available since Linux 6.0.
|
||||
|
||||
It's not zero-copy, but it's still a fast implementation, outperforming both [NBD](nbd.en.md)
|
||||
and [VDUSE](qemu.en.md#vduse) iops-wise and may or may not outperform VDUSE in linear I/O MB/s.
|
||||
ublk also allows to recover devices even if the server (vitastor-ublk process) dies.
|
||||
|
||||
## Example performance comparison
|
||||
|
||||
TCP (100G), 3 hosts each with 6 NVMe OSDs, 3 replicas, single client
|
||||
|
||||
| | direct fio | NBD | VDUSE | UBLK |
|
||||
|----------------------|-------------|-------------|------------|-------------|
|
||||
| linear write | 3807 MB/s | 1832 MB/s | 3226 MB/s | 3027 MB/s |
|
||||
| linear read | 3067 MB/s | 1885 MB/s | 1800 MB/s | 2076 MB/s |
|
||||
| 4k random write Q128 | 128624 iops | 91060 iops | 94621 iops | 149450 iops |
|
||||
| 4k random read Q128 | 117769 iops | 153408 iops | 93157 iops | 171987 iops |
|
||||
| 4k random write Q1 | 8090 iops | 6442 iops | 6316 iops | 7272 iops |
|
||||
| 4k random read Q1 | 9474 iops | 7200 iops | 6840 iops | 8038 iops |
|
||||
|
||||
RDMA (100G), 3 hosts each with 6 NVMe OSDs, 3 replicas, single client
|
||||
|
||||
| | direct fio | NBD | VDUSE | UBLK |
|
||||
|----------------------|-------------|-------------|-------------|-------------|
|
||||
| linear write | 6998 MB/s | 1878 MB/s | 4249 MB/s | 3140 MB/s |
|
||||
| linear read | 8628 MB/s | 3389 MB/s | 5062 MB/s | 3674 MB/s |
|
||||
| 4k random write Q128 | 222541 iops | 181589 iops | 138281 iops | 218222 iops |
|
||||
| 4k random read Q128 | 412647 iops | 239987 iops | 151663 iops | 269583 iops |
|
||||
| 4k random write Q1 | 11601 iops | 8592 iops | 9111 iops | 10000 iops |
|
||||
| 4k random read Q1 | 10102 iops | 7788 iops | 8111 iops | 8965 iops |
|
||||
|
||||
## Commands
|
||||
|
||||
vitastor-ublk supports the following commands:
|
||||
|
||||
- [map](#map)
|
||||
- [unmap](#unmap)
|
||||
- [ls](#ls)
|
||||
|
||||
## map
|
||||
|
||||
To create a local block device for a Vitastor image run:
|
||||
|
||||
```
|
||||
vitastor-ublk map [/dev/ublkbN] --image testimg
|
||||
```
|
||||
|
||||
It will output a block device name like /dev/ublkb0 which you can then use as a normal disk.
|
||||
|
||||
You can also use `--pool <POOL> --inode <INODE> --size <SIZE>` instead of `--image <IMAGE>` if you want.
|
||||
|
||||
vitastor-ublk supports all usual Vitastor configuration options like `--config_path <path_to_config>` plus ublk-specific:
|
||||
|
||||
* `--recover` \
|
||||
Recover a mapped device if the previous ublk server is dead.
|
||||
* `--queue_depth 256` \
|
||||
Maximum queue size for the device.
|
||||
* `--max_io_size 1M` \
|
||||
Maximum single I/O size for the device. Default: `max(1 MB, pool block size * EC part count)`.
|
||||
* `--readonly` \
|
||||
Make the device read-only.
|
||||
* `--hdd` \
|
||||
Mark the device as rotational.
|
||||
* `--logfile /path/to/log/file.txt` \
|
||||
Write log messages to the specified file instead of dropping them (in background mode)
|
||||
or printing them to the standard output (in foreground mode).
|
||||
* `--dev_num N` \
|
||||
Use the specified device /dev/ublkbN instead of automatic selection (alternative syntax
|
||||
to /dev/ublkbN positional parameter).
|
||||
* `--foreground 1` \
|
||||
Stay in foreground, do not daemonize.
|
||||
|
||||
Note that `ublk_queue_depth` and `ublk_max_io_size` may also be specified
|
||||
in `/etc/vitastor/vitastor.conf` or in other configuration file specified with `--config_path`.
|
||||
|
||||
## unmap
|
||||
|
||||
To unmap the device run:
|
||||
|
||||
```
|
||||
vitastor-ublk unmap /dev/ublkb0
|
||||
```
|
||||
|
||||
## ls
|
||||
|
||||
```
|
||||
vitastor-ublk ls [--json]
|
||||
```
|
||||
|
||||
List mapped images.
|
||||
|
||||
Example output (normal format):
|
||||
|
||||
```
|
||||
/dev/ublkb0
|
||||
image: bench
|
||||
pid: 584536
|
||||
|
||||
/dev/ublkb1
|
||||
image: bench1
|
||||
pid: 584546
|
||||
```
|
||||
|
||||
Example output (JSON format):
|
||||
|
||||
```
|
||||
{"/dev/ublkb0": {"image": "bench", "pid": 584536}, "/dev/ublkb1": {"image": "bench1", "pid": 584546}}
|
||||
```
|
121
docs/usage/ublk.ru.md
Normal file
121
docs/usage/ublk.ru.md
Normal file
@@ -0,0 +1,121 @@
|
||||
[Документация](../../README-ru.md#документация) → Использование → UBLK
|
||||
|
||||
-----
|
||||
|
||||
[Read in English](ublk.en.md)
|
||||
|
||||
# UBLK
|
||||
|
||||
[ublk](https://docs.kernel.org/block/ublk.html) - это новый Linux-интерфейс на основе io_uring
|
||||
для реализации блочных устройств в пространстве пользователя, доступный, начиная с Linux 6.0.
|
||||
|
||||
ublk тоже копирует память (т.е. не является zero-copy), но по IOPS всё равно обгоняет и
|
||||
[NBD](nbd.ru.md), и [VDUSE](qemu.ru.md#vduse), и иногда может даже обгонять VDUSE по
|
||||
скорости линейного доступа. Также ublk позволяет оживлять устройства, у которых умер
|
||||
сервер (процесс-обработчик vitastor-ublk).
|
||||
|
||||
## Пример сравнения производительности
|
||||
|
||||
TCP (100G), 3 сервера с 6 NVMe OSD каждый, 3 реплики, один клиент
|
||||
|
||||
| | Прямой fio | NBD | VDUSE | UBLK |
|
||||
|--------------------------|-------------|-------------|------------|-------------|
|
||||
| линейная запись | 3807 MB/s | 1832 MB/s | 3226 MB/s | 3027 MB/s |
|
||||
| линейное чтение | 3067 MB/s | 1885 MB/s | 1800 MB/s | 2076 MB/s |
|
||||
| 4k случайная запись Q128 | 128624 iops | 91060 iops | 94621 iops | 149450 iops |
|
||||
| 4k случайное чтение Q128 | 117769 iops | 153408 iops | 93157 iops | 171987 iops |
|
||||
| 4k случайная запись Q1 | 8090 iops | 6442 iops | 6316 iops | 7272 iops |
|
||||
| 4k случайное чтение Q1 | 9474 iops | 7200 iops | 6840 iops | 8038 iops |
|
||||
|
||||
RDMA (100G), 3 сервера с 6 NVMe OSD каждый, 3 реплики, один клиент
|
||||
|
||||
| | Прямой fio | NBD | VDUSE | UBLK |
|
||||
|--------------------------|-------------|-------------|-------------|-------------|
|
||||
| линейная запись | 6998 MB/s | 1878 MB/s | 4249 MB/s | 3140 MB/s |
|
||||
| линейное чтение | 8628 MB/s | 3389 MB/s | 5062 MB/s | 3674 MB/s |
|
||||
| 4k случайная запись Q128 | 222541 iops | 181589 iops | 138281 iops | 218222 iops |
|
||||
| 4k случайное чтение Q128 | 412647 iops | 239987 iops | 151663 iops | 269583 iops |
|
||||
| 4k случайная запись Q1 | 11601 iops | 8592 iops | 9111 iops | 10000 iops |
|
||||
| 4k случайное чтение Q1 | 10102 iops | 7788 iops | 8111 iops | 8965 iops |
|
||||
|
||||
## Команды
|
||||
|
||||
vitastor-ublk поддерживает следующие команды:
|
||||
|
||||
- [map](#map)
|
||||
- [unmap](#unmap)
|
||||
- [ls](#ls)
|
||||
|
||||
## map
|
||||
|
||||
Чтобы создать локальное блочное устройство для образа, выполните команду:
|
||||
|
||||
```
|
||||
vitastor-ublk map [/dev/ublkbN] --image testimg
|
||||
```
|
||||
|
||||
Команда напечатает название блочного устройства вида /dev/ublkb0, которое потом можно
|
||||
будет использовать как обычный диск.
|
||||
|
||||
Для обращения по номеру инода, аналогично другим командам, можно использовать опции
|
||||
`--pool <POOL> --inode <INODE> --size <SIZE>` вместо `--image testimg`.
|
||||
|
||||
vitastor-ublk поддерживает все обычные опции Vitastor, например, `--config_path <path_to_config>`,
|
||||
плюс специфичные для ublk:
|
||||
|
||||
* `--recover` \
|
||||
Восстановить ранее подключённое устройство, у которого умер обработчик.
|
||||
* `--queue_depth 256` \
|
||||
Максимальная глубина очереди устройства.
|
||||
* `--max_io_size 1M` \
|
||||
Максимальный размер запроса ввода-вывода для устройства. По умолчанию: `max(1 MB, блок данных пула * число частей данных EC)`.
|
||||
* `--readonly` \
|
||||
Подключить устройство в режиме только для чтения.
|
||||
* `--hdd` \
|
||||
Пометить устройство как вращающийся жёсткий диск (флаг rotational).
|
||||
* `--logfile /path/to/log/file.txt` \
|
||||
Писать сообщения о процессе работы в заданный файл, вместо пропуска их
|
||||
при фоновом режиме запуска или печати на стандартный вывод при запуске
|
||||
в консоли с `--foreground 1`.
|
||||
* `--dev_num N` \
|
||||
Использовать заданное устройство `/dev/ublkbN` вместо автоматического подбора.
|
||||
* `--foreground 1` \
|
||||
Не уводить процесс в фоновый режим.
|
||||
|
||||
Обратите внимание, что опции `ublk_queue_depth` и `ublk_max_io_size` можно
|
||||
также задавать в `/etc/vitastor/vitastor.conf` или в другом файле конфигурации,
|
||||
заданном опцией `--config_path`.
|
||||
|
||||
## unmap
|
||||
|
||||
Для отключения устройства выполните:
|
||||
|
||||
```
|
||||
vitastor-ublk unmap /dev/ublkb0
|
||||
```
|
||||
|
||||
## ls
|
||||
|
||||
```
|
||||
vitastor-ublk ls [--json]
|
||||
```
|
||||
|
||||
Вывести подключённые устройства.
|
||||
|
||||
Пример вывода в обычном формате:
|
||||
|
||||
```
|
||||
/dev/ublkb0
|
||||
image: bench
|
||||
pid: 584536
|
||||
|
||||
/dev/ublkb1
|
||||
image: bench1
|
||||
pid: 584546
|
||||
```
|
||||
|
||||
Пример вывода в JSON-формате:
|
||||
|
||||
```
|
||||
{"/dev/ublkb0": {"image": "bench", "pid": 584536}, "/dev/ublkb1": {"image": "bench1", "pid": 584546}}
|
||||
```
|
@@ -96,6 +96,7 @@ class Mon
|
||||
}
|
||||
else
|
||||
{
|
||||
res.setHeader('Content-Type', 'text/plain; version=0.0.4; charset=utf-8');
|
||||
res.write(export_prometheus_metrics(this.state));
|
||||
}
|
||||
}
|
||||
|
@@ -15,7 +15,7 @@ function get_osd_tree(global_config, state)
|
||||
const stat = state.osd.stats[osd_num];
|
||||
const osd_cfg = state.config.osd[osd_num];
|
||||
let reweight = osd_cfg == null ? 1 : Number(osd_cfg.reweight);
|
||||
if (isNaN(reweight) || reweight < 0 || reweight > 0)
|
||||
if (isNaN(reweight) || reweight < 0 || reweight > 1)
|
||||
reweight = 1;
|
||||
if (stat && stat.size && reweight && (state.osd.state[osd_num] || Number(stat.time) >= down_time ||
|
||||
osd_cfg && osd_cfg.noout))
|
||||
@@ -110,7 +110,7 @@ function make_hier_tree(global_config, tree)
|
||||
if (!(tree[node_id].children||[]).length && (tree[node_id].size||0) <= 0)
|
||||
{
|
||||
const parent = tree[node_id].parent;
|
||||
if (parent)
|
||||
if (parent && tree[parent])
|
||||
{
|
||||
tree[parent].children = tree[parent].children.filter(c => c != tree[node_id]);
|
||||
}
|
||||
@@ -179,7 +179,7 @@ function filter_osds_by_block_layout(orig_tree, osd_stats, block_size, bitmap_gr
|
||||
if (orig_tree[osd].level === 'osd')
|
||||
{
|
||||
const osd_stat = osd_stats[osd];
|
||||
if (osd_stat && (osd_stat.bs_block_size && osd_stat.bs_block_size != block_size ||
|
||||
if (osd_stat && (osd_stat.data_block_size && osd_stat.data_block_size != block_size ||
|
||||
osd_stat.bitmap_granularity && osd_stat.bitmap_granularity != bitmap_granularity ||
|
||||
osd_stat.immediate_commit == 'small' && immediate_commit == 'all' ||
|
||||
osd_stat.immediate_commit == 'none' && immediate_commit != 'none'))
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vitastor-mon",
|
||||
"version": "2.1.0",
|
||||
"version": "2.4.0",
|
||||
"description": "Vitastor SDS monitor service",
|
||||
"main": "mon-main.js",
|
||||
"scripts": {
|
||||
@@ -9,7 +9,7 @@
|
||||
"author": "Vitaliy Filippov",
|
||||
"license": "UNLICENSED",
|
||||
"dependencies": {
|
||||
"antietcd": "^1.1.2",
|
||||
"antietcd": "^1.1.3",
|
||||
"sprintf-js": "^1.1.2",
|
||||
"ws": "^7.2.5"
|
||||
},
|
||||
@@ -19,6 +19,6 @@
|
||||
"eslint-plugin-node": "^11.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.1.0"
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
}
|
||||
|
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vitastor",
|
||||
"version": "2.1.0",
|
||||
"version": "2.4.0",
|
||||
"description": "Low-level native bindings to Vitastor client library",
|
||||
"main": "index.js",
|
||||
"keywords": [
|
||||
|
@@ -261,7 +261,7 @@ sub free_image
|
||||
my ($vtype, $name, $vmid, undef, undef, undef) = $class->parse_volname($volname);
|
||||
$class->deactivate_volume($storeid, $scfg, $volname);
|
||||
my $full_list = run_cli($scfg, [ 'ls', '-l' ]);
|
||||
my $list = _process_list($scfg, $storeid, $full_list);
|
||||
my $list = _process_list($scfg, $storeid, $full_list, 0);
|
||||
# Remove image and all its snapshots
|
||||
my $rm_names = {
|
||||
map { ($prefix.$_->{name} => 1) }
|
||||
@@ -269,6 +269,10 @@ sub free_image
|
||||
@$list
|
||||
};
|
||||
my $children = [ grep { $_->{parent_name} && $rm_names->{$_->{parent_name}} } @$full_list ];
|
||||
$children = [ grep {
|
||||
substr($_->{name}, 0, length($prefix.$name)) ne $prefix.$name &&
|
||||
substr($_->{name}, 0, length($prefix.$name)+1) ne $prefix.$name.'@'
|
||||
} @$children ];
|
||||
die "Image has children: ".join(', ', map {
|
||||
substr($_->{name}, 0, length $prefix) eq $prefix
|
||||
? substr($_->name, length $prefix)
|
||||
@@ -288,14 +292,15 @@ sub free_image
|
||||
|
||||
sub _process_list
|
||||
{
|
||||
my ($scfg, $storeid, $result) = @_;
|
||||
my ($scfg, $storeid, $result, $skip_snapshot) = @_;
|
||||
$skip_snapshot = 1 if !defined $skip_snapshot;
|
||||
my $prefix = defined $scfg->{vitastor_prefix} ? $scfg->{vitastor_prefix} : 'pve/';
|
||||
my $list = [];
|
||||
foreach my $el (@$result)
|
||||
{
|
||||
next if !$el->{name} || length($prefix) && substr($el->{name}, 0, length $prefix) ne $prefix;
|
||||
my $name = substr($el->{name}, length $prefix);
|
||||
next if $name =~ /@/;
|
||||
next if $skip_snapshot && $name =~ /@/;
|
||||
my ($owner) = $name =~ /^(?:vm|base)-(\d+)-/s;
|
||||
next if !defined $owner;
|
||||
my $parent = !defined $el->{parent_name}
|
||||
@@ -410,8 +415,8 @@ sub volume_size_info
|
||||
my $prefix = defined $scfg->{vitastor_prefix} ? $scfg->{vitastor_prefix} : 'pve/';
|
||||
my ($vtype, $name, $vmid) = $class->parse_volname($volname);
|
||||
my $info = _process_list($scfg, $storeid, run_cli($scfg, [ 'ls', $prefix.$name ]))->[0];
|
||||
#return wantarray ? ($size, $format, $used, $parent, $st->ctime) : $size;
|
||||
return $info->{size};
|
||||
# (size, format, used, parent, ctime)
|
||||
return wantarray ? ($info->{size}, $info->{format}, $info->{size}, $info->{parent}, 0) : $info->{size};
|
||||
}
|
||||
|
||||
sub volume_resize
|
||||
@@ -494,4 +499,55 @@ sub rename_volume
|
||||
return "${storeid}:${base_name}${target_volname}";
|
||||
}
|
||||
|
||||
sub _monkey_patch_qemu_blockdev_options
|
||||
{
|
||||
my ($cfg, $volid, $machine_version, $options) = @_;
|
||||
my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
|
||||
|
||||
my $scfg = PVE::Storage::storage_config($cfg, $storeid);
|
||||
|
||||
my $plugin = PVE::Storage::Plugin->lookup($scfg->{type});
|
||||
|
||||
my ($vtype) = $plugin->parse_volname($volname);
|
||||
die "cannot use volume of type '$vtype' as a QEMU blockdevice\n"
|
||||
if $vtype ne 'images' && $vtype ne 'iso' && $vtype ne 'import';
|
||||
|
||||
return $plugin->qemu_blockdev_options($scfg, $storeid, $volname, $machine_version, $options);
|
||||
}
|
||||
|
||||
sub qemu_blockdev_options
|
||||
{
|
||||
my ($class, $scfg, $storeid, $volname, $machine_version, $options) = @_;
|
||||
my $prefix = defined $scfg->{vitastor_prefix} ? $scfg->{vitastor_prefix} : 'pve/';
|
||||
my ($vtype, $name, $vmid) = $class->parse_volname($volname);
|
||||
$name .= '@'.$options->{'snapshot-name'} if $options->{'snapshot-name'};
|
||||
if ($scfg->{vitastor_nbd})
|
||||
{
|
||||
my $mapped = run_cli($scfg, [ 'ls' ], binary => '/usr/bin/vitastor-nbd');
|
||||
my ($kerneldev) = grep { $mapped->{$_}->{image} eq $prefix.$name } keys %$mapped;
|
||||
die "Image not mapped via NBD" if !$kerneldev;
|
||||
return { driver => 'host_device', filename => $kerneldev };
|
||||
}
|
||||
my $blockdev = {
|
||||
driver => 'vitastor',
|
||||
image => $prefix.$name,
|
||||
};
|
||||
if ($scfg->{vitastor_config_path})
|
||||
{
|
||||
$blockdev->{'config-path'} = $scfg->{vitastor_config_path};
|
||||
}
|
||||
if ($scfg->{vitastor_etcd_address})
|
||||
{
|
||||
# FIXME This is the only exception: etcd_address -> etcd_host for qemu
|
||||
$blockdev->{'etcd-host'} = $scfg->{vitastor_etcd_address};
|
||||
}
|
||||
if ($scfg->{vitastor_etcd_prefix})
|
||||
{
|
||||
$blockdev->{'etcd-prefix'} = $scfg->{vitastor_etcd_prefix};
|
||||
}
|
||||
return $blockdev;
|
||||
}
|
||||
|
||||
*PVE::Storage::qemu_blockdev_options = *_monkey_patch_qemu_blockdev_options;
|
||||
|
||||
1;
|
||||
|
@@ -50,7 +50,7 @@ from cinder.volume import configuration
|
||||
from cinder.volume import driver
|
||||
from cinder.volume import volume_utils
|
||||
|
||||
VITASTOR_VERSION = '2.1.0'
|
||||
VITASTOR_VERSION = '2.4.0'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
637
patches/libvirt-11.5-vitastor.diff
Normal file
637
patches/libvirt-11.5-vitastor.diff
Normal file
@@ -0,0 +1,637 @@
|
||||
diff --git a/include/libvirt/libvirt-storage.h b/include/libvirt/libvirt-storage.h
|
||||
index aaad4a3da1..5f5daa8341 100644
|
||||
--- a/include/libvirt/libvirt-storage.h
|
||||
+++ b/include/libvirt/libvirt-storage.h
|
||||
@@ -326,6 +326,7 @@ typedef enum {
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS = 1 << 17, /* (Since: 1.2.8) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE = 1 << 18, /* (Since: 3.1.0) */
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ISCSI_DIRECT = 1 << 19, /* (Since: 5.6.0) */
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR = 1 << 20, /* (Since: 5.0.0) */
|
||||
} virConnectListAllStoragePoolsFlags;
|
||||
|
||||
int virConnectListAllStoragePools(virConnectPtr conn,
|
||||
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
|
||||
index 1e24e41a48..ce359a4cf8 100644
|
||||
--- a/src/conf/domain_conf.c
|
||||
+++ b/src/conf/domain_conf.c
|
||||
@@ -7435,7 +7435,8 @@ virDomainDiskSourceNetworkParse(xmlNodePtr node,
|
||||
src->configFile = virXPathString("string(./config/@file)", ctxt);
|
||||
|
||||
if (src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTP ||
|
||||
- src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS)
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_HTTPS ||
|
||||
+ src->protocol == VIR_STORAGE_NET_PROTOCOL_VITASTOR)
|
||||
src->query = virXMLPropString(node, "query");
|
||||
|
||||
if (virDomainStorageNetworkParseHosts(node, ctxt, &src->hosts, &src->nhosts) < 0)
|
||||
@@ -31871,6 +31872,7 @@ virDomainStorageSourceTranslateSourcePool(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_POOL_MPATH:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
|
||||
index b28af7fa56..d1aae6e43e 100644
|
||||
--- a/src/conf/domain_validate.c
|
||||
+++ b/src/conf/domain_validate.c
|
||||
@@ -504,6 +504,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
@@ -576,7 +577,7 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
}
|
||||
}
|
||||
|
||||
- /* internal snapshots and config files are currently supported only with rbd: */
|
||||
+ /* internal snapshots are currently supported only with rbd: */
|
||||
if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD) {
|
||||
if (src->snapshot) {
|
||||
@@ -584,10 +585,14 @@ virDomainDiskDefValidateSourceChainOne(const virStorageSource *src)
|
||||
_("<snapshot> element is currently supported only with 'rbd' disks"));
|
||||
return -1;
|
||||
}
|
||||
-
|
||||
+ }
|
||||
+ /* config files are currently supported only with rbd and vitastor: */
|
||||
+ if (virStorageSourceGetActualType(src) != VIR_STORAGE_TYPE_NETWORK &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_RBD &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR) {
|
||||
if (src->configFile) {
|
||||
virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
- _("<config> element is currently supported only with 'rbd' disks"));
|
||||
+ _("<config> element is currently supported only with 'rbd' and 'vitastor' disks"));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
|
||||
index 183dd5db5e..dcc0d1a778 100644
|
||||
--- a/src/conf/schemas/domaincommon.rng
|
||||
+++ b/src/conf/schemas/domaincommon.rng
|
||||
@@ -2066,6 +2066,35 @@
|
||||
</element>
|
||||
</define>
|
||||
|
||||
+ <define name="diskSourceNetworkProtocolVitastor">
|
||||
+ <element name="source">
|
||||
+ <interleave>
|
||||
+ <attribute name="protocol">
|
||||
+ <value>vitastor</value>
|
||||
+ </attribute>
|
||||
+ <ref name="diskSourceCommon"/>
|
||||
+ <optional>
|
||||
+ <attribute name="name"/>
|
||||
+ </optional>
|
||||
+ <optional>
|
||||
+ <attribute name="query"/>
|
||||
+ </optional>
|
||||
+ <zeroOrMore>
|
||||
+ <ref name="diskSourceNetworkHost"/>
|
||||
+ </zeroOrMore>
|
||||
+ <optional>
|
||||
+ <element name="config">
|
||||
+ <attribute name="file">
|
||||
+ <ref name="absFilePath"/>
|
||||
+ </attribute>
|
||||
+ <empty/>
|
||||
+ </element>
|
||||
+ </optional>
|
||||
+ <empty/>
|
||||
+ </interleave>
|
||||
+ </element>
|
||||
+ </define>
|
||||
+
|
||||
<define name="diskSourceNetworkProtocolISCSI">
|
||||
<element name="source">
|
||||
<attribute name="protocol">
|
||||
@@ -2416,6 +2445,7 @@
|
||||
<ref name="diskSourceNetworkProtocolSimple"/>
|
||||
<ref name="diskSourceNetworkProtocolVxHS"/>
|
||||
<ref name="diskSourceNetworkProtocolNFS"/>
|
||||
+ <ref name="diskSourceNetworkProtocolVitastor"/>
|
||||
</choice>
|
||||
</define>
|
||||
|
||||
diff --git a/src/conf/storage_conf.c b/src/conf/storage_conf.c
|
||||
index 1dc9365bf2..a8a736be81 100644
|
||||
--- a/src/conf/storage_conf.c
|
||||
+++ b/src/conf/storage_conf.c
|
||||
@@ -56,7 +56,7 @@ VIR_ENUM_IMPL(virStoragePool,
|
||||
"logical", "disk", "iscsi",
|
||||
"iscsi-direct", "scsi", "mpath",
|
||||
"rbd", "sheepdog", "gluster",
|
||||
- "zfs", "vstorage",
|
||||
+ "zfs", "vstorage", "vitastor",
|
||||
);
|
||||
|
||||
VIR_ENUM_IMPL(virStoragePoolFormatFileSystem,
|
||||
@@ -242,6 +242,18 @@ static virStoragePoolTypeInfo poolTypeInfo[] = {
|
||||
.formatToString = virStorageFileFormatTypeToString,
|
||||
}
|
||||
},
|
||||
+ {.poolType = VIR_STORAGE_POOL_VITASTOR,
|
||||
+ .poolOptions = {
|
||||
+ .flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NETWORK |
|
||||
+ VIR_STORAGE_POOL_SOURCE_NAME),
|
||||
+ },
|
||||
+ .volOptions = {
|
||||
+ .defaultFormat = VIR_STORAGE_FILE_RAW,
|
||||
+ .formatFromString = virStorageVolumeFormatFromString,
|
||||
+ .formatToString = virStorageFileFormatTypeToString,
|
||||
+ }
|
||||
+ },
|
||||
{.poolType = VIR_STORAGE_POOL_SHEEPDOG,
|
||||
.poolOptions = {
|
||||
.flags = (VIR_STORAGE_POOL_SOURCE_HOST |
|
||||
@@ -538,6 +550,11 @@ virStoragePoolDefParseSource(xmlXPathContextPtr ctxt,
|
||||
_("element 'name' is mandatory for RBD pool"));
|
||||
return -1;
|
||||
}
|
||||
+ if (pool_type == VIR_STORAGE_POOL_VITASTOR && source->name == NULL) {
|
||||
+ virReportError(VIR_ERR_XML_ERROR, "%s",
|
||||
+ _("element 'name' is mandatory for Vitastor pool"));
|
||||
+ return -1;
|
||||
+ }
|
||||
|
||||
if (options->formatFromString) {
|
||||
g_autofree char *format = NULL;
|
||||
@@ -1127,6 +1144,7 @@ virStoragePoolDefFormatBuf(virBuffer *buf,
|
||||
/* RBD, Sheepdog, Gluster and Iscsi-direct devices are not local block devs nor
|
||||
* files, so they don't have a target */
|
||||
if (def->type != VIR_STORAGE_POOL_RBD &&
|
||||
+ def->type != VIR_STORAGE_POOL_VITASTOR &&
|
||||
def->type != VIR_STORAGE_POOL_SHEEPDOG &&
|
||||
def->type != VIR_STORAGE_POOL_GLUSTER &&
|
||||
def->type != VIR_STORAGE_POOL_ISCSI_DIRECT) {
|
||||
diff --git a/src/conf/storage_conf.h b/src/conf/storage_conf.h
|
||||
index fc67957cfe..720c07ef74 100644
|
||||
--- a/src/conf/storage_conf.h
|
||||
+++ b/src/conf/storage_conf.h
|
||||
@@ -103,6 +103,7 @@ typedef enum {
|
||||
VIR_STORAGE_POOL_GLUSTER, /* Gluster device */
|
||||
VIR_STORAGE_POOL_ZFS, /* ZFS */
|
||||
VIR_STORAGE_POOL_VSTORAGE, /* Virtuozzo Storage */
|
||||
+ VIR_STORAGE_POOL_VITASTOR, /* Vitastor */
|
||||
|
||||
VIR_STORAGE_POOL_LAST,
|
||||
} virStoragePoolType;
|
||||
@@ -454,6 +455,7 @@ VIR_ENUM_DECL(virStoragePartedFs);
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SCSI | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_MPATH | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_RBD | \
|
||||
+ VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER | \
|
||||
VIR_CONNECT_LIST_STORAGE_POOLS_ZFS | \
|
||||
diff --git a/src/conf/storage_source_conf.c b/src/conf/storage_source_conf.c
|
||||
index 8a063be244..dd9c7f11a2 100644
|
||||
--- a/src/conf/storage_source_conf.c
|
||||
+++ b/src/conf/storage_source_conf.c
|
||||
@@ -89,6 +89,7 @@ VIR_ENUM_IMPL(virStorageNetProtocol,
|
||||
"ssh",
|
||||
"vxhs",
|
||||
"nfs",
|
||||
+ "vitastor",
|
||||
);
|
||||
|
||||
|
||||
@@ -1314,6 +1315,7 @@ virStorageSourceNetworkDefaultPort(virStorageNetProtocol protocol)
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
return 24007;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
/* we don't provide a default for RBD */
|
||||
return 0;
|
||||
diff --git a/src/conf/storage_source_conf.h b/src/conf/storage_source_conf.h
|
||||
index ebddf28cd6..873a2be65c 100644
|
||||
--- a/src/conf/storage_source_conf.h
|
||||
+++ b/src/conf/storage_source_conf.h
|
||||
@@ -130,6 +130,7 @@ typedef enum {
|
||||
VIR_STORAGE_NET_PROTOCOL_SSH,
|
||||
VIR_STORAGE_NET_PROTOCOL_VXHS,
|
||||
VIR_STORAGE_NET_PROTOCOL_NFS,
|
||||
+ VIR_STORAGE_NET_PROTOCOL_VITASTOR,
|
||||
|
||||
VIR_STORAGE_NET_PROTOCOL_LAST
|
||||
} virStorageNetProtocol;
|
||||
diff --git a/src/conf/virstorageobj.c b/src/conf/virstorageobj.c
|
||||
index 59fa5da372..4739167f5f 100644
|
||||
--- a/src/conf/virstorageobj.c
|
||||
+++ b/src/conf/virstorageobj.c
|
||||
@@ -1438,6 +1438,7 @@ virStoragePoolObjSourceFindDuplicateCb(const void *payload,
|
||||
return 1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
@@ -1921,6 +1922,8 @@ virStoragePoolObjMatch(virStoragePoolObj *obj,
|
||||
(obj->def->type == VIR_STORAGE_POOL_MPATH)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_RBD) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_RBD)) ||
|
||||
+ (MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR) &&
|
||||
+ (obj->def->type == VIR_STORAGE_POOL_VITASTOR)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG) &&
|
||||
(obj->def->type == VIR_STORAGE_POOL_SHEEPDOG)) ||
|
||||
(MATCH(VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER) &&
|
||||
diff --git a/src/libvirt-storage.c b/src/libvirt-storage.c
|
||||
index db7660aac4..561df34709 100644
|
||||
--- a/src/libvirt-storage.c
|
||||
+++ b/src/libvirt-storage.c
|
||||
@@ -94,6 +94,7 @@ virStoragePoolGetConnect(virStoragePoolPtr pool)
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SCSI
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_MPATH
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_RBD
|
||||
+ * VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_SHEEPDOG
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_GLUSTER
|
||||
* VIR_CONNECT_LIST_STORAGE_POOLS_ZFS
|
||||
diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c
|
||||
index bdd30dd65a..5353e00b4a 100644
|
||||
--- a/src/libxl/libxl_conf.c
|
||||
+++ b/src/libxl/libxl_conf.c
|
||||
@@ -1081,6 +1081,7 @@ libxlMakeNetworkDiskSrcStr(virStorageSource *src,
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c
|
||||
index ec8de30c01..61eab9606d 100644
|
||||
--- a/src/libxl/xen_xl.c
|
||||
+++ b/src/libxl/xen_xl.c
|
||||
@@ -1461,6 +1461,7 @@ xenFormatXLDiskSrcNet(virStorageSource *src)
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
virReportError(VIR_ERR_NO_SUPPORT,
|
||||
diff --git a/src/qemu/qemu_block.c b/src/qemu/qemu_block.c
|
||||
index 32568d4ae6..e625fa0720 100644
|
||||
--- a/src/qemu/qemu_block.c
|
||||
+++ b/src/qemu/qemu_block.c
|
||||
@@ -731,6 +731,38 @@ qemuBlockStorageSourceGetRBDProps(virStorageSource *src,
|
||||
}
|
||||
|
||||
|
||||
+static virJSONValue *
|
||||
+qemuBlockStorageSourceGetVitastorProps(virStorageSource *src)
|
||||
+{
|
||||
+ virJSONValue *ret = NULL;
|
||||
+ virStorageNetHostDef *host;
|
||||
+ size_t i;
|
||||
+ g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
|
||||
+ g_autofree char *etcd = NULL;
|
||||
+
|
||||
+ for (i = 0; i < src->nhosts; i++) {
|
||||
+ host = src->hosts + i;
|
||||
+ if ((virStorageNetHostTransport)host->transport != VIR_STORAGE_NET_HOST_TRANS_TCP) {
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ virBufferAsprintf(&buf, i > 0 ? ",%s:%u" : "%s:%u", host->name, host->port);
|
||||
+ }
|
||||
+ if (src->nhosts > 0) {
|
||||
+ etcd = virBufferContentAndReset(&buf);
|
||||
+ }
|
||||
+
|
||||
+ if (virJSONValueObjectAdd(&ret,
|
||||
+ "S:etcd-host", etcd,
|
||||
+ "S:etcd-prefix", src->query,
|
||||
+ "S:config-path", src->configFile,
|
||||
+ "s:image", src->path,
|
||||
+ NULL) < 0)
|
||||
+ return NULL;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static virJSONValue *
|
||||
qemuBlockStorageSourceGetSshProps(virStorageSource *src)
|
||||
{
|
||||
@@ -1082,6 +1114,12 @@ qemuBlockStorageSourceGetBackendProps(virStorageSource *src,
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(fileprops = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return NULL;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
driver = "ssh";
|
||||
if (!(fileprops = qemuBlockStorageSourceGetSshProps(src)))
|
||||
@@ -1985,6 +2023,7 @@ qemuBlockGetBackingStoreString(virStorageSource *src,
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_VXHS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NFS:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
@@ -2365,6 +2404,12 @@ qemuBlockStorageSourceCreateGetStorageProps(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ driver = "vitastor";
|
||||
+ if (!(location = qemuBlockStorageSourceGetVitastorProps(src)))
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SSH:
|
||||
if (srcPriv->nbdkitProcess) {
|
||||
/* disk creation not yet supported with nbdkit, and even if it
|
||||
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
|
||||
index 0d2548d8d4..91121d6e1f 100644
|
||||
--- a/src/qemu/qemu_domain.c
|
||||
+++ b/src/qemu/qemu_domain.c
|
||||
@@ -4526,7 +4526,8 @@ qemuDomainValidateStorageSource(virStorageSource *src,
|
||||
if (src->query &&
|
||||
(actualType != VIR_STORAGE_TYPE_NETWORK ||
|
||||
(src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTPS &&
|
||||
- src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP))) {
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_HTTP &&
|
||||
+ src->protocol != VIR_STORAGE_NET_PROTOCOL_VITASTOR))) {
|
||||
virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
|
||||
_("query is supported only with HTTP(S) protocols"));
|
||||
return -1;
|
||||
@@ -8954,6 +8955,7 @@ qemuDomainPrepareStorageSourceTLS(virStorageSource *src,
|
||||
break;
|
||||
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/qemu/qemu_snapshot.c b/src/qemu/qemu_snapshot.c
|
||||
index 8128154749..afb339b9b0 100644
|
||||
--- a/src/qemu/qemu_snapshot.c
|
||||
+++ b/src/qemu/qemu_snapshot.c
|
||||
@@ -662,6 +662,7 @@ qemuSnapshotPrepareDiskExternalInactive(virDomainSnapshotDiskDef *snapdisk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
@@ -887,6 +888,7 @@ qemuSnapshotPrepareDiskInternal(virDomainDiskDef *disk,
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NBD:
|
||||
case VIR_STORAGE_NET_PROTOCOL_RBD:
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_GLUSTER:
|
||||
case VIR_STORAGE_NET_PROTOCOL_ISCSI:
|
||||
diff --git a/src/storage/storage_driver.c b/src/storage/storage_driver.c
|
||||
index e19e032427..59f91f4710 100644
|
||||
--- a/src/storage/storage_driver.c
|
||||
+++ b/src/storage/storage_driver.c
|
||||
@@ -1626,6 +1626,7 @@ storageVolLookupByPathCallback(virStoragePoolObj *obj,
|
||||
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_SHEEPDOG:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
diff --git a/src/storage_file/storage_source_backingstore.c b/src/storage_file/storage_source_backingstore.c
|
||||
index 80681924ea..8a3ade9ec0 100644
|
||||
--- a/src/storage_file/storage_source_backingstore.c
|
||||
+++ b/src/storage_file/storage_source_backingstore.c
|
||||
@@ -287,6 +287,75 @@ virStorageSourceParseRBDColonString(const char *rbdstr,
|
||||
}
|
||||
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseVitastorColonString(const char *colonstr,
|
||||
+ virStorageSource *src)
|
||||
+{
|
||||
+ char *p, *e, *next;
|
||||
+ g_autofree char *options = NULL;
|
||||
+
|
||||
+ /* optionally skip the "vitastor:" prefix if provided */
|
||||
+ if (STRPREFIX(colonstr, "vitastor:"))
|
||||
+ colonstr += strlen("vitastor:");
|
||||
+
|
||||
+ options = g_strdup(colonstr);
|
||||
+
|
||||
+ p = options;
|
||||
+ while (*p) {
|
||||
+ /* find : delimiter or end of string */
|
||||
+ for (e = p; *e && *e != ':'; ++e) {
|
||||
+ if (*e == '\\') {
|
||||
+ e++;
|
||||
+ if (*e == '\0')
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (*e == '\0') {
|
||||
+ next = e; /* last kv pair */
|
||||
+ } else {
|
||||
+ next = e + 1;
|
||||
+ *e = '\0';
|
||||
+ }
|
||||
+
|
||||
+ if (STRPREFIX(p, "image=")) {
|
||||
+ src->path = g_strdup(p + strlen("image="));
|
||||
+ } else if (STRPREFIX(p, "etcd-prefix=")) {
|
||||
+ src->query = g_strdup(p + strlen("etcd-prefix="));
|
||||
+ } else if (STRPREFIX(p, "config-path=")) {
|
||||
+ src->configFile = g_strdup(p + strlen("config-path="));
|
||||
+ } else if (STRPREFIX(p, "etcd-host=")) {
|
||||
+ char *h, *sep;
|
||||
+
|
||||
+ h = p + strlen("etcd-host=");
|
||||
+ while (h < e) {
|
||||
+ for (sep = h; sep < e; ++sep) {
|
||||
+ if (*sep == '\\' && (sep[1] == ',' ||
|
||||
+ sep[1] == ';' ||
|
||||
+ sep[1] == ' ')) {
|
||||
+ *sep = '\0';
|
||||
+ sep += 2;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (virStorageSourceRBDAddHost(src, h) < 0)
|
||||
+ return -1;
|
||||
+
|
||||
+ h = sep;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ p = next;
|
||||
+ }
|
||||
+
|
||||
+ if (!src->path) {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseNBDColonString(const char *nbdstr,
|
||||
virStorageSource *src)
|
||||
@@ -399,6 +468,11 @@ virStorageSourceParseBackingColon(virStorageSource *src,
|
||||
return -1;
|
||||
break;
|
||||
|
||||
+ case VIR_STORAGE_NET_PROTOCOL_VITASTOR:
|
||||
+ if (virStorageSourceParseVitastorColonString(path, src) < 0)
|
||||
+ return -1;
|
||||
+ break;
|
||||
+
|
||||
case VIR_STORAGE_NET_PROTOCOL_SHEEPDOG:
|
||||
case VIR_STORAGE_NET_PROTOCOL_LAST:
|
||||
case VIR_STORAGE_NET_PROTOCOL_NONE:
|
||||
@@ -975,6 +1049,54 @@ virStorageSourceParseBackingJSONRBD(virStorageSource *src,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+static int
|
||||
+virStorageSourceParseBackingJSONVitastor(virStorageSource *src,
|
||||
+ virJSONValue *json,
|
||||
+ const char *jsonstr G_GNUC_UNUSED,
|
||||
+ int opaque G_GNUC_UNUSED)
|
||||
+{
|
||||
+ const char *filename;
|
||||
+ const char *image = virJSONValueObjectGetString(json, "image");
|
||||
+ const char *conf = virJSONValueObjectGetString(json, "config-path");
|
||||
+ const char *etcd_prefix = virJSONValueObjectGetString(json, "etcd-prefix");
|
||||
+ virJSONValue *servers = virJSONValueObjectGetArray(json, "server");
|
||||
+ size_t nservers;
|
||||
+ size_t i;
|
||||
+
|
||||
+ src->type = VIR_STORAGE_TYPE_NETWORK;
|
||||
+ src->protocol = VIR_STORAGE_NET_PROTOCOL_VITASTOR;
|
||||
+
|
||||
+ /* legacy syntax passed via 'filename' option */
|
||||
+ if ((filename = virJSONValueObjectGetString(json, "filename")))
|
||||
+ return virStorageSourceParseVitastorColonString(filename, src);
|
||||
+
|
||||
+ if (!image) {
|
||||
+ virReportError(VIR_ERR_INVALID_ARG, "%s",
|
||||
+ _("missing image name in Vitastor backing volume "
|
||||
+ "JSON specification"));
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ src->path = g_strdup(image);
|
||||
+ src->configFile = g_strdup(conf);
|
||||
+ src->query = g_strdup(etcd_prefix);
|
||||
+
|
||||
+ if (servers) {
|
||||
+ nservers = virJSONValueArraySize(servers);
|
||||
+
|
||||
+ src->hosts = g_new0(virStorageNetHostDef, nservers);
|
||||
+ src->nhosts = nservers;
|
||||
+
|
||||
+ for (i = 0; i < nservers; i++) {
|
||||
+ if (virStorageSourceParseBackingJSONInetSocketAddress(src->hosts + i,
|
||||
+ virJSONValueArrayGet(servers, i)) < 0)
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
static int
|
||||
virStorageSourceParseBackingJSONRaw(virStorageSource *src,
|
||||
virJSONValue *json,
|
||||
@@ -1152,6 +1274,7 @@ static const struct virStorageSourceJSONDriverParser jsonParsers[] = {
|
||||
{"sheepdog", false, virStorageSourceParseBackingJSONSheepdog, 0},
|
||||
{"ssh", false, virStorageSourceParseBackingJSONSSH, 0},
|
||||
{"rbd", false, virStorageSourceParseBackingJSONRBD, 0},
|
||||
+ {"vitastor", false, virStorageSourceParseBackingJSONVitastor, 0},
|
||||
{"raw", true, virStorageSourceParseBackingJSONRaw, 0},
|
||||
{"nfs", false, virStorageSourceParseBackingJSONNFS, 0},
|
||||
{"vxhs", false, virStorageSourceParseBackingJSONVxHS, 0},
|
||||
diff --git a/src/test/test_driver.c b/src/test/test_driver.c
|
||||
index 25335d9002..cf54069fbe 100644
|
||||
--- a/src/test/test_driver.c
|
||||
+++ b/src/test/test_driver.c
|
||||
@@ -7340,6 +7340,7 @@ testStorageVolumeTypeForPool(int pooltype)
|
||||
case VIR_STORAGE_POOL_ISCSI_DIRECT:
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_RBD:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
return VIR_STORAGE_VOL_NETWORK;
|
||||
case VIR_STORAGE_POOL_LOGICAL:
|
||||
case VIR_STORAGE_POOL_DISK:
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-fs.xml b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
index eee75af746..8bd0a57bdd 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-fs.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='no'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolcapsschemadata/poolcaps-full.xml b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
index 805950a937..852df0de16 100644
|
||||
--- a/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
+++ b/tests/storagepoolcapsschemadata/poolcaps-full.xml
|
||||
@@ -204,4 +204,11 @@
|
||||
</enum>
|
||||
</volOptions>
|
||||
</pool>
|
||||
+ <pool type='vitastor' supported='yes'>
|
||||
+ <volOptions>
|
||||
+ <defaultFormat type='raw'/>
|
||||
+ <enum name='targetFormatType'>
|
||||
+ </enum>
|
||||
+ </volOptions>
|
||||
+ </pool>
|
||||
</storagepoolCapabilities>
|
||||
diff --git a/tests/storagepoolxml2argvtest.c b/tests/storagepoolxml2argvtest.c
|
||||
index d5c2531ab8..b19308ac38 100644
|
||||
--- a/tests/storagepoolxml2argvtest.c
|
||||
+++ b/tests/storagepoolxml2argvtest.c
|
||||
@@ -57,6 +57,7 @@ testCompareXMLToArgvFiles(bool shouldFail,
|
||||
case VIR_STORAGE_POOL_GLUSTER:
|
||||
case VIR_STORAGE_POOL_ZFS:
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
default:
|
||||
VIR_TEST_DEBUG("pool type '%s' has no xml2argv test", defTypeStr);
|
||||
diff --git a/tools/virsh-pool.c b/tools/virsh-pool.c
|
||||
index 2010ef1356..072e2ff9e8 100644
|
||||
--- a/tools/virsh-pool.c
|
||||
+++ b/tools/virsh-pool.c
|
||||
@@ -1187,6 +1187,9 @@ cmdPoolList(vshControl *ctl, const vshCmd *cmd G_GNUC_UNUSED)
|
||||
case VIR_STORAGE_POOL_VSTORAGE:
|
||||
flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VSTORAGE;
|
||||
break;
|
||||
+ case VIR_STORAGE_POOL_VITASTOR:
|
||||
+ flags |= VIR_CONNECT_LIST_STORAGE_POOLS_VITASTOR;
|
||||
+ break;
|
||||
case VIR_STORAGE_POOL_LAST:
|
||||
break;
|
||||
}
|
172
patches/pve-qemu-10.0-vitastor.patch
Normal file
172
patches/pve-qemu-10.0-vitastor.patch
Normal file
@@ -0,0 +1,172 @@
|
||||
Index: pve-qemu-kvm-10.0.2/block/meson.build
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/block/meson.build
|
||||
+++ pve-qemu-kvm-10.0.2/block/meson.build
|
||||
@@ -126,6 +126,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
Index: pve-qemu-kvm-10.0.2/meson.build
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/meson.build
|
||||
+++ pve-qemu-kvm-10.0.2/meson.build
|
||||
@@ -1622,6 +1622,26 @@ if not get_option('rbd').auto() or have_
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'))
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -2514,6 +2534,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
|
||||
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
|
||||
@@ -4812,6 +4833,7 @@ summary_info += {'fdt support': fd
|
||||
summary_info += {'libcap-ng support': libcap_ng}
|
||||
summary_info += {'bpf support': libbpf}
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
Index: pve-qemu-kvm-10.0.2/meson_options.txt
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/meson_options.txt
|
||||
+++ pve-qemu-kvm-10.0.2/meson_options.txt
|
||||
@@ -202,6 +202,8 @@ option('pvg', type: 'feature', value: 'a
|
||||
description: 'macOS paravirtualized graphics support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
Index: pve-qemu-kvm-10.0.2/qapi/block-core.json
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/qapi/block-core.json
|
||||
+++ pve-qemu-kvm-10.0.2/qapi/block-core.json
|
||||
@@ -3599,7 +3599,7 @@
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
'pbs',
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4725,6 +4725,28 @@
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
+##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
# An enumeration of replication modes.
|
||||
@@ -5194,6 +5216,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5674,6 +5697,20 @@
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
+##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
# Subformat options for VMDK images
|
||||
@@ -5895,6 +5932,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
Index: pve-qemu-kvm-10.0.2/scripts/meson-buildoptions.sh
|
||||
===================================================================
|
||||
--- pve-qemu-kvm-10.0.2.orig/scripts/meson-buildoptions.sh
|
||||
+++ pve-qemu-kvm-10.0.2/scripts/meson-buildoptions.sh
|
||||
@@ -175,6 +175,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' qpl Query Processing Library support'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' rust Rust support'
|
||||
@@ -458,6 +459,8 @@ _meson_option_parse() {
|
||||
--disable-qpl) printf "%s" -Dqpl=disabled ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-relocatable) printf "%s" -Drelocatable=true ;;
|
172
patches/qemu-10.0-vitastor.patch
Normal file
172
patches/qemu-10.0-vitastor.patch
Normal file
@@ -0,0 +1,172 @@
|
||||
diff --git a/block/meson.build b/block/meson.build
|
||||
index 34b1b2a306..24ca0f1e52 100644
|
||||
--- a/block/meson.build
|
||||
+++ b/block/meson.build
|
||||
@@ -114,6 +114,7 @@ foreach m : [
|
||||
[libnfs, 'nfs', files('nfs.c')],
|
||||
[libssh, 'ssh', files('ssh.c')],
|
||||
[rbd, 'rbd', files('rbd.c')],
|
||||
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||
]
|
||||
if m[0].found()
|
||||
module_ss = ss.source_set()
|
||||
diff --git a/meson.build b/meson.build
|
||||
index 41f68d3806..29eaed9ba4 100644
|
||||
--- a/meson.build
|
||||
+++ b/meson.build
|
||||
@@ -1622,6 +1622,26 @@ if not get_option('rbd').auto() or have_block
|
||||
endif
|
||||
endif
|
||||
|
||||
+vitastor = not_found
|
||||
+if not get_option('vitastor').auto() or have_block
|
||||
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||
+ required: get_option('vitastor'))
|
||||
+ if libvitastor_client.found()
|
||||
+ if cc.links('''
|
||||
+ #include <vitastor_c.h>
|
||||
+ int main(void) {
|
||||
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
+ return 0;
|
||||
+ }''', dependencies: libvitastor_client)
|
||||
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||
+ elif get_option('vitastor').enabled()
|
||||
+ error('could not link libvitastor_client')
|
||||
+ else
|
||||
+ warning('could not link libvitastor_client, disabling')
|
||||
+ endif
|
||||
+ endif
|
||||
+endif
|
||||
+
|
||||
glusterfs = not_found
|
||||
glusterfs_ftruncate_has_stat = false
|
||||
glusterfs_iocb_has_stat = false
|
||||
@@ -2506,6 +2526,7 @@ endif
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
|
||||
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
|
||||
@@ -4813,6 +4834,7 @@ summary_info += {'fdt support': fdt_opt == 'internal' ? 'internal' : fdt}
|
||||
summary_info += {'libcap-ng support': libcap_ng}
|
||||
summary_info += {'bpf support': libbpf}
|
||||
summary_info += {'rbd support': rbd}
|
||||
+summary_info += {'vitastor support': vitastor}
|
||||
summary_info += {'smartcard support': cacard}
|
||||
summary_info += {'U2F support': u2f}
|
||||
summary_info += {'libusb': libusb}
|
||||
diff --git a/meson_options.txt b/meson_options.txt
|
||||
index 59d973bca0..a3e7123980 100644
|
||||
--- a/meson_options.txt
|
||||
+++ b/meson_options.txt
|
||||
@@ -202,6 +202,8 @@ option('pvg', type: 'feature', value: 'auto',
|
||||
description: 'macOS paravirtualized graphics support')
|
||||
option('rbd', type : 'feature', value : 'auto',
|
||||
description: 'Ceph block device driver')
|
||||
+option('vitastor', type : 'feature', value : 'auto',
|
||||
+ description: 'Vitastor block device driver')
|
||||
option('opengl', type : 'feature', value : 'auto',
|
||||
description: 'OpenGL support')
|
||||
option('rdma', type : 'feature', value : 'auto',
|
||||
diff --git a/qapi/block-core.json b/qapi/block-core.json
|
||||
index b1937780e1..a511193620 100644
|
||||
--- a/qapi/block-core.json
|
||||
+++ b/qapi/block-core.json
|
||||
@@ -3216,7 +3216,7 @@
|
||||
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
|
||||
'raw', 'rbd',
|
||||
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||
@@ -4299,6 +4299,28 @@
|
||||
'*key-secret': 'str',
|
||||
'*server': ['InetSocketAddressBase'] } }
|
||||
|
||||
+##
|
||||
+# @BlockdevOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific block device options for vitastor
|
||||
+#
|
||||
+# @image: Image name
|
||||
+# @inode: Inode number
|
||||
+# @pool: Pool ID
|
||||
+# @size: Desired image size in bytes
|
||||
+# @config-path: Path to Vitastor configuration
|
||||
+# @etcd-host: etcd connection address(es)
|
||||
+# @etcd-prefix: etcd key/value prefix
|
||||
+##
|
||||
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||
+ 'data': { '*inode': 'uint64',
|
||||
+ '*pool': 'uint64',
|
||||
+ '*size': 'uint64',
|
||||
+ '*image': 'str',
|
||||
+ '*config-path': 'str',
|
||||
+ '*etcd-host': 'str',
|
||||
+ '*etcd-prefix': 'str' } }
|
||||
+
|
||||
##
|
||||
# @ReplicationMode:
|
||||
#
|
||||
@@ -4767,6 +4789,7 @@
|
||||
'throttle': 'BlockdevOptionsThrottle',
|
||||
'vdi': 'BlockdevOptionsGenericFormat',
|
||||
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||
'virtio-blk-vfio-pci':
|
||||
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||
'if': 'CONFIG_BLKIO' },
|
||||
@@ -5240,6 +5263,20 @@
|
||||
'*cluster-size' : 'size',
|
||||
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||
|
||||
+##
|
||||
+# @BlockdevCreateOptionsVitastor:
|
||||
+#
|
||||
+# Driver specific image creation options for Vitastor.
|
||||
+#
|
||||
+# @location: Where to store the new image file. This location cannot
|
||||
+# point to a snapshot.
|
||||
+#
|
||||
+# @size: Size of the virtual disk in bytes
|
||||
+##
|
||||
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||
+ 'size': 'size' } }
|
||||
+
|
||||
##
|
||||
# @BlockdevVmdkSubformat:
|
||||
#
|
||||
@@ -5462,6 +5499,7 @@
|
||||
'ssh': 'BlockdevCreateOptionsSsh',
|
||||
'vdi': 'BlockdevCreateOptionsVdi',
|
||||
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||
'vpc': 'BlockdevCreateOptionsVpc'
|
||||
} }
|
||||
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
|
||||
index 3e8e00852b..45aff3b6a9 100644
|
||||
--- a/scripts/meson-buildoptions.sh
|
||||
+++ b/scripts/meson-buildoptions.sh
|
||||
@@ -175,6 +175,7 @@ meson_options_help() {
|
||||
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||
printf "%s\n" ' qpl Query Processing Library support'
|
||||
printf "%s\n" ' rbd Ceph block device driver'
|
||||
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||
printf "%s\n" ' replication replication support'
|
||||
printf "%s\n" ' rust Rust support'
|
||||
@@ -458,6 +459,8 @@ _meson_option_parse() {
|
||||
--disable-qpl) printf "%s" -Dqpl=disabled ;;
|
||||
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||
--enable-relocatable) printf "%s" -Drelocatable=true ;;
|
@@ -7,22 +7,24 @@ set -e
|
||||
VITASTOR=$(dirname $0)
|
||||
VITASTOR=$(realpath "$VITASTOR/..")
|
||||
|
||||
EL=$(rpm --eval '%dist')
|
||||
if [ "$EL" = ".el8" ]; then
|
||||
REL=$(rpm --eval '%dist')
|
||||
REL=${REL##.}
|
||||
if [ "$REL" = "el8" ]; then
|
||||
# CentOS 8
|
||||
. /opt/rh/gcc-toolset-9/enable
|
||||
elif [ "$EL" = ".el7" ]; then
|
||||
elif [ "$REL" = "el7" ]; then
|
||||
# CentOS 7
|
||||
. /opt/rh/devtoolset-9/enable
|
||||
fi
|
||||
cd ~/rpmbuild/SPECS
|
||||
rpmbuild -bp fio.spec
|
||||
cd $VITASTOR
|
||||
VER=$(grep ^Version: rpm/vitastor-el7.spec | awk '{print $2}')
|
||||
VER=$(grep ^Version: rpm/vitastor-$REL.spec | awk '{print $2}')
|
||||
rm -rf fio
|
||||
ln -s ~/rpmbuild/BUILD/fio*/ fio
|
||||
sh copy-fio-includes.sh
|
||||
rm fio
|
||||
mv fio-copy fio
|
||||
FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
|
||||
tar --transform "s#^#vitastor-$VER/#" --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-$VER$(rpm --eval '%dist').tar.gz *
|
||||
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-$REL.spec
|
||||
tar --transform "s#^#vitastor-$VER/#" --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-$VER.$REL.tar.gz $(ls | grep -v packages)
|
||||
|
16
rpm/vitastor-build.sh
Executable file
16
rpm/vitastor-build.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e -x
|
||||
REL=$(rpm --eval '%dist')
|
||||
REL=${REL##.}
|
||||
cd /root/vitastor/rpm
|
||||
./build-tarball.sh
|
||||
VER=$(grep ^Version: vitastor-$REL.spec | awk '{print $2}')
|
||||
cp /root/vitastor-$VER.$REL.tar.gz ~/rpmbuild/SOURCES
|
||||
cp vitastor-$REL.spec ~/rpmbuild/SPECS/vitastor.spec
|
||||
cd ~/rpmbuild/SPECS/
|
||||
rpmbuild -ba vitastor.spec
|
||||
mkdir -p /root/vitastor/packages/vitastor-$REL
|
||||
rm -rf /root/vitastor/packages/vitastor-$REL/*
|
||||
cp ~/rpmbuild/RPMS/*/*vitastor* /root/vitastor/packages/vitastor-$REL/
|
||||
cp ~/rpmbuild/SRPMS/vitastor* /root/vitastor/packages/vitastor-$REL/
|
@@ -1,5 +1,8 @@
|
||||
# Build packages for CentOS 7 inside a container
|
||||
# cd ..; podman build -t vitastor-el7 -v `pwd`/packages:/root/packages -f rpm/vitastor-el7.Dockerfile .
|
||||
# cd ..
|
||||
# docker build -t vitastor-buildenv:el7 -f rpm/vitastor-el7.Dockerfile .
|
||||
# docker run -i --rm -v ./:/root/vitastor vitastor-buildenv:el7 /root/vitastor/rpm/vitastor-build.sh
|
||||
|
||||
# localedef -i ru_RU -f UTF-8 ru_RU.UTF-8
|
||||
|
||||
FROM centos:7
|
||||
@@ -7,7 +10,9 @@ FROM centos:7
|
||||
WORKDIR /root
|
||||
|
||||
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
||||
RUN sed -i 's/^mirrorlist=/#mirrorlist=/; s!#baseurl=http://mirror.centos.org/centos/\$releasever!baseurl=http://vault.centos.org/7.9.2009!' /etc/yum.repos.d/*.repo
|
||||
RUN yum -y --enablerepo=extras install centos-release-scl epel-release yum-utils rpm-build
|
||||
RUN perl -i -pe 's!mirrorlist=!#mirrorlist=!s; s!#\s*baseurl=http://mirror.centos.org!baseurl=http://vault.centos.org!' /etc/yum.repos.d/CentOS-SCLo-scl*.repo
|
||||
RUN yum -y install https://vitastor.io/rpms/centos/7/vitastor-release-1.0-1.el7.noarch.rpm
|
||||
RUN yum -y install devtoolset-9-gcc-c++ devtoolset-9-libatomic-devel gcc make cmake gperftools-devel \
|
||||
fio rh-nodejs12 jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libnl3-devel
|
||||
@@ -16,32 +21,3 @@ RUN rpm --nomd5 -i fio*.src.rpm
|
||||
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
|
||||
RUN cd ~/rpmbuild/SPECS && yum-builddep -y fio.spec
|
||||
RUN yum -y install cmake3
|
||||
|
||||
ADD https://vitastor.io/rpms/liburing-el7/liburing-0.7-2.el7.src.rpm /root
|
||||
|
||||
RUN set -e; \
|
||||
rpm -i liburing*.src.rpm; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
. /opt/rh/devtoolset-9/enable; \
|
||||
rpmbuild -ba liburing.spec; \
|
||||
mkdir -p /root/packages/liburing-el7; \
|
||||
rm -rf /root/packages/liburing-el7/*; \
|
||||
cp ~/rpmbuild/RPMS/*/liburing* /root/packages/liburing-el7/; \
|
||||
cp ~/rpmbuild/SRPMS/liburing* /root/packages/liburing-el7/
|
||||
|
||||
RUN rpm -i `ls /root/packages/liburing-el7/liburing-*.x86_64.rpm | grep -v debug`
|
||||
|
||||
ADD . /root/vitastor
|
||||
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
VER=$(grep ^Version: vitastor-el7.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el7.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
mkdir -p /root/packages/vitastor-el7; \
|
||||
rm -rf /root/packages/vitastor-el7/*; \
|
||||
cp ~/rpmbuild/RPMS/*/*vitastor* /root/packages/vitastor-el7/; \
|
||||
cp ~/rpmbuild/SRPMS/vitastor* /root/packages/vitastor-el7/
|
||||
|
@@ -1,13 +1,12 @@
|
||||
Name: vitastor
|
||||
Version: 2.1.0
|
||||
Version: 2.4.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-2.1.0.el7.tar.gz
|
||||
Source0: vitastor-2.4.0.el7.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
BuildRequires: devtoolset-9-gcc-c++
|
||||
BuildRequires: rh-nodejs12
|
||||
@@ -35,8 +34,6 @@ size with configurable redundancy (replication or erasure codes/XOR).
|
||||
Summary: Vitastor - OSD
|
||||
Requires: libJerasure2
|
||||
Requires: libisa-l
|
||||
Requires: liburing >= 0.6
|
||||
Requires: liburing < 2
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
Requires: util-linux
|
||||
Requires: parted
|
||||
@@ -60,8 +57,6 @@ scheduling cluster-level operations.
|
||||
|
||||
%package -n vitastor-client
|
||||
Summary: Vitastor - client
|
||||
Requires: liburing >= 0.6
|
||||
Requires: liburing < 2
|
||||
|
||||
|
||||
%description -n vitastor-client
|
||||
@@ -82,7 +77,7 @@ Vitastor library headers for development.
|
||||
Summary: Vitastor - fio drivers
|
||||
Group: Development/Libraries
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
Requires: fio = 3.7-1.el7
|
||||
Requires: fio = 3.7-2.el7
|
||||
|
||||
|
||||
%description -n vitastor-fio
|
||||
@@ -169,6 +164,7 @@ chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
%files -n vitastor-client
|
||||
%_bindir/vitastor-nbd
|
||||
%_bindir/vitastor-ublk
|
||||
%_bindir/vitastor-nfs
|
||||
%_bindir/vitastor-cli
|
||||
%_bindir/vitastor-rm
|
||||
|
@@ -1,5 +1,7 @@
|
||||
# Build packages for CentOS 8 inside a container
|
||||
# cd ..; podman build -t vitastor-el8 -v `pwd`/packages:/root/packages -f rpm/vitastor-el8.Dockerfile .
|
||||
# cd ..
|
||||
# docker build -t vitastor-buildenv:el8 -f rpm/vitastor-el8.Dockerfile .
|
||||
# docker run -i --rm -v ./:/root/vitastor vitastor-buildenv:el8 /root/vitastor/rpm/vitastor-build.sh
|
||||
|
||||
FROM centos:8
|
||||
|
||||
@@ -15,32 +17,3 @@ RUN dnf -y install gcc-toolset-9 gcc-toolset-9-gcc-c++ gperftools-devel \
|
||||
RUN dnf download --source fio
|
||||
RUN rpm --nomd5 -i fio*.src.rpm
|
||||
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec fio.spec
|
||||
|
||||
ADD https://vitastor.io/rpms/liburing-el7/liburing-0.7-2.el7.src.rpm /root
|
||||
|
||||
RUN set -e; \
|
||||
rpm -i liburing*.src.rpm; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
. /opt/rh/gcc-toolset-9/enable; \
|
||||
rpmbuild -ba liburing.spec; \
|
||||
mkdir -p /root/packages/liburing-el8; \
|
||||
rm -rf /root/packages/liburing-el8/*; \
|
||||
cp ~/rpmbuild/RPMS/*/liburing* /root/packages/liburing-el8/; \
|
||||
cp ~/rpmbuild/SRPMS/liburing* /root/packages/liburing-el8/
|
||||
|
||||
RUN rpm -i `ls /root/packages/liburing-el8/liburing-*.x86_64.rpm | grep -v debug`
|
||||
|
||||
ADD . /root/vitastor
|
||||
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
VER=$(grep ^Version: vitastor-el8.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el8.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
mkdir -p /root/packages/vitastor-el8; \
|
||||
rm -rf /root/packages/vitastor-el8/*; \
|
||||
cp ~/rpmbuild/RPMS/*/*vitastor* /root/packages/vitastor-el8/; \
|
||||
cp ~/rpmbuild/SRPMS/vitastor* /root/packages/vitastor-el8/
|
||||
|
@@ -1,13 +1,12 @@
|
||||
Name: vitastor
|
||||
Version: 2.1.0
|
||||
Version: 2.4.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-2.1.0.el8.tar.gz
|
||||
Source0: vitastor-2.4.0.el8.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
BuildRequires: gcc-toolset-9-gcc-c++
|
||||
BuildRequires: nodejs >= 10
|
||||
@@ -34,8 +33,6 @@ size with configurable redundancy (replication or erasure codes/XOR).
|
||||
Summary: Vitastor - OSD
|
||||
Requires: libJerasure2
|
||||
Requires: libisa-l
|
||||
Requires: liburing >= 0.6
|
||||
Requires: liburing < 2
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
Requires: util-linux
|
||||
Requires: parted
|
||||
@@ -58,8 +55,6 @@ scheduling cluster-level operations.
|
||||
|
||||
%package -n vitastor-client
|
||||
Summary: Vitastor - client
|
||||
Requires: liburing >= 0.6
|
||||
Requires: liburing < 2
|
||||
|
||||
|
||||
%description -n vitastor-client
|
||||
@@ -80,7 +75,7 @@ Vitastor library headers for development.
|
||||
Summary: Vitastor - fio drivers
|
||||
Group: Development/Libraries
|
||||
Requires: vitastor-client = %{version}-%{release}
|
||||
Requires: fio = 3.7-3.el8
|
||||
Requires: fio = 3.19-3.el8
|
||||
|
||||
|
||||
%description -n vitastor-fio
|
||||
@@ -166,6 +161,7 @@ chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
%files -n vitastor-client
|
||||
%_bindir/vitastor-nbd
|
||||
%_bindir/vitastor-ublk
|
||||
%_bindir/vitastor-nfs
|
||||
%_bindir/vitastor-cli
|
||||
%_bindir/vitastor-rm
|
||||
|
@@ -1,5 +1,7 @@
|
||||
# Build packages for AlmaLinux 9 inside a container
|
||||
# cd ..; podman build -t vitastor-el9 -v `pwd`/packages:/root/packages -f rpm/vitastor-el9.Dockerfile .
|
||||
# cd ..
|
||||
# docker build -t vitastor-buildenv:el9 -f rpm/vitastor-el9.Dockerfile .
|
||||
# docker run -i --rm -v ./:/root/vitastor vitastor-buildenv:el9 /root/vitastor/rpm/vitastor-build.sh
|
||||
|
||||
FROM almalinux:9
|
||||
|
||||
@@ -8,22 +10,7 @@ WORKDIR /root
|
||||
RUN sed -i 's/enabled=0/enabled=1/' /etc/yum.repos.d/*.repo
|
||||
RUN dnf -y install epel-release dnf-plugins-core
|
||||
RUN dnf -y install https://vitastor.io/rpms/centos/9/vitastor-release-1.0-1.el9.noarch.rpm
|
||||
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive liburing-devel cmake libnl3-devel
|
||||
RUN dnf -y install gcc-c++ gperftools-devel fio nodejs rpm-build jerasure-devel libisa-l-devel gf-complete-devel rdma-core-devel libarchive cmake libnl3-devel
|
||||
RUN dnf download --source fio
|
||||
RUN rpm --nomd5 -i fio*.src.rpm
|
||||
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --spec fio.spec
|
||||
|
||||
ADD . /root/vitastor
|
||||
|
||||
RUN set -e; \
|
||||
cd /root/vitastor/rpm; \
|
||||
sh build-tarball.sh; \
|
||||
VER=$(grep ^Version: vitastor-el9.spec | awk '{print $2}'); \
|
||||
cp /root/vitastor-$VER.el9.tar.gz ~/rpmbuild/SOURCES; \
|
||||
cp vitastor-el9.spec ~/rpmbuild/SPECS/vitastor.spec; \
|
||||
cd ~/rpmbuild/SPECS/; \
|
||||
rpmbuild -ba vitastor.spec; \
|
||||
mkdir -p /root/packages/vitastor-el9; \
|
||||
rm -rf /root/packages/vitastor-el9/*; \
|
||||
cp ~/rpmbuild/RPMS/*/*vitastor* /root/packages/vitastor-el9/; \
|
||||
cp ~/rpmbuild/SRPMS/vitastor* /root/packages/vitastor-el9/
|
||||
|
@@ -1,13 +1,12 @@
|
||||
Name: vitastor
|
||||
Version: 2.1.0
|
||||
Version: 2.4.0
|
||||
Release: 1%{?dist}
|
||||
Summary: Vitastor, a fast software-defined clustered block storage
|
||||
|
||||
License: Vitastor Network Public License 1.1
|
||||
URL: https://vitastor.io/
|
||||
Source0: vitastor-2.1.0.el9.tar.gz
|
||||
Source0: vitastor-2.4.0.el9.tar.gz
|
||||
|
||||
BuildRequires: liburing-devel >= 0.6
|
||||
BuildRequires: gperftools-devel
|
||||
BuildRequires: gcc-c++
|
||||
BuildRequires: nodejs >= 10
|
||||
@@ -159,6 +158,7 @@ chown vitastor:vitastor /var/lib/vitastor
|
||||
|
||||
%files -n vitastor-client
|
||||
%_bindir/vitastor-nbd
|
||||
%_bindir/vitastor-ublk
|
||||
%_bindir/vitastor-nfs
|
||||
%_bindir/vitastor-cli
|
||||
%_bindir/vitastor-rm
|
||||
|
@@ -12,6 +12,7 @@ set(WITH_QEMU false CACHE BOOL "Build QEMU driver inside Vitastor source tree")
|
||||
set(WITH_FIO true CACHE BOOL "Build FIO driver")
|
||||
set(QEMU_PLUGINDIR qemu CACHE STRING "QEMU plugin directory suffix (qemu-kvm on RHEL)")
|
||||
set(WITH_ASAN false CACHE BOOL "Build with AddressSanitizer")
|
||||
set(WITH_SYSTEM_LIBURING false CACHE BOOL "Use system liburing")
|
||||
if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
if(EXISTS "/etc/debian_version")
|
||||
set(CMAKE_INSTALL_LIBDIR "lib/${CMAKE_LIBRARY_ARCHITECTURE}")
|
||||
@@ -19,13 +20,16 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
|
||||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
|
||||
endif()
|
||||
|
||||
add_definitions(-DVITASTOR_VERSION="2.1.0")
|
||||
add_definitions(-D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -I ${CMAKE_SOURCE_DIR}/src)
|
||||
add_definitions(-DVITASTOR_VERSION="2.4.0")
|
||||
add_definitions(-D_GNU_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -fno-omit-frame-pointer -fvisibility=hidden -I ${CMAKE_SOURCE_DIR}/src)
|
||||
add_link_options(-fno-omit-frame-pointer)
|
||||
if (${WITH_ASAN})
|
||||
add_definitions(-fsanitize=address)
|
||||
add_link_options(-fsanitize=address -fno-omit-frame-pointer)
|
||||
endif (${WITH_ASAN})
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fvisibility-inlines-hidden")
|
||||
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} -fvisibility-inlines-hidden")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fvisibility-inlines-hidden")
|
||||
|
||||
set(CMAKE_BUILD_TYPE RelWithDebInfo)
|
||||
string(REGEX REPLACE "([\\/\\-]O)[^ \t\r\n]*" "\\13" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
||||
@@ -49,7 +53,6 @@ endmacro(install_symlink)
|
||||
check_include_file("linux/nbd-netlink.h" HAVE_NBD_NETLINK_H)
|
||||
|
||||
find_package(PkgConfig)
|
||||
pkg_check_modules(LIBURING REQUIRED liburing)
|
||||
if (${WITH_QEMU})
|
||||
pkg_check_modules(GLIB REQUIRED glib-2.0)
|
||||
endif (${WITH_QEMU})
|
||||
@@ -66,6 +69,15 @@ if (RDMACM_LIBRARIES)
|
||||
add_definitions(-DWITH_RDMACM)
|
||||
endif (RDMACM_LIBRARIES)
|
||||
|
||||
if (${WITH_SYSTEM_LIBURING})
|
||||
pkg_check_modules(LIBURING REQUIRED liburing>=2.10)
|
||||
include_directories(${LIBURING_INCLUDE_DIRS})
|
||||
else()
|
||||
include_directories(${CMAKE_SOURCE_DIR}/src/liburing/include)
|
||||
add_subdirectory(liburing)
|
||||
set(LIBURING_LIBRARIES uring)
|
||||
endif (${WITH_SYSTEM_LIBURING})
|
||||
|
||||
add_custom_target(build_tests)
|
||||
add_custom_target(test
|
||||
COMMAND
|
||||
@@ -86,7 +98,6 @@ include_directories(
|
||||
${CMAKE_SOURCE_DIR}/src/test
|
||||
${CMAKE_SOURCE_DIR}/src/util
|
||||
/usr/include/jerasure
|
||||
${LIBURING_INCLUDE_DIRS}
|
||||
${IBVERBS_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
@@ -101,7 +112,7 @@ add_subdirectory(test)
|
||||
|
||||
### Install
|
||||
|
||||
install(TARGETS vitastor-osd vitastor-disk vitastor-nbd vitastor-nfs vitastor-cli vitastor-kv vitastor-kv-stress RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
install(TARGETS vitastor-osd vitastor-disk vitastor-nbd vitastor-ublk vitastor-nfs vitastor-cli vitastor-kv vitastor-kv-stress RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
install_symlink(vitastor-disk ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/vitastor-dump-journal)
|
||||
install_symlink(vitastor-cli ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/vitastor-rm)
|
||||
install_symlink(vitastor-cli ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/vita)
|
||||
|
@@ -9,6 +9,7 @@ add_library(vitastor_blk SHARED
|
||||
)
|
||||
target_link_libraries(vitastor_blk
|
||||
${LIBURING_LIBRARIES}
|
||||
${ISAL_LIBRARIES}
|
||||
tcmalloc_minimal
|
||||
# for timerfd_manager
|
||||
vitastor_common
|
||||
|
@@ -42,8 +42,7 @@
|
||||
#define BS_OP_DELETE 6
|
||||
#define BS_OP_LIST 7
|
||||
#define BS_OP_ROLLBACK 8
|
||||
#define BS_OP_SYNC_STAB_ALL 9
|
||||
#define BS_OP_MAX 9
|
||||
#define BS_OP_MAX 8
|
||||
|
||||
#define BS_OP_PRIVATE_DATA_SIZE 256
|
||||
|
||||
@@ -113,14 +112,6 @@ Input:
|
||||
Output:
|
||||
- retval = 0 or negative error number (-ENOENT if no such version for stabilize)
|
||||
|
||||
## BS_OP_SYNC_STAB_ALL
|
||||
|
||||
ONLY FOR TESTS! Sync and mark all unstable object versions as stable, at once.
|
||||
|
||||
Input: Nothing except opcode
|
||||
Output:
|
||||
- retval = 0 or negative error number (-EINVAL)
|
||||
|
||||
## BS_OP_LIST
|
||||
|
||||
Get a list of all objects in this Blockstore.
|
||||
@@ -144,10 +135,10 @@ Output:
|
||||
|
||||
*/
|
||||
|
||||
struct blockstore_op_t
|
||||
struct __attribute__ ((visibility("default"))) blockstore_op_t
|
||||
{
|
||||
// operation
|
||||
uint64_t opcode;
|
||||
uint64_t opcode = 0;
|
||||
// finish callback
|
||||
std::function<void (blockstore_op_t*)> callback;
|
||||
union __attribute__((__packed__))
|
||||
@@ -171,9 +162,9 @@ struct blockstore_op_t
|
||||
uint32_t list_stable_limit;
|
||||
};
|
||||
};
|
||||
void *buf;
|
||||
void *bitmap;
|
||||
int retval;
|
||||
void *buf = NULL;
|
||||
void *bitmap = NULL;
|
||||
int retval = 0;
|
||||
|
||||
uint8_t private_data[BS_OP_PRIVATE_DATA_SIZE];
|
||||
};
|
||||
@@ -182,7 +173,7 @@ typedef std::map<std::string, std::string> blockstore_config_t;
|
||||
|
||||
class blockstore_impl_t;
|
||||
|
||||
class blockstore_t
|
||||
class __attribute__((visibility("default"))) blockstore_t
|
||||
{
|
||||
blockstore_impl_t *impl;
|
||||
public:
|
||||
|
@@ -361,6 +361,10 @@ bool journal_flusher_co::loop()
|
||||
else if (wait_state == 28) goto resume_28;
|
||||
else if (wait_state == 29) goto resume_29;
|
||||
else if (wait_state == 30) goto resume_30;
|
||||
else if (wait_state == 31) goto resume_31;
|
||||
else if (wait_state == 32) goto resume_32;
|
||||
else if (wait_state == 33) goto resume_33;
|
||||
else if (wait_state == 34) goto resume_34;
|
||||
resume_0:
|
||||
if (flusher->flush_queue.size() < flusher->min_flusher_count && !flusher->trim_wanted ||
|
||||
!flusher->flush_queue.size() || !flusher->dequeuing)
|
||||
@@ -486,13 +490,14 @@ resume_2:
|
||||
resume_10:
|
||||
resume_11:
|
||||
resume_12:
|
||||
resume_13:
|
||||
if (fill_incomplete && !clear_incomplete_csum_block_bits(5))
|
||||
return false;
|
||||
// Wait for journal data reads if the journal is not inmemory
|
||||
resume_13:
|
||||
resume_14:
|
||||
if (wait_journal_count > 0)
|
||||
{
|
||||
wait_state = wait_base+13;
|
||||
wait_state = wait_base+14;
|
||||
return false;
|
||||
}
|
||||
if (bs->dsk.csum_block_size)
|
||||
@@ -509,31 +514,26 @@ resume_2:
|
||||
{
|
||||
if (it->copy_flags == COPY_BUF_JOURNAL || it->copy_flags == (COPY_BUF_JOURNAL|COPY_BUF_COALESCED))
|
||||
{
|
||||
await_sqe(14);
|
||||
await_sqe(15);
|
||||
data->iov = (struct iovec){ it->buf, (size_t)it->len };
|
||||
data->callback = simple_callback_w;
|
||||
my_uring_prep_writev(
|
||||
io_uring_prep_writev(
|
||||
sqe, bs->dsk.data_fd, &data->iov, 1, bs->dsk.data_offset + clean_loc + it->offset
|
||||
);
|
||||
wait_count++;
|
||||
}
|
||||
}
|
||||
// Wait for data writes and metadata reads
|
||||
resume_15:
|
||||
resume_16:
|
||||
if (!wait_meta_reads(15))
|
||||
resume_17:
|
||||
if (!wait_meta_reads(16))
|
||||
return false;
|
||||
// Sync data before writing metadata
|
||||
resume_17:
|
||||
resume_18:
|
||||
resume_19:
|
||||
if (copy_count && !fsync_batch(false, 17))
|
||||
resume_20:
|
||||
if (copy_count && !fsync_batch(false, 18))
|
||||
return false;
|
||||
// Modify the new metadata entry
|
||||
update_metadata_entry();
|
||||
// Update clean_db - it must be equal to the metadata entry
|
||||
update_clean_db();
|
||||
// And write metadata entries
|
||||
if (old_clean_loc != UINT64_MAX && old_clean_loc != clean_loc)
|
||||
{
|
||||
// zero out old metadata entry
|
||||
@@ -548,26 +548,56 @@ resume_2:
|
||||
}
|
||||
}
|
||||
memset((uint8_t*)meta_old.buf + meta_old.pos*bs->dsk.clean_entry_size, 0, bs->dsk.clean_entry_size);
|
||||
resume_20:
|
||||
if (meta_old.sector != meta_new.sector && !write_meta_block(meta_old, 20))
|
||||
return false;
|
||||
}
|
||||
if (meta_old.sector != meta_new.sector)
|
||||
{
|
||||
resume_21:
|
||||
if (!write_meta_block(meta_new, 21))
|
||||
return false;
|
||||
if (flusher->inflight_meta_sectors.find(meta_old.sector) != flusher->inflight_meta_sectors.end())
|
||||
{
|
||||
wait_state = wait_base+21;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.insert(meta_old.sector);
|
||||
resume_22:
|
||||
if (!write_meta_block(meta_old, 22))
|
||||
return false;
|
||||
resume_23:
|
||||
if (wait_count > 0)
|
||||
{
|
||||
wait_state = wait_base+23;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.erase(meta_old.sector);
|
||||
}
|
||||
}
|
||||
resume_24:
|
||||
if (flusher->inflight_meta_sectors.find(meta_new.sector) != flusher->inflight_meta_sectors.end())
|
||||
{
|
||||
wait_state = wait_base+24;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.insert(meta_new.sector);
|
||||
// Modify the new metadata entry
|
||||
update_metadata_entry();
|
||||
// Update clean_db - it must be equal to the metadata entry
|
||||
update_clean_db();
|
||||
// And write metadata entries
|
||||
resume_25:
|
||||
if (!write_meta_block(meta_new, 25))
|
||||
return false;
|
||||
resume_26:
|
||||
if (wait_count > 0)
|
||||
{
|
||||
wait_state = wait_base+22;
|
||||
wait_state = wait_base+26;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.erase(meta_new.sector);
|
||||
// Done, free all buffers
|
||||
free_buffers();
|
||||
// And sync metadata (in batches - not per each operation!)
|
||||
resume_23:
|
||||
resume_24:
|
||||
resume_25:
|
||||
if (!fsync_batch(true, 23))
|
||||
resume_27:
|
||||
resume_28:
|
||||
resume_29:
|
||||
if (!fsync_batch(true, 27))
|
||||
return false;
|
||||
// Free the data block only when metadata is synced
|
||||
free_data_blocks();
|
||||
@@ -590,12 +620,12 @@ resume_2:
|
||||
if (bs->journal_trim_interval && !((++flusher->journal_trim_counter) % bs->journal_trim_interval) ||
|
||||
flusher->trim_wanted > 0)
|
||||
{
|
||||
resume_26:
|
||||
resume_27:
|
||||
resume_28:
|
||||
resume_29:
|
||||
resume_30:
|
||||
if (!trim_journal(26))
|
||||
resume_31:
|
||||
resume_32:
|
||||
resume_33:
|
||||
resume_34:
|
||||
if (!trim_journal(30))
|
||||
return false;
|
||||
}
|
||||
// All done
|
||||
@@ -716,7 +746,7 @@ bool journal_flusher_co::write_meta_block(flusher_meta_write_t & meta_block, int
|
||||
await_sqe(0);
|
||||
data->iov = (struct iovec){ meta_block.buf, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = simple_callback_w;
|
||||
my_uring_prep_writev(
|
||||
io_uring_prep_writev(
|
||||
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bs->dsk.meta_block_size + meta_block.sector
|
||||
);
|
||||
wait_count++;
|
||||
@@ -734,6 +764,7 @@ bool journal_flusher_co::clear_incomplete_csum_block_bits(int wait_base)
|
||||
else if (wait_state == wait_base+5) goto resume_5;
|
||||
else if (wait_state == wait_base+6) goto resume_6;
|
||||
else if (wait_state == wait_base+7) goto resume_7;
|
||||
else if (wait_state == wait_base+8) goto resume_8;
|
||||
cleared_incomplete = false;
|
||||
for (auto it = v.begin(); it != v.end(); it++)
|
||||
{
|
||||
@@ -754,11 +785,18 @@ bool journal_flusher_co::clear_incomplete_csum_block_bits(int wait_base)
|
||||
if (!wait_meta_reads(wait_base+0))
|
||||
return false;
|
||||
resume_2:
|
||||
if (wait_journal_count > 0)
|
||||
if (flusher->inflight_meta_sectors.find(meta_new.sector) != flusher->inflight_meta_sectors.end())
|
||||
{
|
||||
wait_state = wait_base+2;
|
||||
return false;
|
||||
}
|
||||
flusher->inflight_meta_sectors.insert(meta_new.sector);
|
||||
resume_3:
|
||||
if (wait_journal_count > 0)
|
||||
{
|
||||
wait_state = wait_base+3;
|
||||
return false;
|
||||
}
|
||||
// Verify data checksums
|
||||
for (i = v.size()-1; i >= 0 && (v[i].copy_flags & COPY_BUF_CSUM_FILL); i--)
|
||||
{
|
||||
@@ -837,19 +875,20 @@ bool journal_flusher_co::clear_incomplete_csum_block_bits(int wait_base)
|
||||
}
|
||||
}
|
||||
// Write and fsync the modified metadata entry
|
||||
resume_3:
|
||||
if (!write_meta_block(meta_new, wait_base+3))
|
||||
return false;
|
||||
resume_4:
|
||||
if (!write_meta_block(meta_new, wait_base+4))
|
||||
return false;
|
||||
resume_5:
|
||||
if (wait_count > 0)
|
||||
{
|
||||
wait_state = wait_base+4;
|
||||
wait_state = wait_base+5;
|
||||
return false;
|
||||
}
|
||||
resume_5:
|
||||
flusher->inflight_meta_sectors.erase(meta_new.sector);
|
||||
resume_6:
|
||||
resume_7:
|
||||
if (!fsync_batch(true, wait_base+5))
|
||||
resume_8:
|
||||
if (!fsync_batch(true, wait_base+6))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -1090,7 +1129,7 @@ bool journal_flusher_co::read_dirty(int wait_base)
|
||||
vi.buf = memalign_or_die(MEM_ALIGNMENT, vi.len);
|
||||
data->iov = (struct iovec){ vi.buf, (size_t)vi.len };
|
||||
data->callback = simple_callback_r;
|
||||
my_uring_prep_readv(
|
||||
io_uring_prep_readv(
|
||||
sqe, bs->dsk.data_fd, &data->iov, 1, bs->dsk.data_offset + old_clean_loc + vi.offset
|
||||
);
|
||||
wait_count++;
|
||||
@@ -1122,7 +1161,7 @@ bool journal_flusher_co::read_dirty(int wait_base)
|
||||
await_sqe(1);
|
||||
data->iov = (struct iovec){ v[i].buf, (size_t)v[i].len };
|
||||
data->callback = simple_callback_rj;
|
||||
my_uring_prep_readv(
|
||||
io_uring_prep_readv(
|
||||
sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + v[i].disk_offset
|
||||
);
|
||||
wait_journal_count++;
|
||||
@@ -1215,7 +1254,7 @@ bool journal_flusher_co::modify_meta_read(uint64_t meta_loc, flusher_meta_write_
|
||||
data->iov = (struct iovec){ wr.it->second.buf, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = simple_callback_r;
|
||||
wr.submitted = true;
|
||||
my_uring_prep_readv(
|
||||
io_uring_prep_readv(
|
||||
sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bs->dsk.meta_block_size + wr.sector
|
||||
);
|
||||
wait_count++;
|
||||
@@ -1313,7 +1352,7 @@ bool journal_flusher_co::fsync_batch(bool fsync_meta, int wait_base)
|
||||
await_sqe(0);
|
||||
data->iov = { 0 };
|
||||
data->callback = simple_callback_w;
|
||||
my_uring_prep_fsync(sqe, fsync_meta ? bs->dsk.meta_fd : bs->dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, fsync_meta ? bs->dsk.meta_fd : bs->dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||
cur_sync->state = 1;
|
||||
wait_count++;
|
||||
resume_2:
|
||||
@@ -1383,7 +1422,7 @@ bool journal_flusher_co::trim_journal(int wait_base)
|
||||
((journal_entry_start*)flusher->journal_superblock)->crc32 = je_crc32((journal_entry*)flusher->journal_superblock);
|
||||
data->iov = (struct iovec){ flusher->journal_superblock, (size_t)bs->dsk.journal_block_size };
|
||||
data->callback = simple_callback_w;
|
||||
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
wait_count++;
|
||||
resume_2:
|
||||
if (wait_count > 0)
|
||||
@@ -1394,7 +1433,7 @@ bool journal_flusher_co::trim_journal(int wait_base)
|
||||
if (!bs->disable_journal_fsync)
|
||||
{
|
||||
await_sqe(3);
|
||||
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = simple_callback_w;
|
||||
wait_count++;
|
||||
|
@@ -119,7 +119,8 @@ class journal_flusher_t
|
||||
|
||||
std::map<uint64_t, meta_sector_t> meta_sectors;
|
||||
std::deque<object_id> flush_queue;
|
||||
std::map<object_id, uint64_t> flush_versions; // FIXME: consider unordered_map?
|
||||
std::unordered_map<object_id, uint64_t> flush_versions;
|
||||
std::unordered_set<uint64_t> inflight_meta_sectors;
|
||||
|
||||
bool try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
|
||||
bool try_find_other(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
|
||||
|
@@ -343,44 +343,6 @@ void blockstore_impl_t::enqueue_op(blockstore_op_t *op)
|
||||
ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
|
||||
return;
|
||||
}
|
||||
if (op->opcode == BS_OP_SYNC_STAB_ALL)
|
||||
{
|
||||
std::function<void(blockstore_op_t*)> *old_callback = new std::function<void(blockstore_op_t*)>(op->callback);
|
||||
op->opcode = BS_OP_SYNC;
|
||||
op->callback = [this, old_callback](blockstore_op_t *op)
|
||||
{
|
||||
if (op->retval >= 0 && unstable_writes.size() > 0)
|
||||
{
|
||||
op->opcode = BS_OP_STABLE;
|
||||
op->len = unstable_writes.size();
|
||||
obj_ver_id *vers = new obj_ver_id[op->len];
|
||||
op->buf = vers;
|
||||
int i = 0;
|
||||
for (auto it = unstable_writes.begin(); it != unstable_writes.end(); it++, i++)
|
||||
{
|
||||
vers[i] = {
|
||||
.oid = it->first,
|
||||
.version = it->second,
|
||||
};
|
||||
}
|
||||
unstable_writes.clear();
|
||||
op->callback = [old_callback](blockstore_op_t *op)
|
||||
{
|
||||
obj_ver_id *vers = (obj_ver_id*)op->buf;
|
||||
delete[] vers;
|
||||
op->buf = NULL;
|
||||
(*old_callback)(op);
|
||||
delete old_callback;
|
||||
};
|
||||
this->enqueue_op(op);
|
||||
}
|
||||
else
|
||||
{
|
||||
(*old_callback)(op);
|
||||
delete old_callback;
|
||||
}
|
||||
};
|
||||
}
|
||||
if ((op->opcode == BS_OP_WRITE || op->opcode == BS_OP_WRITE_STABLE || op->opcode == BS_OP_DELETE) && !enqueue_write(op))
|
||||
{
|
||||
ringloop->set_immediate([op]() { std::function<void (blockstore_op_t*)>(op->callback)(op); });
|
||||
|
@@ -19,6 +19,7 @@
|
||||
#include <deque>
|
||||
#include <new>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "cpp-btree/btree_map.h"
|
||||
|
||||
|
@@ -66,7 +66,7 @@ int blockstore_init_meta::loop()
|
||||
last_read_offset = 0;
|
||||
data->iov = { metadata_buffer, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||
io_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||
bs->ringloop->submit();
|
||||
submitted++;
|
||||
resume_1:
|
||||
@@ -104,7 +104,7 @@ resume_1:
|
||||
last_read_offset = 0;
|
||||
data->iov = (struct iovec){ metadata_buffer, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset);
|
||||
bs->ringloop->submit();
|
||||
submitted++;
|
||||
resume_3:
|
||||
@@ -213,12 +213,12 @@ resume_2:
|
||||
data->iov = { bufs[i].buf, (size_t)bufs[i].size };
|
||||
data->callback = [this, i](ring_data_t *data) { handle_event(data, i); };
|
||||
if (!zero_on_init)
|
||||
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
io_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
else
|
||||
{
|
||||
// Fill metadata with zeroes
|
||||
memset(data->iov.iov_base, 0, data->iov.iov_len);
|
||||
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
}
|
||||
bs->ringloop->submit();
|
||||
break;
|
||||
@@ -245,7 +245,7 @@ resume_2:
|
||||
assert(bufs[i].size <= 0x7fffffff);
|
||||
data->iov = { bufs[i].buf, (size_t)bufs[i].size };
|
||||
data->callback = [this, i](ring_data_t *data) { handle_event(data, i); };
|
||||
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + bufs[i].offset);
|
||||
bs->ringloop->submit();
|
||||
bufs[i].state = INIT_META_WRITING;
|
||||
submitted++;
|
||||
@@ -274,7 +274,7 @@ resume_2:
|
||||
last_read_offset = (1+next_offset)*bs->dsk.meta_block_size;
|
||||
data->iov = { metadata_buffer, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
io_uring_prep_readv(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
bs->ringloop->submit();
|
||||
submitted++;
|
||||
resume_5:
|
||||
@@ -291,7 +291,7 @@ resume_5:
|
||||
GET_SQE();
|
||||
data->iov = { metadata_buffer, (size_t)bs->dsk.meta_block_size };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
my_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
io_uring_prep_writev(sqe, bs->dsk.meta_fd, &data->iov, 1, bs->dsk.meta_offset + (1+next_offset)*bs->dsk.meta_block_size);
|
||||
bs->ringloop->submit();
|
||||
submitted++;
|
||||
resume_6:
|
||||
@@ -313,7 +313,7 @@ resume_6:
|
||||
if (zero_on_init && !bs->disable_meta_fsync)
|
||||
{
|
||||
GET_SQE();
|
||||
my_uring_prep_fsync(sqe, bs->dsk.meta_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, bs->dsk.meta_fd, IORING_FSYNC_DATASYNC);
|
||||
last_read_offset = 0;
|
||||
data->iov = { 0 };
|
||||
data->callback = [this](ring_data_t *data) { handle_event(data, -1); };
|
||||
@@ -495,7 +495,7 @@ int blockstore_init_journal::loop()
|
||||
data = ((ring_data_t*)sqe->user_data);
|
||||
data->iov = { submitted_buf, (size_t)bs->journal.block_size };
|
||||
data->callback = simple_callback;
|
||||
my_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
io_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
bs->ringloop->submit();
|
||||
wait_count = 1;
|
||||
resume_1:
|
||||
@@ -536,7 +536,7 @@ resume_1:
|
||||
GET_SQE();
|
||||
data->iov = (struct iovec){ submitted_buf, (size_t)(2*bs->journal.block_size) };
|
||||
data->callback = simple_callback;
|
||||
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
io_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset);
|
||||
wait_count++;
|
||||
bs->ringloop->submit();
|
||||
resume_6:
|
||||
@@ -548,7 +548,7 @@ resume_1:
|
||||
if (!bs->disable_journal_fsync)
|
||||
{
|
||||
GET_SQE();
|
||||
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = simple_callback;
|
||||
wait_count++;
|
||||
@@ -636,7 +636,7 @@ resume_1:
|
||||
(size_t)(end - journal_pos < JOURNAL_BUFFER_SIZE ? end - journal_pos : JOURNAL_BUFFER_SIZE),
|
||||
};
|
||||
data->callback = [this](ring_data_t *data1) { handle_event(data1); };
|
||||
my_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + journal_pos);
|
||||
io_uring_prep_readv(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + journal_pos);
|
||||
bs->ringloop->submit();
|
||||
}
|
||||
while (done.size() > 0)
|
||||
@@ -651,7 +651,7 @@ resume_1:
|
||||
GET_SQE();
|
||||
data->iov = { init_write_buf, (size_t)bs->journal.block_size };
|
||||
data->callback = simple_callback;
|
||||
my_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + init_write_sector);
|
||||
io_uring_prep_writev(sqe, bs->dsk.journal_fd, &data->iov, 1, bs->journal.offset + init_write_sector);
|
||||
wait_count++;
|
||||
bs->ringloop->submit();
|
||||
resume_7:
|
||||
@@ -665,7 +665,7 @@ resume_1:
|
||||
GET_SQE();
|
||||
data->iov = { 0 };
|
||||
data->callback = simple_callback;
|
||||
my_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, bs->dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
wait_count++;
|
||||
bs->ringloop->submit();
|
||||
}
|
||||
|
@@ -190,7 +190,7 @@ void blockstore_impl_t::prepare_journal_sector_write(int cur_sector, blockstore_
|
||||
(size_t)journal.block_size
|
||||
};
|
||||
data->callback = [this, flush_id = journal.submit_id](ring_data_t *data) { handle_journal_write(data, flush_id); };
|
||||
my_uring_prep_writev(
|
||||
io_uring_prep_writev(
|
||||
sqe, dsk.journal_fd, &data->iov, 1, journal.offset + journal.sector_info[cur_sector].offset
|
||||
);
|
||||
}
|
||||
@@ -326,31 +326,3 @@ void journal_t::dump_diagnostics()
|
||||
journal_used_it == used_sectors.end() ? 0 : journal_used_it->second
|
||||
);
|
||||
}
|
||||
|
||||
static uint64_t zero_page[4096];
|
||||
|
||||
uint32_t crc32c_pad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad)
|
||||
{
|
||||
uint32_t r = prev_crc;
|
||||
while (left_pad >= 4096)
|
||||
{
|
||||
r = crc32c(r, zero_page, 4096);
|
||||
left_pad -= 4096;
|
||||
}
|
||||
if (left_pad > 0)
|
||||
r = crc32c(r, zero_page, left_pad);
|
||||
r = crc32c(r, buf, len);
|
||||
while (right_pad >= 4096)
|
||||
{
|
||||
r = crc32c(r, zero_page, 4096);
|
||||
right_pad -= 4096;
|
||||
}
|
||||
if (left_pad > 0)
|
||||
r = crc32c(r, zero_page, right_pad);
|
||||
return r;
|
||||
}
|
||||
|
||||
uint32_t crc32c_nopad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad)
|
||||
{
|
||||
return crc32c(0, buf, len);
|
||||
}
|
||||
|
@@ -216,6 +216,3 @@ struct blockstore_journal_check_t
|
||||
};
|
||||
|
||||
journal_entry* prefill_single_journal_entry(journal_t & journal, uint16_t type, uint32_t size);
|
||||
|
||||
uint32_t crc32c_pad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad);
|
||||
uint32_t crc32c_nopad(uint32_t prev_crc, const void *buf, size_t len, size_t left_pad, size_t right_pad);
|
||||
|
@@ -27,7 +27,7 @@ int blockstore_impl_t::fulfill_read_push(blockstore_op_t *op, void *buf, uint64_
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
data->iov = (struct iovec){ buf, (size_t)len };
|
||||
PRIV(op)->pending_ops++;
|
||||
my_uring_prep_readv(
|
||||
io_uring_prep_readv(
|
||||
sqe,
|
||||
IS_JOURNAL(item_state) ? dsk.journal_fd : dsk.data_fd,
|
||||
&data->iov, 1,
|
||||
@@ -356,7 +356,7 @@ bool blockstore_impl_t::read_checksum_block(blockstore_op_t *op, int rv_pos, uin
|
||||
int n_cur = n_iov-n_pos < IOV_MAX ? n_iov-n_pos : IOV_MAX;
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
PRIV(op)->pending_ops++;
|
||||
my_uring_prep_readv(sqe, submit_fd, iov + n_pos, n_cur, submit_offset + clean_loc + item_start + d_pos);
|
||||
io_uring_prep_readv(sqe, submit_fd, iov + n_pos, n_cur, submit_offset + clean_loc + item_start + d_pos);
|
||||
data->callback = [this, op](ring_data_t *data) { handle_read_event(data, op); };
|
||||
if (n_pos > 0 || n_pos + IOV_MAX < n_iov)
|
||||
{
|
||||
@@ -702,7 +702,7 @@ uint8_t* blockstore_impl_t::read_clean_meta_block(blockstore_op_t *op, uint64_t
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
data->iov = (struct iovec){ buf, (size_t)dsk.meta_block_size };
|
||||
PRIV(op)->pending_ops++;
|
||||
my_uring_prep_readv(sqe, dsk.meta_fd, &data->iov, 1, dsk.meta_offset + dsk.meta_block_size + sector);
|
||||
io_uring_prep_readv(sqe, dsk.meta_fd, &data->iov, 1, dsk.meta_offset + dsk.meta_block_size + sector);
|
||||
data->callback = [this, op](ring_data_t *data) { handle_read_event(data, op); };
|
||||
// return pointer to checksums + bitmap
|
||||
return buf + pos + sizeof(clean_disk_entry);
|
||||
|
@@ -101,7 +101,7 @@ resume_2:
|
||||
if (!disable_journal_fsync)
|
||||
{
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
my_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||
|
@@ -400,7 +400,7 @@ resume_2:
|
||||
if (!disable_journal_fsync)
|
||||
{
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
my_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||
|
@@ -58,7 +58,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||
if (!disable_data_fsync)
|
||||
{
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
my_uring_prep_fsync(sqe, dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, dsk.data_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||
@@ -149,7 +149,7 @@ int blockstore_impl_t::continue_sync(blockstore_op_t *op)
|
||||
if (!disable_journal_fsync)
|
||||
{
|
||||
BS_SUBMIT_GET_SQE(sqe, data);
|
||||
my_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
io_uring_prep_fsync(sqe, dsk.journal_fd, IORING_FSYNC_DATASYNC);
|
||||
data->iov = { 0 };
|
||||
data->callback = [this, op](ring_data_t *data) { handle_write_event(data, op); };
|
||||
PRIV(op)->min_flushed_journal_sector = PRIV(op)->max_flushed_journal_sector = 0;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user