Compare commits

..

1 Commits

92 changed files with 696 additions and 5788 deletions

View File

@@ -2,6 +2,6 @@ cmake_minimum_required(VERSION 2.8)
project(vitastor)
set(VERSION "0.6.7")
set(VERSION "0.6.4")
add_subdirectory(src)

View File

@@ -40,7 +40,7 @@ Vitastor на данный момент находится в статусе п
- Драйвер диска для QEMU (собирается вне дерева исходников QEMU)
- Драйвер диска для утилиты тестирования производительности fio (также собирается вне дерева исходников fio)
- NBD-прокси для монтирования образов ядром ("блочное устройство в режиме пользователя")
- Утилита для удаления образов/инодов (vitastor-cli rm-data)
- Утилита удаления образов/инодов (vitastor-rm)
- Пакеты для Debian и CentOS
- Статистика операций ввода/вывода и занятого места в разрезе инодов
- Именование инодов через хранение их метаданных в etcd
@@ -48,16 +48,12 @@ Vitastor на данный момент находится в статусе п
- Сглаживание производительности случайной записи в SSD+HDD конфигурациях
- Поддержка RDMA/RoCEv2 через libibverbs
- CSI-плагин для Kubernetes
- Базовая поддержка OpenStack: драйвер Cinder, патчи для Nova и libvirt
- Слияние снапшотов (vitastor-cli {snap-rm,flatten,merge})
- Консольный интерфейс для управления образами (vitastor-cli {ls,create,modify})
## Планы развития
- Поддержка удаления снапшотов (слияния слоёв)
- Более корректные скрипты разметки дисков и автоматического запуска OSD
- Другие инструменты администрирования
- Плагины для OpenNebula, Proxmox и других облачных систем
- Плагины для OpenStack, OpenNebula, Proxmox и других облачных систем
- iSCSI-прокси
- Более быстрое переключение при отказах
- Фоновая проверка целостности без контрольных сумм (сверка реплик)
@@ -374,7 +370,7 @@ Vitastor с однопоточной NBD прокси на том же стен
- Установите gcc и g++ 8.x или новее.
- Склонируйте данный репозиторий с подмодулями: `git clone https://yourcmc.ru/git/vitalif/vitastor/`.
- Желательно пересобрать QEMU с патчем, который делает необязательным запуск через LD_PRELOAD.
См `patches/qemu-*.*-vitastor.patch` - выберите версию, наиболее близкую вашей версии QEMU.
См `qemu-*.*-vitastor.patch` - выберите версию, наиболее близкую вашей версии QEMU.
- Установите QEMU 3.0 или новее, возьмите исходные коды установленного пакета, начните его пересборку,
через некоторое время остановите её и скопируйте следующие заголовки:
- `<qemu>/include` &rarr; `<vitastor>/qemu/include`
@@ -493,10 +489,10 @@ qemu-system-x86_64 -enable-kvm -m 1024
### Удалить образ
Используйте утилиту vitastor-cli rm-data. Например:
Используйте утилиту vitastor-rm. Например:
```
vitastor-cli rm-data --etcd_address 10.115.0.10:2379/v3 --pool 1 --inode 1 --parallel_osds 16 --iodepth 32
vitastor-rm --etcd_address 10.115.0.10:2379/v3 --pool 1 --inode 1 --parallel_osds 16 --iodepth 32
```
### NBD

View File

@@ -34,7 +34,7 @@ breaking changes in the future. However, the following is implemented:
- QEMU driver (built out-of-tree)
- Loadable fio engine for benchmarks (also built out-of-tree)
- NBD proxy for kernel mounts
- Inode removal tool (vitastor-cli rm-data)
- Inode removal tool (vitastor-rm)
- Packaging for Debian and CentOS
- Per-inode I/O and space usage statistics
- Inode metadata storage in etcd
@@ -42,16 +42,12 @@ breaking changes in the future. However, the following is implemented:
- Write throttling to smooth random write workloads in SSD+HDD configurations
- RDMA/RoCEv2 support via libibverbs
- CSI plugin for Kubernetes
- Basic OpenStack support: Cinder driver, Nova and libvirt patches
- Snapshot merge tool (vitastor-cli {snap-rm,flatten,merge})
- Image management CLI (vitastor-cli {ls,create,modify})
## Roadmap
- Snapshot deletion (layer merge) support
- Better OSD creation and auto-start tools
- Other administrative tools
- Plugins for OpenNebula, Proxmox and other cloud systems
- Plugins for OpenStack, OpenNebula, Proxmox and other cloud systems
- iSCSI proxy
- Faster failover
- Scrubbing without checksums (verification of replicas)
@@ -342,7 +338,7 @@ Vitastor with single-thread NBD on the same hardware:
* For QEMU 2.0+: `<qemu>/qapi-types.h` &rarr; `<vitastor>/qemu/b/qemu/qapi-types.h`
- `config-host.h` and `qapi` are required because they contain generated headers
- You can also rebuild QEMU with a patch that makes LD_PRELOAD unnecessary to load vitastor driver.
See `patches/qemu-*.*-vitastor.patch`.
See `qemu-*.*-vitastor.patch`.
- Install fio 3.7 or later, get its source and symlink it into `<vitastor>/fio`.
- Build & install Vitastor with `mkdir build && cd build && cmake .. && make -j8 && make install`.
Pay attention to the `QEMU_PLUGINDIR` cmake option - it must be set to `qemu-kvm` on RHEL.
@@ -445,10 +441,10 @@ just like in qemu-img.
### Remove inode
Use vitastor-rm / vitastor-cli rm-data. For example:
Use vitastor-rm. For example:
```
vitastor-cli rm-data --etcd_address 10.115.0.10:2379/v3 --pool 1 --inode 1 --parallel_osds 16 --iodepth 32
vitastor-rm --etcd_address 10.115.0.10:2379/v3 --pool 1 --inode 1 --parallel_osds 16 --iodepth 32
```
### NBD

View File

@@ -1,2 +1,3 @@
vitastor-csi
go.sum
Dockerfile

View File

@@ -1,7 +1,7 @@
# Compile stage
FROM golang:buster AS build
ADD go.sum go.mod /app/
ADD go.mod /app/
RUN cd /app; CGO_ENABLED=1 GOOS=linux GOARCH=amd64 go mod download -x
ADD . /app
RUN perl -i -e '$/ = undef; while(<>) { s/\n\s*(\{\s*\n)/$1\n/g; s/\}(\s*\n\s*)else\b/$1} else/g; print; }' `find /app -name '*.go'`

View File

@@ -1,4 +1,4 @@
VERSION ?= v0.6.7
VERSION ?= v0.6.4
all: build push

View File

@@ -49,7 +49,7 @@ spec:
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
image: vitalif/vitastor-csi:v0.6.7
image: vitalif/vitastor-csi:v0.6.4
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -116,7 +116,7 @@ spec:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
image: vitalif/vitastor-csi:v0.6.7
image: vitalif/vitastor-csi:v0.6.4
args:
- "--node=$(NODE_ID)"
- "--endpoint=$(CSI_ENDPOINT)"

View File

@@ -1,448 +0,0 @@
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
cloud.google.com/go v0.51.0/go.mod h1:hWtGJ6gnXH+KgDv+V0zFGDvpi07n3z8ZNj3T1RW0Gcw=
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
github.com/Azure/go-autorest/autorest v0.9.0/go.mod h1:xyHB1BMZT0cuDHU7I0+g046+BFDTQ8rEZB0s4Yfa6bI=
github.com/Azure/go-autorest/autorest v0.9.6/go.mod h1:/FALq9T/kS7b5J5qsQ+RSTUdAmGFqi0vUdVNNx8q630=
github.com/Azure/go-autorest/autorest/adal v0.5.0/go.mod h1:8Z9fGy2MpX0PvDjB1pEgQTmVqjGhiHBW7RJJEciWzS0=
github.com/Azure/go-autorest/autorest/adal v0.8.2/go.mod h1:ZjhuQClTqx435SRJ2iMlOxPYt3d2C/T/7TiQCVZSn3Q=
github.com/Azure/go-autorest/autorest/date v0.1.0/go.mod h1:plvfp3oPSKwf2DNjlBjWF/7vwR+cUD/ELuzDCXwHUVA=
github.com/Azure/go-autorest/autorest/date v0.2.0/go.mod h1:vcORJHLJEh643/Ioh9+vPmf1Ij9AEBM5FuBIXLmIy0g=
github.com/Azure/go-autorest/autorest/mocks v0.1.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0=
github.com/Azure/go-autorest/autorest/mocks v0.2.0/go.mod h1:OTyCOPRA2IgIlWxVYxBee2F5Gr4kF2zd2J5cFRaIDN0=
github.com/Azure/go-autorest/autorest/mocks v0.3.0/go.mod h1:a8FDP3DYzQ4RYfVAxAN3SVSiiO77gL2j2ronKKP0syM=
github.com/Azure/go-autorest/logger v0.1.0/go.mod h1:oExouG+K6PryycPJfVSxi/koC6LSNgds39diKLz7Vrc=
github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbtp2fGCgRFtBroKn4Dk=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ=
github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/container-storage-interface/spec v1.2.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
github.com/container-storage-interface/spec v1.4.0 h1:ozAshSKxpJnYUfmkpZCTYyF/4MYeYlhdXbAvPvfGmkg=
github.com/container-storage-interface/spec v1.4.0/go.mod h1:6URME8mwIBbpVyZV93Ce5St17xBiQJQY67NDsuohiy4=
github.com/coreos/bbolt v1.3.5 h1:XFv7xaq7701j8ZSEzR28VohFYSlyakMyqNMU5FQH6Ac=
github.com/coreos/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
github.com/coreos/etcd v3.3.25+incompatible h1:0GQEw6h3YnuOVdtwygkIfJ+Omx0tZ8/QkVyXI4LkbeY=
github.com/coreos/etcd v3.3.25+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM=
github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f h1:lBNOc5arjvs8E5mO2tbpBpLoyyu8B6e44T7hJy6potg=
github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM=
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815/go.mod h1:WwZ+bS3ebgob9U8Nd0kOddGdZWjyMGR8Wziv+TBNwSE=
github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/elazarl/goproxy v0.0.0-20180725130230-947c36da3153/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/ghodss/yaml v0.0.0-20150909031657-73d445a93680/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas=
github.com/go-logr/logr v0.2.0 h1:QvGt2nLcHH0WK9orKa+ppBPAxREcH364nPUedEpK0TY=
github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0=
github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg=
github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc=
github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls=
github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7 h1:5ZkaAPbicIKTF2I64qf5Fh8Aa83Q/dnOafMYV0OMwjA=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3ir6b65WBswg=
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw=
github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho=
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/jonboulle/clockwork v0.2.2 h1:UOGuzwb1PwsrDAObMuhUnj0p5ULPj8V/xJ7Kx9qUBdQ=
github.com/jonboulle/clockwork v0.2.2/go.mod h1:Pkfl5aHPm1nk2H9h0bjmnJD/BcgbGXUBGnn1kMkgxc8=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3 h1:CE8S1cTafDpPvMhIxNJKvHsGVBgn1xWYf1NbHQhywc8=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0 h1:s5hAObm+yFO5uHYt5dYjxi2rXrsnmRpJx4OYvIWUaQs=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kubernetes-csi/csi-lib-utils v0.9.1 h1:sGq6ifVujfMSkfTsMZip44Ttv8SDXvsBlFk9GdYl/b8=
github.com/kubernetes-csi/csi-lib-utils v0.9.1/go.mod h1:8E2jVUX9j3QgspwHXa6LwyN7IHQDjW9jX3kwoWnSC+M=
github.com/mailru/easyjson v0.0.0-20160728113105-d5b7844b561a/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI=
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
github.com/moby/term v0.0.0-20200312100748-672ec06f55cd/go.mod h1:DdlQx2hp0Ss5/fLikoLlEeIYiATotOjgB//nb973jeo=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.11.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.7.1 h1:NTGy1Ja9pByO+xAeH/qiWnLrKtr3hJPNjaVUwnjpdpA=
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.10.0 h1:RyRA7RzGXQZiW+tGMr7sxa85G1z0yOpM1qq5c8lNawc=
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.1.3 h1:F0+tqvhOksq22sc6iCHF5WGlWjdwj92p0udFh1VFBS8=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/sirupsen/logrus v1.6.0 h1:UBcNElsrwanuuMsnGSlYmtmgbb23qDR5dG+6X6Oo89I=
github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=
github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js=
github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0=
github.com/spf13/afero v1.2.2/go.mod h1:9ZxEEn6pIJ8Rxe320qSDBk6AsU0r9pR7Q4OcevTdifk=
github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 h1:uruHq4dN7GR16kFc5fp3d1RIYzJW5onx8Ybykw2YQFA=
github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
go.etcd.io/etcd v3.3.25+incompatible h1:V1RzkZJj9LqsJRy+TUBgpWSbZXITLB819lstuTFoZOY=
go.etcd.io/etcd v3.3.25+incompatible/go.mod h1:yaeTdrJi5lOmYerz05bd8+V7KubZs8YSFZfzsF9A6aI=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.uber.org/atomic v1.4.0 h1:cxzIVoETapQEqDhQu3QfnvXAV4AlzcvUCxkVUFw3+EU=
go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
go.uber.org/multierr v1.1.0 h1:HoEmRHQPVSqub6w2z2d2EOVs2fjyFRGyofhKuyDq0QI=
go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
go.uber.org/zap v1.10.0 h1:ORx85nbTijNz8ljznvCMR1ZBIPKFn3jQrag10X2AsuM=
go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191206172530-e9b2fee46413/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb h1:eBmm0M9fYhWpKZLjQUUKka/LtIxf46G4fxeEz5KJr9U=
golang.org/x/net v0.0.0-20201202161906-c7110b5ffcbb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4 h1:5/PjkGUjvEU5Gl6BxmvKRPpqo2uNMv4rcHBMwzk/st8=
golang.org/x/sys v0.0.0-20200622214017-ed371f2e16b4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0 h1:/5xXl8Y5W96D+TtHSlonuFqGHIWVuyCkGJLwGh9JJFs=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20181011042414-1f849cf54d09/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/grpc v1.25.1 h1:wdKvqQk7IttEw92GoRyKG2IDrUIpgpj6H6m81yfeMW0=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.24.0 h1:UhZDfRO8JRQru4/+LlLE0BRKGF8L+PICnvYZmx/fEGA=
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
k8s.io/api v0.19.0/go.mod h1:I1K45XlvTrDjmj5LoM5LuP/KYrhWbjUKT/SoPG0qTjw=
k8s.io/apimachinery v0.19.0/go.mod h1:DnPGDnARWFvYa3pMHgSxtbZb7gpzzAZ1pTfaUNDVlmA=
k8s.io/client-go v0.19.0/go.mod h1:H9E/VT95blcFQnlyShFgnFT9ZnJOAceiUHM3MlRC+mU=
k8s.io/component-base v0.19.0/go.mod h1:dKsY8BxkA+9dZIAh2aWJLL/UdASFDNtGYTCItL4LM7Y=
k8s.io/gengo v0.0.0-20200413195148-3a45101e95ac/go.mod h1:ezvh/TsK7cY6rbqRK0oQQ8IAqLxYwwyPxAX1Pzy0ii0=
k8s.io/klog v1.0.0 h1:Pt+yjF5aB1xDSVbau4VsWe+dQNzA0qv1LlXdC2dF6Q8=
k8s.io/klog v1.0.0/go.mod h1:4Bi6QPql/J/LkTDqv7R/cd3hPo4k2DG6Ptcz060Ez5I=
k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE=
k8s.io/klog/v2 v2.2.0 h1:XRvcwJozkgZ1UQJmfMGpvRthQHOvihEhYtDfAaxMz/A=
k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y=
k8s.io/kube-openapi v0.0.0-20200805222855-6aeccd4b50c6/go.mod h1:UuqjUnNftUyPE5H64/qeyjQoUZhGpeFDVdxjTeEVN2o=
k8s.io/utils v0.0.0-20200729134348-d5654de09c73/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA=
k8s.io/utils v0.0.0-20210305010621-2afb4311ab10 h1:u5rPykqiCpL+LBfjRkXvnK71gOgIdmq3eHUEkPrbeTI=
k8s.io/utils v0.0.0-20210305010621-2afb4311ab10/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
sigs.k8s.io/structured-merge-diff/v4 v4.0.1/go.mod h1:bJZC9H9iH24zzfZ/41RGcq60oK1F7G282QMXDPYydCw=
sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o=
sigs.k8s.io/yaml v1.2.0 h1:kr/MCeFWJWTwyaHoR9c8EjH9OumOmoF9YGiZd7lFm/Q=
sigs.k8s.io/yaml v1.2.0/go.mod h1:yfXDCHCao9+ENCvLSE62v9VSji2MKu5jeNfTrofGhJc=

View File

@@ -5,7 +5,7 @@ package vitastor
const (
vitastorCSIDriverName = "csi.vitastor.io"
vitastorCSIDriverVersion = "0.6.7"
vitastorCSIDriverVersion = "0.6.4"
)
// Config struct fills the parameters of request or user input

View File

@@ -354,9 +354,9 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
return nil, status.Error(codes.Internal, "invalid "+inodeCfgKey+" key in etcd: "+err.Error())
}
// Delete inode data by invoking vitastor-cli
// Delete inode data by invoking vitastor-rm
args := []string{
"rm-data", "--etcd_address", strings.Join(etcdUrl, ","),
"--etcd_address", strings.Join(etcdUrl, ","),
"--pool", fmt.Sprintf("%d", idx.PoolId),
"--inode", fmt.Sprintf("%d", idx.Id),
}
@@ -364,7 +364,7 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
{
args = append(args, "--config_path", ctxVars["configPath"])
}
c := exec.Command("/usr/bin/vitastor-cli", args...)
c := exec.Command("/usr/bin/vitastor-rm", args...)
var stderr bytes.Buffer
c.Stdout = nil
c.Stderr = &stderr
@@ -372,7 +372,7 @@ func (cs *ControllerServer) DeleteVolume(ctx context.Context, req *csi.DeleteVol
stderrStr := string(stderr.Bytes())
if (err != nil)
{
klog.Errorf("vitastor-cli rm-data failed: %s, status %s\n", stderrStr, err)
klog.Errorf("vitastor-rm failed: %s, status %s\n", stderrStr, err)
return nil, status.Error(codes.Internal, stderrStr+" (status "+err.Error()+")")
}

View File

@@ -1,7 +1,7 @@
#!/bin/bash
cat < vitastor.Dockerfile > ../Dockerfile
sed 's/$REL/bullseye/g' < vitastor.Dockerfile > ../Dockerfile
cd ..
mkdir -p packages
sudo podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f Dockerfile .
sudo podman build -v `pwd`/packages:/root/packages -f Dockerfile .
rm Dockerfile

View File

@@ -1,7 +1,7 @@
#!/bin/bash
cat < vitastor.Dockerfile > ../Dockerfile
sed 's/$REL/buster/g' < vitastor.Dockerfile > ../Dockerfile
cd ..
mkdir -p packages
sudo podman build --build-arg REL=buster -v `pwd`/packages:/root/packages -f Dockerfile .
sudo podman build -v `pwd`/packages:/root/packages -f Dockerfile .
rm Dockerfile

2
debian/changelog vendored
View File

@@ -1,4 +1,4 @@
vitastor (0.6.7-1) unstable; urgency=medium
vitastor (0.6.4-1) unstable; urgency=medium
* RDMA support
* Bugfixes

View File

@@ -1,9 +1,7 @@
# Build patched QEMU for Debian Buster or Bullseye/Sid inside a container
# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/patched-qemu.Dockerfile .
ARG REL=
FROM debian:$REL
ARG REL=
WORKDIR /root
@@ -22,22 +20,20 @@ RUN apt-get update
RUN apt-get -y install qemu fio liburing1 liburing-dev libgoogle-perftools-dev devscripts
RUN apt-get -y build-dep qemu
RUN apt-get -y build-dep fio
# To build a custom version
#RUN cp /root/packages/qemu-orig/* /root
RUN apt-get --download-only source qemu
RUN apt-get --download-only source fio
ADD patches/qemu-5.0-vitastor.patch patches/qemu-5.1-vitastor.patch /root/vitastor/patches/
ADD qemu-5.0-vitastor.patch qemu-5.1-vitastor.patch /root/vitastor/
RUN set -e; \
mkdir -p /root/packages/qemu-$REL; \
rm -rf /root/packages/qemu-$REL/*; \
cd /root/packages/qemu-$REL; \
dpkg-source -x /root/qemu*.dsc; \
if [ -d /root/packages/qemu-$REL/qemu-5.0 ]; then \
cp /root/vitastor/patches/qemu-5.0-vitastor.patch /root/packages/qemu-$REL/qemu-5.0/debian/patches; \
cp /root/vitastor/qemu-5.0-vitastor.patch /root/packages/qemu-$REL/qemu-5.0/debian/patches; \
echo qemu-5.0-vitastor.patch >> /root/packages/qemu-$REL/qemu-5.0/debian/patches/series; \
else \
cp /root/vitastor/patches/qemu-5.1-vitastor.patch /root/packages/qemu-$REL/qemu-*/debian/patches; \
cp /root/vitastor/qemu-5.1-vitastor.patch /root/packages/qemu-$REL/qemu-*/debian/patches; \
P=`ls -d /root/packages/qemu-$REL/qemu-*/debian/patches`; \
echo qemu-5.1-vitastor.patch >> $P/series; \
fi; \

View File

@@ -1,9 +1,7 @@
# Build Vitastor packages for Debian Buster or Bullseye/Sid inside a container
# cd ..; podman build --build-arg REL=bullseye -v `pwd`/packages:/root/packages -f debian/vitastor.Dockerfile .
ARG REL=
FROM debian:$REL
ARG REL=
WORKDIR /root
@@ -42,10 +40,10 @@ RUN set -e -x; \
mkdir -p /root/packages/vitastor-$REL; \
rm -rf /root/packages/vitastor-$REL/*; \
cd /root/packages/vitastor-$REL; \
cp -r /root/vitastor vitastor-0.6.7; \
ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.6.7/qemu; \
ln -s /root/fio-build/fio-*/ vitastor-0.6.7/fio; \
cd vitastor-0.6.7; \
cp -r /root/vitastor vitastor-0.6.4; \
ln -s /root/packages/qemu-$REL/qemu-*/ vitastor-0.6.4/qemu; \
ln -s /root/fio-build/fio-*/ vitastor-0.6.4/fio; \
cd vitastor-0.6.4; \
FIO=$(head -n1 fio/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
QEMU=$(head -n1 qemu/debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
sh copy-qemu-includes.sh; \
@@ -61,8 +59,8 @@ RUN set -e -x; \
echo "dep:fio=$FIO" > debian/substvars; \
echo "dep:qemu=$QEMU" >> debian/substvars; \
cd /root/packages/vitastor-$REL; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.6.7.orig.tar.xz vitastor-0.6.7; \
cd vitastor-0.6.7; \
tar --sort=name --mtime='2020-01-01' --owner=0 --group=0 --exclude=debian -cJf vitastor_0.6.4.orig.tar.xz vitastor-0.6.4; \
cd vitastor-0.6.4; \
V=$(head -n1 debian/changelog | perl -pe 's/^.*\((.*?)\).*$/$1/'); \
DEBFULLNAME="Vitaliy Filippov <vitalif@yourcmc.ru>" dch -D $REL -v "$V""$REL" "Rebuild for $REL"; \
DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage --jobs=auto -sa; \

View File

@@ -1,9 +0,0 @@
# Build Docker image with Vitastor packages
FROM debian:bullseye
ADD vitastor.list /etc/apt/sources.list.d
ADD vitastor.gpg /etc/apt/trusted.gpg.d
ADD vitastor.pref /etc/apt/preferences.d
ADD apt.conf /etc/apt/
RUN apt-get update && apt-get -y install vitastor qemu-system-x86 qemu-system-common && apt-get clean

View File

@@ -1 +0,0 @@
APT::Install-Recommends false;

Binary file not shown.

View File

@@ -1 +0,0 @@
deb http://vitastor.io/debian bullseye main

View File

@@ -1,3 +0,0 @@
Package: *
Pin: origin "vitastor.io"
Pin-Priority: 1000

2
json11

Submodule json11 updated: 34781ed7ee...97f06cb20c

View File

@@ -33,10 +33,8 @@ const etcd_allow = new RegExp('^'+[
'pg/state/[1-9]\\d*/[1-9]\\d*',
'pg/stats/[1-9]\\d*/[1-9]\\d*',
'pg/history/[1-9]\\d*/[1-9]\\d*',
'pool/stats/[1-9]\\d*',
'history/last_clean_pgs',
'inode/stats/[1-9]\\d*/[1-9]\\d*',
'pool/stats/[1-9]\\d*',
'stats',
'index/image/.*',
'index/maxid/[1-9]\\d*',
@@ -83,12 +81,11 @@ const etcd_tree = {
osd_ping_timeout: 5, // seconds. min: 1
up_wait_retry_interval: 500, // ms. min: 50
// osd
etcd_report_interval: 5,
etcd_report_interval: 30, // min: 10
run_primary: true,
bind_address: "0.0.0.0",
bind_port: 0,
autosync_interval: 5,
autosync_writes: 128,
client_queue_depth: 128, // unused
recovery_queue_depth: 4,
recovery_sync_batch: 16,
@@ -232,7 +229,7 @@ const etcd_tree = {
/* <pool_id>: {
<pg_id>: {
primary: osd_num_t,
state: ("starting"|"peering"|"peered"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
state: ("starting"|"peering"|"incomplete"|"active"|"repeering"|"stopping"|"offline"|
"degraded"|"has_incomplete"|"has_degraded"|"has_misplaced"|"has_unclean"|
"has_invalid"|"left_on_dead")[],
}
@@ -265,9 +262,9 @@ const etcd_tree = {
/* <pool_id>: {
<inode_t>: {
raw_used: uint64_t, // raw used bytes on OSDs
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
read: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
write: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
delete: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
},
}, */
},
@@ -284,14 +281,14 @@ const etcd_tree = {
},
stats: {
/* op_stats: {
<string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t, lat: uint64_t },
<string>: { count: uint64_t, usec: uint64_t, bytes: uint64_t },
},
subop_stats: {
<string>: { count: uint64_t, usec: uint64_t, iops: uint64_t, lat: uint64_t },
<string>: { count: uint64_t, usec: uint64_t },
},
recovery_stats: {
degraded: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
misplaced: { count: uint64_t, bytes: uint64_t, bps: uint64_t, iops: uint64_t },
degraded: { count: uint64_t, bytes: uint64_t },
misplaced: { count: uint64_t, bytes: uint64_t },
},
object_counts: {
object: uint64_t,
@@ -338,8 +335,6 @@ class Mon
this.etcd_prefix = this.etcd_prefix.replace(/\/\/+/g, '/').replace(/^\/?(.*[^\/])\/?$/, '/$1');
this.etcd_start_timeout = (config.etcd_start_timeout || 5) * 1000;
this.state = JSON.parse(JSON.stringify(this.constructor.etcd_tree));
this.signals_set = false;
this.on_stop_cb = () => this.on_stop().catch(console.error);
}
async start()
@@ -349,18 +344,7 @@ class Mon
await this.become_master();
await this.load_cluster_state();
await this.start_watcher(this.config.etcd_mon_retries);
for (const pool_id in this.state.config.pools)
{
if (!this.state.pool.stats[pool_id] ||
!this.state.pool.stats[pool_id].pg_real_size)
{
// Generate missing data in etcd
this.state.config.pgs.hash = null;
break;
}
}
await this.recheck_pgs();
this.schedule_update_stats();
}
async load_config()
@@ -402,6 +386,11 @@ class Mon
{
this.config.mon_stats_timeout = 100;
}
this.config.mon_stats_interval = Number(this.config.mon_stats_interval) || 5000;
if (this.config.mon_stats_interval < 100)
{
this.config.mon_stats_interval = 100;
}
// After this number of seconds, a dead OSD will be removed from PG distribution
this.config.osd_out_time = Number(this.config.osd_out_time) || 0;
if (!this.config.osd_out_time)
@@ -564,7 +553,7 @@ class Mon
const max_ttl = this.config.etcd_mon_ttl + this.config.etcd_mon_timeout/1000*this.config.etcd_mon_retries;
const res = await this.etcd_call('/lease/grant', { TTL: max_ttl }, this.config.etcd_mon_timeout, -1);
this.etcd_lease_id = res.ID;
this.lease_timer = setInterval(async () =>
setInterval(async () =>
{
const res = await this.etcd_call('/lease/keepalive', { ID: this.etcd_lease_id }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
if (!res.result.TTL)
@@ -572,19 +561,6 @@ class Mon
this.die('Lease expired');
}
}, this.config.etcd_mon_timeout);
if (!this.signals_set)
{
process.on('SIGINT', this.on_stop_cb);
process.on('SIGTERM', this.on_stop_cb);
this.signals_set = true;
}
}
async on_stop()
{
clearInterval(this.lease_timer);
await this.etcd_call('/lease/revoke', { ID: this.etcd_lease_id }, this.config.etcd_mon_timeout, this.config.etcd_mon_retries);
process.exit(0);
}
async become_master()
@@ -1083,9 +1059,7 @@ class Mon
this.state.pool.stats[pool_id] = {
used_raw_tb: (this.state.pool.stats[pool_id]||{}).used_raw_tb || 0,
total_raw_tb: optimize_result.space,
pg_real_size: pg_effsize,
raw_to_usable: pg_effsize / (pool_cfg.scheme === 'replicated'
? 1 : (pool_cfg.pg_size - (pool_cfg.parity_chunks||0))),
raw_to_usable: pg_effsize / (pool_cfg.pg_size - (pool_cfg.parity_chunks||0)),
space_efficiency: optimize_result.space/(optimize_result.total_space||1),
};
etcd_request.success.push({ requestPut: {
@@ -1198,7 +1172,7 @@ class Mon
}, this.config.mon_change_timeout || 1000);
}
sum_op_stats(timestamp, prev_stats)
sum_op_stats()
{
const op_stats = {}, subop_stats = {}, recovery_stats = {};
for (const osd in this.state.osd.stats)
@@ -1224,29 +1198,6 @@ class Mon
recovery_stats[op].bytes += BigInt(st.recovery_stats[op].bytes||0);
}
}
if (prev_stats && prev_stats.timestamp >= timestamp)
{
prev_stats = null;
}
const tm = prev_stats ? BigInt(timestamp - prev_stats.timestamp) : 0;
for (const op in op_stats)
{
op_stats[op].bps = prev_stats ? (op_stats[op].bytes - prev_stats.op_stats[op].bytes) * 1000n / tm : 0;
op_stats[op].iops = prev_stats ? (op_stats[op].count - prev_stats.op_stats[op].count) * 1000n / tm : 0;
op_stats[op].lat = prev_stats ? (op_stats[op].usec - prev_stats.op_stats[op].usec)
/ ((op_stats[op].count - prev_stats.op_stats[op].count) || 1n) : 0;
}
for (const op in subop_stats)
{
subop_stats[op].iops = prev_stats ? (subop_stats[op].count - prev_stats.subop_stats[op].count) * 1000n / tm : 0;
subop_stats[op].lat = prev_stats ? (subop_stats[op].usec - prev_stats.subop_stats[op].usec)
/ ((subop_stats[op].count - prev_stats.subop_stats[op].count) || 1n) : 0;
}
for (const op in recovery_stats)
{
recovery_stats[op].bps = prev_stats ? (recovery_stats[op].bytes - prev_stats.recovery_stats[op].bytes) * 1000n / tm : 0;
recovery_stats[op].iops = prev_stats ? (recovery_stats[op].count - prev_stats.recovery_stats[op].count) * 1000n / tm : 0;
}
return { op_stats, subop_stats, recovery_stats };
}
@@ -1273,7 +1224,7 @@ class Mon
return object_counts;
}
sum_inode_stats(prev_stats, timestamp, prev_timestamp)
sum_inode_stats()
{
const inode_stats = {};
const inode_stub = () => ({
@@ -1282,10 +1233,8 @@ class Mon
write: { count: 0n, usec: 0n, bytes: 0n },
delete: { count: 0n, usec: 0n, bytes: 0n },
});
const seen_pools = {};
for (const pool_id in this.state.config.pools)
{
seen_pools[pool_id] = true;
this.state.pool.stats[pool_id] = this.state.pool.stats[pool_id] || {};
this.state.pool.stats[pool_id].used_raw_tb = 0n;
}
@@ -1293,12 +1242,7 @@ class Mon
{
for (const pool_id in this.state.osd.space[osd_num])
{
this.state.pool.stats[pool_id] = this.state.pool.stats[pool_id] || {};
if (!seen_pools[pool_id])
{
this.state.pool.stats[pool_id].used_raw_tb = 0n;
seen_pools[pool_id] = true;
}
this.state.pool.stats[pool_id] = this.state.pool.stats[pool_id] || { used_raw_tb: 0n };
inode_stats[pool_id] = inode_stats[pool_id] || {};
for (const inode_num in this.state.osd.space[osd_num][pool_id])
{
@@ -1309,7 +1253,7 @@ class Mon
}
}
}
for (const pool_id in seen_pools)
for (const pool_id in this.state.config.pools)
{
const used = this.state.pool.stats[pool_id].used_raw_tb;
this.state.pool.stats[pool_id].used_raw_tb = Number(used)/1024/1024/1024/1024;
@@ -1332,31 +1276,43 @@ class Mon
}
}
}
if (prev_stats && prev_timestamp >= timestamp)
return inode_stats;
}
fix_stat_overflows(obj, scratch)
{
for (const k in obj)
{
prev_stats = null;
}
const tm = prev_stats ? BigInt(timestamp - prev_timestamp) : 0;
for (const pool_id in inode_stats)
{
for (const inode_num in inode_stats[pool_id])
if (typeof obj[k] == 'bigint')
{
for (const op of [ 'read', 'write', 'delete' ])
if (obj[k] >= 0x10000000000000000n)
{
const op_st = inode_stats[pool_id][inode_num][op];
const prev_st = prev_stats && prev_stats[pool_id] && prev_stats[pool_id][inode_num] && prev_stats[pool_id][inode_num][op];
op_st.bps = prev_st ? (op_st.bytes - prev_st.bytes) * 1000n / tm : 0;
op_st.iops = prev_st ? (op_st.count - prev_st.count) * 1000n / tm : 0;
op_st.lat = prev_st ? (op_st.usec - prev_st.usec) / ((op_st.count - prev_st.count) || 1n) : 0;
if (scratch[k])
{
for (const k2 in scratch)
{
obj[k2] -= scratch[k2];
scratch[k2] = 0n;
}
}
else
{
for (const k2 in obj)
{
scratch[k2] = obj[k2];
}
}
}
}
else if (typeof obj[k] == 'object')
{
this.fix_stat_overflows(obj[k], scratch[k] = (scratch[k] || {}));
}
}
return inode_stats;
}
serialize_bigints(obj)
{
obj = { ...obj };
for (const k in obj)
{
if (typeof obj[k] == 'bigint')
@@ -1365,26 +1321,22 @@ class Mon
}
else if (typeof obj[k] == 'object')
{
obj[k] = this.serialize_bigints(obj[k]);
this.serialize_bigints(obj[k]);
}
}
return obj;
}
async update_total_stats()
{
const txn = [];
const timestamp = Date.now();
const stats = this.sum_op_stats();
const object_counts = this.sum_object_counts();
let stats = this.sum_op_stats(timestamp, this.prev_stats);
let inode_stats = this.sum_inode_stats(
this.prev_stats ? this.prev_stats.inode_stats : null,
timestamp, this.prev_stats ? this.prev_stats.timestamp : null
);
this.prev_stats = { timestamp, ...stats, inode_stats };
const inode_stats = this.sum_inode_stats();
this.fix_stat_overflows(stats, (this.prev_stats = this.prev_stats || {}));
this.fix_stat_overflows(inode_stats, (this.prev_inode_stats = this.prev_inode_stats || {}));
stats.object_counts = object_counts;
stats = this.serialize_bigints(stats);
inode_stats = this.serialize_bigints(inode_stats);
this.serialize_bigints(stats);
this.serialize_bigints(inode_stats);
txn.push({ requestPut: { key: b64(this.etcd_prefix+'/stats'), value: b64(JSON.stringify(stats)) } });
for (const pool_id in inode_stats)
{
@@ -1398,11 +1350,9 @@ class Mon
}
for (const pool_id in this.state.pool.stats)
{
const pool_stats = { ...this.state.pool.stats[pool_id] };
this.serialize_bigints(pool_stats);
txn.push({ requestPut: {
key: b64(this.etcd_prefix+'/pool/stats/'+pool_id),
value: b64(JSON.stringify(pool_stats)),
value: b64(JSON.stringify(this.state.pool.stats[pool_id])),
} });
}
if (txn.length)
@@ -1415,13 +1365,20 @@ class Mon
{
if (this.stats_timer)
{
return;
clearTimeout(this.stats_timer);
this.stats_timer = null;
}
let sleep = (this.stats_update_next||0) - Date.now();
if (sleep < this.config.mon_stats_timeout)
{
sleep = this.config.mon_stats_timeout;
}
this.stats_timer = setTimeout(() =>
{
this.stats_timer = null;
this.stats_update_next = Date.now() + this.config.mon_stats_interval;
this.update_total_stats().catch(console.error);
}, this.config.mon_stats_timeout);
}, sleep);
}
parse_kv(kv)

View File

@@ -1,957 +0,0 @@
# Vitastor Driver for OpenStack Cinder
#
# --------------------------------------------
# Install as cinder/volume/drivers/vitastor.py
# --------------------------------------------
#
# Copyright 2020 Vitaliy Filippov
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Cinder Vitastor Driver"""
import binascii
import base64
import errno
import json
import math
import os
import tempfile
from castellan import key_manager
from oslo_config import cfg
from oslo_log import log as logging
from oslo_service import loopingcall
from oslo_concurrency import processutils
from oslo_utils import encodeutils
from oslo_utils import excutils
from oslo_utils import fileutils
from oslo_utils import units
import six
from six.moves.urllib import request
from cinder import exception
from cinder.i18n import _
from cinder.image import image_utils
from cinder import interface
from cinder import objects
from cinder.objects import fields
from cinder import utils
from cinder.volume import configuration
from cinder.volume import driver
from cinder.volume import volume_utils
VERSION = '0.6.7'
LOG = logging.getLogger(__name__)
VITASTOR_OPTS = [
cfg.StrOpt(
'vitastor_config_path',
default='/etc/vitastor/vitastor.conf',
help='Vitastor configuration file path'
),
cfg.StrOpt(
'vitastor_etcd_address',
default='',
help='Vitastor etcd address(es)'),
cfg.StrOpt(
'vitastor_etcd_prefix',
default='/vitastor',
help='Vitastor etcd prefix'
),
cfg.StrOpt(
'vitastor_pool_id',
default='',
help='Vitastor pool ID to use for volumes'
),
# FIXME exclusive_cinder_pool ?
]
CONF = cfg.CONF
CONF.register_opts(VITASTOR_OPTS, group = configuration.SHARED_CONF_GROUP)
class VitastorDriverException(exception.VolumeDriverException):
message = _("Vitastor Cinder driver failure: %(reason)s")
@interface.volumedriver
class VitastorDriver(driver.CloneableImageVD,
driver.ManageableVD, driver.ManageableSnapshotsVD,
driver.BaseVD):
"""Implements Vitastor volume commands."""
cfg = {}
_etcd_urls = []
def __init__(self, active_backend_id = None, *args, **kwargs):
super(VitastorDriver, self).__init__(*args, **kwargs)
self.configuration.append_config_values(VITASTOR_OPTS)
@classmethod
def get_driver_options(cls):
additional_opts = cls._get_oslo_driver_opts(
'reserved_percentage',
'max_over_subscription_ratio',
'volume_dd_blocksize'
)
return VITASTOR_OPTS + additional_opts
def do_setup(self, context):
"""Performs initialization steps that could raise exceptions."""
super(VitastorDriver, self).do_setup(context)
# Make sure configuration is in UTF-8
for attr in [ 'config_path', 'etcd_address', 'etcd_prefix', 'pool_id' ]:
val = self.configuration.safe_get('vitastor_'+attr)
if val is not None:
self.cfg[attr] = utils.convert_str(val)
self.cfg = self._load_config(self.cfg)
def _load_config(self, cfg):
# Try to load configuration file
try:
f = open(cfg['config_path'] or '/etc/vitastor/vitastor.conf')
conf = json.loads(f.read())
f.close()
for k in conf:
cfg[k] = cfg.get(k, conf[k])
except:
pass
if isinstance(cfg['etcd_address'], str):
cfg['etcd_address'] = cfg['etcd_address'].split(',')
# Sanitize etcd URLs
for i, etcd_url in enumerate(cfg['etcd_address']):
ssl = False
if etcd_url.lower().startswith('http://'):
etcd_url = etcd_url[7:]
elif etcd_url.lower().startswith('https://'):
etcd_url = etcd_url[8:]
ssl = True
if etcd_url.find('/') < 0:
etcd_url += '/v3'
if ssl:
etcd_url = 'https://'+etcd_url
else:
etcd_url = 'http://'+etcd_url
cfg['etcd_address'][i] = etcd_url
return cfg
def check_for_setup_error(self):
"""Returns an error if prerequisites aren't met."""
def _encode_etcd_key(self, key):
if not isinstance(key, bytes):
key = str(key).encode('utf-8')
return base64.b64encode(self.cfg['etcd_prefix'].encode('utf-8')+b'/'+key).decode('utf-8')
def _encode_etcd_value(self, value):
if not isinstance(value, bytes):
value = str(value).encode('utf-8')
return base64.b64encode(value).decode('utf-8')
def _encode_etcd_requests(self, obj):
for v in obj:
for rt in v:
if 'key' in v[rt]:
v[rt]['key'] = self._encode_etcd_key(v[rt]['key'])
if 'range_end' in v[rt]:
v[rt]['range_end'] = self._encode_etcd_key(v[rt]['range_end'])
if 'value' in v[rt]:
v[rt]['value'] = self._encode_etcd_value(v[rt]['value'])
def _etcd_txn(self, params):
if 'compare' in params:
for v in params['compare']:
if 'key' in v:
v['key'] = self._encode_etcd_key(v['key'])
if 'failure' in params:
self._encode_etcd_requests(params['failure'])
if 'success' in params:
self._encode_etcd_requests(params['success'])
body = json.dumps(params).encode('utf-8')
headers = {
'Content-Type': 'application/json'
}
err = None
for etcd_url in self.cfg['etcd_address']:
try:
resp = request.urlopen(request.Request(etcd_url+'/kv/txn', body, headers), timeout = 5)
data = json.loads(resp.read())
if 'responses' not in data:
data['responses'] = []
for i, resp in enumerate(data['responses']):
if 'response_range' in resp:
if 'kvs' not in resp['response_range']:
resp['response_range']['kvs'] = []
for kv in resp['response_range']['kvs']:
kv['key'] = base64.b64decode(kv['key'].encode('utf-8')).decode('utf-8')
if kv['key'].startswith(self.cfg['etcd_prefix']+'/'):
kv['key'] = kv['key'][len(self.cfg['etcd_prefix'])+1 : ]
kv['value'] = json.loads(base64.b64decode(kv['value'].encode('utf-8')))
if len(resp.keys()) != 1:
LOG.exception('unknown responses['+str(i)+'] format: '+json.dumps(resp))
else:
resp = data['responses'][i] = resp[list(resp.keys())[0]]
return data
except Exception as e:
LOG.exception('error calling etcd transaction: '+body.decode('utf-8')+'\nerror: '+str(e))
err = e
raise err
def _etcd_foreach(self, prefix, add_fn):
total = 0
batch = 1000
begin = prefix+'/'
while True:
resp = self._etcd_txn({ 'success': [
{ 'request_range': {
'key': begin,
'range_end': prefix+'0',
'limit': batch+1,
} },
] })
i = 0
while i < batch and i < len(resp['responses'][0]['kvs']):
kv = resp['responses'][0]['kvs'][i]
add_fn(kv)
i += 1
if len(resp['responses'][0]['kvs']) <= batch:
break
begin = resp['responses'][0]['kvs'][batch]['key']
return total
def _update_volume_stats(self):
location_info = json.dumps({
'config': self.configuration.vitastor_config_path,
'etcd_address': self.configuration.vitastor_etcd_address,
'etcd_prefix': self.configuration.vitastor_etcd_prefix,
'pool_id': self.configuration.vitastor_pool_id,
})
stats = {
'vendor_name': 'Vitastor',
'driver_version': self.VERSION,
'storage_protocol': 'vitastor',
'total_capacity_gb': 'unknown',
'free_capacity_gb': 'unknown',
# FIXME check if safe_get is required
'reserved_percentage': self.configuration.safe_get('reserved_percentage'),
'multiattach': True,
'thin_provisioning_support': True,
'max_over_subscription_ratio': self.configuration.safe_get('max_over_subscription_ratio'),
'location_info': location_info,
'backend_state': 'down',
'volume_backend_name': self.configuration.safe_get('volume_backend_name') or 'vitastor',
'replication_enabled': False,
}
try:
pool_stats = self._etcd_txn({ 'success': [
{ 'request_range': { 'key': 'pool/stats/'+str(self.cfg['pool_id']) } }
] })
total_provisioned = 0
def add_total(kv):
nonlocal total_provisioned
if kv['key'].find('@') >= 0:
total_provisioned += kv['value']['size']
self._etcd_foreach('config/inode/'+str(self.cfg['pool_id']), lambda kv: add_total(kv))
stats['provisioned_capacity_gb'] = round(total_provisioned/1024.0/1024.0/1024.0, 2)
pool_stats = pool_stats['responses'][0]['kvs']
if len(pool_stats):
pool_stats = pool_stats[0]['value']
stats['free_capacity_gb'] = round(1024.0*(pool_stats['total_raw_tb']-pool_stats['used_raw_tb'])/pool_stats['raw_to_usable'], 2)
stats['total_capacity_gb'] = round(1024.0*pool_stats['total_raw_tb'], 2)
stats['backend_state'] = 'up'
except Exception as e:
# just log and return unknown capacities
LOG.exception('error getting vitastor pool stats: '+str(e))
self._stats = stats
def get_volume_stats(self, refresh=False):
"""Get volume stats.
If 'refresh' is True, run update the stats first.
"""
if not self._stats or refresh:
self._update_volume_stats()
return self._stats
def _next_id(self, resp):
if len(resp['kvs']) == 0:
return (1, 0)
else:
return (1 + resp['kvs'][0]['value'], resp['kvs'][0]['mod_revision'])
def create_volume(self, volume):
"""Creates a logical volume."""
size = int(volume.size) * units.Gi
# FIXME: Check if convert_str is really required
vol_name = utils.convert_str(volume.name)
if vol_name.find('@') >= 0 or vol_name.find('/') >= 0:
raise exception.VolumeBackendAPIException(data = '@ and / are forbidden in volume and snapshot names')
LOG.debug("creating volume '%s'", vol_name)
self._create_image(vol_name, { 'size': size })
if volume.encryption_key_id:
self._create_encrypted_volume(volume, volume.obj_context)
volume_update = {}
return volume_update
def _create_encrypted_volume(self, volume, context):
"""Create a new LUKS encrypted image directly in Vitastor."""
vol_name = utils.convert_str(volume.name)
f, opts = self._encrypt_opts(volume, context)
# FIXME: Check if it works at all :-)
self._execute(
'qemu-img', 'convert', '-f', 'luks', *opts,
'vitastor:image='+vol_name.replace(':', '\\:')+self._qemu_args(),
'%sM' % (volume.size * 1024)
)
f.close()
def _encrypt_opts(self, volume, context):
encryption = volume_utils.check_encryption_provider(self.db, volume, context)
# Fetch the key associated with the volume and decode the passphrase
keymgr = key_manager.API(CONF)
key = keymgr.get(context, encryption['encryption_key_id'])
passphrase = binascii.hexlify(key.get_encoded()).decode('utf-8')
# Decode the dm-crypt style cipher spec into something qemu-img can use
cipher_spec = image_utils.decode_cipher(encryption['cipher'], encryption['key_size'])
tmp_dir = volume_utils.image_conversion_dir()
f = tempfile.NamedTemporaryFile(prefix = 'luks_', dir = tmp_dir)
f.write(passphrase)
f.flush()
return (f, [
'--object', 'secret,id=luks_sec,format=raw,file=%(passfile)s' % {'passfile': f.name},
'-o', 'key-secret=luks_sec,cipher-alg=%(cipher_alg)s,cipher-mode=%(cipher_mode)s,ivgen-alg=%(ivgen_alg)s' % cipher_spec,
])
def create_snapshot(self, snapshot):
"""Creates a volume snapshot."""
vol_name = utils.convert_str(snapshot.volume_name)
snap_name = utils.convert_str(snapshot.name)
if snap_name.find('@') >= 0 or snap_name.find('/') >= 0:
raise exception.VolumeBackendAPIException(data = '@ and / are forbidden in volume and snapshot names')
self._create_snapshot(vol_name, vol_name+'@'+snap_name)
def snapshot_revert_use_temp_snapshot(self):
"""Disable the use of a temporary snapshot on revert."""
return False
def revert_to_snapshot(self, context, volume, snapshot):
"""Revert a volume to a given snapshot."""
# FIXME Delete the image, then recreate it from the snapshot
def delete_snapshot(self, snapshot):
"""Deletes a snapshot."""
vol_name = utils.convert_str(snapshot.volume_name)
snap_name = utils.convert_str(snapshot.name)
# Find the snapshot
resp = self._etcd_txn({ 'success': [
{ 'request_range': { 'key': 'index/image/'+vol_name+'@'+snap_name } },
] })
if len(resp['responses'][0]['kvs']) == 0:
raise exception.SnapshotNotFound(snapshot_id = snap_name)
inode_id = int(resp['responses'][0]['kvs'][0]['value']['id'])
pool_id = int(resp['responses'][0]['kvs'][0]['value']['pool_id'])
parents = {}
parents[(pool_id << 48) | (inode_id & 0xffffffffffff)] = True
# Check if there are child volumes
children = self._child_count(parents)
if children > 0:
raise exception.SnapshotIsBusy(snapshot_name = snap_name)
# FIXME: We can't delete snapshots because we can't merge layers yet
raise exception.VolumeBackendAPIException(data = 'Snapshot delete (layer merge) is not implemented yet')
def _child_count(self, parents):
children = 0
def add_child(kv):
nonlocal children
children += self._check_parent(kv, parents)
self._etcd_foreach('config/inode', lambda kv: add_child(kv))
return children
def _check_parent(self, kv, parents):
if 'parent_id' not in kv['value']:
return 0
parent_id = kv['value']['parent_id']
_, _, pool_id, inode_id = kv['key'].split('/')
parent_pool_id = pool_id
if 'parent_pool_id' in kv['value'] and kv['value']['parent_pool_id']:
parent_pool_id = kv['value']['parent_pool_id']
inode = (int(pool_id) << 48) | (int(inode_id) & 0xffffffffffff)
parent = (int(parent_pool_id) << 48) | (int(parent_id) & 0xffffffffffff)
if parent in parents and inode not in parents:
return 1
return 0
def create_cloned_volume(self, volume, src_vref):
"""Create a cloned volume from another volume."""
size = int(volume.size) * units.Gi
src_name = utils.convert_str(src_vref.name)
dest_name = utils.convert_str(volume.name)
if dest_name.find('@') >= 0 or dest_name.find('/') >= 0:
raise exception.VolumeBackendAPIException(data = '@ and / are forbidden in volume and snapshot names')
# FIXME Do full copy if requested (cfg.disable_clone)
if src_vref.admin_metadata.get('readonly') == 'True':
# source volume is a volume-image cache entry or other readonly volume
# clone without intermediate snapshot
src = self._get_image(src_name)
LOG.debug("creating image '%s' from '%s'", dest_name, src_name)
new_cfg = self._create_image(dest_name, {
'size': size,
'parent_id': src['idx']['id'],
'parent_pool_id': src['idx']['pool_id'],
})
return {}
clone_snap = "%s@%s.clone_snap" % (src_name, dest_name)
make_img = True
if (volume.display_name and
volume.display_name.startswith('image-') and
src_vref.project_id != volume.project_id):
# idiotic openstack creates image-volume cache entries
# as clones of normal VM volumes... :-X prevent it :-D
clone_snap = dest_name
make_img = False
LOG.debug("creating layer '%s' under '%s'", clone_snap, src_name)
new_cfg = self._create_snapshot(src_name, clone_snap, True)
if make_img:
# Then create a clone from it
new_cfg = self._create_image(dest_name, {
'size': size,
'parent_id': new_cfg['parent_id'],
'parent_pool_id': new_cfg['parent_pool_id'],
})
return {}
def create_volume_from_snapshot(self, volume, snapshot):
"""Creates a cloned volume from an existing snapshot."""
vol_name = utils.convert_str(volume.name)
snap_name = utils.convert_str(snapshot.name)
snap = self._get_image(vol_name+'@'+snap_name)
if not snap:
raise exception.SnapshotNotFound(snapshot_id = snap_name)
snap_inode_id = int(resp['responses'][0]['kvs'][0]['value']['id'])
snap_pool_id = int(resp['responses'][0]['kvs'][0]['value']['pool_id'])
size = snap['cfg']['size']
if int(volume.size):
size = int(volume.size) * units.Gi
new_cfg = self._create_image(vol_name, {
'size': size,
'parent_id': snap['idx']['id'],
'parent_pool_id': snap['idx']['pool_id'],
})
return {}
def _vitastor_args(self):
args = []
for k in [ 'config_path', 'etcd_address', 'etcd_prefix' ]:
v = self.configuration.safe_get('vitastor_'+k)
if v:
args.extend(['--'+k, v])
return args
def _qemu_args(self):
args = ''
for k in [ 'config_path', 'etcd_address', 'etcd_prefix' ]:
v = self.configuration.safe_get('vitastor_'+k)
kk = k
if kk == 'etcd_address':
# FIXME use etcd_address in qemu driver
kk = 'etcd_host'
if v:
args += ':'+kk+'='+v.replace(':', '\\:')
return args
def delete_volume(self, volume):
"""Deletes a logical volume."""
vol_name = utils.convert_str(volume.name)
# Find the volume and all its snapshots
range_end = b'index/image/' + vol_name.encode('utf-8')
range_end = range_end[0 : len(range_end)-1] + six.int2byte(range_end[len(range_end)-1] + 1)
resp = self._etcd_txn({ 'success': [
{ 'request_range': { 'key': 'index/image/'+vol_name, 'range_end': range_end } },
] })
if len(resp['responses'][0]['kvs']) == 0:
# already deleted
LOG.info("volume %s no longer exists in backend", vol_name)
return
layers = resp['responses'][0]['kvs']
layer_ids = {}
for kv in layers:
inode_id = int(kv['value']['id'])
pool_id = int(kv['value']['pool_id'])
inode_pool_id = (pool_id << 48) | (inode_id & 0xffffffffffff)
layer_ids[inode_pool_id] = True
# Check if the volume has clones and raise 'busy' if so
children = self._child_count(layer_ids)
if children > 0:
raise exception.VolumeIsBusy(volume_name = vol_name)
# Clear data
for kv in layers:
args = [
'vitastor-cli', 'rm-data', '--pool', str(kv['value']['pool_id']),
'--inode', str(kv['value']['id']), '--progress', '0',
*(self._vitastor_args())
]
try:
self._execute(*args)
except processutils.ProcessExecutionError as exc:
LOG.error("Failed to remove layer "+kv['key']+": "+exc)
raise exception.VolumeBackendAPIException(data = exc.stderr)
# Delete all layers from etcd
requests = []
for kv in layers:
requests.append({ 'request_delete_range': { 'key': kv['key'] } })
requests.append({ 'request_delete_range': { 'key': 'config/inode/'+str(kv['value']['pool_id'])+'/'+str(kv['value']['id']) } })
self._etcd_txn({ 'success': requests })
def retype(self, context, volume, new_type, diff, host):
"""Change extra type specifications for a volume."""
# FIXME Maybe (in the future) support multiple pools as different types
return True, {}
def ensure_export(self, context, volume):
"""Synchronously recreates an export for a logical volume."""
pass
def create_export(self, context, volume, connector):
"""Exports the volume."""
pass
def remove_export(self, context, volume):
"""Removes an export for a logical volume."""
pass
def _create_image(self, vol_name, cfg):
pool_s = str(self.cfg['pool_id'])
image_id = 0
while image_id == 0:
# check if the image already exists and find a free ID
resp = self._etcd_txn({ 'success': [
{ 'request_range': { 'key': 'index/image/'+vol_name } },
{ 'request_range': { 'key': 'index/maxid/'+pool_s } },
] })
if len(resp['responses'][0]['kvs']) > 0:
# already exists
raise exception.VolumeBackendAPIException(data = 'Volume '+vol_name+' already exists')
image_id, id_mod = self._next_id(resp['responses'][1])
# try to create the image
resp = self._etcd_txn({ 'compare': [
{ 'target': 'MOD', 'mod_revision': id_mod, 'key': 'index/maxid/'+pool_s },
{ 'target': 'VERSION', 'version': 0, 'key': 'index/image/'+vol_name },
{ 'target': 'VERSION', 'version': 0, 'key': 'config/inode/'+pool_s+'/'+str(image_id) },
], 'success': [
{ 'request_put': { 'key': 'index/maxid/'+pool_s, 'value': image_id } },
{ 'request_put': { 'key': 'index/image/'+vol_name, 'value': json.dumps({
'id': image_id, 'pool_id': self.cfg['pool_id']
}) } },
{ 'request_put': { 'key': 'config/inode/'+pool_s+'/'+str(image_id), 'value': json.dumps({
**cfg, 'name': vol_name,
}) } },
] })
if not resp.get('succeeded'):
# repeat
image_id = 0
def _create_snapshot(self, vol_name, snap_vol_name, allow_existing = False):
while True:
# check if the image already exists and snapshot doesn't
resp = self._etcd_txn({ 'success': [
{ 'request_range': { 'key': 'index/image/'+vol_name } },
{ 'request_range': { 'key': 'index/image/'+snap_vol_name } },
] })
if len(resp['responses'][0]['kvs']) == 0:
raise exception.VolumeBackendAPIException(data = 'Volume '+vol_name+' does not exist')
if len(resp['responses'][1]['kvs']) > 0:
if allow_existing:
snap_idx = resp['responses'][1]['kvs'][0]['value']
resp = self._etcd_txn({ 'success': [
{ 'request_range': { 'key': 'config/inode/'+str(snap_idx['pool_id'])+'/'+str(snap_idx['id']) } },
] })
if len(resp['responses'][0]['kvs']) == 0:
raise exception.VolumeBackendAPIException(data =
'Volume '+snap_vol_name+' is already indexed, but does not exist'
)
return resp['responses'][0]['kvs'][0]['value']
raise exception.VolumeBackendAPIException(
data = 'Volume '+snap_vol_name+' already exists'
)
vol_idx = resp['responses'][0]['kvs'][0]['value']
vol_idx_mod = resp['responses'][0]['kvs'][0]['mod_revision']
# get image inode config and find a new ID
resp = self._etcd_txn({ 'success': [
{ 'request_range': { 'key': 'config/inode/'+str(vol_idx['pool_id'])+'/'+str(vol_idx['id']) } },
{ 'request_range': { 'key': 'index/maxid/'+str(self.cfg['pool_id']) } },
] })
if len(resp['responses'][0]['kvs']) == 0:
raise exception.VolumeBackendAPIException(data = 'Volume '+vol_name+' does not exist')
vol_cfg = resp['responses'][0]['kvs'][0]['value']
vol_mod = resp['responses'][0]['kvs'][0]['mod_revision']
new_id, id_mod = self._next_id(resp['responses'][1])
# try to redirect image to the new inode
new_cfg = {
**vol_cfg, 'name': vol_name, 'parent_id': vol_idx['id'], 'parent_pool_id': vol_idx['pool_id']
}
resp = self._etcd_txn({ 'compare': [
{ 'target': 'MOD', 'mod_revision': vol_idx_mod, 'key': 'index/image/'+vol_name },
{ 'target': 'MOD', 'mod_revision': vol_mod, 'key': 'config/inode/'+str(vol_idx['pool_id'])+'/'+str(vol_idx['id']) },
{ 'target': 'MOD', 'mod_revision': id_mod, 'key': 'index/maxid/'+str(self.cfg['pool_id']) },
{ 'target': 'VERSION', 'version': 0, 'key': 'index/image/'+snap_vol_name },
{ 'target': 'VERSION', 'version': 0, 'key': 'config/inode/'+str(self.cfg['pool_id'])+'/'+str(new_id) },
], 'success': [
{ 'request_put': { 'key': 'index/maxid/'+str(self.cfg['pool_id']), 'value': new_id } },
{ 'request_put': { 'key': 'index/image/'+vol_name, 'value': json.dumps({
'id': new_id, 'pool_id': self.cfg['pool_id']
}) } },
{ 'request_put': { 'key': 'config/inode/'+str(self.cfg['pool_id'])+'/'+str(new_id), 'value': json.dumps(new_cfg) } },
{ 'request_put': { 'key': 'index/image/'+snap_vol_name, 'value': json.dumps({
'id': vol_idx['id'], 'pool_id': vol_idx['pool_id']
}) } },
{ 'request_put': { 'key': 'config/inode/'+str(vol_idx['pool_id'])+'/'+str(vol_idx['id']), 'value': json.dumps({
**vol_cfg, 'name': snap_vol_name, 'readonly': True
}) } }
] })
if resp.get('succeeded'):
return new_cfg
def initialize_connection(self, volume, connector):
data = {
'driver_volume_type': 'vitastor',
'data': {
'config_path': self.configuration.vitastor_config_path,
'etcd_address': self.configuration.vitastor_etcd_address,
'etcd_prefix': self.configuration.vitastor_etcd_prefix,
'name': volume.name,
'logical_block_size': 512,
'physical_block_size': 4096,
}
}
LOG.debug('connection data: %s', data)
return data
def terminate_connection(self, volume, connector, **kwargs):
pass
def clone_image(self, context, volume, image_location, image_meta, image_service):
if image_location:
# Note: image_location[0] is glance image direct_url.
# image_location[1] contains the list of all locations (including
# direct_url) or None if show_multiple_locations is False in
# glance configuration.
if image_location[1]:
url_locations = [location['url'] for location in image_location[1]]
else:
url_locations = [image_location[0]]
# iterate all locations to look for a cloneable one.
for url_location in url_locations:
if url_location and url_location.startswith('cinder://'):
# The idea is to use cinder://<volume-id> Glance volumes as base images
base_vol = self.db.volume_get(context, url_location[len('cinder://') : ])
if not base_vol or base_vol.volume_type_id != volume.volume_type_id:
continue
size = int(volume.size) * units.Gi
dest_name = utils.convert_str(volume.name)
# Find or create the base snapshot
snap_cfg = self._create_snapshot(base_vol.name, base_vol.name+'@.clone_snap', True)
# Then create a clone from it
new_cfg = self._create_image(dest_name, {
'size': size,
'parent_id': snap_cfg['parent_id'],
'parent_pool_id': snap_cfg['parent_pool_id'],
})
return ({}, True)
return ({}, False)
def copy_image_to_encrypted_volume(self, context, volume, image_service, image_id):
self.copy_image_to_volume(context, volume, image_service, image_id, encrypted = True)
def copy_image_to_volume(self, context, volume, image_service, image_id, encrypted = False):
tmp_dir = volume_utils.image_conversion_dir()
with tempfile.NamedTemporaryFile(dir = tmp_dir) as tmp:
image_utils.fetch_to_raw(
context, image_service, image_id, tmp.name,
self.configuration.volume_dd_blocksize, size = volume.size
)
out_format = [ '-O', 'raw' ]
if encrypted:
key_file, opts = self._encrypt_opts(volume, context)
out_format = [ '-O', 'luks', *opts ]
dest_name = utils.convert_str(volume.name)
self._try_execute(
'qemu-img', 'convert', '-f', 'raw', tmp.name, *out_format,
'vitastor:image='+dest_name.replace(':', '\\:')+self._qemu_args()
)
if encrypted:
key_file.close()
def copy_volume_to_image(self, context, volume, image_service, image_meta):
tmp_dir = volume_utils.image_conversion_dir()
tmp_file = os.path.join(tmp_dir, volume.name + '-' + image_meta['id'])
with fileutils.remove_path_on_error(tmp_file):
vol_name = utils.convert_str(volume.name)
self._try_execute(
'qemu-img', 'convert', '-f', 'raw',
'vitastor:image='+vol_name.replace(':', '\\:')+self._qemu_args(),
'-O', 'raw', tmp_file
)
# FIXME: Copy directly if the destination image is also in Vitastor
volume_utils.upload_volume(context, image_service, image_meta, tmp_file, volume)
os.unlink(tmp_file)
def _get_image(self, vol_name):
# find the image
resp = self._etcd_txn({ 'success': [
{ 'request_range': { 'key': 'index/image/'+vol_name } },
] })
if len(resp['responses'][0]['kvs']) == 0:
return None
vol_idx = resp['responses'][0]['kvs'][0]['value']
vol_idx_mod = resp['responses'][0]['kvs'][0]['mod_revision']
# get image inode config
resp = self._etcd_txn({ 'success': [
{ 'request_range': { 'key': 'config/inode/'+str(vol_idx['pool_id'])+'/'+str(vol_idx['id']) } },
] })
if len(resp['responses'][0]['kvs']) == 0:
return None
vol_cfg = resp['responses'][0]['kvs'][0]['value']
vol_cfg_mod = resp['responses'][0]['kvs'][0]['mod_revision']
return {
'cfg': vol_cfg,
'cfg_mod': vol_cfg_mod,
'idx': vol_idx,
'idx_mod': vol_idx_mod,
}
def extend_volume(self, volume, new_size):
"""Extend an existing volume."""
vol_name = utils.convert_str(volume.name)
while True:
vol = self._get_image(vol_name)
if not vol:
raise exception.VolumeBackendAPIException(data = 'Volume '+vol_name+' does not exist')
# change size
size = int(new_size) * units.Gi
if size == vol['cfg']['size']:
break
resp = self._etcd_txn({ 'compare': [ {
'target': 'MOD',
'mod_revision': vol['cfg_mod'],
'key': 'config/inode/'+str(vol['idx']['pool_id'])+'/'+str(vol['idx']['id']),
} ], 'success': [
{ 'request_put': {
'key': 'config/inode/'+str(vol['idx']['pool_id'])+'/'+str(vol['idx']['id']),
'value': json.dumps({ **vol['cfg'], 'size': size }),
} },
] })
if resp.get('succeeded'):
break
LOG.debug(
"Extend volume from %(old_size)s GB to %(new_size)s GB.",
{'old_size': volume.size, 'new_size': new_size}
)
def _add_manageable_volume(self, kv, manageable_volumes, cinder_ids):
cfg = kv['value']
if kv['key'].find('@') >= 0:
# snapshot
return
image_id = volume_utils.extract_id_from_volume_name(cfg['name'])
image_info = {
'reference': {'source-name': image_name},
'size': int(math.ceil(float(cfg['size']) / units.Gi)),
'cinder_id': None,
'extra_info': None,
}
if image_id in cinder_ids:
image_info['cinder_id'] = image_id
image_info['safe_to_manage'] = False
image_info['reason_not_safe'] = 'already managed'
else:
image_info['safe_to_manage'] = True
image_info['reason_not_safe'] = None
manageable_volumes.append(image_info)
def get_manageable_volumes(self, cinder_volumes, marker, limit, offset, sort_keys, sort_dirs):
manageable_volumes = []
cinder_ids = [resource['id'] for resource in cinder_volumes]
# List all volumes
# FIXME: It's possible to use pagination in our case, but.. do we want it?
self._etcd_foreach('config/inode/'+str(self.cfg['pool_id']),
lambda kv: self._add_manageable_volume(kv, manageable_volumes, cinder_ids))
return volume_utils.paginate_entries_list(
manageable_volumes, marker, limit, offset, sort_keys, sort_dirs)
def _get_existing_name(existing_ref):
if not isinstance(existing_ref, dict):
existing_ref = {"source-name": existing_ref}
if 'source-name' not in existing_ref:
reason = _('Reference must contain source-name element.')
raise exception.ManageExistingInvalidReference(existing_ref=existing_ref, reason=reason)
src_name = utils.convert_str(existing_ref['source-name'])
if not src_name:
reason = _('Reference must contain source-name element.')
raise exception.ManageExistingInvalidReference(existing_ref=existing_ref, reason=reason)
return src_name
def manage_existing_get_size(self, volume, existing_ref):
"""Return size of an existing image for manage_existing.
:param volume: volume ref info to be set
:param existing_ref: {'source-name': <image name>}
"""
src_name = self._get_existing_name(existing_ref)
vol = self._get_image(src_name)
if not vol:
raise exception.VolumeBackendAPIException(data = 'Volume '+src_name+' does not exist')
return int(math.ceil(float(vol['cfg']['size']) / units.Gi))
def manage_existing(self, volume, existing_ref):
"""Manages an existing image.
Renames the image name to match the expected name for the volume.
:param volume: volume ref info to be set
:param existing_ref: {'source-name': <image name>}
"""
from_name = self._get_existing_name(existing_ref)
to_name = utils.convert_str(volume.name)
self._rename(from_name, to_name)
def _rename(self, from_name, to_name):
while True:
vol = self._get_image(from_name)
if not vol:
raise exception.VolumeBackendAPIException(data = 'Volume '+from_name+' does not exist')
to = self._get_image(to_name)
if to:
raise exception.VolumeBackendAPIException(data = 'Volume '+to_name+' already exists')
resp = self._etcd_txn({ 'compare': [
{ 'target': 'MOD', 'mod_revision': vol['idx_mod'], 'key': 'index/image/'+vol['cfg']['name'] },
{ 'target': 'MOD', 'mod_revision': vol['cfg_mod'], 'key': 'config/inode/'+str(vol['idx']['pool_id'])+'/'+str(vol['idx']['id']) },
{ 'target': 'VERSION', 'version': 0, 'key': 'index/image/'+to_name },
], 'success': [
{ 'request_delete_range': { 'key': 'index/image/'+vol['cfg']['name'] } },
{ 'request_put': { 'key': 'index/image/'+to_name, 'value': json.dumps(vol['idx']) } },
{ 'request_put': { 'key': 'config/inode/'+str(vol['idx']['pool_id'])+'/'+str(vol['idx']['id']),
'value': json.dumps({ **vol['cfg'], 'name': to_name }) } },
] })
if resp.get('succeeded'):
break
def unmanage(self, volume):
pass
def _add_manageable_snapshot(self, kv, manageable_snapshots, cinder_ids):
cfg = kv['value']
dog = kv['key'].find('@')
if dog < 0:
# snapshot
return
image_name = kv['key'][0 : dog]
snap_name = kv['key'][dog+1 : ]
snapshot_id = volume_utils.extract_id_from_snapshot_name(snap_name)
snapshot_info = {
'reference': {'source-name': snap_name},
'size': int(math.ceil(float(cfg['size']) / units.Gi)),
'cinder_id': None,
'extra_info': None,
'safe_to_manage': False,
'reason_not_safe': None,
'source_reference': {'source-name': image_name}
}
if snapshot_id in cinder_ids:
# Exclude snapshots already managed.
snapshot_info['reason_not_safe'] = ('already managed')
snapshot_info['cinder_id'] = snapshot_id
elif snap_name.endswith('.clone_snap'):
# Exclude clone snapshot.
snapshot_info['reason_not_safe'] = ('used for clone snap')
else:
snapshot_info['safe_to_manage'] = True
manageable_snapshots.append(snapshot_info)
def get_manageable_snapshots(self, cinder_snapshots, marker, limit, offset, sort_keys, sort_dirs):
"""List manageable snapshots in Vitastor."""
manageable_snapshots = []
cinder_snapshot_ids = [resource['id'] for resource in cinder_snapshots]
# List all volumes
# FIXME: It's possible to use pagination in our case, but.. do we want it?
self._etcd_foreach('config/inode/'+str(self.cfg['pool_id']),
lambda kv: self._add_manageable_volume(kv, manageable_snapshots, cinder_snapshot_ids))
return volume_utils.paginate_entries_list(
manageable_snapshots, marker, limit, offset, sort_keys, sort_dirs)
def manage_existing_snapshot_get_size(self, snapshot, existing_ref):
"""Return size of an existing image for manage_existing.
:param snapshot: snapshot ref info to be set
:param existing_ref: {'source-name': <name of snapshot>}
"""
vol_name = utils.convert_str(snapshot.volume_name)
snap_name = self._get_existing_name(existing_ref)
vol = self._get_image(vol_name+'@'+snap_name)
if not vol:
raise exception.ManageExistingInvalidReference(
existing_ref=snapshot_name, reason='Specified snapshot does not exist.'
)
return int(math.ceil(float(vol['cfg']['size']) / units.Gi))
def manage_existing_snapshot(self, snapshot, existing_ref):
"""Manages an existing snapshot.
Renames the snapshot name to match the expected name for the snapshot.
Error checking done by manage_existing_get_size is not repeated.
:param snapshot: snapshot ref info to be set
:param existing_ref: {'source-name': <name of snapshot>}
"""
vol_name = utils.convert_str(snapshot.volume_name)
snap_name = self._get_existing_name(existing_ref)
from_name = vol_name+'@'+snap_name
to_name = vol_name+'@'+utils.convert_str(snapshot.name)
self._rename(from_name, to_name)
def unmanage_snapshot(self, snapshot):
"""Removes the specified snapshot from Cinder management."""
pass
def _dumps(self, obj):
return json.dumps(obj, separators=(',', ':'), sort_keys=True)

View File

@@ -1,23 +0,0 @@
# Devstack configuration for bridged networking
[[local|localrc]]
ADMIN_PASSWORD=secret
DATABASE_PASSWORD=$ADMIN_PASSWORD
RABBIT_PASSWORD=$ADMIN_PASSWORD
SERVICE_PASSWORD=$ADMIN_PASSWORD
HOST_IP=10.0.2.15
Q_USE_SECGROUP=True
FLOATING_RANGE="10.0.2.0/24"
IPV4_ADDRS_SAFE_TO_USE="10.0.5.0/24"
Q_FLOATING_ALLOCATION_POOL=start=10.0.2.50,end=10.0.2.100
PUBLIC_NETWORK_GATEWAY=10.0.2.2
PUBLIC_INTERFACE=ens3
Q_USE_PROVIDERNET_FOR_PUBLIC=True
Q_AGENT=linuxbridge
Q_ML2_PLUGIN_MECHANISM_DRIVERS=linuxbridge
LB_PHYSICAL_INTERFACE=ens3
PUBLIC_PHYSICAL_NETWORK=default
LB_INTERFACE_MAPPINGS=default:ens3
Q_SERVICE_PLUGIN_CLASSES=
Q_ML2_PLUGIN_TYPE_DRIVERS=flat
Q_ML2_PLUGIN_EXT_DRIVERS=

View File

@@ -1,287 +0,0 @@
diff --git a/nova/virt/image/model.py b/nova/virt/image/model.py
index 971f7e9c07..70ed70d5e2 100644
--- a/nova/virt/image/model.py
+++ b/nova/virt/image/model.py
@@ -129,3 +129,22 @@ class RBDImage(Image):
self.user = user
self.password = password
self.servers = servers
+
+
+class VitastorImage(Image):
+ """Class for images in a remote Vitastor cluster"""
+
+ def __init__(self, name, etcd_address = None, etcd_prefix = None, config_path = None):
+ """Create a new Vitastor image object
+
+ :param name: name of the image
+ :param etcd_address: etcd URL(s) (optional)
+ :param etcd_prefix: etcd prefix (optional)
+ :param config_path: path to the configuration (optional)
+ """
+ super(RBDImage, self).__init__(FORMAT_RAW)
+
+ self.name = name
+ self.etcd_address = etcd_address
+ self.etcd_prefix = etcd_prefix
+ self.config_path = config_path
diff --git a/nova/virt/images.py b/nova/virt/images.py
index 5358f3766a..ebe3d6effb 100644
--- a/nova/virt/images.py
+++ b/nova/virt/images.py
@@ -41,7 +41,7 @@ IMAGE_API = glance.API()
def qemu_img_info(path, format=None):
"""Return an object containing the parsed output from qemu-img info."""
- if not os.path.exists(path) and not path.startswith('rbd:'):
+ if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
raise exception.DiskNotFound(location=path)
info = nova.privsep.qemu.unprivileged_qemu_img_info(path, format=format)
@@ -50,7 +50,7 @@ def qemu_img_info(path, format=None):
def privileged_qemu_img_info(path, format=None, output_format='json'):
"""Return an object containing the parsed output from qemu-img info."""
- if not os.path.exists(path) and not path.startswith('rbd:'):
+ if not os.path.exists(path) and not path.startswith('rbd:') and not path.startswith('vitastor:'):
raise exception.DiskNotFound(location=path)
info = nova.privsep.qemu.privileged_qemu_img_info(path, format=format)
diff --git a/nova/virt/libvirt/config.py b/nova/virt/libvirt/config.py
index f9475776b3..51573fe41d 100644
--- a/nova/virt/libvirt/config.py
+++ b/nova/virt/libvirt/config.py
@@ -1060,6 +1060,8 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
self.driver_iommu = False
self.source_path = None
self.source_protocol = None
+ self.source_query = None
+ self.source_config = None
self.source_name = None
self.source_hosts = []
self.source_ports = []
@@ -1186,7 +1188,8 @@ class LibvirtConfigGuestDisk(LibvirtConfigGuestDevice):
elif self.source_type == "mount":
dev.append(etree.Element("source", dir=self.source_path))
elif self.source_type == "network" and self.source_protocol:
- source = etree.Element("source", protocol=self.source_protocol)
+ source = etree.Element("source", protocol=self.source_protocol,
+ query=self.source_query, config=self.source_config)
if self.source_name is not None:
source.set('name', self.source_name)
hosts_info = zip(self.source_hosts, self.source_ports)
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index 391231c527..34dc60dcdd 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -179,6 +179,7 @@ VOLUME_DRIVERS = {
'local': 'nova.virt.libvirt.volume.volume.LibvirtVolumeDriver',
'fake': 'nova.virt.libvirt.volume.volume.LibvirtFakeVolumeDriver',
'rbd': 'nova.virt.libvirt.volume.net.LibvirtNetVolumeDriver',
+ 'vitastor': 'nova.virt.libvirt.volume.vitastor.LibvirtVitastorVolumeDriver',
'nfs': 'nova.virt.libvirt.volume.nfs.LibvirtNFSVolumeDriver',
'smbfs': 'nova.virt.libvirt.volume.smbfs.LibvirtSMBFSVolumeDriver',
'fibre_channel': 'nova.virt.libvirt.volume.fibrechannel.LibvirtFibreChannelVolumeDriver', # noqa:E501
@@ -385,10 +386,10 @@ class LibvirtDriver(driver.ComputeDriver):
# This prevents the risk of one test setting a capability
# which bleeds over into other tests.
- # LVM and RBD require raw images. If we are not configured to
+ # LVM, RBD, Vitastor require raw images. If we are not configured to
# force convert images into raw format, then we _require_ raw
# images only.
- raw_only = ('rbd', 'lvm')
+ raw_only = ('rbd', 'lvm', 'vitastor')
requires_raw_image = (CONF.libvirt.images_type in raw_only and
not CONF.force_raw_images)
requires_ploop_image = CONF.libvirt.virt_type == 'parallels'
@@ -775,12 +776,12 @@ class LibvirtDriver(driver.ComputeDriver):
# Some imagebackends are only able to import raw disk images,
# and will fail if given any other format. See the bug
# https://bugs.launchpad.net/nova/+bug/1816686 for more details.
- if CONF.libvirt.images_type in ('rbd',):
+ if CONF.libvirt.images_type in ('rbd', 'vitastor'):
if not CONF.force_raw_images:
msg = _("'[DEFAULT]/force_raw_images = False' is not "
- "allowed with '[libvirt]/images_type = rbd'. "
+ "allowed with '[libvirt]/images_type = rbd' or 'vitastor'. "
"Please check the two configs and if you really "
- "do want to use rbd as images_type, set "
+ "do want to use rbd or vitastor as images_type, set "
"force_raw_images to True.")
raise exception.InvalidConfiguration(msg)
@@ -2603,6 +2604,16 @@ class LibvirtDriver(driver.ComputeDriver):
if connection_info['data'].get('auth_enabled'):
username = connection_info['data']['auth_username']
path = f"rbd:{volume_name}:id={username}"
+ elif connection_info['driver_volume_type'] == 'vitastor':
+ volume_name = connection_info['data']['name']
+ path = 'vitastor:image='+volume_name.replace(':', '\\:')
+ for k in [ 'config_path', 'etcd_address', 'etcd_prefix' ]:
+ if k in connection_info['data']:
+ kk = k
+ if kk == 'etcd_address':
+ # FIXME use etcd_address in qemu driver
+ kk = 'etcd_host'
+ path += ":"+kk+"="+connection_info['data'][k].replace(':', '\\:')
else:
path = 'unknown'
raise exception.DiskNotFound(location='unknown')
@@ -2827,8 +2838,8 @@ class LibvirtDriver(driver.ComputeDriver):
image_format = CONF.libvirt.snapshot_image_format or source_type
- # NOTE(bfilippov): save lvm and rbd as raw
- if image_format == 'lvm' or image_format == 'rbd':
+ # NOTE(bfilippov): save lvm and rbd and vitastor as raw
+ if image_format == 'lvm' or image_format == 'rbd' or image_format == 'vitastor':
image_format = 'raw'
metadata = self._create_snapshot_metadata(instance.image_meta,
@@ -2899,7 +2910,7 @@ class LibvirtDriver(driver.ComputeDriver):
expected_state=task_states.IMAGE_UPLOADING)
# TODO(nic): possibly abstract this out to the root_disk
- if source_type == 'rbd' and live_snapshot:
+ if (source_type == 'rbd' or source_type == 'vitastor') and live_snapshot:
# Standard snapshot uses qemu-img convert from RBD which is
# not safe to run with live_snapshot.
live_snapshot = False
@@ -4099,7 +4110,7 @@ class LibvirtDriver(driver.ComputeDriver):
# cleanup rescue volume
lvm.remove_volumes([lvmdisk for lvmdisk in self._lvm_disks(instance)
if lvmdisk.endswith('.rescue')])
- if CONF.libvirt.images_type == 'rbd':
+ if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
filter_fn = lambda disk: (disk.startswith(instance.uuid) and
disk.endswith('.rescue'))
rbd_utils.RBDDriver().cleanup_volumes(filter_fn)
@@ -4356,6 +4367,8 @@ class LibvirtDriver(driver.ComputeDriver):
# TODO(mikal): there is a bug here if images_type has
# changed since creation of the instance, but I am pretty
# sure that this bug already exists.
+ if CONF.libvirt.images_type == 'vitastor':
+ return 'vitastor'
return 'rbd' if CONF.libvirt.images_type == 'rbd' else 'raw'
@staticmethod
@@ -4764,10 +4777,10 @@ class LibvirtDriver(driver.ComputeDriver):
finally:
# NOTE(mikal): if the config drive was imported into RBD,
# then we no longer need the local copy
- if CONF.libvirt.images_type == 'rbd':
+ if CONF.libvirt.images_type == 'rbd' or CONF.libvirt.images_type == 'vitastor':
LOG.info('Deleting local config drive %(path)s '
- 'because it was imported into RBD.',
- {'path': config_disk_local_path},
+ 'because it was imported into %(type).',
+ {'path': config_disk_local_path, 'type': CONF.libvirt.images_type},
instance=instance)
os.unlink(config_disk_local_path)
diff --git a/nova/virt/libvirt/utils.py b/nova/virt/libvirt/utils.py
index da2a6e8b8a..52c02e72f1 100644
--- a/nova/virt/libvirt/utils.py
+++ b/nova/virt/libvirt/utils.py
@@ -340,6 +340,10 @@ def find_disk(guest: libvirt_guest.Guest) -> ty.Tuple[str, ty.Optional[str]]:
disk_path = disk.source_name
if disk_path:
disk_path = 'rbd:' + disk_path
+ elif not disk_path and disk.source_protocol == 'vitastor':
+ disk_path = disk.source_name
+ if disk_path:
+ disk_path = 'vitastor:' + disk_path
if not disk_path:
raise RuntimeError(_("Can't retrieve root device path "
@@ -354,6 +358,8 @@ def get_disk_type_from_path(path: str) -> ty.Optional[str]:
return 'lvm'
elif path.startswith('rbd:'):
return 'rbd'
+ elif path.startswith('vitastor:'):
+ return 'vitastor'
elif (os.path.isdir(path) and
os.path.exists(os.path.join(path, "DiskDescriptor.xml"))):
return 'ploop'
diff --git a/nova/virt/libvirt/volume/vitastor.py b/nova/virt/libvirt/volume/vitastor.py
new file mode 100644
index 0000000000..0256df62c1
--- /dev/null
+++ b/nova/virt/libvirt/volume/vitastor.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2021+, Vitaliy Filippov <vitalif@yourcmc.ru>
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+from os_brick import exception as os_brick_exception
+from os_brick import initiator
+from os_brick.initiator import connector
+from oslo_log import log as logging
+
+import nova.conf
+from nova import utils
+from nova.virt.libvirt.volume import volume as libvirt_volume
+
+
+CONF = nova.conf.CONF
+LOG = logging.getLogger(__name__)
+
+
+class LibvirtVitastorVolumeDriver(libvirt_volume.LibvirtBaseVolumeDriver):
+ """Driver to attach Vitastor volumes to libvirt."""
+ def __init__(self, host):
+ super(LibvirtVitastorVolumeDriver, self).__init__(host, is_block_dev=False)
+
+ def connect_volume(self, connection_info, instance):
+ pass
+
+ def disconnect_volume(self, connection_info, instance):
+ pass
+
+ def get_config(self, connection_info, disk_info):
+ """Returns xml for libvirt."""
+ conf = super(LibvirtVitastorVolumeDriver, self).get_config(connection_info, disk_info)
+ conf.source_type = 'network'
+ conf.source_protocol = 'vitastor'
+ conf.source_name = connection_info['data'].get('name')
+ conf.source_query = connection_info['data'].get('etcd_prefix') or None
+ conf.source_config = connection_info['data'].get('config_path') or None
+ conf.source_hosts = []
+ conf.source_ports = []
+ addresses = connection_info['data'].get('etcd_address', '')
+ if addresses:
+ if not isinstance(addresses, list):
+ addresses = addresses.split(',')
+ for addr in addresses:
+ if addr.startswith('https://'):
+ raise NotImplementedError('Vitastor block driver does not support SSL for etcd communication yet')
+ if addr.startswith('http://'):
+ addr = addr[7:]
+ addr = addr.rstrip('/')
+ if addr.endswith('/v3'):
+ addr = addr[0:-3]
+ p = addr.find('/')
+ if p > 0:
+ raise NotImplementedError('libvirt does not support custom URL paths for Vitastor etcd yet. Use /etc/vitastor/vitastor.conf')
+ p = addr.find(':')
+ port = '2379'
+ if p > 0:
+ port = addr[p+1:]
+ addr = addr[0:p]
+ conf.source_hosts.append(addr)
+ conf.source_ports.append(port)
+ return conf
+
+ def extend_volume(self, connection_info, instance, requested_size):
+ raise NotImplementedError

View File

@@ -24,7 +24,7 @@ Index: qemu-3.1+dfsg/qapi/block-core.json
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config_path: Path to Vitastor configuration
+# @etcd_host: etcd connection address(es)
+# @etcd_address: etcd connection address(es)
+# @etcd_prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
@@ -33,7 +33,7 @@ Index: qemu-3.1+dfsg/qapi/block-core.json
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config_path': 'str',
+ '*etcd_host': 'str',
+ '*etcd_address': 'str',
+ '*etcd_prefix': 'str' } }
+
+##

View File

@@ -24,7 +24,7 @@ Index: qemu/qapi/block-core.json
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config_path: Path to Vitastor configuration
+# @etcd_host: etcd connection address(es)
+# @etcd_address: etcd connection address(es)
+# @etcd_prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
@@ -33,7 +33,7 @@ Index: qemu/qapi/block-core.json
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config_path': 'str',
+ '*etcd_host': 'str',
+ '*etcd_address': 'str',
+ '*etcd_prefix': 'str' } }
+
+##

View File

@@ -24,7 +24,7 @@ Index: qemu/qapi/block-core.json
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config_path: Path to Vitastor configuration
+# @etcd_host: etcd connection address(es)
+# @etcd_address: etcd connection address(es)
+# @etcd_prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
@@ -33,7 +33,7 @@ Index: qemu/qapi/block-core.json
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config_path': 'str',
+ '*etcd_host': 'str',
+ '*etcd_address': 'str',
+ '*etcd_prefix': 'str' } }
+
+##

View File

@@ -24,7 +24,7 @@ Index: qemu-5.1+dfsg/qapi/block-core.json
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config_path: Path to Vitastor configuration
+# @etcd_host: etcd connection address(es)
+# @etcd_address: etcd connection address(es)
+# @etcd_prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
@@ -33,7 +33,7 @@ Index: qemu-5.1+dfsg/qapi/block-core.json
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config_path': 'str',
+ '*etcd_host': 'str',
+ '*etcd_address': 'str',
+ '*etcd_prefix': 'str' } }
+
+##

View File

@@ -48,4 +48,4 @@ FIO=`rpm -qi fio | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Ve
QEMU=`rpm -qi qemu qemu-kvm | perl -e 'while(<>) { /^Epoch[\s:]+(\S+)/ && print "$1:"; /^Version[\s:]+(\S+)/ && print $1; /^Release[\s:]+(\S+)/ && print "-$1"; }'`
perl -i -pe 's/(Requires:\s*fio)([^\n]+)?/$1 = '$FIO'/' $VITASTOR/rpm/vitastor-el$EL.spec
perl -i -pe 's/(Requires:\s*qemu(?:-kvm)?)([^\n]+)?/$1 = '$QEMU'/' $VITASTOR/rpm/vitastor-el$EL.spec
tar --transform 's#^#vitastor-0.6.7/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.6.7$(rpm --eval '%dist').tar.gz *
tar --transform 's#^#vitastor-0.6.4/#' --exclude 'rpm/*.rpm' -czf $VITASTOR/../vitastor-0.6.4$(rpm --eval '%dist').tar.gz *

View File

@@ -11,7 +11,7 @@ RUN rm -rf /var/lib/dnf/*; dnf download --disablerepo='*' --enablerepo='centos-a
RUN rpm --nomd5 -i qemu*.src.rpm
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=PowerTools --spec qemu-kvm.spec
ADD patches/qemu-*-vitastor.patch /root/vitastor/patches/
ADD qemu-*-vitastor.patch /root/vitastor/
RUN set -e; \
mkdir -p /root/packages/qemu-el8; \
@@ -25,7 +25,7 @@ RUN set -e; \
echo "Patch$((PN+1)): qemu-4.2-vitastor.patch" >> qemu-kvm.spec; \
tail -n +2 xx01 >> qemu-kvm.spec; \
perl -i -pe 's/(^Release:\s*\d+)/$1.vitastor/' qemu-kvm.spec; \
cp /root/vitastor/patches/qemu-4.2-vitastor.patch ~/rpmbuild/SOURCES; \
cp /root/vitastor/qemu-4.2-vitastor.patch ~/rpmbuild/SOURCES; \
rpmbuild --nocheck -ba qemu-kvm.spec; \
cp ~/rpmbuild/RPMS/*/*qemu* /root/packages/qemu-el8/; \
cp ~/rpmbuild/SRPMS/*qemu* /root/packages/qemu-el8/

View File

@@ -15,8 +15,8 @@ RUN yumdownloader --disablerepo=centos-sclo-rh --source fio
RUN rpm --nomd5 -i qemu*.src.rpm
RUN rpm --nomd5 -i fio*.src.rpm
RUN rm -f /etc/yum.repos.d/CentOS-Media.repo
RUN cd ~/rpmbuild/SPECS && yum-builddep -y qemu-kvm.spec
RUN cd ~/rpmbuild/SPECS && yum-builddep -y fio.spec
RUN cd ~/rpmbuild/SPECS && yum-builddep -y --enablerepo='*' --disablerepo=centos-sclo-rh --disablerepo=centos-sclo-rh-source --disablerepo=centos-sclo-sclo-testing qemu-kvm.spec
RUN cd ~/rpmbuild/SPECS && yum-builddep -y --enablerepo='*' --disablerepo=centos-sclo-rh --disablerepo=centos-sclo-rh-source --disablerepo=centos-sclo-sclo-testing fio.spec
RUN yum -y install rdma-core-devel
ADD https://vitastor.io/rpms/liburing-el7/liburing-0.7-2.el7.src.rpm /root
@@ -38,7 +38,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.6.7.el7.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.6.4.el7.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el7.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.6.7
Version: 0.6.4
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.6.7.el7.tar.gz
Source0: vitastor-0.6.4.el7.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
@@ -57,9 +57,7 @@ cp -r mon %buildroot/usr/lib/vitastor/mon
%_bindir/vitastor-dump-journal
%_bindir/vitastor-nbd
%_bindir/vitastor-osd
%_bindir/vitastor-cli
%_bindir/vitastor-rm
%_bindir/vita
%_libdir/qemu-kvm/block-vitastor.so
%_libdir/libfio_vitastor.so
%_libdir/libfio_vitastor_blk.so

View File

@@ -15,7 +15,7 @@ RUN rpm --nomd5 -i qemu*.src.rpm
RUN rpm --nomd5 -i fio*.src.rpm
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec qemu-kvm.spec
RUN cd ~/rpmbuild/SPECS && dnf builddep -y --enablerepo=powertools --spec fio.spec && dnf install -y cmake
RUN yum -y install libibverbs-devel libarchive
RUN yum -y install libibverbs-devel
ADD https://vitastor.io/rpms/liburing-el7/liburing-0.7-2.el7.src.rpm /root
@@ -36,7 +36,7 @@ ADD . /root/vitastor
RUN set -e; \
cd /root/vitastor/rpm; \
sh build-tarball.sh; \
cp /root/vitastor-0.6.7.el8.tar.gz ~/rpmbuild/SOURCES; \
cp /root/vitastor-0.6.4.el8.tar.gz ~/rpmbuild/SOURCES; \
cp vitastor-el8.spec ~/rpmbuild/SPECS/vitastor.spec; \
cd ~/rpmbuild/SPECS/; \
rpmbuild -ba vitastor.spec; \

View File

@@ -1,11 +1,11 @@
Name: vitastor
Version: 0.6.7
Version: 0.6.4
Release: 1%{?dist}
Summary: Vitastor, a fast software-defined clustered block storage
License: Vitastor Network Public License 1.1
URL: https://vitastor.io/
Source0: vitastor-0.6.7.el8.tar.gz
Source0: vitastor-0.6.4.el8.tar.gz
BuildRequires: liburing-devel >= 0.6
BuildRequires: gperftools-devel
@@ -54,9 +54,7 @@ cp -r mon %buildroot/usr/lib/vitastor
%_bindir/vitastor-dump-journal
%_bindir/vitastor-nbd
%_bindir/vitastor-osd
%_bindir/vitastor-cli
%_bindir/vitastor-rm
%_bindir/vita
%_libdir/qemu-kvm/block-vitastor.so
%_libdir/libfio_vitastor.so
%_libdir/libfio_vitastor_blk.so

View File

@@ -15,8 +15,8 @@ if("${CMAKE_INSTALL_PREFIX}" MATCHES "^/usr/local/?$")
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}")
endif()
add_definitions(-DVERSION="0.6.7")
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -fdiagnostics-color=always -I ${CMAKE_SOURCE_DIR}/src)
add_definitions(-DVERSION="0.6.4")
add_definitions(-Wall -Wno-sign-compare -Wno-comment -Wno-parentheses -Wno-pointer-arith -I ${CMAKE_SOURCE_DIR}/src)
if (${WITH_ASAN})
add_definitions(-fsanitize=address -fno-omit-frame-pointer)
add_link_options(-fsanitize=address -fno-omit-frame-pointer)
@@ -36,11 +36,6 @@ string(REGEX REPLACE "([\\/\\-]D) *NDEBUG" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_F
string(REGEX REPLACE "([\\/\\-]D) *NDEBUG" "" CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL}")
string(REGEX REPLACE "([\\/\\-]D) *NDEBUG" "" CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO}")
macro(install_symlink filepath sympath)
install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${filepath} \$ENV{DESTDIR}${sympath})")
install(CODE "message(\"-- Created symlink: ${sympath} -> ${filepath}\")")
endmacro(install_symlink)
find_package(PkgConfig)
pkg_check_modules(LIBURING REQUIRED liburing)
if (${WITH_QEMU})
@@ -121,7 +116,6 @@ endif (${WITH_FIO})
# libvitastor_client.so
add_library(vitastor_client SHARED
cluster_client.cpp
cluster_client_list.cpp
vitastor_c.cpp
)
set_target_properties(vitastor_client PROPERTIES PUBLIC_HEADER "vitastor_c.h")
@@ -151,11 +145,11 @@ target_link_libraries(vitastor-nbd
vitastor_client
)
# vitastor-cli
add_executable(vitastor-cli
cli.cpp cli_ls.cpp cli_create.cpp cli_modify.cpp cli_flatten.cpp cli_merge.cpp cli_rm.cpp cli_snap_rm.cpp
# vitastor-rm
add_executable(vitastor-rm
rm_inode.cpp
)
target_link_libraries(vitastor-cli
target_link_libraries(vitastor-rm
vitastor_client
)
@@ -226,7 +220,7 @@ target_link_libraries(test_cas
# test_cluster_client
add_executable(test_cluster_client
test_cluster_client.cpp
pg_states.cpp osd_ops.cpp cluster_client.cpp cluster_client_list.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
pg_states.cpp osd_ops.cpp cluster_client.cpp msgr_op.cpp mock/messenger.cpp msgr_stop.cpp
etcd_state_client.cpp timerfd_manager.cpp ../json11/json11.cpp
)
target_compile_definitions(test_cluster_client PUBLIC -D__MOCK__)
@@ -240,9 +234,7 @@ target_include_directories(test_cluster_client PUBLIC ${CMAKE_SOURCE_DIR}/src/mo
### Install
install(TARGETS vitastor-osd vitastor-dump-journal vitastor-nbd vitastor-cli RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
install_symlink(vitastor-cli ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/vitastor-rm)
install_symlink(vitastor-cli ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}/vita)
install(TARGETS vitastor-osd vitastor-dump-journal vitastor-nbd vitastor-rm RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
install(
TARGETS vitastor_blk vitastor_client
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}

View File

@@ -48,11 +48,6 @@ std::map<uint64_t, uint64_t> & blockstore_t::get_inode_space_stats()
return impl->inode_space_stats;
}
void blockstore_t::dump_diagnostics()
{
return impl->dump_diagnostics();
}
uint32_t blockstore_t::get_block_size()
{
return impl->get_block_size();
@@ -68,11 +63,6 @@ uint64_t blockstore_t::get_free_block_count()
return impl->get_free_block_count();
}
uint64_t blockstore_t::get_journal_size()
{
return impl->get_journal_size();
}
uint32_t blockstore_t::get_bitmap_granularity()
{
return impl->get_bitmap_granularity();

View File

@@ -186,15 +186,10 @@ public:
// Get per-inode space usage statistics
std::map<uint64_t, uint64_t> & get_inode_space_stats();
// Print diagnostics to stdout
void dump_diagnostics();
// FIXME rename to object_size
uint32_t get_block_size();
uint64_t get_block_count();
uint64_t get_free_block_count();
uint64_t get_journal_size();
uint32_t get_bitmap_granularity();
};

View File

@@ -182,75 +182,6 @@ void journal_flusher_t::release_trim()
trim_wanted--;
}
void journal_flusher_t::dump_diagnostics()
{
const char *unflushable_type = "";
obj_ver_id unflushable = { 0 };
// Try to find out if there is a flushable object for information
for (object_id cur_oid: flush_queue)
{
obj_ver_id cur = { .oid = cur_oid, .version = flush_versions[cur_oid] };
auto dirty_end = bs->dirty_db.find(cur);
if (dirty_end == bs->dirty_db.end())
{
// Already flushed
continue;
}
auto repeat_it = sync_to_repeat.find(cur.oid);
if (repeat_it != sync_to_repeat.end())
{
// Someone is already flushing it
unflushable_type = "locked,";
unflushable = cur;
break;
}
if (dirty_end->second.journal_sector >= bs->journal.dirty_start &&
(bs->journal.dirty_start >= bs->journal.used_start ||
dirty_end->second.journal_sector < bs->journal.used_start))
{
// Object is more recent than possible to flush
bool found = try_find_older(dirty_end, cur);
if (!found)
{
unflushable_type = "dirty,";
unflushable = cur;
break;
}
}
unflushable_type = "ok,";
unflushable = cur;
break;
}
printf(
"Flusher: queued=%ld first=%s%lx:%lx trim_wanted=%d dequeuing=%d trimming=%d cur=%d target=%d active=%d syncing=%d\n",
flush_queue.size(), unflushable_type, unflushable.oid.inode, unflushable.oid.stripe,
trim_wanted, dequeuing, trimming, cur_flusher_count, target_flusher_count,
active_flushers, syncing_flushers
);
}
bool journal_flusher_t::try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur)
{
bool found = false;
while (dirty_end != bs->dirty_db.begin())
{
dirty_end--;
if (dirty_end->first.oid != cur.oid)
{
break;
}
if (!(dirty_end->second.journal_sector >= bs->journal.dirty_start &&
(bs->journal.dirty_start >= bs->journal.used_start ||
dirty_end->second.journal_sector < bs->journal.used_start)))
{
found = true;
cur.version = dirty_end->first.version;
break;
}
}
return found;
}
#define await_sqe(label) \
resume_##label:\
sqe = bs->get_sqe();\
@@ -355,15 +286,30 @@ stop_flusher:
// And it may even block writes if we don't flush the older version
// (if it's in the beginning of the journal)...
// So first try to find an older version of the same object to flush.
bool found = flusher->try_find_older(dirty_end, cur);
bool found = false;
while (dirty_end != bs->dirty_db.begin())
{
dirty_end--;
if (dirty_end->first.oid != cur.oid)
{
break;
}
if (!(dirty_end->second.journal_sector >= bs->journal.dirty_start &&
(bs->journal.dirty_start >= bs->journal.used_start ||
dirty_end->second.journal_sector < bs->journal.used_start)))
{
found = true;
cur.version = dirty_end->first.version;
break;
}
}
if (!found)
{
// Try other objects
flusher->sync_to_repeat.erase(cur.oid);
int search_left = flusher->flush_queue.size() - 1;
#ifdef BLOCKSTORE_DEBUG
printf("Flusher overran writers (%lx:%lx v%lu, dirty_start=%08lx) - searching for older flushes (%d left)\n",
cur.oid.inode, cur.oid.stripe, cur.version, bs->journal.dirty_start, search_left);
printf("Flusher overran writers (dirty_start=%08lx) - searching for older flushes (%d left)\n", bs->journal.dirty_start, search_left);
#endif
while (search_left > 0)
{
@@ -386,12 +332,7 @@ stop_flusher:
else
{
repeat_it = flusher->sync_to_repeat.find(cur.oid);
if (repeat_it != flusher->sync_to_repeat.end())
{
if (repeat_it->second < cur.version)
repeat_it->second = cur.version;
}
else
if (repeat_it == flusher->sync_to_repeat.end())
{
flusher->sync_to_repeat[cur.oid] = 0;
break;

View File

@@ -97,9 +97,6 @@ class journal_flusher_t
std::map<uint64_t, meta_sector_t> meta_sectors;
std::deque<object_id> flush_queue;
std::map<object_id, uint64_t> flush_versions;
bool try_find_older(std::map<obj_ver_id, dirty_entry>::iterator & dirty_end, obj_ver_id & cur);
public:
journal_flusher_t(blockstore_impl_t *bs);
~journal_flusher_t();
@@ -111,5 +108,4 @@ public:
void enqueue_flush(obj_ver_id oid);
void unshift_flush(obj_ver_id oid, bool force);
void remove_flush(object_id oid);
void dump_diagnostics();
};

View File

@@ -595,9 +595,3 @@ void blockstore_impl_t::process_list(blockstore_op_t *op)
op->buf = stable;
FINISH_OP(op);
}
void blockstore_impl_t::dump_diagnostics()
{
journal.dump_diagnostics();
flusher->dump_diagnostics();
}

View File

@@ -361,12 +361,8 @@ public:
// Space usage statistics
std::map<uint64_t, uint64_t> inode_space_stats;
// Print diagnostics to stdout
void dump_diagnostics();
inline uint32_t get_block_size() { return block_size; }
inline uint64_t get_block_count() { return block_count; }
inline uint64_t get_free_block_count() { return data_alloc->get_free_count(); }
inline uint32_t get_bitmap_granularity() { return disk_alignment; }
inline uint64_t get_journal_size() { return journal.len; }
};

View File

@@ -218,19 +218,3 @@ uint64_t journal_t::get_trim_pos()
// Can't trim journal
return used_start;
}
void journal_t::dump_diagnostics()
{
auto journal_used_it = used_sectors.lower_bound(used_start);
if (journal_used_it == used_sectors.end())
{
// Journal is cleared to its end, restart from the beginning
journal_used_it = used_sectors.begin();
}
printf(
"Journal: used_start=%08lx next_free=%08lx dirty_start=%08lx trim_to=%08lx trim_to_refs=%ld\n",
used_start, next_free, dirty_start,
journal_used_it == used_sectors.end() ? 0 : journal_used_it->first,
journal_used_it == used_sectors.end() ? 0 : journal_used_it->second
);
}

View File

@@ -180,7 +180,6 @@ struct journal_t
~journal_t();
bool trim();
uint64_t get_trim_pos();
void dump_diagnostics();
inline bool entry_fits(int size)
{
return !(block_size - in_sector_pos < size ||

View File

@@ -478,15 +478,15 @@ resume_2:
}
resume_4:
// Switch object state
#ifdef BLOCKSTORE_DEBUG
printf("Ack write %lx:%lx v%lu = state 0x%x\n", op->oid.inode, op->oid.stripe, op->version, dirty_it->second.state);
#endif
{
auto dirty_it = dirty_db.find((obj_ver_id){
.oid = op->oid,
.version = op->version,
});
assert(dirty_it != dirty_db.end());
#ifdef BLOCKSTORE_DEBUG
printf("Ack write %lx:%lx v%lu = state 0x%x\n", op->oid.inode, op->oid.stripe, op->version, dirty_it->second.state);
#endif
bool is_big = (dirty_it->second.state & BS_ST_TYPE_MASK) == BS_ST_BIG_WRITE;
bool imm = is_big ? (immediate_commit == IMMEDIATE_ALL) : (immediate_commit != IMMEDIATE_NONE);
if (imm)

View File

@@ -1,326 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
/**
* CLI tool
* Currently can (a) remove inodes and (b) merge snapshot/clone layers
*/
#include <vector>
#include <algorithm>
#include "cli.h"
#include "epoll_manager.h"
#include "cluster_client.h"
#include "pg_states.h"
#include "base64.h"
static const char *exe_name = NULL;
json11::Json::object cli_tool_t::parse_args(int narg, const char *args[])
{
json11::Json::object cfg;
json11::Json::array cmd;
cfg["progress"] = "1";
for (int i = 1; i < narg; i++)
{
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
{
help();
}
else if (args[i][0] == '-' && args[i][1] == 'l')
{
cfg["long"] = "1";
}
else if (args[i][0] == '-' && args[i][1] == 'n')
{
cfg["count"] = args[++i];
}
else if (args[i][0] == '-' && args[i][1] == 'p')
{
cfg["pool"] = args[++i];
}
else if (args[i][0] == '-' && args[i][1] == 's')
{
cfg["size"] = args[++i];
}
else if (args[i][0] == '-' && args[i][1] == 'r')
{
cfg["reverse"] = "1";
}
else if (args[i][0] == '-' && args[i][1] == 'f')
{
cfg["force"] = "1";
}
else if (args[i][0] == '-' && args[i][1] == '-')
{
const char *opt = args[i]+2;
cfg[opt] = i == narg-1 || !strcmp(opt, "json") || !strcmp(opt, "wait-list") ||
!strcmp(opt, "long") || !strcmp(opt, "del") || !strcmp(opt, "no-color") ||
!strcmp(opt, "force") || !strcmp(opt, "reverse") ||
!strcmp(opt, "writers-stopped") && strcmp("1", args[i+1]) != 0
? "1" : args[++i];
}
else
{
cmd.push_back(std::string(args[i]));
}
}
if (!cmd.size())
{
std::string exe(exe_name);
if (exe.substr(exe.size()-11) == "vitastor-rm")
{
cmd.push_back("rm-data");
}
}
cfg["command"] = cmd;
return cfg;
}
void cli_tool_t::help()
{
printf(
"Vitastor command-line tool\n"
"(c) Vitaliy Filippov, 2019+ (VNPL-1.1)\n"
"\n"
"USAGE:\n"
"%s ls [-l] [-p POOL] [--sort FIELD] [-r] [-n N] [<name> ...]\n"
" List images (only specified if <name> passed).\n"
" -p|--pool POOL Filter images by pool ID or name\n"
" -l|--long Also report allocated size and I/O statistics\n"
" --del Also include delete operation statistics\n"
" --sort FIELD Sort by specified field (name, size, used_size, <read|write|delete>_<iops|bps|lat|queue>)\n"
" -r|--reverse Sort in descending order\n"
" -n|--count N Only list first N items\n"
"\n"
"%s create -s|--size <size> [-p|--pool <id|name>] [--parent <parent_name>[@<snapshot>]] <name>\n"
" Create an image. You may use K/M/G/T suffixes for <size>. If --parent is specified,\n"
" a copy-on-write image clone is created. Parent must be a snapshot (readonly image).\n"
" Pool must be specified if there is more than one pool.\n"
"\n"
"%s create --snapshot <snapshot> [-p|--pool <id|name>] <image>\n"
"%s snap-create [-p|--pool <id|name>] <image>@<snapshot>\n"
" Create a snapshot of image <name>. May be used live if only a single writer is active.\n"
"\n"
"%s modify <name> [-s|--size <size>] [--readonly | --readwrite] [-f|--force]\n"
" Resize image or change its readonly status. Images with children can't be made read-write.\n"
" If the new size is smaller than the old size, extra data will be purged.\n"
" You should resize file system in the image, if present, before shrinking it.\n"
" -f|--force Proceed with shrinking or setting readwrite flag even if the image has children.\n"
"\n"
"%s rm <from> [<to>] [--writers-stopped]\n"
" Remove <from> or all layers between <from> and <to> (<to> must be a child of <from>),\n"
" rebasing all their children accordingly. --writers-stopped allows merging to be a bit\n"
" more effective in case of a single 'slim' read-write child and 'fat' removed parent:\n"
" the child is merged into parent and parent is renamed to child in that case.\n"
" In other cases parent layers are always merged into children.\n"
"\n"
"%s flatten <layer>\n"
" Flatten a layer, i.e. merge data and detach it from parents.\n"
"\n"
"%s rm-data --pool <pool> --inode <inode> [--wait-list] [--min-offset <offset>]\n"
" Remove inode data without changing metadata.\n"
" --wait-list Retrieve full objects listings before starting to remove objects.\n"
" Requires more memory, but allows to show correct removal progress.\n"
" --min-offset Purge only data starting with specified offset.\n"
"\n"
"%s merge-data <from> <to> [--target <target>]\n"
" Merge layer data without changing metadata. Merge <from>..<to> to <target>.\n"
" <to> must be a child of <from> and <target> may be one of the layers between\n"
" <from> and <to>, including <from> and <to>.\n"
"\n"
"GLOBAL OPTIONS:\n"
" --etcd_address <etcd_address>\n"
" --iodepth N Send N operations in parallel to each OSD when possible (default 32)\n"
" --parallel_osds M Work with M osds in parallel when possible (default 4)\n"
" --progress 1|0 Report progress (default 1)\n"
" --cas 1|0 Use online CAS writes when possible (default auto)\n"
" --no-color Disable colored output\n"
" --json JSON output\n"
,
exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name, exe_name
);
exit(0);
}
void cli_tool_t::change_parent(inode_t cur, inode_t new_parent)
{
auto cur_cfg_it = cli->st_cli.inode_config.find(cur);
if (cur_cfg_it == cli->st_cli.inode_config.end())
{
fprintf(stderr, "Inode 0x%lx disappeared\n", cur);
exit(1);
}
inode_config_t new_cfg = cur_cfg_it->second;
std::string cur_name = new_cfg.name;
std::string cur_cfg_key = base64_encode(cli->st_cli.etcd_prefix+
"/config/inode/"+std::to_string(INODE_POOL(cur))+
"/"+std::to_string(INODE_NO_POOL(cur)));
new_cfg.parent_id = new_parent;
json11::Json::object cur_cfg_json = cli->st_cli.serialize_inode_cfg(&new_cfg);
waiting++;
cli->st_cli.etcd_txn(json11::Json::object {
{ "compare", json11::Json::array {
json11::Json::object {
{ "target", "MOD" },
{ "key", cur_cfg_key },
{ "result", "LESS" },
{ "mod_revision", new_cfg.mod_revision+1 },
},
} },
{ "success", json11::Json::array {
json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", cur_cfg_key },
{ "value", base64_encode(json11::Json(cur_cfg_json).dump()) },
} }
},
} },
}, ETCD_SLOW_TIMEOUT, [this, new_parent, cur, cur_name](std::string err, json11::Json res)
{
if (err != "")
{
fprintf(stderr, "Error changing parent of %s: %s\n", cur_name.c_str(), err.c_str());
exit(1);
}
if (!res["succeeded"].bool_value())
{
fprintf(stderr, "Inode %s was modified during snapshot deletion\n", cur_name.c_str());
exit(1);
}
if (new_parent)
{
auto new_parent_it = cli->st_cli.inode_config.find(new_parent);
std::string new_parent_name = new_parent_it != cli->st_cli.inode_config.end()
? new_parent_it->second.name : "<unknown>";
printf(
"Parent of layer %s (inode %lu in pool %u) changed to %s (inode %lu in pool %u)\n",
cur_name.c_str(), INODE_NO_POOL(cur), INODE_POOL(cur),
new_parent_name.c_str(), INODE_NO_POOL(new_parent), INODE_POOL(new_parent)
);
}
else
{
printf(
"Parent of layer %s (inode %lu in pool %u) detached\n",
cur_name.c_str(), INODE_NO_POOL(cur), INODE_POOL(cur)
);
}
waiting--;
ringloop->wakeup();
});
}
inode_config_t* cli_tool_t::get_inode_cfg(const std::string & name)
{
for (auto & ic: cli->st_cli.inode_config)
{
if (ic.second.name == name)
{
return &ic.second;
}
}
fprintf(stderr, "Layer %s not found\n", name.c_str());
exit(1);
}
void cli_tool_t::run(json11::Json cfg)
{
json11::Json::array cmd = cfg["command"].array_items();
if (!cmd.size())
{
fprintf(stderr, "command is missing\n");
exit(1);
}
else if (cmd[0] == "ls")
{
// List images
action_cb = start_ls(cfg);
}
else if (cmd[0] == "create" || cmd[0] == "snap-create")
{
// Create image/snapshot
action_cb = start_create(cfg);
}
else if (cmd[0] == "modify")
{
// Modify image
action_cb = start_modify(cfg);
}
else if (cmd[0] == "rm-data")
{
// Delete inode data
action_cb = start_rm(cfg);
}
else if (cmd[0] == "merge-data")
{
// Merge layer data without affecting metadata
action_cb = start_merge(cfg);
}
else if (cmd[0] == "flatten")
{
// Merge layer data without affecting metadata
action_cb = start_flatten(cfg);
}
else if (cmd[0] == "rm")
{
// Remove multiple snapshots and rebase their children
action_cb = start_snap_rm(cfg);
}
else
{
fprintf(stderr, "unknown command: %s\n", cmd[0].string_value().c_str());
exit(1);
}
color = !cfg["no-color"].bool_value();
json_output = cfg["json"].bool_value();
iodepth = cfg["iodepth"].uint64_value();
if (!iodepth)
iodepth = 32;
parallel_osds = cfg["parallel_osds"].uint64_value();
if (!parallel_osds)
parallel_osds = 4;
log_level = cfg["log_level"].int64_value();
progress = cfg["progress"].uint64_value() ? true : false;
list_first = cfg["wait-list"].uint64_value() ? true : false;
// Create client
ringloop = new ring_loop_t(512);
epmgr = new epoll_manager_t(ringloop);
cli = new cluster_client_t(ringloop, epmgr->tfd, cfg);
cli->on_ready([this]()
{
// Initialize job
consumer.loop = [this]()
{
if (action_cb != NULL)
{
bool done = action_cb();
if (done)
{
action_cb = NULL;
}
}
ringloop->submit();
};
ringloop->register_consumer(&consumer);
consumer.loop();
});
// Loop until it completes
while (action_cb != NULL)
{
ringloop->loop();
if (action_cb != NULL)
ringloop->wait();
}
}
int main(int narg, const char *args[])
{
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
exe_name = args[0];
cli_tool_t *p = new cli_tool_t();
p->run(cli_tool_t::parse_args(narg, args));
return 0;
}

View File

@@ -1,60 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
// Common CLI tool header
#pragma once
#include "json11/json11.hpp"
#include "object_id.h"
#include "ringloop.h"
#include <functional>
struct rm_inode_t;
struct snap_merger_t;
struct snap_flattener_t;
struct snap_remover_t;
class epoll_manager_t;
class cluster_client_t;
struct inode_config_t;
class cli_tool_t
{
public:
uint64_t iodepth = 0, parallel_osds = 0;
bool progress = true;
bool list_first = false;
bool json_output = false;
int log_level = 0;
bool color = false;
ring_loop_t *ringloop = NULL;
epoll_manager_t *epmgr = NULL;
cluster_client_t *cli = NULL;
int waiting = 0;
ring_consumer_t consumer;
std::function<bool(void)> action_cb;
void run(json11::Json cfg);
void change_parent(inode_t cur, inode_t new_parent);
inode_config_t* get_inode_cfg(const std::string & name);
static json11::Json::object parse_args(int narg, const char *args[]);
static void help();
friend struct rm_inode_t;
friend struct snap_merger_t;
friend struct snap_flattener_t;
friend struct snap_remover_t;
std::function<bool(void)> start_ls(json11::Json cfg);
std::function<bool(void)> start_create(json11::Json cfg);
std::function<bool(void)> start_modify(json11::Json cfg);
std::function<bool(void)> start_rm(json11::Json);
std::function<bool(void)> start_merge(json11::Json);
std::function<bool(void)> start_flatten(json11::Json);
std::function<bool(void)> start_snap_rm(json11::Json);
};

View File

@@ -1,514 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include <ctype.h>
#include "cli.h"
#include "cluster_client.h"
#include "base64.h"
// Create an image, snapshot or clone
//
// Snapshot creation performs a etcd transaction which:
// - Checks that the image exists
// - Checks that the snapshot doesn't exist
// - Renames the inode to a new name with snapshot (say, testimg -> testimg@0)
// - Sets the readonly flag for the old inode
// - Creates a new inode with the same name pointing to the old inode as parent
// - Adjusts /index/image/*
//
// The same algorithm can be easily implemented in any other language or even via etcdctl,
// however we have it here for completeness
struct image_creator_t
{
cli_tool_t *parent;
pool_id_t new_pool_id = 0;
std::string new_pool_name;
std::string image_name, new_snap, new_parent;
uint64_t size;
pool_id_t old_pool_id = 0;
inode_t new_parent_id = 0;
inode_t new_id = 0, old_id = 0;
uint64_t max_id_mod_rev = 0, cfg_mod_rev = 0, idx_mod_rev = 0;
json11::Json result;
int state = 0;
bool is_done()
{
return state == 100;
}
void loop()
{
if (state >= 1)
goto resume_1;
if (new_pool_id)
{
auto & pools = parent->cli->st_cli.pool_config;
if (pools.find(new_pool_id) == pools.end())
{
new_pool_id = 0;
}
}
else if (new_pool_name != "")
{
for (auto & ic: parent->cli->st_cli.pool_config)
{
if (ic.second.name == new_pool_name)
{
new_pool_id = ic.first;
break;
}
}
}
else if (parent->cli->st_cli.pool_config.size() == 1)
{
auto it = parent->cli->st_cli.pool_config.begin();
new_pool_id = it->first;
}
if (!new_pool_id)
{
if (new_pool_name == "")
{
fprintf(stderr, "Pool name or ID is missing\n");
}
else
{
fprintf(stderr, "Pool %s does not exist\n", new_pool_name.c_str());
}
exit(1);
}
state = 1;
resume_1:
if (new_snap == "")
create_image();
else
create_snapshot();
}
void create_image()
{
if (state == 2)
goto resume_2;
else if (state == 3)
goto resume_3;
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (ic.second.name == image_name)
{
fprintf(stderr, "Image %s already exists\n", image_name.c_str());
exit(1);
}
}
do
{
etcd_txn(json11::Json::object {
{ "success", json11::Json::array { get_next_id() } }
});
state = 2;
resume_2:
if (parent->waiting > 0)
return;
extract_next_id(result["responses"][0]);
attempt_create();
state = 3;
resume_3:
if (parent->waiting > 0)
return;
if (!result["succeeded"].bool_value() &&
result["responses"][0]["response_range"]["kvs"].array_items().size() > 0)
{
fprintf(stderr, "Image %s already exists\n", image_name.c_str());
exit(1);
}
} while (!result["succeeded"].bool_value());
if (parent->progress)
{
printf("Image %s created\n", image_name.c_str());
}
state = 100;
}
void create_snapshot()
{
if (state == 2)
goto resume_2;
else if (state == 3)
goto resume_3;
else if (state == 4)
goto resume_4;
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (ic.second.name == image_name+"@"+new_snap)
{
fprintf(stderr, "Snapshot %s@%s already exists\n", image_name.c_str(), new_snap.c_str());
exit(1);
}
}
do
{
// In addition to next_id, get: size, old_id, old_pool_id, new_parent, cfg_mod_rev, idx_mod_rev
resume_2:
resume_3:
get_image_details();
if (parent->waiting > 0)
return;
if (!old_id)
{
fprintf(stderr, "Image %s does not exist\n", image_name.c_str());
exit(1);
}
attempt_create();
state = 4;
resume_4:
if (parent->waiting > 0)
return;
if (!result["succeeded"].bool_value() &&
result["responses"][0]["response_range"]["kvs"].array_items().size() > 0)
{
fprintf(stderr, "Snapshot %s@%s already exists\n", image_name.c_str(), new_snap.c_str());
exit(1);
}
} while (!result["succeeded"].bool_value());
if (parent->progress)
{
printf("Snapshot %s@%s created\n", image_name.c_str(), new_snap.c_str());
}
state = 100;
}
json11::Json::object get_next_id()
{
return json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/index/maxid/"+std::to_string(new_pool_id)
) },
} },
};
}
void extract_next_id(json11::Json response)
{
new_id = 1;
max_id_mod_rev = 0;
if (response["response_range"]["kvs"].array_items().size() > 0)
{
auto kv = parent->cli->st_cli.parse_etcd_kv(response["response_range"]["kvs"][0]);
new_id = 1+INODE_NO_POOL(kv.value.uint64_value());
max_id_mod_rev = kv.mod_revision;
}
auto ino_it = parent->cli->st_cli.inode_config.lower_bound(INODE_WITH_POOL(new_pool_id, 0));
if (ino_it != parent->cli->st_cli.inode_config.begin())
{
ino_it--;
if (INODE_POOL(ino_it->first) == new_pool_id && new_id < 1+INODE_NO_POOL(ino_it->first))
new_id = 1+INODE_NO_POOL(ino_it->first);
}
}
void get_image_details()
{
if (state == 2)
goto resume_2;
else if (state == 3)
goto resume_3;
etcd_txn(json11::Json::object { { "success", json11::Json::array {
get_next_id(),
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/index/image/"+image_name
) },
} },
},
} } });
state = 2;
resume_2:
if (parent->waiting > 0)
return;
extract_next_id(result["responses"][0]);
old_id = 0;
old_pool_id = 0;
cfg_mod_rev = idx_mod_rev = 0;
if (result["responses"][1]["response_range"]["kvs"].array_items().size() == 0)
{
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (ic.second.name == image_name)
{
old_id = INODE_NO_POOL(ic.first);
old_pool_id = INODE_POOL(ic.first);
size = ic.second.size;
new_parent_id = ic.second.parent_id;
cfg_mod_rev = ic.second.mod_revision;
break;
}
}
}
else
{
// FIXME: Parse kvs in etcd_state_client automatically
{
auto kv = parent->cli->st_cli.parse_etcd_kv(result["responses"][1]["response_range"]["kvs"][0]);
old_id = INODE_NO_POOL(kv.value["id"].uint64_value());
old_pool_id = (pool_id_t)kv.value["pool_id"].uint64_value();
idx_mod_rev = kv.mod_revision;
if (!old_id || !old_pool_id || old_pool_id >= POOL_ID_MAX)
{
fprintf(stderr, "Invalid pool or inode ID in etcd key %s\n", kv.key.c_str());
exit(1);
}
}
etcd_txn(json11::Json::object {
{ "success", json11::Json::array {
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/config/inode/"+
std::to_string(old_pool_id)+"/"+std::to_string(old_id)
) },
} },
},
} },
});
state = 3;
resume_3:
if (parent->waiting > 0)
return;
{
auto kv = parent->cli->st_cli.parse_etcd_kv(result["responses"][0]["response_range"]["kvs"][0]);
size = kv.value["size"].uint64_value();
new_parent_id = kv.value["parent_id"].uint64_value();
uint64_t parent_pool_id = kv.value["parent_pool_id"].uint64_value();
if (new_parent_id)
{
new_parent_id = INODE_WITH_POOL(parent_pool_id ? parent_pool_id : old_pool_id, new_parent_id);
}
cfg_mod_rev = kv.mod_revision;
}
}
}
void attempt_create()
{
inode_config_t new_cfg = {
.num = INODE_WITH_POOL(new_pool_id, new_id),
.name = image_name,
.size = size,
.parent_id = (new_snap != "" ? INODE_WITH_POOL(old_pool_id, old_id) : new_parent_id),
.readonly = false,
};
json11::Json::array checks = json11::Json::array {
json11::Json::object {
{ "target", "VERSION" },
{ "version", 0 },
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/config/inode/"+
std::to_string(new_pool_id)+"/"+std::to_string(new_id)
) },
},
json11::Json::object {
{ "target", "VERSION" },
{ "version", 0 },
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/index/image/"+image_name+
(new_snap != "" ? "@"+new_snap : "")
) },
},
json11::Json::object {
{ "target", "MOD" },
{ "mod_revision", max_id_mod_rev },
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/index/maxid/"+std::to_string(new_pool_id)) },
},
};
json11::Json::array success = json11::Json::array {
json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/config/inode/"+
std::to_string(new_pool_id)+"/"+std::to_string(new_id)
) },
{ "value", base64_encode(
json11::Json(parent->cli->st_cli.serialize_inode_cfg(&new_cfg)).dump()
) },
} },
},
json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/index/image/"+image_name) },
{ "value", base64_encode(json11::Json(json11::Json::object{
{ "id", new_id },
{ "pool_id", (uint64_t)new_pool_id },
}).dump()) },
} },
},
json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/index/maxid/"+
std::to_string(new_pool_id)
) },
{ "value", base64_encode(std::to_string(new_id)) }
} },
},
};
json11::Json::array failure = json11::Json::array {
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/index/image/"+
image_name+(new_snap != "" ? "@"+new_snap : "")
) },
} },
},
};
if (new_snap != "")
{
inode_config_t snap_cfg = {
.num = INODE_WITH_POOL(old_pool_id, old_id),
.name = image_name+"@"+new_snap,
.size = size,
.parent_id = new_parent_id,
.readonly = true,
};
checks.push_back(json11::Json::object {
{ "target", "MOD" },
{ "mod_revision", cfg_mod_rev },
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/config/inode/"+
std::to_string(old_pool_id)+"/"+std::to_string(old_id)
) },
});
checks.push_back(json11::Json::object {
{ "target", "MOD" },
{ "mod_revision", idx_mod_rev },
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/index/image/"+image_name) }
});
success.push_back(json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/config/inode/"+
std::to_string(old_pool_id)+"/"+std::to_string(old_id)
) },
{ "value", base64_encode(
json11::Json(parent->cli->st_cli.serialize_inode_cfg(&snap_cfg)).dump()
) },
} },
});
success.push_back(json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/index/image/"+image_name+"@"+new_snap) },
{ "value", base64_encode(json11::Json(json11::Json::object{
{ "id", old_id },
{ "pool_id", (uint64_t)old_pool_id },
}).dump()) },
} },
});
};
etcd_txn(json11::Json::object {
{ "compare", checks },
{ "success", success },
{ "failure", failure },
});
}
void etcd_txn(json11::Json txn)
{
parent->waiting++;
parent->cli->st_cli.etcd_txn(txn, ETCD_SLOW_TIMEOUT, [this](std::string err, json11::Json res)
{
parent->waiting--;
if (err != "")
{
fprintf(stderr, "Error reading from etcd: %s\n", err.c_str());
exit(1);
}
this->result = res;
parent->ringloop->wakeup();
});
}
};
std::function<bool(void)> cli_tool_t::start_create(json11::Json cfg)
{
json11::Json::array cmd = cfg["command"].array_items();
auto image_creator = new image_creator_t();
image_creator->parent = this;
image_creator->image_name = cmd.size() > 1 ? cmd[1].string_value() : "";
image_creator->new_pool_id = cfg["pool"].uint64_value();
image_creator->new_pool_name = cfg["pool"].string_value();
if (cfg["snapshot"].string_value() != "")
{
image_creator->new_snap = cfg["snapshot"].string_value();
}
else if (cmd[0] == "snap-create")
{
int p = image_creator->image_name.find('@');
if (p == std::string::npos || p == image_creator->image_name.length()-1)
{
fprintf(stderr, "Please specify new snapshot name after @\n");
exit(1);
}
image_creator->new_snap = image_creator->image_name.substr(p + 1);
image_creator->image_name = image_creator->image_name.substr(0, p);
}
image_creator->new_parent = cfg["parent"].string_value();
if (cfg["size"].string_value() != "")
{
std::string size_str = cfg["size"].string_value();
uint64_t mul = 1;
char type_char = tolower(size_str[size_str.length()-1]);
if (type_char == 'k' || type_char == 'm' || type_char == 'g' || type_char == 't')
{
if (type_char == 'k')
mul = 1l<<10;
else if (type_char == 'm')
mul = 1l<<20;
else if (type_char == 'g')
mul = 1l<<30;
else /*if (type_char == 't')*/
mul = 1l<<40;
size_str = size_str.substr(0, size_str.length()-1);
}
uint64_t size = json11::Json(size_str).uint64_value() * mul;
if (size == 0)
{
fprintf(stderr, "Invalid syntax for size: %s\n", cfg["size"].string_value().c_str());
exit(1);
}
if (size % 4096)
{
fprintf(stderr, "Image size should be a multiple of 4096\n");
exit(1);
}
image_creator->size = size;
if (image_creator->new_snap != "")
{
fprintf(stderr, "--size can't be specified for snapshots\n");
exit(1);
}
}
if (image_creator->image_name == "")
{
fprintf(stderr, "Image name is missing\n");
exit(1);
}
if (image_creator->image_name.find('@') != std::string::npos)
{
fprintf(stderr, "Image name can't contain @ character\n");
exit(1);
}
return [image_creator]()
{
image_creator->loop();
if (image_creator->is_done())
{
delete image_creator;
return true;
}
return false;
};
}

View File

@@ -1,124 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include "cli.h"
#include "cluster_client.h"
// Flatten a layer: merge all parents into a layer and break the connection completely
struct snap_flattener_t
{
cli_tool_t *parent;
// target to flatten
std::string target_name;
// writers are stopped, we can safely change writable layers
bool writers_stopped = false;
// use CAS writes (0 = never, 1 = auto, 2 = always)
int use_cas = 1;
// interval between fsyncs
int fsync_interval = 128;
std::string top_parent_name;
inode_t target_id = 0;
int state = 0;
std::function<bool(void)> merger_cb;
void get_merge_parents()
{
// Get all parents of target
inode_config_t *target_cfg = parent->get_inode_cfg(target_name);
target_id = target_cfg->num;
std::vector<inode_t> chain_list;
inode_config_t *cur = target_cfg;
chain_list.push_back(cur->num);
while (cur->parent_id != 0 && cur->parent_id != target_cfg->num)
{
auto it = parent->cli->st_cli.inode_config.find(cur->parent_id);
if (it == parent->cli->st_cli.inode_config.end())
{
fprintf(stderr, "Parent inode of layer %s (id %ld) not found\n", cur->name.c_str(), cur->parent_id);
exit(1);
}
cur = &it->second;
chain_list.push_back(cur->num);
}
if (cur->parent_id != 0)
{
fprintf(stderr, "Layer %s has a loop in parents\n", target_name.c_str());
exit(1);
}
top_parent_name = cur->name;
}
bool is_done()
{
return state == 5;
}
void loop()
{
if (state == 1)
goto resume_1;
else if (state == 2)
goto resume_2;
else if (state == 3)
goto resume_3;
// Get parent layers
get_merge_parents();
// Start merger
merger_cb = parent->start_merge(json11::Json::object {
{ "command", json11::Json::array{ "merge-data", top_parent_name, target_name } },
{ "target", target_name },
{ "delete-source", false },
{ "cas", use_cas },
{ "fsync-interval", fsync_interval },
});
// Wait for it
resume_1:
while (!merger_cb())
{
state = 1;
return;
}
merger_cb = NULL;
// Change parent
parent->change_parent(target_id, 0);
// Wait for it to complete
state = 2;
resume_2:
if (parent->waiting > 0)
return;
state = 3;
resume_3:
// Done
return;
}
};
std::function<bool(void)> cli_tool_t::start_flatten(json11::Json cfg)
{
json11::Json::array cmd = cfg["command"].array_items();
auto flattener = new snap_flattener_t();
flattener->parent = this;
flattener->target_name = cmd.size() > 1 ? cmd[1].string_value() : "";
if (flattener->target_name == "")
{
fprintf(stderr, "Layer to flatten argument is missing\n");
exit(1);
}
flattener->fsync_interval = cfg["fsync-interval"].uint64_value();
if (!flattener->fsync_interval)
flattener->fsync_interval = 128;
if (!cfg["cas"].is_null())
flattener->use_cas = cfg["cas"].uint64_value() ? 2 : 0;
return [flattener]()
{
flattener->loop();
if (flattener->is_done())
{
delete flattener;
return true;
}
return false;
};
}

View File

@@ -1,522 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include <algorithm>
#include "cli.h"
#include "cluster_client.h"
#include "base64.h"
#define MIN(a, b) ((a) < (b) ? (b) : (a))
std::string print_table(json11::Json items, json11::Json header, bool use_esc);
std::string format_size(uint64_t size);
std::string format_lat(uint64_t lat);
std::string format_q(double depth);
// List existing images
//
// Again, you can just look into etcd, but this console tool incapsulates it
struct image_lister_t
{
cli_tool_t *parent;
pool_id_t list_pool_id = 0;
std::string list_pool_name;
std::string sort_field;
std::set<std::string> only_names;
bool reverse = false;
int max_count = 0;
bool show_stats = false, show_delete = false;
int state = 0;
std::map<inode_t, json11::Json::object> stats;
json11::Json space_info;
bool is_done()
{
return state == 100;
}
void get_list()
{
if (list_pool_name != "")
{
for (auto & ic: parent->cli->st_cli.pool_config)
{
if (ic.second.name == list_pool_name)
{
list_pool_id = ic.first;
break;
}
}
if (!list_pool_id)
{
fprintf(stderr, "Pool %s does not exist\n", list_pool_name.c_str());
exit(1);
}
}
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (list_pool_id && INODE_POOL(ic.second.num) != list_pool_id)
{
continue;
}
auto & pool_cfg = parent->cli->st_cli.pool_config.at(INODE_POOL(ic.second.num));
auto item = json11::Json::object {
{ "name", ic.second.name },
{ "size", ic.second.size },
{ "used_size", 0 },
{ "readonly", ic.second.readonly },
{ "pool_id", (uint64_t)INODE_POOL(ic.second.num) },
{ "pool_name", pool_cfg.name },
{ "inode_num", INODE_NO_POOL(ic.second.num) },
{ "inode_id", ic.second.num },
};
if (ic.second.parent_id)
{
auto p_it = parent->cli->st_cli.inode_config.find(ic.second.parent_id);
item["parent_name"] = p_it != parent->cli->st_cli.inode_config.end()
? p_it->second.name : "";
item["parent_pool_id"] = (uint64_t)INODE_POOL(ic.second.parent_id);
item["parent_inode_num"] = INODE_NO_POOL(ic.second.parent_id);
}
stats[ic.second.num] = item;
}
}
void get_stats()
{
if (state == 1)
goto resume_1;
// Space statistics
// inode/stats/<pool>/<inode>::raw_used divided by pool/stats/<pool>::pg_real_size
// multiplied by 1 or number of data drives
parent->waiting++;
parent->cli->st_cli.etcd_txn(json11::Json::object {
{ "success", json11::Json::array {
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/pool/stats"+
(list_pool_id ? "/"+std::to_string(list_pool_id) : "")+"/"
) },
{ "range_end", base64_encode(
parent->cli->st_cli.etcd_prefix+"/pool/stats"+
(list_pool_id ? "/"+std::to_string(list_pool_id) : "")+"0"
) },
} },
},
json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+"/inode/stats"+
(list_pool_id ? "/"+std::to_string(list_pool_id) : "")+"/"
) },
{ "range_end", base64_encode(
parent->cli->st_cli.etcd_prefix+"/inode/stats"+
(list_pool_id ? "/"+std::to_string(list_pool_id) : "")+"0"
) },
} },
},
} },
}, ETCD_SLOW_TIMEOUT, [this](std::string err, json11::Json res)
{
parent->waiting--;
if (err != "")
{
fprintf(stderr, "Error reading from etcd: %s\n", err.c_str());
exit(1);
}
space_info = res;
parent->ringloop->wakeup();
});
state = 1;
resume_1:
if (parent->waiting > 0)
return;
std::map<pool_id_t, uint64_t> pool_pg_real_size;
for (auto & kv_item: space_info["responses"][0]["response_range"]["kvs"].array_items())
{
auto kv = parent->cli->st_cli.parse_etcd_kv(kv_item);
// pool ID
pool_id_t pool_id;
char null_byte = 0;
sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(), "/pool/stats/%u%c", &pool_id, &null_byte);
if (!pool_id || pool_id >= POOL_ID_MAX || null_byte != 0)
{
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
continue;
}
// pg_real_size
pool_pg_real_size[pool_id] = kv.value["pg_real_size"].uint64_value();
}
for (auto & kv_item: space_info["responses"][1]["response_range"]["kvs"].array_items())
{
auto kv = parent->cli->st_cli.parse_etcd_kv(kv_item);
// pool ID & inode number
pool_id_t pool_id;
inode_t only_inode_num;
char null_byte = 0;
sscanf(kv.key.substr(parent->cli->st_cli.etcd_prefix.length()).c_str(),
"/inode/stats/%u/%lu%c", &pool_id, &only_inode_num, &null_byte);
if (!pool_id || pool_id >= POOL_ID_MAX || INODE_POOL(only_inode_num) != 0 || null_byte != 0)
{
fprintf(stderr, "Invalid key in etcd: %s\n", kv.key.c_str());
continue;
}
inode_t inode_num = INODE_WITH_POOL(pool_id, only_inode_num);
uint64_t used_size = kv.value["raw_used"].uint64_value();
// save stats
auto pool_it = parent->cli->st_cli.pool_config.find(pool_id);
if (pool_it != parent->cli->st_cli.pool_config.end())
{
auto & pool_cfg = pool_it->second;
used_size = used_size / pool_pg_real_size[pool_id]
* (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
}
auto stat_it = stats.find(inode_num);
if (stat_it == stats.end())
{
stats[inode_num] = json11::Json::object {
{ "name", "Pool:"+std::to_string(pool_id)+",ID:"+std::to_string(only_inode_num) },
{ "size", 0 },
{ "readonly", false },
{ "pool_id", (uint64_t)INODE_POOL(inode_num) },
{ "pool_name", pool_it == parent->cli->st_cli.pool_config.end()
? (pool_it->second.name == "" ? "<Unnamed>" : pool_it->second.name) : "?" },
{ "inode_num", INODE_NO_POOL(inode_num) },
{ "inode_id", inode_num },
};
stat_it = stats.find(inode_num);
}
stat_it->second["used_size"] = used_size;
stat_it->second["read_iops"] = kv.value["read"]["iops"];
stat_it->second["read_bps"] = kv.value["read"]["bps"];
stat_it->second["read_lat"] = kv.value["read"]["lat"];
stat_it->second["read_queue"] = kv.value["read"]["iops"].number_value() * kv.value["read"]["lat"].number_value() / 1000000;
stat_it->second["write_iops"] = kv.value["write"]["iops"];
stat_it->second["write_bps"] = kv.value["write"]["bps"];
stat_it->second["write_lat"] = kv.value["write"]["lat"];
stat_it->second["write_queue"] = kv.value["write"]["iops"].number_value() * kv.value["write"]["lat"].number_value() / 1000000;
stat_it->second["delete_iops"] = kv.value["delete"]["iops"];
stat_it->second["delete_bps"] = kv.value["delete"]["bps"];
stat_it->second["delete_lat"] = kv.value["delete"]["lat"];
stat_it->second["delete_queue"] = kv.value["delete"]["iops"].number_value() * kv.value["delete"]["lat"].number_value() / 1000000;
}
}
json11::Json::array to_list()
{
json11::Json::array list;
for (auto & kv: stats)
{
if (!only_names.size() || only_names.find(kv.second["name"].string_value()) != only_names.end())
{
list.push_back(kv.second);
}
}
if (sort_field == "name" || sort_field == "pool_name")
{
std::sort(list.begin(), list.end(), [this](json11::Json a, json11::Json b)
{
auto av = a[sort_field].as_string();
auto bv = b[sort_field].as_string();
return reverse ? av > bv : av < bv;
});
}
else
{
std::sort(list.begin(), list.end(), [this](json11::Json a, json11::Json b)
{
auto av = a[sort_field].number_value();
auto bv = b[sort_field].number_value();
return reverse ? av > bv : av < bv;
});
}
if (max_count > 0 && list.size() > max_count)
{
list.resize(max_count);
}
return list;
}
void loop()
{
if (state == 1)
goto resume_1;
get_list();
if (show_stats)
{
resume_1:
get_stats();
if (parent->waiting > 0)
return;
}
if (parent->json_output)
{
// JSON output
printf("%s\n", json11::Json(to_list()).dump().c_str());
state = 100;
return;
}
// Table output: name, size_fmt, [used_size_fmt], ro, parent_name
json11::Json::array cols;
cols.push_back(json11::Json::object{
{ "key", "name" },
{ "title", "NAME" },
});
if (!list_pool_id)
{
cols.push_back(json11::Json::object{
{ "key", "pool_name" },
{ "title", "POOL" },
});
}
cols.push_back(json11::Json::object{
{ "key", "size_fmt" },
{ "title", "SIZE" },
});
if (show_stats)
{
cols.push_back(json11::Json::object{
{ "key", "used_size_fmt" },
{ "title", "USED" },
});
cols.push_back(json11::Json::object{
{ "key", "read_bw" },
{ "title", "READ" },
});
cols.push_back(json11::Json::object{
{ "key", "read_iops" },
{ "title", "IOPS" },
});
cols.push_back(json11::Json::object{
{ "key", "read_q" },
{ "title", "QUEUE" },
});
cols.push_back(json11::Json::object{
{ "key", "read_lat_f" },
{ "title", "LAT" },
});
cols.push_back(json11::Json::object{
{ "key", "write_bw" },
{ "title", "WRITE" },
});
cols.push_back(json11::Json::object{
{ "key", "write_iops" },
{ "title", "IOPS" },
});
cols.push_back(json11::Json::object{
{ "key", "write_q" },
{ "title", "QUEUE" },
});
cols.push_back(json11::Json::object{
{ "key", "write_lat_f" },
{ "title", "LAT" },
});
if (show_delete)
{
cols.push_back(json11::Json::object{
{ "key", "delete_bw" },
{ "title", "DEL" },
});
cols.push_back(json11::Json::object{
{ "key", "delete_iops" },
{ "title", "IOPS" },
});
cols.push_back(json11::Json::object{
{ "key", "delete_q" },
{ "title", "QUEUE" },
});
cols.push_back(json11::Json::object{
{ "key", "delete_lat_f" },
{ "title", "LAT" },
});
}
}
cols.push_back(json11::Json::object{
{ "key", "ro" },
{ "title", "FLAGS" },
{ "right", true },
});
cols.push_back(json11::Json::object{
{ "key", "parent_name" },
{ "title", "PARENT" },
});
json11::Json::array list;
for (auto & kv: stats)
{
if (show_stats)
{
kv.second["used_size_fmt"] = format_size(kv.second["used_size"].uint64_value());
kv.second["read_bw"] = format_size(kv.second["read_bps"].uint64_value())+"/s";
kv.second["write_bw"] = format_size(kv.second["write_bps"].uint64_value())+"/s";
kv.second["delete_bw"] = format_size(kv.second["delete_bps"].uint64_value())+"/s";
kv.second["read_lat_f"] = format_lat(kv.second["read_lat"].uint64_value());
kv.second["write_lat_f"] = format_lat(kv.second["write_lat"].uint64_value());
kv.second["delete_lat_f"] = format_lat(kv.second["delete_lat"].uint64_value());
kv.second["read_q"] = format_q(kv.second["read_queue"].number_value());
kv.second["write_q"] = format_q(kv.second["write_queue"].number_value());
kv.second["delete_q"] = format_q(kv.second["delete_queue"].number_value());
}
kv.second["size_fmt"] = format_size(kv.second["size"].uint64_value());
kv.second["ro"] = kv.second["readonly"].bool_value() ? "RO" : "-";
}
printf("%s", print_table(to_list(), cols, parent->color).c_str());
state = 100;
}
};
std::string print_table(json11::Json items, json11::Json header, bool use_esc)
{
std::vector<int> sizes;
for (int i = 0; i < header.array_items().size(); i++)
{
sizes.push_back(header[i]["title"].string_value().length());
}
for (auto & item: items.array_items())
{
for (int i = 0; i < header.array_items().size(); i++)
{
int l = item[header[i]["key"].string_value()].as_string().length();
sizes[i] = sizes[i] < l ? l : sizes[i];
}
}
std::string str = use_esc ? "\033[1m" : "";
for (int i = 0; i < header.array_items().size(); i++)
{
if (i > 0)
{
// Separator
str += " ";
}
int pad = sizes[i]-header[i]["title"].string_value().length();
if (header[i]["right"].bool_value())
{
// Align right
for (int j = 0; j < pad; j++)
str += ' ';
str += header[i]["title"].string_value();
}
else
{
// Align left
str += header[i]["title"].string_value();
for (int j = 0; j < pad; j++)
str += ' ';
}
}
if (use_esc)
str += "\033[0m";
str += "\n";
for (auto & item: items.array_items())
{
for (int i = 0; i < header.array_items().size(); i++)
{
if (i > 0)
{
// Separator
str += " ";
}
int pad = sizes[i] - item[header[i]["key"].string_value()].as_string().length();
if (header[i]["right"].bool_value())
{
// Align right
for (int j = 0; j < pad; j++)
str += ' ';
str += item[header[i]["key"].string_value()].as_string();
}
else
{
// Align left
str += item[header[i]["key"].string_value()].as_string();
for (int j = 0; j < pad; j++)
str += ' ';
}
}
str += "\n";
}
return str;
}
static uint64_t size_thresh[] = { 1024l*1024*1024*1024, 1024l*1024*1024, 1024l*1024, 1024, 0 };
static const char *size_unit = "TGMKB";
std::string format_size(uint64_t size)
{
char buf[256];
for (int i = 0; i < sizeof(size_thresh)/sizeof(size_thresh[0]); i++)
{
if (size >= size_thresh[i] || i >= sizeof(size_thresh)/sizeof(size_thresh[0])-1)
{
double value = size_thresh[i] ? (double)size/size_thresh[i] : size;
int l = snprintf(buf, sizeof(buf), "%.1f", value);
assert(l < sizeof(buf)-2);
if (buf[l-1] == '0')
l -= 2;
buf[l] = ' ';
buf[l+1] = size_unit[i];
buf[l+2] = 0;
break;
}
}
return std::string(buf);
}
std::string format_lat(uint64_t lat)
{
char buf[256];
int l = 0;
if (lat < 100)
l = snprintf(buf, sizeof(buf), "%lu us", lat);
else if (lat < 500000)
l = snprintf(buf, sizeof(buf), "%.2f ms", (double)lat/1000);
else
l = snprintf(buf, sizeof(buf), "%.2f s", (double)lat/1000000);
assert(l < sizeof(buf));
return std::string(buf);
}
std::string format_q(double depth)
{
char buf[256];
int l = snprintf(buf, sizeof(buf), "%.2f", depth);
assert(l < sizeof(buf));
if (buf[l-1] == '0')
l--;
if (buf[l-1] == '0')
l -= 2;
buf[l] = 0;
return std::string(buf);
}
std::function<bool(void)> cli_tool_t::start_ls(json11::Json cfg)
{
json11::Json::array cmd = cfg["command"].array_items();
auto lister = new image_lister_t();
lister->parent = this;
lister->list_pool_id = cfg["pool"].uint64_value();
lister->list_pool_name = lister->list_pool_id ? "" : cfg["pool"].as_string();
lister->show_stats = cfg["long"].bool_value();
lister->show_delete = cfg["del"].bool_value();
lister->sort_field = cfg["sort"].string_value();
lister->reverse = cfg["reverse"].bool_value();
lister->max_count = cfg["count"].uint64_value();
for (int i = 0; i < cmd.size(); i++)
{
lister->only_names.insert(cmd[i].string_value());
}
return [lister]()
{
lister->loop();
if (lister->is_done())
{
delete lister;
return true;
}
return false;
};
}

View File

@@ -1,583 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include "cli.h"
#include "cluster_client.h"
#include "cpp-btree/safe_btree_set.h"
struct snap_rw_op_t
{
uint64_t offset = 0;
void *buf = NULL;
cluster_op_t op;
int todo = 0;
uint32_t start = 0, end = 0;
};
// Layer merge is the base for multiple operations:
// 1) Delete snapshot "up" = merge child layer into the parent layer, remove the child
// and rename the parent to the child
// 2) Delete snapshot "down" = merge parent layer into the child layer and remove the parent
// 3) Flatten image = merge parent layers into the child layer and break the connection
struct snap_merger_t
{
cli_tool_t *parent;
// -- CONFIGURATION --
// merge from..to into target (target may be one of from..to)
std::string from_name, to_name, target_name;
// inode=>rank (bigger rank means child layers)
std::map<inode_t,int> sources;
// delete merged source inode data during merge
bool delete_source = false;
// use CAS writes (0 = never, 1 = auto, 2 = always)
int use_cas = 1;
// don't necessarily delete source data, but perform checks as if we were to do it
bool check_delete_source = false;
// interval between fsyncs
int fsync_interval = 128;
// -- STATE --
inode_t target;
int target_rank;
bool inside_continue = false;
int state = 0;
int lists_todo = 0;
uint64_t target_block_size = 0;
btree::safe_btree_set<uint64_t> merge_offsets;
btree::safe_btree_set<uint64_t>::iterator oit;
std::map<inode_t, std::vector<uint64_t>> layer_lists;
std::map<inode_t, uint64_t> layer_block_size;
std::map<inode_t, uint64_t> layer_list_pos;
int in_flight = 0;
uint64_t last_fsync_offset = 0;
uint64_t last_written_offset = 0;
int deleted_unsynced = 0;
uint64_t processed = 0, to_process = 0;
void start_merge()
{
check_delete_source = delete_source || check_delete_source;
inode_config_t *from_cfg = parent->get_inode_cfg(from_name);
inode_config_t *to_cfg = parent->get_inode_cfg(to_name);
inode_config_t *target_cfg = target_name == "" ? from_cfg : parent->get_inode_cfg(target_name);
if (to_cfg->num == from_cfg->num)
{
fprintf(stderr, "Only one layer specified, nothing to merge\n");
exit(1);
}
// Check that to_cfg is actually a child of from_cfg and target_cfg is somewhere between them
std::vector<inode_t> chain_list;
inode_config_t *cur = to_cfg;
chain_list.push_back(cur->num);
layer_block_size[cur->num] = get_block_size(cur->num);
while (cur->parent_id != from_cfg->num &&
cur->parent_id != to_cfg->num &&
cur->parent_id != 0)
{
auto it = parent->cli->st_cli.inode_config.find(cur->parent_id);
if (it == parent->cli->st_cli.inode_config.end())
{
fprintf(stderr, "Parent inode of layer %s (id %ld) not found\n", cur->name.c_str(), cur->parent_id);
exit(1);
}
cur = &it->second;
chain_list.push_back(cur->num);
layer_block_size[cur->num] = get_block_size(cur->num);
}
if (cur->parent_id != from_cfg->num)
{
fprintf(stderr, "Layer %s is not a child of %s\n", to_name.c_str(), from_name.c_str());
exit(1);
}
chain_list.push_back(from_cfg->num);
layer_block_size[from_cfg->num] = get_block_size(from_cfg->num);
int i = chain_list.size()-1;
for (inode_t item: chain_list)
{
sources[item] = i--;
}
if (sources.find(target_cfg->num) == sources.end())
{
fprintf(stderr, "Layer %s is not between %s and %s\n", target_name.c_str(), to_name.c_str(), from_name.c_str());
exit(1);
}
target = target_cfg->num;
target_rank = sources.at(target);
int to_rank = sources.at(to_cfg->num);
bool to_has_children = false;
// Check that there are no other inodes dependent on altered layers
//
// 1) everything between <target> and <to> except <to> is not allowed
// to have children other than <to> if <to> is a child of <target>:
//
// <target> - <layer 3> - <to>
// \- <layer 4> <--------X--------- NOT ALLOWED
//
// 2) everything between <from> and <target>, except <target>, is not allowed
// to have children other than <target> if sources are to be deleted after merging:
//
// <from> - <layer 1> - <target> - <to>
// \- <layer 2> <---------X-------- NOT ALLOWED
for (auto & ic: parent->cli->st_cli.inode_config)
{
auto it = sources.find(ic.second.num);
if (it == sources.end() && ic.second.parent_id != 0)
{
it = sources.find(ic.second.parent_id);
if (it != sources.end())
{
int parent_rank = it->second;
if (parent_rank < to_rank && (parent_rank >= target_rank || check_delete_source))
{
fprintf(
stderr, "Layers at or above %s, but below %s are not allowed"
" to have other children, but %s is a child of %s\n",
(check_delete_source ? from_name.c_str() : target_name.c_str()),
to_name.c_str(), ic.second.name.c_str(),
parent->cli->st_cli.inode_config.at(ic.second.parent_id).name.c_str()
);
exit(1);
}
if (parent_rank >= to_rank)
{
to_has_children = true;
}
}
}
}
if ((target_rank < to_rank || to_has_children) && use_cas == 1)
{
// <to> has children itself, no need for CAS
use_cas = 0;
}
sources.erase(target);
printf(
"Merging %ld layer(s) into target %s%s (inode %lu in pool %u)\n",
sources.size(), target_cfg->name.c_str(),
use_cas ? " online (with CAS)" : "", INODE_NO_POOL(target), INODE_POOL(target)
);
target_block_size = get_block_size(target);
}
uint64_t get_block_size(inode_t inode)
{
auto & pool_cfg = parent->cli->st_cli.pool_config.at(INODE_POOL(inode));
uint64_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
return parent->cli->get_bs_block_size() * pg_data_size;
}
void continue_merge_reent()
{
if (!inside_continue)
{
inside_continue = true;
continue_merge();
inside_continue = false;
}
}
bool is_done()
{
return state == 6;
}
void continue_merge()
{
if (state == 1)
goto resume_1;
else if (state == 2)
goto resume_2;
else if (state == 3)
goto resume_3;
else if (state == 4)
goto resume_4;
else if (state == 5)
goto resume_5;
else if (state == 6)
goto resume_6;
// Get parents and so on
start_merge();
// First list lower layers
list_layers(true);
state = 1;
resume_1:
while (lists_todo > 0)
{
// Wait for lists
return;
}
if (merge_offsets.size() > 0)
{
state = 2;
oit = merge_offsets.begin();
processed = 0;
to_process = merge_offsets.size();
resume_2:
// Then remove blocks already filled in target by issuing zero-length reads and checking bitmaps
while (in_flight < parent->iodepth*parent->parallel_osds && oit != merge_offsets.end())
{
in_flight++;
check_if_full(*oit);
oit++;
processed++;
if (parent->progress && !(processed % 128))
{
printf("\rFiltering target blocks: %lu/%lu", processed, to_process);
}
}
if (in_flight > 0 || oit != merge_offsets.end())
{
// Wait until reads finish
return;
}
if (parent->progress)
{
printf("\r%lu full blocks of target filtered out\n", to_process-merge_offsets.size());
}
}
state = 3;
resume_3:
// Then list upper layers
list_layers(false);
state = 4;
resume_4:
while (lists_todo > 0)
{
// Wait for lists
return;
}
state = 5;
processed = 0;
to_process = merge_offsets.size();
oit = merge_offsets.begin();
resume_5:
// Now read, overwrite and optionally delete offsets one by one
while (in_flight < parent->iodepth*parent->parallel_osds && oit != merge_offsets.end())
{
in_flight++;
read_and_write(*oit);
oit++;
processed++;
if (parent->progress && !(processed % 128))
{
printf("\rOverwriting blocks: %lu/%lu", processed, to_process);
}
}
if (in_flight > 0 || oit != merge_offsets.end())
{
// Wait until overwrites finish
return;
}
if (parent->progress)
{
printf("\rOverwriting blocks: %lu/%lu\n", to_process, to_process);
}
// Done
printf("Done, layers from %s to %s merged into %s\n", from_name.c_str(), to_name.c_str(), target_name.c_str());
state = 6;
resume_6:
return;
}
void list_layers(bool lower)
{
for (auto & sp: sources)
{
inode_t src = sp.first;
if (lower ? (sp.second < target_rank) : (sp.second > target_rank))
{
lists_todo++;
inode_list_t* lst = parent->cli->list_inode_start(src, [this, src](
inode_list_t *lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)
{
uint64_t layer_block = layer_block_size.at(src);
for (object_id obj: objects)
{
merge_offsets.insert(obj.stripe - obj.stripe % target_block_size);
for (int i = target_block_size; i < layer_block; i += target_block_size)
{
merge_offsets.insert(obj.stripe - obj.stripe % target_block_size + i);
}
}
if (delete_source)
{
// Also store individual lists
auto & layer_list = layer_lists[src];
int pos = layer_list.size();
layer_list.resize(pos + objects.size());
for (object_id obj: objects)
{
layer_list[pos++] = obj.stripe;
}
}
if (status & INODE_LIST_DONE)
{
auto & name = parent->cli->st_cli.inode_config.at(src).name;
printf("Got listing of layer %s (inode %lu in pool %u)\n", name.c_str(), INODE_NO_POOL(src), INODE_POOL(src));
if (delete_source)
{
// Sort the inode listing
std::sort(layer_lists[src].begin(), layer_lists[src].end());
}
lists_todo--;
continue_merge_reent();
}
else
{
parent->cli->list_inode_next(lst, 1);
}
});
parent->cli->list_inode_next(lst, parent->parallel_osds);
}
}
}
// Check if <offset> is fully written in <target> and remove it from merge_offsets if so
void check_if_full(uint64_t offset)
{
cluster_op_t *op = new cluster_op_t;
op->opcode = OSD_OP_READ_BITMAP;
op->inode = target;
op->offset = offset;
op->len = 0;
op->callback = [this](cluster_op_t *op)
{
if (op->retval < 0)
{
fprintf(stderr, "error reading target bitmap at offset %lx: %s\n", op->offset, strerror(-op->retval));
}
else
{
uint64_t bitmap_bytes = target_block_size/parent->cli->get_bs_bitmap_granularity()/8;
int i;
for (i = 0; i < bitmap_bytes; i++)
{
if (((uint8_t*)op->bitmap_buf)[i] != 0xff)
{
break;
}
}
if (i == bitmap_bytes)
{
// full
merge_offsets.erase(op->offset);
}
}
delete op;
in_flight--;
continue_merge_reent();
};
parent->cli->execute(op);
}
// Read <offset> from <to>, write it to <target> and optionally delete it
// from all layers except <target> after fsync'ing
void read_and_write(uint64_t offset)
{
snap_rw_op_t *rwo = new snap_rw_op_t;
// Initialize counter to 1 to later allow write_subop() to return immediately
// (even though it shouldn't really do that)
rwo->todo = 1;
rwo->buf = malloc(target_block_size);
rwo->offset = offset;
rwo_read(rwo);
}
void rwo_read(snap_rw_op_t *rwo)
{
cluster_op_t *op = &rwo->op;
op->opcode = OSD_OP_READ;
op->inode = target;
op->offset = rwo->offset;
op->len = target_block_size;
op->iov.push_back(rwo->buf, target_block_size);
op->callback = [this, rwo](cluster_op_t *op)
{
if (op->retval != op->len)
{
fprintf(stderr, "error reading target at offset %lx: %s\n", op->offset, strerror(-op->retval));
exit(1);
}
next_write(rwo);
};
parent->cli->execute(op);
}
void next_write(snap_rw_op_t *rwo)
{
// Write each non-empty range using an individual operation
// FIXME: Allow to use single write with "holes" (OSDs don't allow it yet)
uint32_t gran = parent->cli->get_bs_bitmap_granularity();
uint64_t bitmap_size = target_block_size / gran;
while (rwo->end < bitmap_size)
{
auto bit = ((*(uint8_t*)(rwo->op.bitmap_buf + (rwo->end >> 3))) & (1 << (rwo->end & 0x7)));
if (!bit)
{
if (rwo->end > rwo->start)
{
// write start->end
rwo->todo++;
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
rwo->start = rwo->end;
if (use_cas)
{
// Submit one by one if using CAS writes
return;
}
}
rwo->start = rwo->end = rwo->end+1;
}
else
{
rwo->end++;
}
}
if (rwo->end > rwo->start)
{
// write start->end
rwo->todo++;
write_subop(rwo, rwo->start*gran, rwo->end*gran, use_cas ? 1+rwo->op.version : 0);
rwo->start = rwo->end;
if (use_cas)
{
return;
}
}
rwo->todo--;
// Just in case, if everything is done
autofree_op(rwo);
}
void write_subop(snap_rw_op_t *rwo, uint32_t start, uint32_t end, uint64_t version)
{
cluster_op_t *subop = new cluster_op_t;
subop->opcode = OSD_OP_WRITE;
subop->inode = target;
subop->offset = rwo->offset+start;
subop->len = end-start;
subop->version = version;
subop->flags = OSD_OP_IGNORE_READONLY;
subop->iov.push_back(rwo->buf+start, end-start);
subop->callback = [this, rwo](cluster_op_t *subop)
{
rwo->todo--;
if (subop->retval != subop->len)
{
if (use_cas && subop->retval == -EINTR)
{
// CAS failure - reread and repeat optimistically
rwo->start = subop->offset - rwo->offset;
rwo_read(rwo);
delete subop;
return;
}
fprintf(stderr, "error writing target at offset %lx: %s\n", subop->offset, strerror(-subop->retval));
exit(1);
}
// Increment CAS version
rwo->op.version++;
if (use_cas)
next_write(rwo);
else
autofree_op(rwo);
delete subop;
};
parent->cli->execute(subop);
}
void delete_offset(inode_t inode_num, uint64_t offset)
{
cluster_op_t *subop = new cluster_op_t;
subop->opcode = OSD_OP_DELETE;
subop->inode = inode_num;
subop->offset = offset;
subop->len = 0;
subop->flags = OSD_OP_IGNORE_READONLY;
subop->callback = [this](cluster_op_t *subop)
{
if (subop->retval != 0)
{
fprintf(stderr, "error deleting from layer 0x%lx at offset %lx: %s", subop->inode, subop->offset, strerror(-subop->retval));
}
delete subop;
};
parent->cli->execute(subop);
}
void autofree_op(snap_rw_op_t *rwo)
{
if (!rwo->todo)
{
if (last_written_offset < rwo->op.offset+target_block_size)
{
last_written_offset = rwo->op.offset+target_block_size;
}
if (delete_source)
{
deleted_unsynced++;
if (deleted_unsynced >= fsync_interval)
{
uint64_t from = last_fsync_offset, to = last_written_offset;
cluster_op_t *subop = new cluster_op_t;
subop->opcode = OSD_OP_SYNC;
subop->callback = [this, from, to](cluster_op_t *subop)
{
delete subop;
// We can now delete source data between <from> and <to>
// But to do this we have to keep all object lists in memory :-(
for (auto & lp: layer_list_pos)
{
auto & layer_list = layer_lists.at(lp.first);
uint64_t layer_block = layer_block_size.at(lp.first);
int cur_pos = lp.second;
while (cur_pos < layer_list.size() && layer_list[cur_pos]+layer_block < to)
{
delete_offset(lp.first, layer_list[cur_pos]);
cur_pos++;
}
lp.second = cur_pos;
}
};
parent->cli->execute(subop);
}
}
free(rwo->buf);
delete rwo;
in_flight--;
continue_merge_reent();
}
}
};
std::function<bool(void)> cli_tool_t::start_merge(json11::Json cfg)
{
json11::Json::array cmd = cfg["command"].array_items();
auto merger = new snap_merger_t();
merger->parent = this;
merger->from_name = cmd.size() > 1 ? cmd[1].string_value() : "";
merger->to_name = cmd.size() > 2 ? cmd[2].string_value() : "";
merger->target_name = cfg["target"].string_value();
if (merger->from_name == "" || merger->to_name == "")
{
fprintf(stderr, "Beginning or end of the merge sequence is missing\n");
exit(1);
}
merger->delete_source = cfg["delete-source"].string_value() != "";
merger->fsync_interval = cfg["fsync-interval"].uint64_value();
if (!merger->fsync_interval)
merger->fsync_interval = 128;
if (!cfg["cas"].is_null())
merger->use_cas = cfg["cas"].uint64_value() ? 2 : 0;
return [merger]()
{
merger->continue_merge_reent();
if (merger->is_done())
{
delete merger;
return true;
}
return false;
};
}

View File

@@ -1,182 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include "cli.h"
#include "cluster_client.h"
#include "base64.h"
// Resize image (purging extra data on shrink) or change its readonly status
struct image_changer_t
{
cli_tool_t *parent;
std::string image_name;
uint64_t new_size = 0;
bool set_readonly = false, set_readwrite = false, force = false;
// interval between fsyncs
int fsync_interval = 128;
uint64_t inode_num = 0;
inode_config_t cfg;
std::string cur_cfg_key;
bool has_children = false;
int state = 0;
std::function<bool(void)> cb;
bool is_done()
{
return state == 100;
}
void loop()
{
if (state == 1)
goto resume_1;
else if (state == 2)
goto resume_2;
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (ic.second.name == image_name)
{
inode_num = ic.first;
cfg = ic.second;
break;
}
}
if (!inode_num)
{
fprintf(stderr, "Image %s does not exist\n", image_name.c_str());
exit(1);
}
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (ic.second.parent_id == inode_num)
{
has_children = true;
break;
}
}
if (new_size != 0)
{
if (cfg.size >= new_size)
{
// Check confirmation if trimming an image with children
if (has_children && !force)
{
fprintf(stderr, "Image %s has children. Refusing to shrink it without --force", image_name.c_str());
exit(1);
}
// Shrink the image first
cb = parent->start_rm(json11::Json::object {
{ "inode", INODE_NO_POOL(inode_num) },
{ "pool", (uint64_t)INODE_POOL(inode_num) },
{ "fsync-interval", fsync_interval },
{ "min-offset", new_size },
});
resume_1:
while (!cb())
{
state = 1;
return;
}
cb = NULL;
}
cfg.size = new_size;
}
if (set_readonly)
{
cfg.readonly = true;
}
if (set_readwrite)
{
cfg.readonly = false;
// Check confirmation if trimming an image with children
if (!force)
{
fprintf(stderr, "Image %s has children. Refusing to make it read-write without --force", image_name.c_str());
exit(1);
}
}
cur_cfg_key = base64_encode(parent->cli->st_cli.etcd_prefix+
"/config/inode/"+std::to_string(INODE_POOL(inode_num))+
"/"+std::to_string(INODE_NO_POOL(inode_num)));
parent->waiting++;
parent->cli->st_cli.etcd_txn(json11::Json::object {
{ "compare", json11::Json::array {
json11::Json::object {
{ "target", "MOD" },
{ "key", cur_cfg_key },
{ "result", "LESS" },
{ "mod_revision", cfg.mod_revision+1 },
},
} },
{ "success", json11::Json::array {
json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", cur_cfg_key },
{ "value", base64_encode(json11::Json(
parent->cli->st_cli.serialize_inode_cfg(&cfg)
).dump()) },
} }
},
} },
}, ETCD_SLOW_TIMEOUT, [this](std::string err, json11::Json res)
{
if (err != "")
{
fprintf(stderr, "Error changing %s: %s\n", image_name.c_str(), err.c_str());
exit(1);
}
if (!res["succeeded"].bool_value())
{
fprintf(stderr, "Image %s was modified by someone else, please repeat your request\n", image_name.c_str());
exit(1);
}
parent->waiting--;
parent->ringloop->wakeup();
});
state = 2;
resume_2:
if (parent->waiting > 0)
return;
printf("Image %s changed\n", image_name.c_str());
state = 100;
}
};
std::function<bool(void)> cli_tool_t::start_modify(json11::Json cfg)
{
json11::Json::array cmd = cfg["command"].array_items();
auto changer = new image_changer_t();
changer->parent = this;
changer->image_name = cmd.size() > 1 ? cmd[1].string_value() : "";
if (changer->image_name == "")
{
fprintf(stderr, "Image name is missing\n");
exit(1);
}
changer->new_size = cfg["size"].uint64_value();
if (changer->new_size != 0 && (changer->new_size % 4096))
{
fprintf(stderr, "Image size should be a multiple of 4096\n");
exit(1);
}
changer->force = cfg["force"].bool_value();
changer->set_readonly = cfg["readonly"].bool_value();
changer->set_readwrite = cfg["readwrite"].bool_value();
changer->fsync_interval = cfg["fsync-interval"].uint64_value();
if (!changer->fsync_interval)
changer->fsync_interval = 128;
// FIXME Check that the image doesn't have children when shrinking
return [changer]()
{
changer->loop();
if (changer->is_done())
{
delete changer;
return true;
}
return false;
};
}

View File

@@ -1,213 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include "cli.h"
#include "cluster_client.h"
#define RM_LISTING 1
#define RM_REMOVING 2
#define RM_END 3
struct rm_pg_t
{
pg_num_t pg_num;
osd_num_t rm_osd_num;
std::set<object_id> objects;
std::set<object_id>::iterator obj_pos;
uint64_t obj_count = 0, obj_done = 0;
int state = 0;
int in_flight = 0;
};
struct rm_inode_t
{
uint64_t inode = 0;
pool_id_t pool_id = 0;
uint64_t min_offset = 0;
cli_tool_t *parent = NULL;
inode_list_t *lister = NULL;
std::vector<rm_pg_t*> lists;
uint64_t total_count = 0, total_done = 0, total_prev_pct = 0;
uint64_t pgs_to_list = 0;
bool lists_done = false;
int state = 0;
void start_delete()
{
lister = parent->cli->list_inode_start(inode, [this](inode_list_t *lst,
std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)
{
rm_pg_t *rm = new rm_pg_t((rm_pg_t){
.pg_num = pg_num,
.rm_osd_num = primary_osd,
.objects = objects,
.obj_count = objects.size(),
.obj_done = 0,
});
if (min_offset == 0)
{
total_count += objects.size();
}
else
{
for (object_id oid: objects)
{
if (oid.stripe >= min_offset)
{
total_count++;
}
}
}
rm->obj_pos = rm->objects.begin();
lists.push_back(rm);
if (parent->list_first)
{
parent->cli->list_inode_next(lister, 1);
}
if (status & INODE_LIST_DONE)
{
lists_done = true;
}
pgs_to_list--;
continue_delete();
});
if (!lister)
{
fprintf(stderr, "Failed to list inode %lu from pool %u objects\n", INODE_NO_POOL(inode), INODE_POOL(inode));
exit(1);
}
pgs_to_list = parent->cli->list_pg_count(lister);
parent->cli->list_inode_next(lister, parent->parallel_osds);
}
void send_ops(rm_pg_t *cur_list)
{
if (parent->cli->msgr.osd_peer_fds.find(cur_list->rm_osd_num) ==
parent->cli->msgr.osd_peer_fds.end())
{
// Initiate connection
parent->cli->msgr.connect_peer(cur_list->rm_osd_num, parent->cli->st_cli.peer_states[cur_list->rm_osd_num]);
return;
}
while (cur_list->in_flight < parent->iodepth && cur_list->obj_pos != cur_list->objects.end())
{
if (cur_list->obj_pos->stripe >= min_offset)
{
osd_op_t *op = new osd_op_t();
op->op_type = OSD_OP_OUT;
op->peer_fd = parent->cli->msgr.osd_peer_fds[cur_list->rm_osd_num];
op->req = (osd_any_op_t){
.rw = {
.header = {
.magic = SECONDARY_OSD_OP_MAGIC,
.id = parent->cli->next_op_id(),
.opcode = OSD_OP_DELETE,
},
.inode = cur_list->obj_pos->inode,
.offset = cur_list->obj_pos->stripe,
.len = 0,
},
};
op->callback = [this, cur_list](osd_op_t *op)
{
cur_list->in_flight--;
if (op->reply.hdr.retval < 0)
{
fprintf(stderr, "Failed to remove object %lx:%lx from PG %u (OSD %lu) (retval=%ld)\n",
op->req.rw.inode, op->req.rw.offset,
cur_list->pg_num, cur_list->rm_osd_num, op->reply.hdr.retval);
}
delete op;
cur_list->obj_done++;
total_done++;
continue_delete();
};
cur_list->in_flight++;
parent->cli->msgr.outbox_push(op);
}
cur_list->obj_pos++;
}
}
void continue_delete()
{
if (parent->list_first && !lists_done)
{
return;
}
for (int i = 0; i < lists.size(); i++)
{
if (!lists[i]->in_flight && lists[i]->obj_pos == lists[i]->objects.end())
{
delete lists[i];
lists.erase(lists.begin()+i, lists.begin()+i+1);
i--;
if (!lists_done)
{
parent->cli->list_inode_next(lister, 1);
}
}
else
{
send_ops(lists[i]);
}
}
if (parent->progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
{
printf("\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
total_prev_pct = total_done*1000/total_count;
}
if (lists_done && !lists.size())
{
printf("Done, inode %lu in pool %u data removed\n", INODE_NO_POOL(inode), pool_id);
state = 2;
}
}
bool loop()
{
if (state == 0)
{
start_delete();
state = 1;
}
else if (state == 1)
{
continue_delete();
}
else if (state == 2)
{
return true;
}
return false;
}
};
std::function<bool(void)> cli_tool_t::start_rm(json11::Json cfg)
{
auto remover = new rm_inode_t();
remover->parent = this;
remover->inode = cfg["inode"].uint64_value();
remover->pool_id = cfg["pool"].uint64_value();
if (remover->pool_id)
{
remover->inode = (remover->inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)remover->pool_id) << (64-POOL_ID_BITS));
}
remover->pool_id = INODE_POOL(remover->inode);
if (!remover->pool_id)
{
fprintf(stderr, "pool is missing\n");
exit(1);
}
remover->min_offset = cfg["min-offset"].uint64_value();
return [remover]()
{
if (remover->loop())
{
delete remover;
return true;
}
return false;
};
}

View File

@@ -1,565 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
#include "cli.h"
#include "cluster_client.h"
#include "base64.h"
// Remove layer(s): similar to merge, but alters metadata and processes multiple merge targets
//
// Exactly one child of the requested layers may be merged using the "inverted" workflow,
// where we merge it "down" into one of the "to-be-removed" layers and then rename the
// "to-be-removed" layer to the child. It may be done either if all writers are stopped
// before trying to delete layers (which is signaled by --writers-stopped) or if that child
// is a read-only layer (snapshot) itself.
//
// This "inverted" workflow trades copying data of one of the deleted layers for copying
// data of one child of the chain which is also a child of the "traded" layer. So we
// choose the (parent,child) pair which has the largest difference between "parent" and
// "child" inode sizes.
//
// All other children of the chain are processed by iterating though them, merging removed
// parents into them and rebasing them to the last layer which isn't a member of the removed
// chain.
//
// Example:
//
// <parent> - <from> - <layer 2> - <to> - <child 1>
// \ \ \- <child 2>
// \ \- <child 3>
// \-<child 4>
//
// 1) Find optimal pair for the "reverse" scenario
// Imagine that it's (<layer 2>, <child 1>) in this example
// 2) Process all children except <child 1>:
// - Merge <from>..<to> to <child 2>
// - Set <child 2> parent to <parent>
// - Repeat for others
// 3) Process <child 1>:
// - Merge <from>..<child 1> to <layer 2>
// - Set <layer 2> parent to <parent>
// - Rename <layer 2> to <child 1>
// 4) Delete other layers of the chain (<from>, <to>)
struct snap_remover_t
{
cli_tool_t *parent;
// remove from..to
std::string from_name, to_name;
// writers are stopped, we can safely change writable layers
bool writers_stopped = false;
// use CAS writes (0 = never, 1 = auto, 2 = always)
int use_cas = 1;
// interval between fsyncs
int fsync_interval = 128;
std::map<inode_t,int> sources;
std::map<inode_t,uint64_t> inode_used;
std::vector<inode_t> merge_children;
std::vector<inode_t> chain_list;
std::map<inode_t,int> inverse_candidates;
inode_t inverse_parent = 0, inverse_child = 0;
inode_t new_parent = 0;
int state = 0;
int current_child = 0;
std::function<bool(void)> cb;
bool is_done()
{
return state == 9;
}
void loop()
{
if (state == 1)
goto resume_1;
else if (state == 2)
goto resume_2;
else if (state == 3)
goto resume_3;
else if (state == 4)
goto resume_4;
else if (state == 5)
goto resume_5;
else if (state == 6)
goto resume_6;
else if (state == 7)
goto resume_7;
else if (state == 8)
goto resume_8;
else if (state == 9)
goto resume_9;
// Get children to merge
get_merge_children();
// Try to select an inode for the "inverse" optimized scenario
// Read statistics from etcd to do it
read_stats();
state = 1;
resume_1:
if (parent->waiting > 0)
return;
choose_inverse_candidate();
// Merge children one by one, except our "inverse" child
for (current_child = 0; current_child < merge_children.size(); current_child++)
{
if (merge_children[current_child] == inverse_child)
continue;
start_merge_child(merge_children[current_child], merge_children[current_child]);
resume_2:
while (!cb())
{
state = 2;
return;
}
cb = NULL;
parent->change_parent(merge_children[current_child], new_parent);
state = 3;
resume_3:
if (parent->waiting > 0)
return;
}
// Merge our "inverse" child into our "inverse" parent
if (inverse_child != 0)
{
start_merge_child(inverse_child, inverse_parent);
resume_4:
while (!cb())
{
state = 4;
return;
}
cb = NULL;
// Delete "inverse" child data
start_delete_source(inverse_child);
resume_5:
while (!cb())
{
state = 5;
return;
}
cb = NULL;
// Delete "inverse" child metadata, rename parent over it,
// and also change parent links of the previous "inverse" child
rename_inverse_parent();
state = 6;
resume_6:
if (parent->waiting > 0)
return;
}
// Delete parents, except the "inverse" one
for (current_child = 0; current_child < chain_list.size(); current_child++)
{
if (chain_list[current_child] == inverse_parent)
continue;
start_delete_source(chain_list[current_child]);
resume_7:
while (!cb())
{
state = 7;
return;
}
cb = NULL;
delete_inode_config(chain_list[current_child]);
state = 8;
resume_8:
if (parent->waiting > 0)
return;
}
state = 9;
resume_9:
// Done
return;
}
void get_merge_children()
{
// Get all children of from..to
inode_config_t *from_cfg = parent->get_inode_cfg(from_name);
inode_config_t *to_cfg = parent->get_inode_cfg(to_name);
// Check that to_cfg is actually a child of from_cfg
// FIXME de-copypaste the following piece of code with snap_merger_t
inode_config_t *cur = to_cfg;
chain_list.push_back(cur->num);
while (cur->num != from_cfg->num && cur->parent_id != 0)
{
auto it = parent->cli->st_cli.inode_config.find(cur->parent_id);
if (it == parent->cli->st_cli.inode_config.end())
{
fprintf(stderr, "Parent inode of layer %s (id %ld) not found\n", cur->name.c_str(), cur->parent_id);
exit(1);
}
cur = &it->second;
chain_list.push_back(cur->num);
}
if (cur->num != from_cfg->num)
{
fprintf(stderr, "Layer %s is not a child of %s\n", to_name.c_str(), from_name.c_str());
exit(1);
}
new_parent = from_cfg->parent_id;
// Calculate ranks
int i = chain_list.size()-1;
for (inode_t item: chain_list)
{
sources[item] = i--;
}
for (auto & ic: parent->cli->st_cli.inode_config)
{
if (!ic.second.parent_id)
{
continue;
}
auto it = sources.find(ic.second.parent_id);
if (it != sources.end() && sources.find(ic.second.num) == sources.end())
{
merge_children.push_back(ic.second.num);
if (ic.second.readonly || writers_stopped)
{
inverse_candidates[ic.second.num] = it->second;
}
}
}
}
void read_stats()
{
if (inverse_candidates.size() == 0)
{
return;
}
json11::Json::array reads;
for (auto cp: inverse_candidates)
{
inode_t inode = cp.first;
reads.push_back(json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+
"/inode/stats/"+std::to_string(INODE_POOL(inode))+
"/"+std::to_string(INODE_NO_POOL(inode))
) },
} }
});
}
for (auto cp: sources)
{
inode_t inode = cp.first;
reads.push_back(json11::Json::object {
{ "request_range", json11::Json::object {
{ "key", base64_encode(
parent->cli->st_cli.etcd_prefix+
"/inode/stats/"+std::to_string(INODE_POOL(inode))+
"/"+std::to_string(INODE_NO_POOL(inode))
) },
} }
});
}
parent->waiting++;
parent->cli->st_cli.etcd_txn(json11::Json::object {
{ "success", reads },
}, ETCD_SLOW_TIMEOUT, [this](std::string err, json11::Json data)
{
parent->waiting--;
if (err != "")
{
fprintf(stderr, "Error reading layer statistics from etcd: %s\n", err.c_str());
exit(1);
}
for (auto inode_result: data["responses"].array_items())
{
auto kv = parent->cli->st_cli.parse_etcd_kv(inode_result["kvs"][0]);
pool_id_t pool_id = 0;
inode_t inode = 0;
char null_byte = 0;
sscanf(kv.key.c_str() + parent->cli->st_cli.etcd_prefix.length()+13, "%u/%lu%c", &pool_id, &inode, &null_byte);
if (!inode || null_byte != 0)
{
fprintf(stderr, "Bad key returned from etcd: %s\n", kv.key.c_str());
exit(1);
}
auto pool_cfg_it = parent->cli->st_cli.pool_config.find(pool_id);
if (pool_cfg_it == parent->cli->st_cli.pool_config.end())
{
fprintf(stderr, "Pool %u does not exist\n", pool_id);
exit(1);
}
inode = INODE_WITH_POOL(pool_id, inode);
auto & pool_cfg = pool_cfg_it->second;
uint64_t used_bytes = kv.value["raw_used"].uint64_value() / pool_cfg.pg_size;
if (pool_cfg.scheme != POOL_SCHEME_REPLICATED)
{
used_bytes *= (pool_cfg.pg_size - pool_cfg.parity_chunks);
}
inode_used[inode] = used_bytes;
}
parent->ringloop->wakeup();
});
}
void choose_inverse_candidate()
{
uint64_t max_diff = 0;
for (auto cp: inverse_candidates)
{
inode_t child = cp.first;
uint64_t child_used = inode_used[child];
int rank = cp.second;
for (int i = chain_list.size()-rank; i < chain_list.size(); i++)
{
inode_t parent = chain_list[i];
uint64_t parent_used = inode_used[parent];
if (parent_used > child_used && (!max_diff || max_diff < (parent_used-child_used)))
{
max_diff = (parent_used-child_used);
inverse_parent = parent;
inverse_child = child;
}
}
}
}
void rename_inverse_parent()
{
auto child_it = parent->cli->st_cli.inode_config.find(inverse_child);
if (child_it == parent->cli->st_cli.inode_config.end())
{
fprintf(stderr, "Inode %ld disappeared\n", inverse_child);
exit(1);
}
auto target_it = parent->cli->st_cli.inode_config.find(inverse_parent);
if (target_it == parent->cli->st_cli.inode_config.end())
{
fprintf(stderr, "Inode %ld disappeared\n", inverse_parent);
exit(1);
}
inode_config_t *child_cfg = &child_it->second;
inode_config_t *target_cfg = &target_it->second;
std::string child_name = child_cfg->name;
std::string target_name = target_cfg->name;
std::string child_cfg_key = base64_encode(
parent->cli->st_cli.etcd_prefix+
"/config/inode/"+std::to_string(INODE_POOL(inverse_child))+
"/"+std::to_string(INODE_NO_POOL(inverse_child))
);
std::string target_cfg_key = base64_encode(
parent->cli->st_cli.etcd_prefix+
"/config/inode/"+std::to_string(INODE_POOL(inverse_parent))+
"/"+std::to_string(INODE_NO_POOL(inverse_parent))
);
// Fill new configuration
inode_config_t new_cfg = *child_cfg;
new_cfg.num = target_cfg->num;
new_cfg.parent_id = new_parent;
json11::Json::array cmp = json11::Json::array {
json11::Json::object {
{ "target", "MOD" },
{ "key", child_cfg_key },
{ "result", "LESS" },
{ "mod_revision", child_cfg->mod_revision+1 },
},
json11::Json::object {
{ "target", "MOD" },
{ "key", target_cfg_key },
{ "result", "LESS" },
{ "mod_revision", target_cfg->mod_revision+1 },
},
};
json11::Json::array txn = json11::Json::array {
json11::Json::object {
{ "request_delete_range", json11::Json::object {
{ "key", child_cfg_key },
} },
},
json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", target_cfg_key },
{ "value", base64_encode(json11::Json(parent->cli->st_cli.serialize_inode_cfg(&new_cfg)).dump()) },
} },
},
json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/index/image/"+child_cfg->name) },
{ "value", base64_encode(json11::Json({
{ "id", INODE_NO_POOL(inverse_parent) },
{ "pool_id", (uint64_t)INODE_POOL(inverse_parent) },
}).dump()) },
} },
},
};
// Reparent children of inverse_child
for (auto & cp: parent->cli->st_cli.inode_config)
{
if (cp.second.parent_id == child_cfg->num)
{
auto cp_cfg = cp.second;
cp_cfg.parent_id = inverse_parent;
auto cp_key = base64_encode(
parent->cli->st_cli.etcd_prefix+
"/config/inode/"+std::to_string(INODE_POOL(cp.second.num))+
"/"+std::to_string(INODE_NO_POOL(cp.second.num))
);
cmp.push_back(json11::Json::object {
{ "target", "MOD" },
{ "key", cp_key },
{ "result", "LESS" },
{ "mod_revision", cp.second.mod_revision+1 },
});
txn.push_back(json11::Json::object {
{ "request_put", json11::Json::object {
{ "key", cp_key },
{ "value", base64_encode(json11::Json(parent->cli->st_cli.serialize_inode_cfg(&cp_cfg)).dump()) },
} },
});
}
}
parent->waiting++;
parent->cli->st_cli.etcd_txn(json11::Json::object {
{ "compare", cmp },
{ "success", txn },
}, ETCD_SLOW_TIMEOUT, [this, target_name, child_name](std::string err, json11::Json res)
{
parent->waiting--;
if (err != "")
{
fprintf(stderr, "Error renaming %s to %s: %s\n", target_name.c_str(), child_name.c_str(), err.c_str());
exit(1);
}
if (!res["succeeded"].bool_value())
{
fprintf(
stderr, "Parent (%s), child (%s), or one of its children"
" configuration was modified during rename\n", target_name.c_str(), child_name.c_str()
);
exit(1);
}
printf("Layer %s renamed to %s\n", target_name.c_str(), child_name.c_str());
parent->ringloop->wakeup();
});
}
void delete_inode_config(inode_t cur)
{
auto cur_cfg_it = parent->cli->st_cli.inode_config.find(cur);
if (cur_cfg_it == parent->cli->st_cli.inode_config.end())
{
fprintf(stderr, "Inode 0x%lx disappeared\n", cur);
exit(1);
}
inode_config_t *cur_cfg = &cur_cfg_it->second;
std::string cur_name = cur_cfg->name;
std::string cur_cfg_key = base64_encode(
parent->cli->st_cli.etcd_prefix+
"/config/inode/"+std::to_string(INODE_POOL(cur))+
"/"+std::to_string(INODE_NO_POOL(cur))
);
parent->waiting++;
parent->cli->st_cli.etcd_txn(json11::Json::object {
{ "compare", json11::Json::array {
json11::Json::object {
{ "target", "MOD" },
{ "key", cur_cfg_key },
{ "result", "LESS" },
{ "mod_revision", cur_cfg->mod_revision+1 },
},
} },
{ "success", json11::Json::array {
json11::Json::object {
{ "request_delete_range", json11::Json::object {
{ "key", cur_cfg_key },
} },
{ "request_delete_range", json11::Json::object {
{ "key", base64_encode(parent->cli->st_cli.etcd_prefix+"/index/image/"+cur_name) },
} },
},
} },
}, ETCD_SLOW_TIMEOUT, [this, cur_name](std::string err, json11::Json res)
{
parent->waiting--;
if (err != "")
{
fprintf(stderr, "Error deleting %s: %s\n", cur_name.c_str(), err.c_str());
exit(1);
}
if (!res["succeeded"].bool_value())
{
fprintf(stderr, "Layer %s configuration was modified during deletion\n", cur_name.c_str());
exit(1);
}
printf("Layer %s deleted\n", cur_name.c_str());
parent->ringloop->wakeup();
});
}
void start_merge_child(inode_t child_inode, inode_t target_inode)
{
auto child_it = parent->cli->st_cli.inode_config.find(child_inode);
if (child_it == parent->cli->st_cli.inode_config.end())
{
fprintf(stderr, "Inode %ld disappeared\n", child_inode);
exit(1);
}
auto target_it = parent->cli->st_cli.inode_config.find(target_inode);
if (target_it == parent->cli->st_cli.inode_config.end())
{
fprintf(stderr, "Inode %ld disappeared\n", target_inode);
exit(1);
}
cb = parent->start_merge(json11::Json::object {
{ "command", json11::Json::array{ "merge-data", from_name, child_it->second.name } },
{ "target", target_it->second.name },
{ "delete-source", false },
{ "cas", use_cas },
{ "fsync-interval", fsync_interval },
});
}
void start_delete_source(inode_t inode)
{
auto source = parent->cli->st_cli.inode_config.find(inode);
if (source == parent->cli->st_cli.inode_config.end())
{
fprintf(stderr, "Inode %ld disappeared\n", inode);
exit(1);
}
cb = parent->start_rm(json11::Json::object {
{ "inode", inode },
{ "pool", (uint64_t)INODE_POOL(inode) },
{ "fsync-interval", fsync_interval },
});
}
};
std::function<bool(void)> cli_tool_t::start_snap_rm(json11::Json cfg)
{
json11::Json::array cmd = cfg["command"].array_items();
auto snap_remover = new snap_remover_t();
snap_remover->parent = this;
snap_remover->from_name = cmd.size() > 1 ? cmd[1].string_value() : "";
snap_remover->to_name = cmd.size() > 2 ? cmd[2].string_value() : "";
if (snap_remover->from_name == "")
{
fprintf(stderr, "Layer to remove argument is missing\n");
exit(1);
}
if (snap_remover->to_name == "")
{
snap_remover->to_name = snap_remover->from_name;
}
snap_remover->fsync_interval = cfg["fsync-interval"].uint64_value();
if (!snap_remover->fsync_interval)
snap_remover->fsync_interval = 128;
if (!cfg["cas"].is_null())
snap_remover->use_cas = cfg["cas"].uint64_value() ? 2 : 0;
if (!cfg["writers_stopped"].is_null())
snap_remover->writers_stopped = true;
return [snap_remover]()
{
snap_remover->loop();
if (snap_remover->is_done())
{
delete snap_remover;
return true;
}
return false;
};
}

View File

@@ -12,7 +12,7 @@
#define CACHE_DIRTY 1
#define CACHE_FLUSHING 2
#define CACHE_REPEATING 3
#define OP_FLUSH_BUFFER 0x02
#define OP_FLUSH_BUFFER 2
cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd, json11::Json & config)
{
@@ -31,7 +31,6 @@ cluster_client_t::cluster_client_t(ring_loop_t *ringloop, timerfd_manager_t *tfd
{
// peer_osd just connected
continue_ops();
continue_lists();
}
else if (dirty_buffers.size())
{
@@ -140,7 +139,7 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
if (!op->prev_wait && pgs_loaded)
continue_sync(op);
}
else /* if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP) */
else
{
for (auto prev = op->prev; prev; prev = prev->prev)
{
@@ -148,7 +147,7 @@ void cluster_client_t::calc_wait(cluster_op_t *op)
{
op->prev_wait++;
}
else if (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_READ || prev->opcode == OSD_OP_READ_BITMAP)
else if (prev->opcode == OSD_OP_WRITE || prev->opcode == OSD_OP_READ)
{
// Flushes are always in the beginning
break;
@@ -168,7 +167,7 @@ void cluster_client_t::inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *n
auto n2 = next->next;
if (next->opcode == OSD_OP_SYNC ||
next->opcode == OSD_OP_WRITE && (flags & OP_FLUSH_BUFFER) && !(next->flags & OP_FLUSH_BUFFER) ||
(next->opcode == OSD_OP_READ || next->opcode == OSD_OP_READ_BITMAP) && (flags & OP_FLUSH_BUFFER))
next->opcode == OSD_OP_READ && (flags & OP_FLUSH_BUFFER))
{
next->prev_wait += inc;
if (!next->prev_wait)
@@ -358,7 +357,7 @@ void cluster_client_t::on_change_hook(std::map<std::string, etcd_kv_t> & changes
// And now they have to be resliced!
for (auto op = op_queue_head; op; op = op->next)
{
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP) &&
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_READ) &&
INODE_POOL(op->cur_inode) == pool_item.first)
{
op->needs_reslice = true;
@@ -418,8 +417,7 @@ void cluster_client_t::on_ready(std::function<void(void)> fn)
*/
void cluster_client_t::execute(cluster_op_t *op)
{
if (op->opcode != OSD_OP_SYNC && op->opcode != OSD_OP_READ &&
op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_WRITE)
if (op->opcode != OSD_OP_SYNC && op->opcode != OSD_OP_READ && op->opcode != OSD_OP_WRITE)
{
op->retval = -EINVAL;
std::function<void(cluster_op_t*)>(op->callback)(op);
@@ -559,7 +557,7 @@ void cluster_client_t::flush_buffer(const object_id & oid, cluster_buffer_t *wr)
{
wr->state = CACHE_REPEATING;
cluster_op_t *op = new cluster_op_t;
op->flags = OSD_OP_IGNORE_READONLY|OP_FLUSH_BUFFER;
op->flags = OP_FLUSH_BUFFER;
op->opcode = OSD_OP_WRITE;
op->cur_inode = op->inode = oid.inode;
op->offset = oid.stripe;
@@ -596,8 +594,7 @@ int cluster_client_t::continue_rw(cluster_op_t *op)
else if (op->state == 3)
goto resume_3;
resume_0:
if ((op->opcode == OSD_OP_READ || op->opcode == OSD_OP_WRITE) && !op->len ||
op->offset % bs_bitmap_granularity || op->len % bs_bitmap_granularity)
if (!op->len || op->offset % bs_bitmap_granularity || op->len % bs_bitmap_granularity)
{
op->retval = -EINVAL;
erase_op(op);
@@ -618,19 +615,16 @@ resume_0:
return 0;
}
}
if (op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE)
if (op->opcode == OSD_OP_WRITE)
{
if (!(op->flags & OSD_OP_IGNORE_READONLY))
auto ino_it = st_cli.inode_config.find(op->inode);
if (ino_it != st_cli.inode_config.end() && ino_it->second.readonly)
{
auto ino_it = st_cli.inode_config.find(op->inode);
if (ino_it != st_cli.inode_config.end() && ino_it->second.readonly)
{
op->retval = -EINVAL;
erase_op(op);
return 1;
}
op->retval = -EINVAL;
erase_op(op);
return 1;
}
if (op->opcode == OSD_OP_WRITE && !immediate_commit && !(op->flags & OP_FLUSH_BUFFER))
if (!immediate_commit && !(op->flags & OP_FLUSH_BUFFER))
{
copy_write(op, dirty_buffers);
}
@@ -639,7 +633,7 @@ resume_1:
// Slice the operation into parts
slice_rw(op);
op->needs_reslice = false;
if ((op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE) && op->version && op->parts.size() > 1)
if (op->opcode == OSD_OP_WRITE && op->version && op->parts.size() > 1)
{
// Atomic writes to multiple stripes are unsupported
op->retval = -EINVAL;
@@ -799,13 +793,13 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
uint32_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
uint64_t pg_block_size = bs_block_size * pg_data_size;
uint64_t first_stripe = (op->offset / pg_block_size) * pg_block_size;
uint64_t last_stripe = op->len > 0 ? ((op->offset + op->len - 1) / pg_block_size) * pg_block_size : first_stripe;
uint64_t last_stripe = ((op->offset + op->len + pg_block_size - 1) / pg_block_size - 1) * pg_block_size;
op->retval = 0;
op->parts.resize((last_stripe - first_stripe) / pg_block_size + 1);
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP)
if (op->opcode == OSD_OP_READ)
{
// Allocate memory for the bitmap
unsigned object_bitmap_size = (((op->opcode == OSD_OP_READ_BITMAP ? pg_block_size : op->len) / bs_bitmap_granularity + 7) / 8);
unsigned object_bitmap_size = ((op->len / bs_bitmap_granularity + 7) / 8);
object_bitmap_size = (object_bitmap_size < 8 ? 8 : object_bitmap_size);
unsigned bitmap_mem = object_bitmap_size + (bs_bitmap_size * pg_data_size) * op->parts.size();
if (op->bitmap_buf_size < bitmap_mem)
@@ -869,13 +863,13 @@ void cluster_client_t::slice_rw(cluster_op_t *op)
if (end == begin)
op->done_count++;
}
else if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE)
else
{
add_iov(end-begin, false, op, iov_idx, iov_pos, op->parts[i].iov, NULL, 0);
}
op->parts[i].parent = op;
op->parts[i].offset = begin;
op->parts[i].len = op->opcode == OSD_OP_READ_BITMAP || op->opcode == OSD_OP_DELETE ? 0 : (uint32_t)(end - begin);
op->parts[i].len = (uint32_t)(end - begin);
op->parts[i].pg_num = pg_num;
op->parts[i].osd_num = 0;
op->parts[i].flags = 0;
@@ -889,7 +883,7 @@ bool cluster_client_t::affects_osd(uint64_t inode, uint64_t offset, uint64_t len
uint32_t pg_data_size = (pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks);
uint64_t pg_block_size = bs_block_size * pg_data_size;
uint64_t first_stripe = (offset / pg_block_size) * pg_block_size;
uint64_t last_stripe = len > 0 ? ((offset + len - 1) / pg_block_size) * pg_block_size : first_stripe;
uint64_t last_stripe = ((offset + len + pg_block_size - 1) / pg_block_size - 1) * pg_block_size;
for (uint64_t stripe = first_stripe; stripe <= last_stripe; stripe += pg_block_size)
{
pg_num_t pg_num = (stripe/pool_cfg.pg_stripe_size) % pool_cfg.real_pg_count + 1; // like map_to_pg()
@@ -922,12 +916,9 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
pool_cfg.scheme == POOL_SCHEME_REPLICATED ? 1 : pool_cfg.pg_size-pool_cfg.parity_chunks
);
uint64_t meta_rev = 0;
if (op->opcode != OSD_OP_READ_BITMAP && op->opcode != OSD_OP_DELETE)
{
auto ino_it = st_cli.inode_config.find(op->inode);
if (ino_it != st_cli.inode_config.end())
meta_rev = ino_it->second.mod_revision;
}
auto ino_it = st_cli.inode_config.find(op->inode);
if (ino_it != st_cli.inode_config.end())
meta_rev = ino_it->second.mod_revision;
part->op = (osd_op_t){
.op_type = OSD_OP_OUT,
.peer_fd = peer_fd,
@@ -935,16 +926,16 @@ bool cluster_client_t::try_send(cluster_op_t *op, int i)
.header = {
.magic = SECONDARY_OSD_OP_MAGIC,
.id = op_id++,
.opcode = op->opcode == OSD_OP_READ_BITMAP ? OSD_OP_READ : op->opcode,
.opcode = op->opcode,
},
.inode = op->cur_inode,
.offset = part->offset,
.len = part->len,
.meta_revision = meta_rev,
.version = op->opcode == OSD_OP_WRITE || op->opcode == OSD_OP_DELETE ? op->version : 0,
.version = op->opcode == OSD_OP_WRITE ? op->version : 0,
} },
.bitmap = (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP ? op->part_bitmaps + pg_bitmap_size*i : NULL),
.bitmap_len = (unsigned)(op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP ? pg_bitmap_size : 0),
.bitmap = op->opcode == OSD_OP_WRITE ? NULL : op->part_bitmaps + pg_bitmap_size*i,
.bitmap_len = (unsigned)(op->opcode == OSD_OP_WRITE ? 0 : pg_bitmap_size),
.callback = [this, part](osd_op_t *op_part)
{
handle_op_part(part);
@@ -1126,7 +1117,7 @@ void cluster_client_t::handle_op_part(cluster_op_part_t *part)
dirty_osds.insert(part->osd_num);
part->flags |= PART_DONE;
op->done_count++;
if (op->opcode == OSD_OP_READ || op->opcode == OSD_OP_READ_BITMAP)
if (op->opcode == OSD_OP_READ)
{
copy_part_bitmap(op, part);
op->version = op->parts.size() == 1 ? part->op.reply.rw.version : 0;
@@ -1150,7 +1141,7 @@ void cluster_client_t::copy_part_bitmap(cluster_op_t *op, cluster_op_part_t *par
);
uint32_t object_offset = (part->op.req.rw.offset - op->offset) / bs_bitmap_granularity;
uint32_t part_offset = (part->op.req.rw.offset % pg_block_size) / bs_bitmap_granularity;
uint32_t part_len = (op->opcode == OSD_OP_READ_BITMAP ? pg_block_size : part->op.req.rw.len) / bs_bitmap_granularity;
uint32_t part_len = part->op.req.rw.len / bs_bitmap_granularity;
if (!(object_offset & 0x7) && !(part_offset & 0x7) && (part_len >= 8))
{
// Copy bytes
@@ -1170,8 +1161,3 @@ void cluster_client_t::copy_part_bitmap(cluster_op_t *op, cluster_op_part_t *par
part_len--;
}
}
uint64_t cluster_client_t::next_op_id()
{
return op_id++;
}

View File

@@ -10,11 +10,6 @@
#define MAX_BLOCK_SIZE 128*1024*1024
#define DEFAULT_CLIENT_MAX_DIRTY_BYTES 32*1024*1024
#define DEFAULT_CLIENT_MAX_DIRTY_OPS 1024
#define INODE_LIST_DONE 1
#define INODE_LIST_HAS_UNSTABLE 2
#define OSD_OP_READ_BITMAP OSD_OP_SEC_READ_BMP
#define OSD_OP_IGNORE_READONLY 0x08
struct cluster_op_t;
@@ -32,22 +27,19 @@ struct cluster_op_part_t
struct cluster_op_t
{
uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC, OSD_OP_DELETE, OSD_OP_READ_BITMAP
uint64_t opcode; // OSD_OP_READ, OSD_OP_WRITE, OSD_OP_SYNC
uint64_t inode;
uint64_t offset;
uint64_t len;
// for reads and writes within a single object (stripe),
// reads can return current version and writes can use "CAS" semantics
uint64_t version = 0;
// now only OSD_OP_IGNORE_READONLY is supported
uint64_t flags = 0;
int retval;
osd_op_buf_list_t iov;
// READ and READ_BITMAP return the bitmap here
void *bitmap_buf = NULL;
std::function<void(cluster_op_t*)> callback;
~cluster_op_t();
protected:
uint64_t flags = 0;
int state = 0;
uint64_t cur_inode; // for snapshot reads
void *buf = NULL;
@@ -56,7 +48,7 @@ protected:
bool up_wait = false;
int inflight_count = 0, done_count = 0;
std::vector<cluster_op_part_t> parts;
void *part_bitmaps = NULL;
void *bitmap_buf = NULL, *part_bitmaps = NULL;
unsigned bitmap_buf_size = 0;
cluster_op_t *prev = NULL, *next = NULL;
int prev_wait = 0;
@@ -70,9 +62,6 @@ struct cluster_buffer_t
int state;
};
struct inode_list_t;
struct inode_list_osd_t;
// FIXME: Split into public and private interfaces
class cluster_client_t
{
@@ -104,7 +93,6 @@ class cluster_client_t
bool pgs_loaded = false;
ring_consumer_t consumer;
std::vector<std::function<void(void)>> on_ready_hooks;
std::vector<inode_list_t*> lists;
int continuing_ops = 0;
public:
@@ -120,14 +108,6 @@ public:
static void copy_write(cluster_op_t *op, std::map<object_id, cluster_buffer_t> & dirty_buffers);
void continue_ops(bool up_retry = false);
inode_list_t *list_inode_start(inode_t inode,
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback);
int list_pg_count(inode_list_t *lst);
void list_inode_next(inode_list_t *lst, int next_pgs);
inline uint32_t get_bs_bitmap_granularity() { return bs_bitmap_granularity; }
inline uint64_t get_bs_block_size() { return bs_block_size; }
uint64_t next_op_id();
protected:
bool affects_osd(uint64_t inode, uint64_t offset, uint64_t len, osd_num_t osd);
void flush_buffer(const object_id & oid, cluster_buffer_t *wr);
@@ -145,7 +125,4 @@ protected:
void erase_op(cluster_op_t *op);
void calc_wait(cluster_op_t *op);
void inc_wait(uint64_t opcode, uint64_t flags, cluster_op_t *next, int inc);
void continue_lists();
void continue_listing(inode_list_t *lst);
void send_list(inode_list_osd_t *cur_list);
};

View File

@@ -1,285 +0,0 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 or GNU GPL-2.0+ (see README.md for details)
#include <algorithm>
#include "pg_states.h"
#include "cluster_client.h"
struct inode_list_t;
struct inode_list_pg_t;
struct inode_list_osd_t
{
inode_list_pg_t *pg = NULL;
osd_num_t osd_num = 0;
bool sent = false;
};
struct inode_list_pg_t
{
inode_list_t *lst = NULL;
int pos = 0;
pg_num_t pg_num;
osd_num_t cur_primary;
bool has_unstable = false;
int sent = 0;
int done = 0;
std::vector<inode_list_osd_t> list_osds;
std::set<object_id> objects;
};
struct inode_list_t
{
cluster_client_t *cli = NULL;
pool_id_t pool_id = 0;
inode_t inode = 0;
int done_pgs = 0;
int want = 0;
std::vector<inode_list_pg_t*> pgs;
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback;
};
inode_list_t* cluster_client_t::list_inode_start(inode_t inode,
std::function<void(inode_list_t* lst, std::set<object_id>&& objects, pg_num_t pg_num, osd_num_t primary_osd, int status)> callback)
{
int skipped_pgs = 0;
pool_id_t pool_id = INODE_POOL(inode);
if (!pool_id || st_cli.pool_config.find(pool_id) == st_cli.pool_config.end())
{
if (log_level > 0)
{
fprintf(stderr, "Pool %u does not exist\n", pool_id);
}
return NULL;
}
inode_list_t *lst = new inode_list_t();
lst->cli = this;
lst->pool_id = pool_id;
lst->inode = inode;
lst->callback = callback;
auto pool_cfg = st_cli.pool_config[pool_id];
for (auto & pg_item: pool_cfg.pg_config)
{
auto & pg = pg_item.second;
if (pg.pause || !pg.cur_primary || !(pg.cur_state & PG_ACTIVE))
{
skipped_pgs++;
if (log_level > 0)
{
fprintf(stderr, "PG %u is inactive, skipping\n", pg_item.first);
}
continue;
}
inode_list_pg_t *r = new inode_list_pg_t();
r->lst = lst;
r->pg_num = pg_item.first;
r->cur_primary = pg.cur_primary;
if (pg.cur_state != PG_ACTIVE)
{
// Not clean
std::set<osd_num_t> all_peers;
for (osd_num_t pg_osd: pg.target_set)
{
if (pg_osd != 0)
{
all_peers.insert(pg_osd);
}
}
for (osd_num_t pg_osd: pg.all_peers)
{
if (pg_osd != 0)
{
all_peers.insert(pg_osd);
}
}
for (auto & hist_item: pg.target_history)
{
for (auto pg_osd: hist_item)
{
if (pg_osd != 0)
{
all_peers.insert(pg_osd);
}
}
}
for (osd_num_t peer_osd: all_peers)
{
r->list_osds.push_back((inode_list_osd_t){
.pg = r,
.osd_num = peer_osd,
.sent = false,
});
}
}
else
{
// Clean
r->list_osds.push_back((inode_list_osd_t){
.pg = r,
.osd_num = pg.cur_primary,
.sent = false,
});
}
lst->pgs.push_back(r);
}
std::sort(lst->pgs.begin(), lst->pgs.end(), [](inode_list_pg_t *a, inode_list_pg_t *b)
{
return a->cur_primary < b->cur_primary ? true : false;
});
for (int i = 0; i < lst->pgs.size(); i++)
{
lst->pgs[i]->pos = i;
}
lists.push_back(lst);
return lst;
}
int cluster_client_t::list_pg_count(inode_list_t *lst)
{
return lst->pgs.size();
}
void cluster_client_t::list_inode_next(inode_list_t *lst, int next_pgs)
{
if (next_pgs >= 0)
{
lst->want += next_pgs;
}
continue_listing(lst);
}
void cluster_client_t::continue_listing(inode_list_t *lst)
{
if (lst->done_pgs >= lst->pgs.size())
{
// All done
for (int i = 0; i < lists.size(); i++)
{
if (lists[i] == lst)
{
lists.erase(lists.begin()+i, lists.begin()+i+1);
break;
}
}
delete lst;
return;
}
if (lst->want <= 0)
{
return;
}
for (int i = 0; i < lst->pgs.size(); i++)
{
if (lst->pgs[i] && lst->pgs[i]->sent < lst->pgs[i]->list_osds.size())
{
for (int j = 0; j < lst->pgs[i]->list_osds.size(); j++)
{
send_list(&lst->pgs[i]->list_osds[j]);
if (lst->want <= 0)
{
break;
}
}
}
}
}
void cluster_client_t::send_list(inode_list_osd_t *cur_list)
{
if (cur_list->sent)
{
return;
}
if (msgr.osd_peer_fds.find(cur_list->osd_num) == msgr.osd_peer_fds.end())
{
// Initiate connection
msgr.connect_peer(cur_list->osd_num, st_cli.peer_states[cur_list->osd_num]);
return;
}
auto & pool_cfg = st_cli.pool_config[cur_list->pg->lst->pool_id];
osd_op_t *op = new osd_op_t();
op->op_type = OSD_OP_OUT;
op->peer_fd = msgr.osd_peer_fds[cur_list->osd_num];
op->req = (osd_any_op_t){
.sec_list = {
.header = {
.magic = SECONDARY_OSD_OP_MAGIC,
.id = op_id++,
.opcode = OSD_OP_SEC_LIST,
},
.list_pg = cur_list->pg->pg_num,
.pg_count = (pg_num_t)pool_cfg.real_pg_count,
.pg_stripe_size = pool_cfg.pg_stripe_size,
.min_inode = cur_list->pg->lst->inode,
.max_inode = cur_list->pg->lst->inode,
},
};
op->callback = [this, cur_list](osd_op_t *op)
{
if (op->reply.hdr.retval < 0)
{
fprintf(stderr, "Failed to get PG %u/%u object list from OSD %lu (retval=%ld), skipping\n",
cur_list->pg->lst->pool_id, cur_list->pg->pg_num, cur_list->osd_num, op->reply.hdr.retval);
}
else
{
if (op->reply.sec_list.stable_count < op->reply.hdr.retval)
{
// Unstable objects, if present, mean that someone still writes into the inode. Warn the user about it.
cur_list->pg->has_unstable = true;
fprintf(
stderr, "[PG %u/%u] Inode still has %lu unstable object versions out of total %lu - is it still open?\n",
cur_list->pg->lst->pool_id, cur_list->pg->pg_num, op->reply.hdr.retval - op->reply.sec_list.stable_count,
op->reply.hdr.retval
);
}
if (log_level > 0)
{
fprintf(
stderr, "[PG %u/%u] Got inode object list from OSD %lu: %ld object versions\n",
cur_list->pg->lst->pool_id, cur_list->pg->pg_num, cur_list->osd_num, op->reply.hdr.retval
);
}
for (uint64_t i = 0; i < op->reply.hdr.retval; i++)
{
object_id oid = ((obj_ver_id*)op->buf)[i].oid;
oid.stripe = oid.stripe & ~STRIPE_MASK;
cur_list->pg->objects.insert(oid);
}
}
delete op;
auto lst = cur_list->pg->lst;
auto pg = cur_list->pg;
pg->done++;
if (pg->done >= pg->list_osds.size())
{
int status = 0;
lst->done_pgs++;
if (lst->done_pgs >= lst->pgs.size())
{
status |= INODE_LIST_DONE;
}
if (pg->has_unstable)
{
status |= INODE_LIST_HAS_UNSTABLE;
}
lst->callback(lst, std::move(pg->objects), pg->pg_num, pg->cur_primary, status);
lst->pgs[pg->pos] = NULL;
delete pg;
}
continue_listing(lst);
};
msgr.outbox_push(op);
cur_list->sent = true;
cur_list->pg->sent++;
cur_list->pg->lst->want--;
}
void cluster_client_t::continue_lists()
{
for (auto lst: lists)
{
continue_listing(lst);
}
}

View File

@@ -765,22 +765,3 @@ void etcd_state_client_t::close_watch(inode_watch_t* watch)
}
delete watch;
}
json11::Json::object etcd_state_client_t::serialize_inode_cfg(inode_config_t *cfg)
{
json11::Json::object new_cfg = json11::Json::object {
{ "name", cfg->name },
{ "size", cfg->size },
};
if (cfg->parent_id)
{
if (INODE_POOL(cfg->num) != INODE_POOL(cfg->parent_id))
new_cfg["parent_pool"] = (uint64_t)INODE_POOL(cfg->parent_id);
new_cfg["parent_id"] = (uint64_t)INODE_NO_POOL(cfg->parent_id);
}
if (cfg->readonly)
{
new_cfg["readonly"] = true;
}
return new_cfg;
}

View File

@@ -99,7 +99,6 @@ public:
std::function<void(pool_id_t, pg_num_t)> on_change_pg_history_hook;
std::function<void(osd_num_t)> on_change_osd_state_hook;
json11::Json::object serialize_inode_cfg(inode_config_t *cfg);
etcd_kv_t parse_etcd_kv(const json11::Json & kv_json);
void etcd_call(std::string api, json11::Json payload, int timeout, std::function<void(std::string, json11::Json)> callback);
void etcd_txn(json11::Json txn, int timeout, std::function<void(std::string, json11::Json)> callback);

View File

@@ -17,12 +17,11 @@ void osd_messenger_t::init()
{
rdma_context = msgr_rdma_context_t::create(
rdma_device != "" ? rdma_device.c_str() : NULL,
rdma_port_num, rdma_gid_index, rdma_mtu, log_level
rdma_port_num, rdma_gid_index, rdma_mtu
);
if (!rdma_context)
{
if (log_level > 0)
fprintf(stderr, "[OSD %lu] Couldn't initialize RDMA, proceeding with TCP only\n", osd_num);
fprintf(stderr, "[OSD %lu] Couldn't initialize RDMA, proceeding with TCP only\n", osd_num);
}
else
{
@@ -118,7 +117,7 @@ osd_messenger_t::~osd_messenger_t()
}
while (clients.size() > 0)
{
stop_client(clients.begin()->first, true, true);
stop_client(clients.begin()->first, true);
}
#ifdef WITH_RDMA
if (rdma_context)

View File

@@ -156,7 +156,7 @@ public:
void init();
void parse_config(const json11::Json & config);
void connect_peer(uint64_t osd_num, json11::Json peer_state);
void stop_client(int peer_fd, bool force = false, bool force_delete = false);
void stop_client(int peer_fd, bool force = false);
void outbox_push(osd_op_t *cur_op);
std::function<void(osd_op_t*)> exec_op;
std::function<void(osd_num_t)> repeer_pgs;

View File

@@ -15,7 +15,7 @@ osd_messenger_t::~osd_messenger_t()
{
while (clients.size() > 0)
{
stop_client(clients.begin()->first, true, true);
stop_client(clients.begin()->first, true);
}
}

View File

@@ -46,19 +46,15 @@ msgr_rdma_connection_t::~msgr_rdma_connection_t()
ctx->used_max_cqe -= max_send+max_recv;
if (qp)
ibv_destroy_qp(qp);
if (recv_buffers.size())
for (auto b: recv_buffers)
free(b);
}
msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, int log_level)
msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu)
{
int res;
ibv_device **dev_list = NULL;
msgr_rdma_context_t *ctx = new msgr_rdma_context_t();
ctx->mtu = mtu;
srand48(time(NULL));
dev_list = ibv_get_device_list(NULL);
if (!dev_list)
{
@@ -70,8 +66,7 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t
ctx->dev = *dev_list;
if (!ctx->dev)
{
if (log_level > 0)
fprintf(stderr, "No RDMA devices found\n");
fprintf(stderr, "No RDMA devices found\n");
goto cleanup;
}
}
@@ -482,11 +477,7 @@ void osd_messenger_t::handle_rdma_events()
if (!is_send)
{
cl->rdma_conn->cur_recv--;
if (!handle_read_buffer(cl, cl->rdma_conn->recv_buffers[0], wc[i].byte_len))
{
// handle_read_buffer may stop the client
continue;
}
handle_read_buffer(cl, cl->rdma_conn->recv_buffers[0], wc[i].byte_len);
free(cl->rdma_conn->recv_buffers[0]);
cl->rdma_conn->recv_buffers.erase(cl->rdma_conn->recv_buffers.begin(), cl->rdma_conn->recv_buffers.begin()+1);
try_recv_rdma(cl);

View File

@@ -35,7 +35,7 @@ struct msgr_rdma_context_t
int max_cqe = 0;
int used_max_cqe = 0;
static msgr_rdma_context_t *create(const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, int log_level);
static msgr_rdma_context_t *create(const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu);
~msgr_rdma_context_t();
};

View File

@@ -41,7 +41,7 @@ void osd_messenger_t::cancel_op(osd_op_t *op)
}
}
void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
void osd_messenger_t::stop_client(int peer_fd, bool force)
{
assert(peer_fd != 0);
auto it = clients.find(peer_fd);
@@ -136,7 +136,7 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete)
clients.erase(it);
}
cl->refs--;
if (cl->refs <= 0 || force_delete)
if (cl->refs <= 0)
{
delete cl;
}

View File

@@ -100,7 +100,7 @@ public:
}
unmap(cfg["dev_num"].uint64_value());
}
else if (cfg["command"] == "ls" || cfg["command"] == "list" || cfg["command"] == "list-mapped")
else if (cfg["command"] == "list" || cfg["command"] == "list-mapped")
{
auto mapped = list_mapped();
print_mapped(mapped, !cfg["json"].is_null());
@@ -119,7 +119,7 @@ public:
"USAGE:\n"
" %s map [--etcd_address <etcd_address>] (--image <image> | --pool <pool> --inode <inode> --size <size in bytes>)\n"
" %s unmap /dev/nbd0\n"
" %s ls [--json]\n",
" %s list [--json]\n",
exe_name, exe_name, exe_name
);
exit(0);
@@ -402,7 +402,7 @@ public:
printf("%s\n", dev.first.c_str());
for (auto & k: dev.second.object_items())
{
printf("%s: %s\n", k.first.c_str(), k.second.as_string().c_str());
printf("%s: %s\n", k.first.c_str(), k.second.string_value().c_str());
}
printf("\n");
}

View File

@@ -7,8 +7,6 @@
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include "blockstore_impl.h"
#include "osd_primary.h"
#include "osd.h"
#include "http_client.h"
@@ -45,12 +43,6 @@ osd_t::osd_t(const json11::Json & config, ring_loop_t *ringloop)
// FIXME: Create Blockstore from on-disk superblock config and check it against the OSD cluster config
auto bs_cfg = json_to_bs(this->config);
this->bs = new blockstore_t(bs_cfg, ringloop, tfd);
{
// Autosync based on the number of unstable writes to prevent stalls due to insufficient journal space
uint64_t max_autosync = bs->get_journal_size() / bs->get_block_size() / 2;
if (autosync_writes > max_autosync)
autosync_writes = max_autosync;
}
this->tfd->set_timer(print_stats_interval*1000, true, [this](int timer_id)
{
@@ -110,7 +102,7 @@ void osd_t::parse_config(const json11::Json & config)
log_level = config["log_level"].uint64_value();
etcd_report_interval = config["etcd_report_interval"].uint64_value();
if (etcd_report_interval <= 0)
etcd_report_interval = 5;
etcd_report_interval = 30;
readonly = config["readonly"] == "true" || config["readonly"] == "1" || config["readonly"] == "yes";
run_primary = config["run_primary"] != "false" && config["run_primary"] != "0" && config["run_primary"] != "no";
no_rebalance = config["no_rebalance"] == "true" || config["no_rebalance"] == "1" || config["no_rebalance"] == "yes";
@@ -129,11 +121,6 @@ void osd_t::parse_config(const json11::Json & config)
if (autosync_interval > MAX_AUTOSYNC_INTERVAL)
autosync_interval = DEFAULT_AUTOSYNC_INTERVAL;
}
if (!config["autosync_writes"].is_null())
{
// Allow to set it to 0
autosync_writes = config["autosync_writes"].uint64_value();
}
if (!config["client_queue_depth"].is_null())
{
client_queue_depth = config["client_queue_depth"].uint64_value();
@@ -378,7 +365,6 @@ void osd_t::print_stats()
void osd_t::print_slow()
{
bool has_slow = false;
char alloc[1024];
timespec now;
clock_gettime(CLOCK_REALTIME, &now);
@@ -440,32 +426,9 @@ void osd_t::print_slow()
{
bufprintf(" inode=%lx offset=%lx len=%x", op->req.rw.inode, op->req.rw.offset, op->req.rw.len);
}
if (op->req.hdr.opcode == OSD_OP_SEC_READ || op->req.hdr.opcode == OSD_OP_SEC_WRITE ||
op->req.hdr.opcode == OSD_OP_SEC_WRITE_STABLE || op->req.hdr.opcode == OSD_OP_SEC_DELETE ||
op->req.hdr.opcode == OSD_OP_SEC_SYNC || op->req.hdr.opcode == OSD_OP_SEC_LIST ||
op->req.hdr.opcode == OSD_OP_SEC_STABILIZE || op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK ||
op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)
{
bufprintf(" state=%d", PRIV(op->bs_op)->op_state);
int wait_for = PRIV(op->bs_op)->wait_for;
if (wait_for)
{
bufprintf(" wait=%d (detail=%lu)", wait_for, PRIV(op->bs_op)->wait_detail);
}
}
else if (op->req.hdr.opcode == OSD_OP_READ || op->req.hdr.opcode == OSD_OP_WRITE ||
op->req.hdr.opcode == OSD_OP_SYNC || op->req.hdr.opcode == OSD_OP_DELETE)
{
bufprintf(" state=%d", !op->op_data ? -1 : op->op_data->st);
}
#undef bufprintf
printf("%s\n", alloc);
has_slow = true;
}
}
}
if (has_slow)
{
bs->dump_diagnostics();
}
}

View File

@@ -35,7 +35,6 @@
#define MAX_AUTOSYNC_INTERVAL 3600
#define DEFAULT_AUTOSYNC_INTERVAL 5
#define DEFAULT_AUTOSYNC_WRITES 128
#define MAX_RECOVERY_QUEUE 2048
#define DEFAULT_RECOVERY_QUEUE 4
#define DEFAULT_RECOVERY_BATCH 16
@@ -94,7 +93,7 @@ class osd_t
// config
json11::Json::object config;
int etcd_report_interval = 5;
int etcd_report_interval = 30;
bool readonly = false;
osd_num_t osd_num = 1; // OSD numbers start with 1
@@ -109,8 +108,7 @@ class osd_t
int print_stats_interval = 3;
int slow_log_interval = 10;
int immediate_commit = IMMEDIATE_NONE;
int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // "emergency" sync every 5 seconds
int autosync_writes = DEFAULT_AUTOSYNC_WRITES;
int autosync_interval = DEFAULT_AUTOSYNC_INTERVAL; // sync every 5 seconds
int recovery_queue_depth = DEFAULT_RECOVERY_QUEUE;
int recovery_sync_batch = DEFAULT_RECOVERY_BATCH;
int log_level = 0;
@@ -142,7 +140,6 @@ class osd_t
osd_op_t *autosync_op = NULL;
// Unstable writes
uint64_t unstable_write_count = 0;
std::map<osd_object_id_t, uint64_t> unstable_writes;
std::deque<osd_op_t*> syncs_in_progress;

View File

@@ -615,7 +615,7 @@ void osd_t::apply_pg_config()
}
if (currently_taken)
{
if (pg_it->second.state & (PG_ACTIVE | PG_INCOMPLETE | PG_PEERING | PG_REPEERING | PG_PEERED))
if (pg_it->second.state & (PG_ACTIVE | PG_INCOMPLETE | PG_PEERING | PG_REPEERING))
{
if (pg_it->second.target_set == pg_cfg.target_set)
{
@@ -703,19 +703,13 @@ void osd_t::apply_pg_config()
this->pg_config_applied = all_applied;
}
struct reporting_pg_t
{
pool_pg_num_t pool_pg_num;
bool history_changed;
};
void osd_t::report_pg_states()
{
if (etcd_reporting_pg_state || !this->pg_state_dirty.size() || !st_cli.etcd_addresses.size())
{
return;
}
std::vector<reporting_pg_t> reporting_pgs;
std::vector<std::pair<pool_pg_num_t,bool>> reporting_pgs;
json11::Json::array checks;
json11::Json::array success;
json11::Json::array failure;
@@ -727,7 +721,7 @@ void osd_t::report_pg_states()
continue;
}
auto & pg = pg_it->second;
reporting_pgs.push_back((reporting_pg_t){ *it, pg.history_changed });
reporting_pgs.push_back({ *it, pg.history_changed });
std::string state_key_base64 = base64_encode(st_cli.etcd_prefix+"/pg/state/"+std::to_string(pg.pool_id)+"/"+std::to_string(pg.pg_num));
bool pg_state_exists = false;
if (pg.state != PG_STARTING)
@@ -833,10 +827,10 @@ void osd_t::report_pg_states()
// One of PG state updates failed, put dirty flags back
for (auto pp: reporting_pgs)
{
this->pg_state_dirty.insert(pp.pool_pg_num);
if (pp.history_changed)
this->pg_state_dirty.insert(pp.first);
if (pp.second)
{
auto pg_it = this->pgs.find(pp.pool_pg_num);
auto pg_it = this->pgs.find(pp.first);
if (pg_it != this->pgs.end())
{
pg_it->second.history_changed = true;
@@ -876,27 +870,17 @@ void osd_t::report_pg_states()
// Success. We'll get our changes back via the watcher and react to them
for (auto pp: reporting_pgs)
{
auto pg_it = this->pgs.find(pp.pool_pg_num);
auto pg_it = this->pgs.find(pp.first);
if (pg_it != this->pgs.end() &&
pg_state_dirty.find(pp.pool_pg_num) == pg_state_dirty.end())
pg_it->second.state == PG_OFFLINE &&
pg_state_dirty.find(pp.first) == pg_state_dirty.end())
{
if (pg_it->second.state == PG_OFFLINE)
// Forget offline PGs after reporting their state
if (pg_it->second.scheme == POOL_SCHEME_JERASURE)
{
// Forget offline PGs after reporting their state
// (if the state wasn't changed again)
if (pg_it->second.scheme == POOL_SCHEME_JERASURE)
{
use_jerasure(pg_it->second.pg_size, pg_it->second.pg_data_size, false);
}
this->pgs.erase(pg_it);
}
else if (pg_it->second.state & PG_PEERED)
{
// Activate PG after PG PEERED state is reported along with history
// (if the state wasn't changed again)
pg_it->second.state = pg_it->second.state & ~PG_PEERED | PG_ACTIVE;
report_pg_state(pg_it->second);
use_jerasure(pg_it->second.pg_size, pg_it->second.pg_data_size, false);
}
this->pgs.erase(pg_it);
}
}
// Push other PG state updates, if any

View File

@@ -9,8 +9,6 @@
#define POOL_ID_MAX 0x10000
#define POOL_ID_BITS 16
#define INODE_POOL(inode) (pool_id_t)((inode) >> (64 - POOL_ID_BITS))
#define INODE_NO_POOL(inode) (inode_t)(inode & ((1l << (64-POOL_ID_BITS)) - 1))
#define INODE_WITH_POOL(pool_id, inode) (((inode_t)(pool_id) << (64-POOL_ID_BITS)) | INODE_NO_POOL(inode))
// Pool ID is 16 bits long
typedef uint32_t pool_id_t;

View File

@@ -37,10 +37,6 @@ void osd_t::handle_peers()
still = true;
}
}
else if (p.second.state & PG_PEERED)
{
still = true;
}
}
if (!still)
{
@@ -61,10 +57,6 @@ void osd_t::handle_peers()
}
still = true;
}
else if (p.second.state & PG_PEERED)
{
still = true;
}
}
if (!still)
{
@@ -87,7 +79,7 @@ void osd_t::repeer_pgs(osd_num_t peer_osd)
{
auto & pg = p.second;
bool repeer = false;
if (pg.state & (PG_PEERING | PG_PEERED | PG_ACTIVE | PG_INCOMPLETE))
if (pg.state & (PG_PEERING | PG_ACTIVE | PG_INCOMPLETE))
{
for (osd_num_t pg_osd: pg.all_peers)
{

View File

@@ -86,24 +86,9 @@ void pg_obj_state_check_t::walk()
}
if (pg->pg_cursize < pg->pg_size)
{
// Report PG history and activate
pg->state |= PG_DEGRADED | PG_PEERED;
std::vector<osd_num_t> history_set;
for (auto peer_osd: pg->cur_set)
{
if (peer_osd != 0)
{
history_set.push_back(peer_osd);
}
}
pg->target_history.push_back(history_set);
pg->history_changed = true;
}
else
{
// Just activate
pg->state |= PG_ACTIVE;
pg->state |= PG_DEGRADED;
}
pg->state |= PG_ACTIVE;
if (pg->state == PG_ACTIVE && pg->cur_peers.size() < pg->all_peers.size())
{
pg->state |= PG_LEFT_ON_DEAD;
@@ -445,11 +430,10 @@ void pg_t::calc_object_states(int log_level)
void pg_t::print_state()
{
printf(
"[PG %u/%u] is %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s (%lu objects)\n", pool_id, pg_num,
"[PG %u/%u] is %s%s%s%s%s%s%s%s%s%s%s%s%s%s (%lu objects)\n", pool_id, pg_num,
(state & PG_STARTING) ? "starting" : "",
(state & PG_OFFLINE) ? "offline" : "",
(state & PG_PEERING) ? "peering" : "",
(state & PG_PEERED) ? "peered" : "",
(state & PG_INCOMPLETE) ? "incomplete" : "",
(state & PG_ACTIVE) ? "active" : "",
(state & PG_REPEERING) ? "repeering" : "",

View File

@@ -198,7 +198,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
{
// Fast happy-path
cur_op->buf = alloc_read_buffer(op_data->stripes, op_data->pg_data_size, 0);
submit_primary_subops(SUBMIT_RMW_READ, op_data->target_ver, pg.cur_set.data(), cur_op);
submit_primary_subops(SUBMIT_READ, op_data->target_ver, pg.cur_set.data(), cur_op);
op_data->st = 1;
}
else
@@ -215,7 +215,7 @@ void osd_t::continue_primary_read(osd_op_t *cur_op)
op_data->scheme = pg.scheme;
op_data->degraded = 1;
cur_op->buf = alloc_read_buffer(op_data->stripes, pg.pg_size, 0);
submit_primary_subops(SUBMIT_RMW_READ, op_data->target_ver, cur_set, cur_op);
submit_primary_subops(SUBMIT_READ, op_data->target_ver, cur_set, cur_op);
op_data->st = 1;
}
}
@@ -353,7 +353,6 @@ resume_3:
if (cur_op->req.rw.version && op_data->fact_ver != (cur_op->req.rw.version-1))
{
cur_op->reply.hdr.retval = -EINTR;
cur_op->reply.rw.version = op_data->fact_ver;
goto continue_others;
}
// Save version override for parallel reads

View File

@@ -5,6 +5,8 @@
void osd_t::autosync()
{
// FIXME Autosync based on the number of unstable writes to prevent
// "journal_sector_buffer_count is too low for this batch" errors
if (immediate_commit != IMMEDIATE_ALL && !autosync_op)
{
autosync_op = new osd_op_t();

View File

@@ -100,7 +100,6 @@ resume_3:
if (cur_op->req.rw.version && op_data->fact_ver != (cur_op->req.rw.version-1))
{
cur_op->reply.hdr.retval = -EINTR;
cur_op->reply.rw.version = op_data->fact_ver;
goto continue_others;
}
if (op_data->scheme == POOL_SCHEME_REPLICATED)
@@ -261,7 +260,6 @@ resume_9:
}
}
cur_op->reply.hdr.retval = cur_op->req.rw.len;
cur_op->reply.rw.version = op_data->fact_ver;
continue_others:
osd_op_t *next_op = NULL;
auto next_it = pg.write_queue.find(op_data->oid);
@@ -274,11 +272,6 @@ continue_others:
}
// finish_op would invalidate next_it if it cleared pg.write_queue, but it doesn't do that :)
finish_op(cur_op, cur_op->reply.hdr.retval);
if (unstable_write_count >= autosync_writes)
{
unstable_write_count = 0;
autosync();
}
if (next_op)
{
// Continue next write to the same object
@@ -358,7 +351,6 @@ resume_7:
else
{
lazy:
unstable_write_count++;
if (op_data->scheme != POOL_SCHEME_REPLICATED)
{
// Remember version as unstable for EC/XOR

View File

@@ -3,12 +3,11 @@
#include "pg_states.h"
const int pg_state_bit_count = 16;
const int pg_state_bit_count = 15;
const int pg_state_bits[16] = {
const int pg_state_bits[15] = {
PG_STARTING,
PG_PEERING,
PG_PEERED,
PG_INCOMPLETE,
PG_ACTIVE,
PG_REPEERING,
@@ -23,10 +22,9 @@ const int pg_state_bits[16] = {
PG_LEFT_ON_DEAD,
};
const char *pg_state_names[16] = {
const char *pg_state_names[15] = {
"starting",
"peering",
"peered",
"incomplete",
"active",
"repeering",

View File

@@ -4,27 +4,23 @@
#pragma once
// Placement group states
// STARTING -> [acquire lock] -> PEERING -> PEERED
// PEERED -> [report history if required!] -> INCOMPLETE|ACTIVE
// ACTIVE -> REPEERING -> PEERING
// ACTIVE -> STOPPING -> OFFLINE -> [release lock]
// STARTING -> [acquire lock] -> PEERING -> INCOMPLETE|ACTIVE -> STOPPING -> OFFLINE -> [release lock]
// Exactly one of these:
#define PG_STARTING (1<<0)
#define PG_PEERING (1<<1)
#define PG_PEERED (1<<2)
#define PG_INCOMPLETE (1<<3)
#define PG_ACTIVE (1<<4)
#define PG_REPEERING (1<<5)
#define PG_STOPPING (1<<6)
#define PG_OFFLINE (1<<7)
#define PG_INCOMPLETE (1<<2)
#define PG_ACTIVE (1<<3)
#define PG_REPEERING (1<<4)
#define PG_STOPPING (1<<5)
#define PG_OFFLINE (1<<6)
// Plus any of these:
#define PG_DEGRADED (1<<8)
#define PG_HAS_INCOMPLETE (1<<9)
#define PG_HAS_DEGRADED (1<<10)
#define PG_HAS_MISPLACED (1<<11)
#define PG_HAS_UNCLEAN (1<<12)
#define PG_HAS_INVALID (1<<13)
#define PG_LEFT_ON_DEAD (1<<14)
#define PG_DEGRADED (1<<7)
#define PG_HAS_INCOMPLETE (1<<8)
#define PG_HAS_DEGRADED (1<<9)
#define PG_HAS_MISPLACED (1<<10)
#define PG_HAS_UNCLEAN (1<<11)
#define PG_HAS_INVALID (1<<12)
#define PG_LEFT_ON_DEAD (1<<13)
// Lower bits that represent object role (EC 0/1/2... or always 0 with replication)
// 12 bits is a safe default that doesn't depend on pg_stripe_size or pg_block_size

View File

@@ -19,10 +19,6 @@
#include "qemu/units.h"
#include "block/qdict.h"
#include "qemu/cutils.h"
#elif QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR >= 10
#include "qemu/cutils.h"
#include "qapi/qmp/qstring.h"
#include "qapi/qmp/qjson.h"
#else
#include "qapi/qmp/qint.h"
#define qdict_put_int(options, name, num_val) qdict_put_obj(options, name, QOBJECT(qint_from_int(num_val)))
@@ -106,7 +102,7 @@ static void qemu_vitastor_unescape(char *src)
}
// vitastor[:key=value]*
// vitastor[:etcd_host=127.0.0.1]:inode=1:pool=1[:rdma_gid_index=3]
// vitastor[:(etcd|etcd_host|etcd_address)=127.0.0.1]:inode=1:pool=1[:rdma_gid_index=3]
// vitastor:config_path=/etc/vitastor/vitastor.conf:image=testimg
static void vitastor_parse_filename(const char *filename, QDict *options, Error **errp)
{
@@ -203,8 +199,12 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
int64_t ret = 0;
qemu_mutex_init(&client->mutex);
client->config_path = g_strdup(qdict_get_try_str(options, "config_path"));
// FIXME: Rename to etcd_address
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd_host"));
if (qdict_get_try_str(options, "etcd_address"))
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd_address"));
else if (qdict_get_try_str(options, "etcd_host"))
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd_host"));
else if (qdict_get_try_str(options, "etcd"))
client->etcd_host = g_strdup(qdict_get_try_str(options, "etcd"));
client->etcd_prefix = g_strdup(qdict_get_try_str(options, "etcd_prefix"));
client->use_rdma = qdict_get_try_int(options, "use_rdma", -1);
client->rdma_device = g_strdup(qdict_get_try_str(options, "rdma_device"));
@@ -271,6 +271,8 @@ static int vitastor_file_open(BlockDriverState *bs, QDict *options, int flags, E
qdict_del(options, "rdma_device");
qdict_del(options, "config_path");
qdict_del(options, "etcd_host");
qdict_del(options, "etcd_address");
qdict_del(options, "etcd");
qdict_del(options, "etcd_prefix");
qdict_del(options, "image");
qdict_del(options, "inode");
@@ -294,7 +296,7 @@ static void vitastor_close(BlockDriverState *bs)
g_free(client->image);
}
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
#if QEMU_VERSION_MAJOR >= 3
static int vitastor_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{
bsz->phys = 4096;
@@ -303,7 +305,6 @@ static int vitastor_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
}
#endif
#if QEMU_VERSION_MAJOR >= 3
static int coroutine_fn vitastor_co_create_opts(
#if QEMU_VERSION_MAJOR >= 4
BlockDriver *drv,
@@ -328,7 +329,6 @@ out:
qobject_unref(options);
return ret;
}
#endif
#if QEMU_VERSION_MAJOR >= 3
static int coroutine_fn vitastor_co_truncate(BlockDriverState *bs, int64_t offset,
@@ -368,18 +368,20 @@ static int64_t vitastor_getlength(BlockDriverState *bs)
return client->size;
}
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
#if QEMU_VERSION_MAJOR >= 3
static void vitastor_refresh_limits(BlockDriverState *bs, Error **errp)
#else
static int vitastor_refresh_limits(BlockDriverState *bs)
#endif
{
#if QEMU_VERSION_MAJOR >= 4
bs->bl.request_alignment = 4096;
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 3
bs->bl.min_mem_alignment = 4096;
#else
bs->request_alignment = 4096;
#endif
bs->bl.opt_mem_alignment = 4096;
#if QEMU_VERSION_MAJOR < 2 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR == 0
#if QEMU_VERSION_MAJOR < 3
return 0;
#endif
}
@@ -404,7 +406,7 @@ static void vitastor_co_generic_bh_cb(void *opaque, long retval)
task->complete = 1;
if (qemu_coroutine_self() != task->co)
{
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 8
#if QEMU_VERSION_MAJOR >= 3
aio_co_wake(task->co);
#else
qemu_coroutine_enter(task->co, NULL);
@@ -488,7 +490,7 @@ static int coroutine_fn vitastor_co_flush(BlockDriverState *bs)
return task.ret;
}
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
#if QEMU_VERSION_MAJOR >= 3
static QemuOptsList vitastor_create_opts = {
.name = "vitastor-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(vitastor_create_opts.head),
@@ -516,6 +518,8 @@ static const char *vitastor_strong_runtime_opts[] = {
"inode",
"pool",
"config_path",
"etcd",
"etcd_address",
"etcd_host",
"etcd_prefix",
@@ -532,7 +536,7 @@ static BlockDriver bdrv_vitastor = {
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_get_info = vitastor_get_info,
.bdrv_getlength = vitastor_getlength,
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 2
#if QEMU_VERSION_MAJOR >= 3
.bdrv_probe_blocksizes = vitastor_probe_blocksizes,
#endif
.bdrv_refresh_limits = vitastor_refresh_limits,
@@ -544,7 +548,7 @@ static BlockDriver bdrv_vitastor = {
.bdrv_close = vitastor_close,
// Option list for the create operation
#if QEMU_VERSION_MAJOR >= 3 || QEMU_VERSION_MAJOR == 2 && QEMU_VERSION_MINOR > 0
#if QEMU_VERSION_MAJOR >= 3
.create_opts = &vitastor_create_opts,
#else
.create_options = vitastor_create_opts,

410
src/rm_inode.cpp Normal file
View File

@@ -0,0 +1,410 @@
// Copyright (c) Vitaliy Filippov, 2019+
// License: VNPL-1.1 (see README.md for details)
/**
* Inode removal tool
* May be included into a bigger "command-line management interface" in the future
*/
#include <vector>
#include <algorithm>
#include "epoll_manager.h"
#include "cluster_client.h"
#include "pg_states.h"
#define RM_LISTING 1
#define RM_REMOVING 2
#define RM_END 3
const char *exe_name = NULL;
struct rm_pg_t;
struct rm_pg_osd_t
{
rm_pg_t *pg = NULL;
osd_num_t osd_num;
bool sent = false;
};
struct rm_pg_t
{
pg_num_t pg_num;
osd_num_t rm_osd_num;
std::vector<rm_pg_osd_t> list_osds;
int state = 0;
int to_list;
std::set<object_id> objects;
std::set<object_id>::iterator obj_pos;
uint64_t obj_count = 0, obj_done = 0, obj_prev_done = 0;
int in_flight = 0;
};
class rm_inode_t
{
protected:
uint64_t inode = 0;
pool_id_t pool_id = 0;
uint64_t iodepth = 0, parallel_osds = 0;
ring_loop_t *ringloop = NULL;
epoll_manager_t *epmgr = NULL;
cluster_client_t *cli = NULL;
ring_consumer_t consumer;
std::vector<rm_pg_t*> lists;
uint64_t total_count = 0, total_done = 0, total_prev_pct = 0;
uint64_t pgs_to_list = 0;
bool started = false;
bool progress = true;
bool list_first = false;
int log_level = 0;
public:
static json11::Json::object parse_args(int narg, const char *args[])
{
json11::Json::object cfg;
cfg["progress"] = "1";
for (int i = 1; i < narg; i++)
{
if (!strcmp(args[i], "-h") || !strcmp(args[i], "--help"))
{
help();
}
else if (args[i][0] == '-' && args[i][1] == '-')
{
const char *opt = args[i]+2;
cfg[opt] = !strcmp(opt, "json") || !strcmp(opt, "wait-list") || i == narg-1 ? "1" : args[++i];
}
}
return cfg;
}
static void help()
{
printf(
"Vitastor inode removal tool\n"
"(c) Vitaliy Filippov, 2020 (VNPL-1.1)\n\n"
"USAGE:\n"
" %s [--etcd_address <etcd_address>] --pool <pool> --inode <inode> [--wait-list]\n",
exe_name
);
exit(0);
}
void run(json11::Json cfg)
{
inode = cfg["inode"].uint64_value();
pool_id = cfg["pool"].uint64_value();
if (pool_id)
inode = (inode & ((1l << (64-POOL_ID_BITS)) - 1)) | (((uint64_t)pool_id) << (64-POOL_ID_BITS));
pool_id = INODE_POOL(inode);
if (!pool_id)
{
fprintf(stderr, "pool is missing");
exit(1);
}
iodepth = cfg["iodepth"].uint64_value();
if (!iodepth)
iodepth = 32;
parallel_osds = cfg["parallel_osds"].uint64_value();
if (!parallel_osds)
parallel_osds = 4;
log_level = cfg["log_level"].int64_value();
progress = cfg["progress"].uint64_value() ? true : false;
list_first = cfg["wait-list"].uint64_value() ? true : false;
// Create client
ringloop = new ring_loop_t(512);
epmgr = new epoll_manager_t(ringloop);
cli = new cluster_client_t(ringloop, epmgr->tfd, cfg);
cli->on_ready([this]() { start_delete(); });
// Initialize job
consumer.loop = [this]()
{
if (started)
continue_delete();
ringloop->submit();
};
ringloop->register_consumer(&consumer);
// Loop until it completes
while (1)
{
ringloop->loop();
ringloop->wait();
}
}
void start_delete()
{
if (cli->st_cli.pool_config.find(pool_id) == cli->st_cli.pool_config.end())
{
fprintf(stderr, "Pool %u does not exist\n", pool_id);
exit(1);
}
auto pool_cfg = cli->st_cli.pool_config[pool_id];
for (auto & pg_item: pool_cfg.pg_config)
{
auto & pg = pg_item.second;
if (pg.pause || !pg.cur_primary || !(pg.cur_state & PG_ACTIVE))
{
fprintf(stderr, "PG %u is inactive, skipping\n", pg_item.first);
continue;
}
rm_pg_t *r = new rm_pg_t();
r->pg_num = pg_item.first;
r->rm_osd_num = pg.cur_primary;
r->state = RM_LISTING;
if (pg.cur_state != PG_ACTIVE)
{
std::set<osd_num_t> all_peers;
for (osd_num_t pg_osd: pg.target_set)
{
if (pg_osd != 0)
{
all_peers.insert(pg_osd);
}
}
for (osd_num_t pg_osd: pg.all_peers)
{
if (pg_osd != 0)
{
all_peers.insert(pg_osd);
}
}
for (auto & hist_item: pg.target_history)
{
for (auto pg_osd: hist_item)
{
if (pg_osd != 0)
{
all_peers.insert(pg_osd);
}
}
}
for (osd_num_t peer_osd: all_peers)
{
r->list_osds.push_back((rm_pg_osd_t){ .pg = r, .osd_num = peer_osd, .sent = false });
}
}
else
{
r->list_osds.push_back((rm_pg_osd_t){ .pg = r, .osd_num = pg.cur_primary, .sent = false });
}
r->to_list = r->list_osds.size();
lists.push_back(r);
}
std::sort(lists.begin(), lists.end(), [](rm_pg_t *a, rm_pg_t *b)
{
return a->rm_osd_num < b->rm_osd_num ? true : false;
});
pgs_to_list = lists.size();
started = true;
continue_delete();
}
void send_list(rm_pg_osd_t *cur_list)
{
if (cur_list->sent)
{
return;
}
if (cli->msgr.osd_peer_fds.find(cur_list->osd_num) ==
cli->msgr.osd_peer_fds.end())
{
// Initiate connection
cli->msgr.connect_peer(cur_list->osd_num, cli->st_cli.peer_states[cur_list->osd_num]);
return;
}
osd_op_t *op = new osd_op_t();
op->op_type = OSD_OP_OUT;
op->peer_fd = cli->msgr.osd_peer_fds[cur_list->osd_num];
op->req = (osd_any_op_t){
.sec_list = {
.header = {
.magic = SECONDARY_OSD_OP_MAGIC,
.id = cli->msgr.next_subop_id++,
.opcode = OSD_OP_SEC_LIST,
},
.list_pg = cur_list->pg->pg_num,
.pg_count = (pg_num_t)cli->st_cli.pool_config[pool_id].real_pg_count,
.pg_stripe_size = cli->st_cli.pool_config[pool_id].pg_stripe_size,
.min_inode = inode,
.max_inode = inode,
},
};
op->callback = [this, cur_list](osd_op_t *op)
{
cur_list->pg->to_list--;
if (op->reply.hdr.retval < 0)
{
fprintf(stderr, "Failed to get PG %u/%u object list from OSD %lu (retval=%ld), skipping\n",
pool_id, cur_list->pg->pg_num, cur_list->osd_num, op->reply.hdr.retval);
}
else
{
if (op->reply.sec_list.stable_count < op->reply.hdr.retval)
{
// Unstable objects, if present, mean that someone still writes into the inode. Warn the user about it.
printf(
"[PG %u/%u] Inode still has %lu unstable object versions - is it still open? Not a good idea to delete it.\n",
pool_id, cur_list->pg->pg_num, op->reply.hdr.retval - op->reply.sec_list.stable_count
);
}
if (log_level > 0)
{
printf(
"[PG %u/%u] Got inode object list from OSD %lu: %ld object versions\n",
pool_id, cur_list->pg->pg_num, cur_list->osd_num, op->reply.hdr.retval
);
}
for (uint64_t i = 0; i < op->reply.hdr.retval; i++)
{
object_id oid = ((obj_ver_id*)op->buf)[i].oid;
oid.stripe = oid.stripe & ~STRIPE_MASK;
cur_list->pg->objects.insert(oid);
}
}
delete op;
if (cur_list->pg->to_list <= 0)
{
cur_list->pg->obj_done = cur_list->pg->obj_prev_done = 0;
cur_list->pg->obj_pos = cur_list->pg->objects.begin();
cur_list->pg->obj_count = cur_list->pg->objects.size();
total_count += cur_list->pg->obj_count;
total_prev_pct = 0;
cur_list->pg->state = RM_REMOVING;
pgs_to_list--;
}
continue_delete();
};
cli->msgr.outbox_push(op);
cur_list->sent = true;
}
void send_ops(rm_pg_t *cur_list)
{
if (cli->msgr.osd_peer_fds.find(cur_list->rm_osd_num) ==
cli->msgr.osd_peer_fds.end())
{
// Initiate connection
cli->msgr.connect_peer(cur_list->rm_osd_num, cli->st_cli.peer_states[cur_list->rm_osd_num]);
return;
}
while (cur_list->in_flight < iodepth && cur_list->obj_pos != cur_list->objects.end())
{
osd_op_t *op = new osd_op_t();
op->op_type = OSD_OP_OUT;
op->peer_fd = cli->msgr.osd_peer_fds[cur_list->rm_osd_num];
op->req = (osd_any_op_t){
.rw = {
.header = {
.magic = SECONDARY_OSD_OP_MAGIC,
.id = cli->msgr.next_subop_id++,
.opcode = OSD_OP_DELETE,
},
.inode = cur_list->obj_pos->inode,
.offset = (cur_list->obj_pos->stripe & ~STRIPE_MASK),
.len = 0,
},
};
op->callback = [this, cur_list](osd_op_t *op)
{
cur_list->in_flight--;
if (op->reply.hdr.retval < 0)
{
fprintf(stderr, "Failed to remove object from PG %u (OSD %lu) (retval=%ld)\n",
cur_list->pg_num, cur_list->rm_osd_num, op->reply.hdr.retval);
}
delete op;
cur_list->obj_done++;
total_done++;
continue_delete();
};
cli->msgr.outbox_push(op);
cur_list->obj_pos++;
cur_list->in_flight++;
}
if (!cur_list->in_flight && cur_list->obj_pos == cur_list->objects.end())
{
cur_list->obj_count = 0;
cur_list->obj_done = cur_list->obj_prev_done = 0;
cur_list->state = RM_END;
}
}
void continue_delete()
{
int par_osd = 0;
osd_num_t max_seen_osd = 0;
bool no_del = false;
if (list_first)
{
int i, n = 0;
for (i = 0; i < lists.size(); i++)
{
if (lists[i]->state == RM_LISTING)
{
n++;
}
}
if (n > 0)
{
no_del = true;
}
}
for (int i = 0; i < lists.size(); i++)
{
if (lists[i]->state == RM_END)
{
delete lists[i];
lists.erase(lists.begin()+i, lists.begin()+i+1);
i--;
}
else if (lists[i]->rm_osd_num > max_seen_osd)
{
if (lists[i]->state == RM_LISTING)
{
for (int j = 0; j < lists[i]->list_osds.size(); j++)
{
send_list(&lists[i]->list_osds[j]);
}
}
else if (lists[i]->state == RM_REMOVING)
{
if (no_del)
{
continue;
}
send_ops(lists[i]);
}
par_osd++;
max_seen_osd = lists[i]->rm_osd_num;
if (par_osd >= parallel_osds)
{
break;
}
}
}
if (progress && total_count > 0 && total_done*1000/total_count != total_prev_pct)
{
printf("\rRemoved %lu/%lu objects, %lu more PGs to list...", total_done, total_count, pgs_to_list);
total_prev_pct = total_done*1000/total_count;
}
if (!lists.size())
{
printf("Done, inode %lu in pool %u removed\n", (inode & ((1l << (64-POOL_ID_BITS)) - 1)), pool_id);
exit(0);
}
}
};
int main(int narg, const char *args[])
{
setvbuf(stdout, NULL, _IONBF, 0);
setvbuf(stderr, NULL, _IONBF, 0);
exe_name = args[0];
rm_inode_t *p = new rm_inode_t();
p->run(rm_inode_t::parse_args(narg, args));
return 0;
}

View File

@@ -72,7 +72,7 @@ static void vitastor_c_write_handler(void *opaque)
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
bool use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level)
{
json11::Json cfg_json = vitastor_c_common_config(
config_path, etcd_host, etcd_prefix, use_rdma,

View File

@@ -28,7 +28,7 @@ typedef void QEMUSetFDHandler(void *ctx, int fd, int is_external, IOHandler *fd_
vitastor_c *vitastor_c_create_qemu(QEMUSetFDHandler *aio_set_fd_handler, void *aio_context,
const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
bool use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
vitastor_c *vitastor_c_create_uring(const char *config_path, const char *etcd_host, const char *etcd_prefix,
int use_rdma, const char *rdma_device, int rdma_port_num, int rdma_gid_index, int rdma_mtu, int log_level);
vitastor_c *vitastor_c_create_uring_json(const char **options, int options_len);

View File

@@ -3,16 +3,17 @@
. `dirname $0`/common.sh
OSD_SIZE=${OSD_SIZE:-1024}
PG_COUNT=${PG_COUNT:-1}
PG_SIZE=${PG_SIZE:-3}
OSD_COUNT=${OSD_COUNT:-3}
SCHEME=${SCHEME:-ec}
for i in $(seq 1 $OSD_COUNT); do
dd if=/dev/zero of=./testdata/test_osd$i.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/vitastor-osd --osd_num $i --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd$i.bin 2>/dev/null) &>./testdata/osd$i.log &
eval OSD${i}_PID=$!
done
dd if=/dev/zero of=./testdata/test_osd1.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
dd if=/dev/zero of=./testdata/test_osd2.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
dd if=/dev/zero of=./testdata/test_osd3.bin bs=1024 count=1 seek=$((OSD_SIZE*1024-1))
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) &>./testdata/osd1.log &
OSD1_PID=$!
build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) &>./testdata/osd2.log &
OSD2_PID=$!
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) &>./testdata/osd3.log &
OSD3_PID=$!
cd mon
npm install
@@ -24,20 +25,16 @@ if [ -n "$GLOBAL_CONF" ]; then
$ETCDCTL put /vitastor/config/global "$GLOBAL_CONF"
fi
if [ "$SCHEME" = "replicated" ]; then
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"replicated","pg_size":'$PG_SIZE',"pg_minsize":'$((PG_SIZE-1))',"pg_count":'$PG_COUNT',"failure_domain":"osd"}}'
else
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"xor","pg_size":'$PG_SIZE',"pg_minsize":'$((PG_SIZE-1))',"parity_chunks":1,"pg_count":'$PG_COUNT',"failure_domain":"osd"}}'
fi
$ETCDCTL put /vitastor/config/pools '{"1":{"name":"testpool","scheme":"xor","pg_size":3,"pg_minsize":2,"parity_chunks":1,"pg_count":1,"failure_domain":"osd"}}'
sleep 2
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and ([ .[0].items["1"][] | select(((.osd_set | select(. != 0) | sort | unique) | length) == '$PG_SIZE') ] | length) == '$PG_COUNT); then
format_error "FAILED: $PG_COUNT PG(s) NOT CONFIGURED"
if ! ($ETCDCTL get /vitastor/config/pgs --print-value-only | jq -s -e '(. | length) != 0 and (.[0].items["1"]["1"].osd_set | sort) == ["1","2","3"]'); then
format_error "FAILED: 1 PG NOT CONFIGURED"
fi
if ! ($ETCDCTL get /vitastor/pg/state/1/ --prefix --print-value-only | jq -s -e '[ .[] | select(.state == ["active"]) ] | length == '$PG_COUNT); then
format_error "FAILED: $PG_COUNT PG(s) NOT UP"
if ! ($ETCDCTL get /vitastor/pg/state/1/1 --print-value-only | jq -s -e '(. | length) != 0 and .[0].state == ["active"]'); then
format_error "FAILED: 1 PG NOT UP"
fi
if ! cmp build/src/block-vitastor.so /usr/lib/x86_64-linux-gnu/qemu/block-vitastor.so; then

View File

@@ -18,19 +18,19 @@ dd if=/dev/zero of=./testdata/test_osd5.bin bs=1024 count=1 seek=$((1024*1024-1)
dd if=/dev/zero of=./testdata/test_osd6.bin bs=1024 count=1 seek=$((1024*1024-1))
dd if=/dev/zero of=./testdata/test_osd7.bin bs=1024 count=1 seek=$((1024*1024-1))
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) &>./testdata/osd1.log &
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd1.bin 2>/dev/null) 2>&1 >>./testdata/osd1.log &
OSD1_PID=$!
build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) &>./testdata/osd2.log &
build/src/vitastor-osd --osd_num 2 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) 2>&1 >>./testdata/osd2.log &
OSD2_PID=$!
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) &>./testdata/osd3.log &
build/src/vitastor-osd --osd_num 3 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd3.bin 2>/dev/null) 2>&1 >>./testdata/osd3.log &
OSD3_PID=$!
build/src/vitastor-osd --osd_num 4 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd4.bin 2>/dev/null) &>./testdata/osd4.log &
build/src/vitastor-osd --osd_num 4 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd4.bin 2>/dev/null) 2>&1 >>./testdata/osd4.log &
OSD4_PID=$!
build/src/vitastor-osd --osd_num 5 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd5.bin 2>/dev/null) &>./testdata/osd5.log &
build/src/vitastor-osd --osd_num 5 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd5.bin 2>/dev/null) 2>&1 >>./testdata/osd5.log &
OSD5_PID=$!
build/src/vitastor-osd --osd_num 6 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd6.bin 2>/dev/null) &>./testdata/osd6.log &
build/src/vitastor-osd --osd_num 6 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd6.bin 2>/dev/null) 2>&1 >>./testdata/osd6.log &
OSD6_PID=$!
build/src/vitastor-osd --osd_num 7 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd7.bin 2>/dev/null) &>./testdata/osd7.log &
build/src/vitastor-osd --osd_num 7 --bind_address 127.0.0.1 $NO_SAME --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd7.bin 2>/dev/null) 2>&1 >>./testdata/osd7.log &
OSD7_PID=$!
cd mon

View File

@@ -1,14 +0,0 @@
#!/bin/bash -ex
PG_COUNT=16
. `dirname $0`/run_3osds.sh
LD_PRELOAD=libasan.so.5 \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 \
-end_fsync=1 -fsync=1 -rw=write -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -cluster_log_level=10
$ETCDCTL get --prefix '/vitastor/pg/state'
build/src/vitastor-cli rm-data --etcd_address $ETCD_URL --pool 1 --inode 1
format_green OK

View File

@@ -6,19 +6,18 @@
$ETCDCTL put /vitastor/config/inode/1/2 '{"name":"testimg","size":'$((32*1024*1024))'}'
LD_PRELOAD="libasan.so.5 build/src/libfio_vitastor.so" \
LD_PRELOAD=libasan.so.5 \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -fsync=1 -rw=write \
-etcd=$ETCD_URL -pool=1 -inode=2 -size=32M -cluster_log_level=10
$ETCDCTL put /vitastor/config/inode/1/2 '{"name":"testimg@0","size":'$((32*1024*1024))'}'
$ETCDCTL put /vitastor/config/inode/1/3 '{"parent_id":2,"name":"testimg","size":'$((32*1024*1024))'}'
# Preload build/src/libfio_vitastor.so so libasan detects all symbols
LD_PRELOAD="libasan.so.5 build/src/libfio_vitastor.so" \
LD_PRELOAD=libasan.so.5 \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=32 -buffer_pattern=0xdeadface \
-rw=randwrite -etcd=$ETCD_URL -image=testimg -number_ios=1024
LD_PRELOAD="libasan.so.5 build/src/libfio_vitastor.so" \
LD_PRELOAD=libasan.so.5 \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4M -direct=1 -iodepth=1 -rw=read -etcd=$ETCD_URL -pool=1 -inode=3 -size=32M
qemu-img convert -S 4096 -p \
@@ -39,16 +38,4 @@ node mon/merge.js ./testdata/layer0.bin ./testdata/layer1.bin ./testdata/check.b
cmp ./testdata/merged.bin ./testdata/check.bin
# Test merge
$ETCDCTL put /vitastor/config/inode/1/3 '{"parent_id":2,"name":"testimg","size":'$((32*1024*1024))'}'
build/src/vitastor-cli rm --etcd_address $ETCD_URL testimg@0
qemu-img convert -S 4096 -p \
-f raw "vitastor:etcd_host=127.0.0.1\:$ETCD_PORT/v3:image=testimg" \
-O raw ./testdata/merged-by-tool.bin
cmp ./testdata/merged.bin ./testdata/merged-by-tool.bin
format_green OK

View File

@@ -1,32 +0,0 @@
#!/bin/bash -ex
OSD_COUNT=2
PG_SIZE=2
SCHEME=replicated
. `dirname $0`/run_3osds.sh
# Kill OSD 1
kill $OSD1_PID
sleep 2
# Write
LD_PRELOAD=libasan.so.5 \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=4k -direct=1 -iodepth=1 -fsync=1 \
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10 -number_ios=100
# Kill OSD 2, start OSD 1
kill $OSD2_PID
build/src/vitastor-osd --osd_num 1 --bind_address 127.0.0.1 $OSD_ARGS --etcd_address $ETCD_URL $(node mon/simple-offsets.js --format options --device ./testdata/test_osd2.bin 2>/dev/null) >>./testdata/osd2.log 2>&1 &
sleep 2
# Check PG state - it should NOT become active
if ($ETCDCTL get --prefix /vitastor/pg/state/ --print-value-only | grep -q active); then
format_error "FAILED: PG STILL ACTIVE AFTER SPLITBRAIN"
fi
format_green OK

View File

@@ -5,12 +5,6 @@
#LD_PRELOAD=libasan.so.5 \
# fio -thread -name=test -ioengine=build/src/libfio_vitastor_sec.so -bs=4k -fsync=128 `$ETCDCTL get /vitastor/osd/state/1 --print-value-only | jq -r '"-host="+.addresses[0]+" -port="+(.port|tostring)'` -rw=write -size=32M
# Random writes without immediate_commit were stalling OSDs
LD_PRELOAD=libasan.so.5 \
fio -thread -name=test -ioengine=build/src/libfio_vitastor.so -bs=124k -direct=1 -numjobs=16 -iodepth=4 \
-rw=randwrite -etcd=$ETCD_URL -pool=1 -inode=1 -size=128M -runtime=10
# A lot of parallel syncs was crashing the primary OSD at some point
LD_PRELOAD=libasan.so.5 \