Compare commits

..

1 Commits

Author SHA1 Message Date
Vitaliy Filippov 5a8f80159f Add bindiff for tests
Test / test_rebalance_verify_ec (push) Successful in 1m45s Details
Test / test_rebalance_verify_ec_imm (push) Successful in 1m49s Details
Test / test_switch_primary (push) Successful in 33s Details
Test / test_write (push) Successful in 32s Details
Test / test_write_xor (push) Successful in 35s Details
Test / test_resize_auto (push) Successful in 8s Details
Test / test_osd_tags (push) Successful in 10s Details
Test / test_enospc (push) Successful in 11s Details
Test / test_snapshot_pool2 (push) Successful in 15s Details
Test / test_enospc_imm (push) Successful in 9s Details
Test / test_enospc_xor (push) Successful in 14s Details
Test / test_enospc_imm_xor (push) Successful in 13s Details
Test / test_scrub (push) Successful in 15s Details
Test / test_scrub_zero_osd_2 (push) Successful in 14s Details
Test / test_scrub_xor (push) Successful in 14s Details
Test / test_scrub_pg_size_3 (push) Successful in 17s Details
Test / test_scrub_pg_size_6_pg_minsize_4_osd_count_6_ec (push) Successful in 16s Details
Test / test_scrub_ec (push) Successful in 15s Details
Test / test_nfs (push) Successful in 12s Details
Test / test_write_no_same (push) Successful in 10s Details
Test / test_heal_csum_32k (push) Successful in 2m25s Details
Test / test_heal_csum_4k_dj (push) Successful in 2m27s Details
Test / test_heal_pg_size_2 (push) Successful in 2m18s Details
Test / test_heal_ec (push) Successful in 2m17s Details
Test / test_heal_antietcd (push) Successful in 2m17s Details
Test / test_heal_csum_4k_dmj (push) Successful in 2m18s Details
Test / test_resize (push) Successful in 14s Details
Test / test_heal_csum_4k (push) Successful in 2m33s Details
Test / test_heal_csum_32k_dmj (push) Successful in 2m33s Details
Test / test_heal_csum_32k_dj (push) Successful in 2m49s Details
2024-11-15 02:21:37 +03:00
13 changed files with 62 additions and 515 deletions

View File

@ -1,193 +0,0 @@
Index: pve-qemu-kvm-9.0.0/block/meson.build
===================================================================
--- pve-qemu-kvm-9.0.0.orig/block/meson.build
+++ pve-qemu-kvm-9.0.0/block/meson.build
@@ -126,6 +126,7 @@ foreach m : [
[libnfs, 'nfs', files('nfs.c')],
[libssh, 'ssh', files('ssh.c')],
[rbd, 'rbd', files('rbd.c')],
+ [vitastor, 'vitastor', files('vitastor.c')],
]
if m[0].found()
module_ss = ss.source_set()
Index: pve-qemu-kvm-9.0.0/meson.build
===================================================================
--- pve-qemu-kvm-9.0.0.orig/meson.build
+++ pve-qemu-kvm-9.0.0/meson.build
@@ -1452,6 +1452,26 @@ if not get_option('rbd').auto() or have_
endif
endif
+vitastor = not_found
+if not get_option('vitastor').auto() or have_block
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
+ required: get_option('vitastor'))
+ if libvitastor_client.found()
+ if cc.links('''
+ #include <vitastor_c.h>
+ int main(void) {
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ return 0;
+ }''', dependencies: libvitastor_client)
+ vitastor = declare_dependency(dependencies: libvitastor_client)
+ elif get_option('vitastor').enabled()
+ error('could not link libvitastor_client')
+ else
+ warning('could not link libvitastor_client, disabling')
+ endif
+ endif
+endif
+
glusterfs = not_found
glusterfs_ftruncate_has_stat = false
glusterfs_iocb_has_stat = false
@@ -2254,6 +2274,7 @@ endif
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
config_host_data.set('CONFIG_RBD', rbd.found())
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
@@ -4454,6 +4475,7 @@ summary_info += {'fdt support': fd
summary_info += {'libcap-ng support': libcap_ng}
summary_info += {'bpf support': libbpf}
summary_info += {'rbd support': rbd}
+summary_info += {'vitastor support': vitastor}
summary_info += {'smartcard support': cacard}
summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
Index: pve-qemu-kvm-9.0.0/meson_options.txt
===================================================================
--- pve-qemu-kvm-9.0.0.orig/meson_options.txt
+++ pve-qemu-kvm-9.0.0/meson_options.txt
@@ -194,6 +194,8 @@ option('lzo', type : 'feature', value :
description: 'lzo compression support')
option('rbd', type : 'feature', value : 'auto',
description: 'Ceph block device driver')
+option('vitastor', type : 'feature', value : 'auto',
+ description: 'Vitastor block device driver')
option('opengl', type : 'feature', value : 'auto',
description: 'OpenGL support')
option('rdma', type : 'feature', value : 'auto',
Index: pve-qemu-kvm-9.0.0/qapi/block-core.json
===================================================================
--- pve-qemu-kvm-9.0.0.orig/qapi/block-core.json
+++ pve-qemu-kvm-9.0.0/qapi/block-core.json
@@ -3481,7 +3481,7 @@
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'pbs',
- 'ssh', 'throttle', 'vdi', 'vhdx',
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
@@ -4591,6 +4591,28 @@
'*server': ['InetSocketAddressBase'] } }
##
+# @BlockdevOptionsVitastor:
+#
+# Driver specific block device options for vitastor
+#
+# @image: Image name
+# @inode: Inode number
+# @pool: Pool ID
+# @size: Desired image size in bytes
+# @config-path: Path to Vitastor configuration
+# @etcd-host: etcd connection address(es)
+# @etcd-prefix: etcd key/value prefix
+##
+{ 'struct': 'BlockdevOptionsVitastor',
+ 'data': { '*inode': 'uint64',
+ '*pool': 'uint64',
+ '*size': 'uint64',
+ '*image': 'str',
+ '*config-path': 'str',
+ '*etcd-host': 'str',
+ '*etcd-prefix': 'str' } }
+
+##
# @ReplicationMode:
#
# An enumeration of replication modes.
@@ -5053,6 +5075,7 @@
'throttle': 'BlockdevOptionsThrottle',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
+ 'vitastor': 'BlockdevOptionsVitastor',
'virtio-blk-vfio-pci':
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
'if': 'CONFIG_BLKIO' },
@@ -5498,6 +5521,20 @@
'*encrypt' : 'RbdEncryptionCreateOptions' } }
##
+# @BlockdevCreateOptionsVitastor:
+#
+# Driver specific image creation options for Vitastor.
+#
+# @location: Where to store the new image file. This location cannot
+# point to a snapshot.
+#
+# @size: Size of the virtual disk in bytes
+##
+{ 'struct': 'BlockdevCreateOptionsVitastor',
+ 'data': { 'location': 'BlockdevOptionsVitastor',
+ 'size': 'size' } }
+
+##
# @BlockdevVmdkSubformat:
#
# Subformat options for VMDK images
@@ -5719,6 +5753,7 @@
'ssh': 'BlockdevCreateOptionsSsh',
'vdi': 'BlockdevCreateOptionsVdi',
'vhdx': 'BlockdevCreateOptionsVhdx',
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
'vmdk': 'BlockdevCreateOptionsVmdk',
'vpc': 'BlockdevCreateOptionsVpc'
} }
Index: pve-qemu-kvm-9.0.0/scripts/ci/org.centos/stream/8/x86_64/configure
===================================================================
--- pve-qemu-kvm-9.0.0.orig/scripts/ci/org.centos/stream/8/x86_64/configure
+++ pve-qemu-kvm-9.0.0/scripts/ci/org.centos/stream/8/x86_64/configure
@@ -30,7 +30,7 @@
--with-suffix="qemu-kvm" \
--firmwarepath=/usr/share/qemu-firmware \
--target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
--audio-drv-list="" \
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
--with-coroutine=ucontext \
@@ -176,6 +176,7 @@
--enable-opengl \
--enable-pie \
--enable-rbd \
+--enable-vitastor \
--enable-rdma \
--enable-seccomp \
--enable-snappy \
Index: pve-qemu-kvm-9.0.0/scripts/meson-buildoptions.sh
===================================================================
--- pve-qemu-kvm-9.0.0.orig/scripts/meson-buildoptions.sh
+++ pve-qemu-kvm-9.0.0/scripts/meson-buildoptions.sh
@@ -168,6 +168,7 @@ meson_options_help() {
printf "%s\n" ' qed qed image format support'
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
printf "%s\n" ' rbd Ceph block device driver'
+ printf "%s\n" ' vitastor Vitastor block device driver'
printf "%s\n" ' rdma Enable RDMA-based migration'
printf "%s\n" ' replication replication support'
printf "%s\n" ' rutabaga-gfx rutabaga_gfx support'
@@ -445,6 +446,8 @@ _meson_option_parse() {
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
--enable-rbd) printf "%s" -Drbd=enabled ;;
--disable-rbd) printf "%s" -Drbd=disabled ;;
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
--enable-rdma) printf "%s" -Drdma=enabled ;;
--disable-rdma) printf "%s" -Drdma=disabled ;;
--enable-relocatable) printf "%s" -Drelocatable=true ;;

View File

@ -176,7 +176,7 @@ void etcd_state_client_t::add_etcd_url(std::string addr)
exit(1); exit(1);
} }
if (!local_ips.size()) if (!local_ips.size())
local_ips = getifaddr_list(std::vector<std::string>(), true); local_ips = getifaddr_list();
std::string check_addr; std::string check_addr;
int pos = addr.find('/'); int pos = addr.find('/');
int pos2 = addr.find(':'); int pos2 = addr.find(':');

View File

@ -121,7 +121,7 @@ void osd_messenger_t::init()
if (use_rdma) if (use_rdma)
{ {
rdma_context = msgr_rdma_context_t::create( rdma_context = msgr_rdma_context_t::create(
osd_networks, rdma_device != "" ? rdma_device.c_str() : NULL, rdma_device != "" ? rdma_device.c_str() : NULL,
rdma_port_num, rdma_gid_index, rdma_mtu, rdma_odp, log_level rdma_port_num, rdma_gid_index, rdma_mtu, rdma_odp, log_level
); );
if (!rdma_context) if (!rdma_context)
@ -266,8 +266,7 @@ void osd_messenger_t::parse_config(const json11::Json & config)
this->rdma_port_num = (uint8_t)config["rdma_port_num"].uint64_value(); this->rdma_port_num = (uint8_t)config["rdma_port_num"].uint64_value();
if (!this->rdma_port_num) if (!this->rdma_port_num)
this->rdma_port_num = 1; this->rdma_port_num = 1;
if (!config["rdma_gid_index"].is_null()) this->rdma_gid_index = (uint8_t)config["rdma_gid_index"].uint64_value();
this->rdma_gid_index = (uint8_t)config["rdma_gid_index"].uint64_value();
this->rdma_mtu = (uint32_t)config["rdma_mtu"].uint64_value(); this->rdma_mtu = (uint32_t)config["rdma_mtu"].uint64_value();
this->rdma_max_sge = config["rdma_max_sge"].uint64_value(); this->rdma_max_sge = config["rdma_max_sge"].uint64_value();
if (!this->rdma_max_sge) if (!this->rdma_max_sge)
@ -282,15 +281,6 @@ void osd_messenger_t::parse_config(const json11::Json & config)
if (!this->rdma_max_msg || this->rdma_max_msg > 128*1024*1024) if (!this->rdma_max_msg || this->rdma_max_msg > 128*1024*1024)
this->rdma_max_msg = 129*1024; this->rdma_max_msg = 129*1024;
this->rdma_odp = config["rdma_odp"].bool_value(); this->rdma_odp = config["rdma_odp"].bool_value();
std::vector<std::string> mask;
if (config["bind_address"].is_string())
mask.push_back(config["bind_address"].string_value());
else if (config["osd_network"].is_string())
mask.push_back(config["osd_network"].string_value());
else
for (auto v: config["osd_network"].array_items())
mask.push_back(v.string_value());
this->osd_networks = mask;
#endif #endif
if (!osd_num) if (!osd_num)
this->iothread_count = (uint32_t)config["client_iothread_count"].uint64_value(); this->iothread_count = (uint32_t)config["client_iothread_count"].uint64_value();

View File

@ -165,9 +165,8 @@ protected:
#ifdef WITH_RDMA #ifdef WITH_RDMA
bool use_rdma = true; bool use_rdma = true;
std::vector<std::string> osd_networks;
std::string rdma_device; std::string rdma_device;
uint64_t rdma_port_num = 1, rdma_gid_index = -1, rdma_mtu = 0; uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0;
msgr_rdma_context_t *rdma_context = NULL; msgr_rdma_context_t *rdma_context = NULL;
uint64_t rdma_max_sge = 0, rdma_max_send = 0, rdma_max_recv = 0; uint64_t rdma_max_sge = 0, rdma_max_send = 0, rdma_max_recv = 0;
uint64_t rdma_max_msg = 0; uint64_t rdma_max_msg = 0;

View File

@ -3,7 +3,6 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "addr_util.h"
#include "msgr_rdma.h" #include "msgr_rdma.h"
#include "messenger.h" #include "messenger.h"
@ -70,126 +69,7 @@ msgr_rdma_connection_t::~msgr_rdma_connection_t()
send_out_size = 0; send_out_size = 0;
} }
static bool is_ipv4_gid(ibv_gid_entry *gidx) msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, bool odp, int log_level)
{
return (((uint64_t*)gidx->gid.raw)[0] == 0 &&
((uint32_t*)gidx->gid.raw)[2] == 0xffff0000);
}
static bool match_gid(ibv_gid_entry *gidx, addr_mask_t *networks, int nnet)
{
if (gidx->gid_type != IBV_GID_TYPE_ROCE_V1 &&
gidx->gid_type != IBV_GID_TYPE_ROCE_V2 ||
((uint64_t*)gidx->gid.raw)[0] == 0 &&
((uint64_t*)gidx->gid.raw)[1] == 0)
{
return false;
}
if (is_ipv4_gid(gidx))
{
for (int i = 0; i < nnet; i++)
{
if (networks[i].family == AF_INET && cidr_match(*(in_addr*)(gidx->gid.raw+12), networks[i].ipv4, networks[i].bits))
return true;
}
}
else
{
for (int i = 0; i < nnet; i++)
{
if (networks[i].family == AF_INET6 && cidr6_match(*(in6_addr*)gidx->gid.raw, networks[i].ipv6, networks[i].bits))
return true;
}
}
return false;
}
struct matched_dev
{
int dev = -1;
int port = -1;
int gid = -1;
bool rocev2 = false;
};
static void log_rdma_dev_port_gid(ibv_device *dev, int ib_port, int gid_index, ibv_gid_entry & gidx)
{
bool is4 = ((uint64_t*)gidx.gid.raw)[0] == 0 && ((uint32_t*)gidx.gid.raw)[2] == 0xffff0000;
char buf[256];
inet_ntop(is4 ? AF_INET : AF_INET6, is4 ? gidx.gid.raw+12 : gidx.gid.raw, buf, sizeof(buf));
fprintf(
stderr, "Auto-selected RDMA device %s port %d GID %d - ROCEv%d IPv%d %s\n",
ibv_get_device_name(dev), ib_port, gid_index,
gidx.gid_type == IBV_GID_TYPE_ROCE_V2 ? 2 : 1, is4 ? 4 : 6, buf
);
}
static matched_dev match_device(ibv_device **dev_list, addr_mask_t *networks, int nnet, int log_level)
{
matched_dev best;
ibv_device_attr attr;
ibv_port_attr portinfo;
ibv_gid_entry best_gidx;
int res;
for (int i = 0; dev_list[i]; ++i)
{
auto dev = dev_list[i];
ibv_context *context = ibv_open_device(dev_list[i]);
if ((res = ibv_query_device(context, &attr)) != 0)
{
fprintf(stderr, "Couldn't query RDMA device %s for its features: %s\n", ibv_get_device_name(dev_list[i]), strerror(res));
goto cleanup;
}
for (int j = 1; j <= attr.phys_port_cnt; j++)
{
// Try to find a port with matching address
if ((res = ibv_query_port(context, j, &portinfo)) != 0)
{
fprintf(stderr, "Couldn't get RDMA device %s port %d info: %s\n", ibv_get_device_name(dev), j, strerror(res));
goto cleanup;
}
for (int k = 0; k < portinfo.gid_tbl_len; k++)
{
ibv_gid_entry gidx;
if ((res = ibv_query_gid_ex(context, j, k, &gidx, 0)) != 0)
{
if (res != ENODATA)
{
fprintf(stderr, "Couldn't read RDMA device %s GID index %d: %s\n", ibv_get_device_name(dev), k, strerror(res));
goto cleanup;
}
else
break;
}
if (match_gid(&gidx, networks, nnet))
{
// Prefer RoCEv2
if (!best.rocev2)
{
best.dev = i;
best.port = j;
best.gid = k;
best.rocev2 = (gidx.gid_type == IBV_GID_TYPE_ROCE_V2);
best_gidx = gidx;
}
}
}
}
cleanup:
ibv_close_device(context);
if (best.rocev2)
{
break;
}
}
if (best.dev >= 0 && log_level > 0)
{
log_rdma_dev_port_gid(dev_list[best.dev], best.port, best.gid, best_gidx);
}
return best;
}
msgr_rdma_context_t *msgr_rdma_context_t::create(std::vector<std::string> osd_networks, const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, bool odp, int log_level)
{ {
int res; int res;
ibv_device **dev_list = NULL; ibv_device **dev_list = NULL;
@ -200,23 +80,28 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(std::vector<std::string> osd_ne
clock_gettime(CLOCK_REALTIME, &tv); clock_gettime(CLOCK_REALTIME, &tv);
srand48(tv.tv_sec*1000000000 + tv.tv_nsec); srand48(tv.tv_sec*1000000000 + tv.tv_nsec);
dev_list = ibv_get_device_list(NULL); dev_list = ibv_get_device_list(NULL);
if (!dev_list || !*dev_list) if (!dev_list)
{ {
if (errno == -ENOSYS || errno == ENOSYS) if (errno == -ENOSYS || errno == ENOSYS)
{ {
if (log_level > 0) if (log_level > 0)
fprintf(stderr, "No RDMA devices found (RDMA device list returned ENOSYS)\n"); fprintf(stderr, "No RDMA devices found (RDMA device list returned ENOSYS)\n");
} }
else if (!*dev_list)
{
if (log_level > 0)
fprintf(stderr, "No RDMA devices found\n");
}
else else
fprintf(stderr, "Failed to get RDMA device list: %s\n", strerror(errno)); fprintf(stderr, "Failed to get RDMA device list: %s\n", strerror(errno));
goto cleanup; goto cleanup;
} }
if (ib_devname) if (!ib_devname)
{
ctx->dev = *dev_list;
if (!ctx->dev)
{
if (log_level > 0)
fprintf(stderr, "No RDMA devices found\n");
goto cleanup;
}
}
else
{ {
int i; int i;
for (i = 0; dev_list[i]; ++i) for (i = 0; dev_list[i]; ++i)
@ -229,31 +114,6 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(std::vector<std::string> osd_ne
goto cleanup; goto cleanup;
} }
} }
else if (osd_networks.size())
{
std::vector<addr_mask_t> nets;
for (auto & netstr: osd_networks)
{
nets.push_back(cidr_parse(netstr));
}
auto best = match_device(dev_list, nets.data(), nets.size(), log_level);
if (best.dev < 0)
{
if (log_level > 0)
fprintf(stderr, "RDMA device matching osd_network is not found, using first available device\n");
best.dev = 0;
}
else
{
ib_port = best.port;
gid_index = best.gid;
}
ctx->dev = dev_list[best.dev];
}
else
{
ctx->dev = *dev_list;
}
ctx->context = ibv_open_device(ctx->dev); ctx->context = ibv_open_device(ctx->dev);
if (!ctx->context) if (!ctx->context)
@ -263,6 +123,7 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(std::vector<std::string> osd_ne
} }
ctx->ib_port = ib_port; ctx->ib_port = ib_port;
ctx->gid_index = gid_index;
if ((res = ibv_query_port(ctx->context, ib_port, &ctx->portinfo)) != 0) if ((res = ibv_query_port(ctx->context, ib_port, &ctx->portinfo)) != 0)
{ {
fprintf(stderr, "Couldn't get RDMA device %s port %d info: %s\n", ibv_get_device_name(ctx->dev), ib_port, strerror(res)); fprintf(stderr, "Couldn't get RDMA device %s port %d info: %s\n", ibv_get_device_name(ctx->dev), ib_port, strerror(res));
@ -274,47 +135,10 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(std::vector<std::string> osd_ne
fprintf(stderr, "RDMA device %s must have local LID because it's not Ethernet, but LID is zero\n", ibv_get_device_name(ctx->dev)); fprintf(stderr, "RDMA device %s must have local LID because it's not Ethernet, but LID is zero\n", ibv_get_device_name(ctx->dev));
goto cleanup; goto cleanup;
} }
if (ibv_query_gid(ctx->context, ib_port, gid_index, &ctx->my_gid))
if (gid_index != -1)
{ {
ctx->gid_index = gid_index; fprintf(stderr, "Couldn't read RDMA device %s GID index %d\n", ibv_get_device_name(ctx->dev), gid_index);
if (ibv_query_gid_ex(ctx->context, ib_port, gid_index, &ctx->my_gid, 0)) goto cleanup;
{
fprintf(stderr, "Couldn't read RDMA device %s GID index %d\n", ibv_get_device_name(ctx->dev), gid_index);
goto cleanup;
}
}
else
{
// Auto-guess GID
for (int k = 0; k < ctx->portinfo.gid_tbl_len; k++)
{
ibv_gid_entry gidx;
if (ibv_query_gid_ex(ctx->context, ib_port, k, &gidx, 0) != 0)
{
fprintf(stderr, "Couldn't read RDMA device %s GID index %d\n", ibv_get_device_name(ctx->dev), k);
goto cleanup;
}
// Skip empty GID
if (((uint64_t*)gidx.gid.raw)[0] == 0 &&
((uint64_t*)gidx.gid.raw)[1] == 0)
{
continue;
}
// Prefer IPv4 RoCEv2 GID by default
if (gid_index == -1 ||
gidx.gid_type == IBV_GID_TYPE_ROCE_V2 &&
(ctx->my_gid.gid_type != IBV_GID_TYPE_ROCE_V2 || is_ipv4_gid(&gidx)))
{
gid_index = k;
ctx->my_gid = gidx;
}
}
ctx->gid_index = gid_index = (gid_index == -1 ? 0 : gid_index);
if (log_level > 0)
{
log_rdma_dev_port_gid(ctx->dev, ctx->ib_port, ctx->gid_index, ctx->my_gid);
}
} }
ctx->pd = ibv_alloc_pd(ctx->context); ctx->pd = ibv_alloc_pd(ctx->context);
@ -431,7 +255,7 @@ msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx,
} }
conn->addr.lid = ctx->my_lid; conn->addr.lid = ctx->my_lid;
conn->addr.gid = ctx->my_gid.gid; conn->addr.gid = ctx->my_gid;
conn->addr.qpn = conn->qp->qp_num; conn->addr.qpn = conn->qp->qp_num;
conn->addr.psn = lrand48() & 0xffffff; conn->addr.psn = lrand48() & 0xffffff;

View File

@ -31,12 +31,12 @@ struct msgr_rdma_context_t
uint8_t ib_port; uint8_t ib_port;
uint8_t gid_index; uint8_t gid_index;
uint16_t my_lid; uint16_t my_lid;
ibv_gid_entry my_gid; ibv_gid my_gid;
uint32_t mtu; uint32_t mtu;
int max_cqe = 0; int max_cqe = 0;
int used_max_cqe = 0; int used_max_cqe = 0;
static msgr_rdma_context_t *create(std::vector<std::string> osd_networks, const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, bool odp, int log_level); static msgr_rdma_context_t *create(const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, bool odp, int log_level);
~msgr_rdma_context_t(); ~msgr_rdma_context_t();
}; };

View File

@ -216,7 +216,7 @@ resume_1:
for (uint64_t osd_num: node.child_osds) for (uint64_t osd_num: node.child_osds)
{ {
auto & osd = placement_tree->osds.at(osd_num); auto & osd = placement_tree->osds.at(osd_num);
auto json_osd = json11::Json::object{ fmt_items.push_back(json11::Json::object{
{ "type", "osd" }, { "type", "osd" },
{ "name", osd.num }, { "name", osd.num },
{ "parent", node.name }, { "parent", node.name },
@ -230,16 +230,7 @@ resume_1:
{ "bitmap", (uint64_t)osd.bitmap_granularity }, { "bitmap", (uint64_t)osd.bitmap_granularity },
{ "commit", osd.immediate_commit == IMMEDIATE_NONE ? "none" : (osd.immediate_commit == IMMEDIATE_ALL ? "all" : "small") }, { "commit", osd.immediate_commit == IMMEDIATE_NONE ? "none" : (osd.immediate_commit == IMMEDIATE_ALL ? "all" : "small") },
{ "op_stats", osd_stats[osd_num]["op_stats"] }, { "op_stats", osd_stats[osd_num]["op_stats"] },
}; });
if (osd_stats[osd_num]["slow_ops_primary"].uint64_value() > 0)
{
json_osd["slow_ops_primary"] = osd_stats[osd_num]["slow_ops_primary"];
}
if (osd_stats[osd_num]["slow_ops_secondary"].uint64_value() > 0)
{
json_osd["slow_ops_secondary"] = osd_stats[osd_num]["slow_ops_secondary"];
}
fmt_items.push_back(json_osd);
} }
} }
result.data = fmt_items; result.data = fmt_items;

View File

@ -134,7 +134,6 @@ resume_2:
} }
int osd_count = 0, osd_up = 0; int osd_count = 0, osd_up = 0;
uint64_t total_raw = 0, free_raw = 0, free_down_raw = 0, down_raw = 0; uint64_t total_raw = 0, free_raw = 0, free_down_raw = 0, down_raw = 0;
std::vector<uint64_t> slow_op_primary_osds, slow_op_secondary_osds;
parent->iterate_kvs_1(osd_stats, "/osd/stats/", [&](uint64_t stat_osd_num, json11::Json value) parent->iterate_kvs_1(osd_stats, "/osd/stats/", [&](uint64_t stat_osd_num, json11::Json value)
{ {
osd_count++; osd_count++;
@ -154,14 +153,6 @@ resume_2:
if (peer_it != parent->cli->st_cli.peer_states.end()) if (peer_it != parent->cli->st_cli.peer_states.end())
{ {
osd_up++; osd_up++;
if (value["slow_ops_primary"].uint64_value() > 0)
{
slow_op_primary_osds.push_back(stat_osd_num);
}
if (value["slow_ops_secondary"].uint64_value() > 0)
{
slow_op_secondary_osds.push_back(stat_osd_num);
}
} }
else else
{ {
@ -225,10 +216,6 @@ resume_2:
{ "mon_master", mon_master }, { "mon_master", mon_master },
{ "osd_up", osd_up }, { "osd_up", osd_up },
{ "osd_count", osd_count }, { "osd_count", osd_count },
{ "osds_full", osds_full },
{ "osds_nearfull", osds_nearfull },
{ "osds_primary_slow_ops", slow_op_primary_osds },
{ "osds_secondary_slow_ops", slow_op_secondary_osds },
{ "total_raw", total_raw }, { "total_raw", total_raw },
{ "free_raw", free_raw }, { "free_raw", free_raw },
{ "down_raw", down_raw }, { "down_raw", down_raw },
@ -313,26 +300,6 @@ resume_2:
warning_str += " "+std::to_string(osds_nearfull)+ warning_str += " "+std::to_string(osds_nearfull)+
(osds_nearfull > 1 ? " osds are almost full\n" : " osd is almost full\n"); (osds_nearfull > 1 ? " osds are almost full\n" : " osd is almost full\n");
} }
if (slow_op_primary_osds.size() > 0)
{
warning_str += " "+std::to_string(slow_op_primary_osds.size());
warning_str += (slow_op_primary_osds.size() > 1 ? " osds have" : " osd has");
warning_str += " slow client ops: ";
for (int i = 0; i < slow_op_primary_osds.size(); i++)
{
warning_str += (i > 0 ? ", " : "")+std::to_string(slow_op_primary_osds[i])+"\n";
}
}
if (slow_op_secondary_osds.size() > 0)
{
warning_str += " "+std::to_string(slow_op_secondary_osds.size());
warning_str += (slow_op_secondary_osds.size() > 1 ? " osds have" : " osd has");
warning_str += " slow replication ops: ";
for (int i = 0; i < slow_op_secondary_osds.size(); i++)
{
warning_str += (i > 0 ? ", " : "")+std::to_string(slow_op_secondary_osds[i])+"\n";
}
}
if (warning_str != "") if (warning_str != "")
{ {
warning_str = "\n warning:\n"+warning_str; warning_str = "\n warning:\n"+warning_str;

View File

@ -535,12 +535,10 @@ void osd_t::print_stats()
void osd_t::print_slow() void osd_t::print_slow()
{ {
cur_slow_op_primary = 0; bool has_slow = false;
cur_slow_op_secondary = 0;
char alloc[1024]; char alloc[1024];
timespec now; timespec now;
clock_gettime(CLOCK_REALTIME, &now); clock_gettime(CLOCK_REALTIME, &now);
// FIXME: Also track slow local blockstore ops and recovery/flush/scrub ops
for (auto & kv: msgr.clients) for (auto & kv: msgr.clients)
{ {
for (auto op: kv.second->received_ops) for (auto op: kv.second->received_ops)
@ -610,7 +608,6 @@ void osd_t::print_slow()
op->req.hdr.opcode == OSD_OP_SEC_STABILIZE || op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK || op->req.hdr.opcode == OSD_OP_SEC_STABILIZE || op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK ||
op->req.hdr.opcode == OSD_OP_SEC_READ_BMP) op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)
{ {
cur_slow_op_secondary++;
bufprintf(" state=%d", op->bs_op ? PRIV(op->bs_op)->op_state : -1); bufprintf(" state=%d", op->bs_op ? PRIV(op->bs_op)->op_state : -1);
int wait_for = op->bs_op ? PRIV(op->bs_op)->wait_for : 0; int wait_for = op->bs_op ? PRIV(op->bs_op)->wait_for : 0;
if (wait_for) if (wait_for)
@ -621,19 +618,15 @@ void osd_t::print_slow()
else if (op->req.hdr.opcode == OSD_OP_READ || op->req.hdr.opcode == OSD_OP_WRITE || else if (op->req.hdr.opcode == OSD_OP_READ || op->req.hdr.opcode == OSD_OP_WRITE ||
op->req.hdr.opcode == OSD_OP_SYNC || op->req.hdr.opcode == OSD_OP_DELETE) op->req.hdr.opcode == OSD_OP_SYNC || op->req.hdr.opcode == OSD_OP_DELETE)
{ {
cur_slow_op_primary++;
bufprintf(" state=%d", !op->op_data ? -1 : op->op_data->st); bufprintf(" state=%d", !op->op_data ? -1 : op->op_data->st);
} }
else
{
cur_slow_op_primary++;
}
#undef bufprintf #undef bufprintf
printf("%s\n", alloc); printf("%s\n", alloc);
has_slow = true;
} }
} }
} }
if ((cur_slow_op_primary+cur_slow_op_secondary) > 0 && bs) if (has_slow && bs)
{ {
bs->dump_diagnostics(); bs->dump_diagnostics();
} }

View File

@ -151,8 +151,6 @@ class osd_t
bool etcd_reporting_pg_state = false; bool etcd_reporting_pg_state = false;
bool etcd_reporting_stats = false; bool etcd_reporting_stats = false;
int print_stats_timer_id = -1, slow_log_timer_id = -1; int print_stats_timer_id = -1, slow_log_timer_id = -1;
uint64_t cur_slow_op_primary = 0;
uint64_t cur_slow_op_secondary = 0;
// peers and PGs // peers and PGs

View File

@ -201,14 +201,6 @@ json11::Json osd_t::get_statistics()
st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none"); st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none");
st["host"] = self_state["host"]; st["host"] = self_state["host"];
st["version"] = VITASTOR_VERSION; st["version"] = VITASTOR_VERSION;
if (cur_slow_op_primary > 0)
{
st["slow_ops_primary"] = cur_slow_op_primary;
}
if (cur_slow_op_secondary > 0)
{
st["slow_ops_secondary"] = cur_slow_op_secondary;
}
json11::Json::object op_stats, subop_stats; json11::Json::object op_stats, subop_stats;
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++) for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
{ {

View File

@ -65,7 +65,7 @@ std::string addr_to_string(const sockaddr_storage &addr)
return std::string(peer_str)+":"+std::to_string(port); return std::string(peer_str)+":"+std::to_string(port);
} }
bool cidr_match(const in_addr &addr, const in_addr &net, uint8_t bits) static bool cidr_match(const in_addr &addr, const in_addr &net, uint8_t bits)
{ {
if (bits == 0) if (bits == 0)
{ {
@ -75,7 +75,7 @@ bool cidr_match(const in_addr &addr, const in_addr &net, uint8_t bits)
return !((addr.s_addr ^ net.s_addr) & htonl(0xFFFFFFFFu << (32 - bits))); return !((addr.s_addr ^ net.s_addr) & htonl(0xFFFFFFFFu << (32 - bits)));
} }
bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_t bits) static bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_t bits)
{ {
const uint32_t *a = address.s6_addr32; const uint32_t *a = address.s6_addr32;
const uint32_t *n = network.s6_addr32; const uint32_t *n = network.s6_addr32;
@ -93,49 +93,47 @@ bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_t bits)
return true; return true;
} }
addr_mask_t cidr_parse(std::string mask) struct addr_mask_t
{ {
unsigned bits = 255; sa_family_t family;
int p = mask.find('/');
if (p != std::string::npos)
{
char null_byte = 0;
if (sscanf(mask.c_str()+p+1, "%u%c", &bits, &null_byte) != 1 || bits > 128)
throw std::runtime_error("Invalid IP address mask: " + mask);
mask = mask.substr(0, p);
}
in_addr ipv4; in_addr ipv4;
in6_addr ipv6; in6_addr ipv6;
if (inet_pton(AF_INET, mask.c_str(), &ipv4) == 1) uint8_t bits;
{ };
if (bits == 255)
bits = 32;
if (bits > 32)
throw std::runtime_error("Invalid IP address mask: " + mask);
return (addr_mask_t){ .family = AF_INET, .ipv4 = ipv4, .bits = (uint8_t)(bits ? bits : 32) };
}
else if (inet_pton(AF_INET6, mask.c_str(), &ipv6) == 1)
{
if (bits == 255)
bits = 128;
return (addr_mask_t){ .family = AF_INET6, .ipv6 = ipv6, .bits = (uint8_t)bits };
}
else
{
throw std::runtime_error("Invalid IP address mask: " + mask);
}
}
std::vector<std::string> getifaddr_list(std::vector<std::string> mask_cfg, bool include_v6) std::vector<std::string> getifaddr_list(std::vector<std::string> mask_cfg, bool include_v6)
{ {
std::vector<addr_mask_t> masks; std::vector<addr_mask_t> masks;
for (auto mask: mask_cfg) for (auto mask: mask_cfg)
{ {
masks.push_back(cidr_parse(mask)); unsigned bits = 0;
if (masks[masks.size()-1].family == AF_INET6) int p = mask.find('/');
if (p != std::string::npos)
{ {
// Auto-enable IPv6 addresses char null_byte = 0;
include_v6 = true; if (sscanf(mask.c_str()+p+1, "%u%c", &bits, &null_byte) != 1 || bits > 128)
{
throw std::runtime_error((include_v6 ? "Invalid IPv4 address mask: " : "Invalid IP address mask: ") + mask);
}
mask = mask.substr(0, p);
}
in_addr ipv4;
in6_addr ipv6;
if (inet_pton(AF_INET, mask.c_str(), &ipv4) == 1)
{
if (bits > 32)
{
throw std::runtime_error((include_v6 ? "Invalid IPv4 address mask: " : "Invalid IP address mask: ") + mask);
}
masks.push_back((addr_mask_t){ .family = AF_INET, .ipv4 = ipv4, .bits = (uint8_t)bits });
}
else if (include_v6 && inet_pton(AF_INET6, mask.c_str(), &ipv6) == 1)
{
masks.push_back((addr_mask_t){ .family = AF_INET6, .ipv6 = ipv6, .bits = (uint8_t)bits });
}
else
{
throw std::runtime_error((include_v6 ? "Invalid IPv4 address mask: " : "Invalid IP address mask: ") + mask);
} }
} }
std::set<std::string> addresses; std::set<std::string> addresses;

View File

@ -1,22 +1,10 @@
#pragma once #pragma once
#include <netinet/in.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <string> #include <string>
#include <vector> #include <vector>
struct addr_mask_t
{
sa_family_t family;
in_addr ipv4;
in6_addr ipv6;
uint8_t bits;
};
bool string_to_addr(std::string str, bool parse_port, int default_port, struct sockaddr_storage *addr); bool string_to_addr(std::string str, bool parse_port, int default_port, struct sockaddr_storage *addr);
std::string addr_to_string(const sockaddr_storage &addr); std::string addr_to_string(const sockaddr_storage &addr);
addr_mask_t cidr_parse(std::string mask);
bool cidr_match(const in_addr &address, const in_addr &network, uint8_t bits);
bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_t bits);
std::vector<std::string> getifaddr_list(std::vector<std::string> mask_cfg = std::vector<std::string>(), bool include_v6 = false); std::vector<std::string> getifaddr_list(std::vector<std::string> mask_cfg = std::vector<std::string>(), bool include_v6 = false);
int create_and_bind_socket(std::string bind_address, int bind_port, int listen_backlog, int *listening_port); int create_and_bind_socket(std::string bind_address, int bind_port, int listen_backlog, int *listening_port);