Compare commits
4 Commits
5a8f80159f
...
1c78df4fba
Author | SHA1 | Date |
---|---|---|
Vitaliy Filippov | 1c78df4fba | |
Vitaliy Filippov | 7de38250ad | |
Vitaliy Filippov | 9c59d30e83 | |
Vitaliy Filippov | 5db02cdf6e |
|
@ -0,0 +1,193 @@
|
||||||
|
Index: pve-qemu-kvm-9.0.0/block/meson.build
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-9.0.0.orig/block/meson.build
|
||||||
|
+++ pve-qemu-kvm-9.0.0/block/meson.build
|
||||||
|
@@ -126,6 +126,7 @@ foreach m : [
|
||||||
|
[libnfs, 'nfs', files('nfs.c')],
|
||||||
|
[libssh, 'ssh', files('ssh.c')],
|
||||||
|
[rbd, 'rbd', files('rbd.c')],
|
||||||
|
+ [vitastor, 'vitastor', files('vitastor.c')],
|
||||||
|
]
|
||||||
|
if m[0].found()
|
||||||
|
module_ss = ss.source_set()
|
||||||
|
Index: pve-qemu-kvm-9.0.0/meson.build
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-9.0.0.orig/meson.build
|
||||||
|
+++ pve-qemu-kvm-9.0.0/meson.build
|
||||||
|
@@ -1452,6 +1452,26 @@ if not get_option('rbd').auto() or have_
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
+vitastor = not_found
|
||||||
|
+if not get_option('vitastor').auto() or have_block
|
||||||
|
+ libvitastor_client = cc.find_library('vitastor_client', has_headers: ['vitastor_c.h'],
|
||||||
|
+ required: get_option('vitastor'))
|
||||||
|
+ if libvitastor_client.found()
|
||||||
|
+ if cc.links('''
|
||||||
|
+ #include <vitastor_c.h>
|
||||||
|
+ int main(void) {
|
||||||
|
+ vitastor_c_create_qemu(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
|
+ return 0;
|
||||||
|
+ }''', dependencies: libvitastor_client)
|
||||||
|
+ vitastor = declare_dependency(dependencies: libvitastor_client)
|
||||||
|
+ elif get_option('vitastor').enabled()
|
||||||
|
+ error('could not link libvitastor_client')
|
||||||
|
+ else
|
||||||
|
+ warning('could not link libvitastor_client, disabling')
|
||||||
|
+ endif
|
||||||
|
+ endif
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
glusterfs = not_found
|
||||||
|
glusterfs_ftruncate_has_stat = false
|
||||||
|
glusterfs_iocb_has_stat = false
|
||||||
|
@@ -2254,6 +2274,7 @@ endif
|
||||||
|
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||||
|
config_host_data.set('CONFIG_PLUGIN', get_option('plugins'))
|
||||||
|
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||||
|
+config_host_data.set('CONFIG_VITASTOR', vitastor.found())
|
||||||
|
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||||
|
config_host_data.set('CONFIG_RELOCATABLE', get_option('relocatable'))
|
||||||
|
config_host_data.set('CONFIG_SAFESTACK', get_option('safe_stack'))
|
||||||
|
@@ -4454,6 +4475,7 @@ summary_info += {'fdt support': fd
|
||||||
|
summary_info += {'libcap-ng support': libcap_ng}
|
||||||
|
summary_info += {'bpf support': libbpf}
|
||||||
|
summary_info += {'rbd support': rbd}
|
||||||
|
+summary_info += {'vitastor support': vitastor}
|
||||||
|
summary_info += {'smartcard support': cacard}
|
||||||
|
summary_info += {'U2F support': u2f}
|
||||||
|
summary_info += {'libusb': libusb}
|
||||||
|
Index: pve-qemu-kvm-9.0.0/meson_options.txt
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-9.0.0.orig/meson_options.txt
|
||||||
|
+++ pve-qemu-kvm-9.0.0/meson_options.txt
|
||||||
|
@@ -194,6 +194,8 @@ option('lzo', type : 'feature', value :
|
||||||
|
description: 'lzo compression support')
|
||||||
|
option('rbd', type : 'feature', value : 'auto',
|
||||||
|
description: 'Ceph block device driver')
|
||||||
|
+option('vitastor', type : 'feature', value : 'auto',
|
||||||
|
+ description: 'Vitastor block device driver')
|
||||||
|
option('opengl', type : 'feature', value : 'auto',
|
||||||
|
description: 'OpenGL support')
|
||||||
|
option('rdma', type : 'feature', value : 'auto',
|
||||||
|
Index: pve-qemu-kvm-9.0.0/qapi/block-core.json
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-9.0.0.orig/qapi/block-core.json
|
||||||
|
+++ pve-qemu-kvm-9.0.0/qapi/block-core.json
|
||||||
|
@@ -3481,7 +3481,7 @@
|
||||||
|
'raw', 'rbd',
|
||||||
|
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
|
||||||
|
'pbs',
|
||||||
|
- 'ssh', 'throttle', 'vdi', 'vhdx',
|
||||||
|
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vitastor',
|
||||||
|
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
|
||||||
|
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
|
||||||
|
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
|
||||||
|
@@ -4591,6 +4591,28 @@
|
||||||
|
'*server': ['InetSocketAddressBase'] } }
|
||||||
|
|
||||||
|
##
|
||||||
|
+# @BlockdevOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific block device options for vitastor
|
||||||
|
+#
|
||||||
|
+# @image: Image name
|
||||||
|
+# @inode: Inode number
|
||||||
|
+# @pool: Pool ID
|
||||||
|
+# @size: Desired image size in bytes
|
||||||
|
+# @config-path: Path to Vitastor configuration
|
||||||
|
+# @etcd-host: etcd connection address(es)
|
||||||
|
+# @etcd-prefix: etcd key/value prefix
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'data': { '*inode': 'uint64',
|
||||||
|
+ '*pool': 'uint64',
|
||||||
|
+ '*size': 'uint64',
|
||||||
|
+ '*image': 'str',
|
||||||
|
+ '*config-path': 'str',
|
||||||
|
+ '*etcd-host': 'str',
|
||||||
|
+ '*etcd-prefix': 'str' } }
|
||||||
|
+
|
||||||
|
+##
|
||||||
|
# @ReplicationMode:
|
||||||
|
#
|
||||||
|
# An enumeration of replication modes.
|
||||||
|
@@ -5053,6 +5075,7 @@
|
||||||
|
'throttle': 'BlockdevOptionsThrottle',
|
||||||
|
'vdi': 'BlockdevOptionsGenericFormat',
|
||||||
|
'vhdx': 'BlockdevOptionsGenericFormat',
|
||||||
|
+ 'vitastor': 'BlockdevOptionsVitastor',
|
||||||
|
'virtio-blk-vfio-pci':
|
||||||
|
{ 'type': 'BlockdevOptionsVirtioBlkVfioPci',
|
||||||
|
'if': 'CONFIG_BLKIO' },
|
||||||
|
@@ -5498,6 +5521,20 @@
|
||||||
|
'*encrypt' : 'RbdEncryptionCreateOptions' } }
|
||||||
|
|
||||||
|
##
|
||||||
|
+# @BlockdevCreateOptionsVitastor:
|
||||||
|
+#
|
||||||
|
+# Driver specific image creation options for Vitastor.
|
||||||
|
+#
|
||||||
|
+# @location: Where to store the new image file. This location cannot
|
||||||
|
+# point to a snapshot.
|
||||||
|
+#
|
||||||
|
+# @size: Size of the virtual disk in bytes
|
||||||
|
+##
|
||||||
|
+{ 'struct': 'BlockdevCreateOptionsVitastor',
|
||||||
|
+ 'data': { 'location': 'BlockdevOptionsVitastor',
|
||||||
|
+ 'size': 'size' } }
|
||||||
|
+
|
||||||
|
+##
|
||||||
|
# @BlockdevVmdkSubformat:
|
||||||
|
#
|
||||||
|
# Subformat options for VMDK images
|
||||||
|
@@ -5719,6 +5753,7 @@
|
||||||
|
'ssh': 'BlockdevCreateOptionsSsh',
|
||||||
|
'vdi': 'BlockdevCreateOptionsVdi',
|
||||||
|
'vhdx': 'BlockdevCreateOptionsVhdx',
|
||||||
|
+ 'vitastor': 'BlockdevCreateOptionsVitastor',
|
||||||
|
'vmdk': 'BlockdevCreateOptionsVmdk',
|
||||||
|
'vpc': 'BlockdevCreateOptionsVpc'
|
||||||
|
} }
|
||||||
|
Index: pve-qemu-kvm-9.0.0/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-9.0.0.orig/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
+++ pve-qemu-kvm-9.0.0/scripts/ci/org.centos/stream/8/x86_64/configure
|
||||||
|
@@ -30,7 +30,7 @@
|
||||||
|
--with-suffix="qemu-kvm" \
|
||||||
|
--firmwarepath=/usr/share/qemu-firmware \
|
||||||
|
--target-list="x86_64-softmmu" \
|
||||||
|
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
+--block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,vitastor,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
|
||||||
|
--audio-drv-list="" \
|
||||||
|
--block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
|
||||||
|
--with-coroutine=ucontext \
|
||||||
|
@@ -176,6 +176,7 @@
|
||||||
|
--enable-opengl \
|
||||||
|
--enable-pie \
|
||||||
|
--enable-rbd \
|
||||||
|
+--enable-vitastor \
|
||||||
|
--enable-rdma \
|
||||||
|
--enable-seccomp \
|
||||||
|
--enable-snappy \
|
||||||
|
Index: pve-qemu-kvm-9.0.0/scripts/meson-buildoptions.sh
|
||||||
|
===================================================================
|
||||||
|
--- pve-qemu-kvm-9.0.0.orig/scripts/meson-buildoptions.sh
|
||||||
|
+++ pve-qemu-kvm-9.0.0/scripts/meson-buildoptions.sh
|
||||||
|
@@ -168,6 +168,7 @@ meson_options_help() {
|
||||||
|
printf "%s\n" ' qed qed image format support'
|
||||||
|
printf "%s\n" ' qga-vss build QGA VSS support (broken with MinGW)'
|
||||||
|
printf "%s\n" ' rbd Ceph block device driver'
|
||||||
|
+ printf "%s\n" ' vitastor Vitastor block device driver'
|
||||||
|
printf "%s\n" ' rdma Enable RDMA-based migration'
|
||||||
|
printf "%s\n" ' replication replication support'
|
||||||
|
printf "%s\n" ' rutabaga-gfx rutabaga_gfx support'
|
||||||
|
@@ -445,6 +446,8 @@ _meson_option_parse() {
|
||||||
|
--disable-qom-cast-debug) printf "%s" -Dqom_cast_debug=false ;;
|
||||||
|
--enable-rbd) printf "%s" -Drbd=enabled ;;
|
||||||
|
--disable-rbd) printf "%s" -Drbd=disabled ;;
|
||||||
|
+ --enable-vitastor) printf "%s" -Dvitastor=enabled ;;
|
||||||
|
+ --disable-vitastor) printf "%s" -Dvitastor=disabled ;;
|
||||||
|
--enable-rdma) printf "%s" -Drdma=enabled ;;
|
||||||
|
--disable-rdma) printf "%s" -Drdma=disabled ;;
|
||||||
|
--enable-relocatable) printf "%s" -Drelocatable=true ;;
|
|
@ -176,7 +176,7 @@ void etcd_state_client_t::add_etcd_url(std::string addr)
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if (!local_ips.size())
|
if (!local_ips.size())
|
||||||
local_ips = getifaddr_list();
|
local_ips = getifaddr_list(std::vector<std::string>(), true);
|
||||||
std::string check_addr;
|
std::string check_addr;
|
||||||
int pos = addr.find('/');
|
int pos = addr.find('/');
|
||||||
int pos2 = addr.find(':');
|
int pos2 = addr.find(':');
|
||||||
|
|
|
@ -121,7 +121,7 @@ void osd_messenger_t::init()
|
||||||
if (use_rdma)
|
if (use_rdma)
|
||||||
{
|
{
|
||||||
rdma_context = msgr_rdma_context_t::create(
|
rdma_context = msgr_rdma_context_t::create(
|
||||||
rdma_device != "" ? rdma_device.c_str() : NULL,
|
osd_networks, rdma_device != "" ? rdma_device.c_str() : NULL,
|
||||||
rdma_port_num, rdma_gid_index, rdma_mtu, rdma_odp, log_level
|
rdma_port_num, rdma_gid_index, rdma_mtu, rdma_odp, log_level
|
||||||
);
|
);
|
||||||
if (!rdma_context)
|
if (!rdma_context)
|
||||||
|
@ -266,6 +266,7 @@ void osd_messenger_t::parse_config(const json11::Json & config)
|
||||||
this->rdma_port_num = (uint8_t)config["rdma_port_num"].uint64_value();
|
this->rdma_port_num = (uint8_t)config["rdma_port_num"].uint64_value();
|
||||||
if (!this->rdma_port_num)
|
if (!this->rdma_port_num)
|
||||||
this->rdma_port_num = 1;
|
this->rdma_port_num = 1;
|
||||||
|
if (!config["rdma_gid_index"].is_null())
|
||||||
this->rdma_gid_index = (uint8_t)config["rdma_gid_index"].uint64_value();
|
this->rdma_gid_index = (uint8_t)config["rdma_gid_index"].uint64_value();
|
||||||
this->rdma_mtu = (uint32_t)config["rdma_mtu"].uint64_value();
|
this->rdma_mtu = (uint32_t)config["rdma_mtu"].uint64_value();
|
||||||
this->rdma_max_sge = config["rdma_max_sge"].uint64_value();
|
this->rdma_max_sge = config["rdma_max_sge"].uint64_value();
|
||||||
|
@ -281,6 +282,15 @@ void osd_messenger_t::parse_config(const json11::Json & config)
|
||||||
if (!this->rdma_max_msg || this->rdma_max_msg > 128*1024*1024)
|
if (!this->rdma_max_msg || this->rdma_max_msg > 128*1024*1024)
|
||||||
this->rdma_max_msg = 129*1024;
|
this->rdma_max_msg = 129*1024;
|
||||||
this->rdma_odp = config["rdma_odp"].bool_value();
|
this->rdma_odp = config["rdma_odp"].bool_value();
|
||||||
|
std::vector<std::string> mask;
|
||||||
|
if (config["bind_address"].is_string())
|
||||||
|
mask.push_back(config["bind_address"].string_value());
|
||||||
|
else if (config["osd_network"].is_string())
|
||||||
|
mask.push_back(config["osd_network"].string_value());
|
||||||
|
else
|
||||||
|
for (auto v: config["osd_network"].array_items())
|
||||||
|
mask.push_back(v.string_value());
|
||||||
|
this->osd_networks = mask;
|
||||||
#endif
|
#endif
|
||||||
if (!osd_num)
|
if (!osd_num)
|
||||||
this->iothread_count = (uint32_t)config["client_iothread_count"].uint64_value();
|
this->iothread_count = (uint32_t)config["client_iothread_count"].uint64_value();
|
||||||
|
|
|
@ -165,8 +165,9 @@ protected:
|
||||||
|
|
||||||
#ifdef WITH_RDMA
|
#ifdef WITH_RDMA
|
||||||
bool use_rdma = true;
|
bool use_rdma = true;
|
||||||
|
std::vector<std::string> osd_networks;
|
||||||
std::string rdma_device;
|
std::string rdma_device;
|
||||||
uint64_t rdma_port_num = 1, rdma_gid_index = 0, rdma_mtu = 0;
|
uint64_t rdma_port_num = 1, rdma_gid_index = -1, rdma_mtu = 0;
|
||||||
msgr_rdma_context_t *rdma_context = NULL;
|
msgr_rdma_context_t *rdma_context = NULL;
|
||||||
uint64_t rdma_max_sge = 0, rdma_max_send = 0, rdma_max_recv = 0;
|
uint64_t rdma_max_sge = 0, rdma_max_send = 0, rdma_max_recv = 0;
|
||||||
uint64_t rdma_max_msg = 0;
|
uint64_t rdma_max_msg = 0;
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include "addr_util.h"
|
||||||
#include "msgr_rdma.h"
|
#include "msgr_rdma.h"
|
||||||
#include "messenger.h"
|
#include "messenger.h"
|
||||||
|
|
||||||
|
@ -69,7 +70,126 @@ msgr_rdma_connection_t::~msgr_rdma_connection_t()
|
||||||
send_out_size = 0;
|
send_out_size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, bool odp, int log_level)
|
static bool is_ipv4_gid(ibv_gid_entry *gidx)
|
||||||
|
{
|
||||||
|
return (((uint64_t*)gidx->gid.raw)[0] == 0 &&
|
||||||
|
((uint32_t*)gidx->gid.raw)[2] == 0xffff0000);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool match_gid(ibv_gid_entry *gidx, addr_mask_t *networks, int nnet)
|
||||||
|
{
|
||||||
|
if (gidx->gid_type != IBV_GID_TYPE_ROCE_V1 &&
|
||||||
|
gidx->gid_type != IBV_GID_TYPE_ROCE_V2 ||
|
||||||
|
((uint64_t*)gidx->gid.raw)[0] == 0 &&
|
||||||
|
((uint64_t*)gidx->gid.raw)[1] == 0)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (is_ipv4_gid(gidx))
|
||||||
|
{
|
||||||
|
for (int i = 0; i < nnet; i++)
|
||||||
|
{
|
||||||
|
if (networks[i].family == AF_INET && cidr_match(*(in_addr*)(gidx->gid.raw+12), networks[i].ipv4, networks[i].bits))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int i = 0; i < nnet; i++)
|
||||||
|
{
|
||||||
|
if (networks[i].family == AF_INET6 && cidr6_match(*(in6_addr*)gidx->gid.raw, networks[i].ipv6, networks[i].bits))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct matched_dev
|
||||||
|
{
|
||||||
|
int dev = -1;
|
||||||
|
int port = -1;
|
||||||
|
int gid = -1;
|
||||||
|
bool rocev2 = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void log_rdma_dev_port_gid(ibv_device *dev, int ib_port, int gid_index, ibv_gid_entry & gidx)
|
||||||
|
{
|
||||||
|
bool is4 = ((uint64_t*)gidx.gid.raw)[0] == 0 && ((uint32_t*)gidx.gid.raw)[2] == 0xffff0000;
|
||||||
|
char buf[256];
|
||||||
|
inet_ntop(is4 ? AF_INET : AF_INET6, is4 ? gidx.gid.raw+12 : gidx.gid.raw, buf, sizeof(buf));
|
||||||
|
fprintf(
|
||||||
|
stderr, "Auto-selected RDMA device %s port %d GID %d - ROCEv%d IPv%d %s\n",
|
||||||
|
ibv_get_device_name(dev), ib_port, gid_index,
|
||||||
|
gidx.gid_type == IBV_GID_TYPE_ROCE_V2 ? 2 : 1, is4 ? 4 : 6, buf
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
static matched_dev match_device(ibv_device **dev_list, addr_mask_t *networks, int nnet, int log_level)
|
||||||
|
{
|
||||||
|
matched_dev best;
|
||||||
|
ibv_device_attr attr;
|
||||||
|
ibv_port_attr portinfo;
|
||||||
|
ibv_gid_entry best_gidx;
|
||||||
|
int res;
|
||||||
|
for (int i = 0; dev_list[i]; ++i)
|
||||||
|
{
|
||||||
|
auto dev = dev_list[i];
|
||||||
|
ibv_context *context = ibv_open_device(dev_list[i]);
|
||||||
|
if ((res = ibv_query_device(context, &attr)) != 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Couldn't query RDMA device %s for its features: %s\n", ibv_get_device_name(dev_list[i]), strerror(res));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
for (int j = 1; j <= attr.phys_port_cnt; j++)
|
||||||
|
{
|
||||||
|
// Try to find a port with matching address
|
||||||
|
if ((res = ibv_query_port(context, j, &portinfo)) != 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Couldn't get RDMA device %s port %d info: %s\n", ibv_get_device_name(dev), j, strerror(res));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
for (int k = 0; k < portinfo.gid_tbl_len; k++)
|
||||||
|
{
|
||||||
|
ibv_gid_entry gidx;
|
||||||
|
if ((res = ibv_query_gid_ex(context, j, k, &gidx, 0)) != 0)
|
||||||
|
{
|
||||||
|
if (res != ENODATA)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Couldn't read RDMA device %s GID index %d: %s\n", ibv_get_device_name(dev), k, strerror(res));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (match_gid(&gidx, networks, nnet))
|
||||||
|
{
|
||||||
|
// Prefer RoCEv2
|
||||||
|
if (!best.rocev2)
|
||||||
|
{
|
||||||
|
best.dev = i;
|
||||||
|
best.port = j;
|
||||||
|
best.gid = k;
|
||||||
|
best.rocev2 = (gidx.gid_type == IBV_GID_TYPE_ROCE_V2);
|
||||||
|
best_gidx = gidx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cleanup:
|
||||||
|
ibv_close_device(context);
|
||||||
|
if (best.rocev2)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (best.dev >= 0 && log_level > 0)
|
||||||
|
{
|
||||||
|
log_rdma_dev_port_gid(dev_list[best.dev], best.port, best.gid, best_gidx);
|
||||||
|
}
|
||||||
|
return best;
|
||||||
|
}
|
||||||
|
|
||||||
|
msgr_rdma_context_t *msgr_rdma_context_t::create(std::vector<std::string> osd_networks, const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, bool odp, int log_level)
|
||||||
{
|
{
|
||||||
int res;
|
int res;
|
||||||
ibv_device **dev_list = NULL;
|
ibv_device **dev_list = NULL;
|
||||||
|
@ -80,28 +200,23 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t
|
||||||
clock_gettime(CLOCK_REALTIME, &tv);
|
clock_gettime(CLOCK_REALTIME, &tv);
|
||||||
srand48(tv.tv_sec*1000000000 + tv.tv_nsec);
|
srand48(tv.tv_sec*1000000000 + tv.tv_nsec);
|
||||||
dev_list = ibv_get_device_list(NULL);
|
dev_list = ibv_get_device_list(NULL);
|
||||||
if (!dev_list)
|
if (!dev_list || !*dev_list)
|
||||||
{
|
{
|
||||||
if (errno == -ENOSYS || errno == ENOSYS)
|
if (errno == -ENOSYS || errno == ENOSYS)
|
||||||
{
|
{
|
||||||
if (log_level > 0)
|
if (log_level > 0)
|
||||||
fprintf(stderr, "No RDMA devices found (RDMA device list returned ENOSYS)\n");
|
fprintf(stderr, "No RDMA devices found (RDMA device list returned ENOSYS)\n");
|
||||||
}
|
}
|
||||||
|
else if (!*dev_list)
|
||||||
|
{
|
||||||
|
if (log_level > 0)
|
||||||
|
fprintf(stderr, "No RDMA devices found\n");
|
||||||
|
}
|
||||||
else
|
else
|
||||||
fprintf(stderr, "Failed to get RDMA device list: %s\n", strerror(errno));
|
fprintf(stderr, "Failed to get RDMA device list: %s\n", strerror(errno));
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (!ib_devname)
|
if (ib_devname)
|
||||||
{
|
|
||||||
ctx->dev = *dev_list;
|
|
||||||
if (!ctx->dev)
|
|
||||||
{
|
|
||||||
if (log_level > 0)
|
|
||||||
fprintf(stderr, "No RDMA devices found\n");
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; dev_list[i]; ++i)
|
for (i = 0; dev_list[i]; ++i)
|
||||||
|
@ -114,6 +229,31 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (osd_networks.size())
|
||||||
|
{
|
||||||
|
std::vector<addr_mask_t> nets;
|
||||||
|
for (auto & netstr: osd_networks)
|
||||||
|
{
|
||||||
|
nets.push_back(cidr_parse(netstr));
|
||||||
|
}
|
||||||
|
auto best = match_device(dev_list, nets.data(), nets.size(), log_level);
|
||||||
|
if (best.dev < 0)
|
||||||
|
{
|
||||||
|
if (log_level > 0)
|
||||||
|
fprintf(stderr, "RDMA device matching osd_network is not found, using first available device\n");
|
||||||
|
best.dev = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ib_port = best.port;
|
||||||
|
gid_index = best.gid;
|
||||||
|
}
|
||||||
|
ctx->dev = dev_list[best.dev];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ctx->dev = *dev_list;
|
||||||
|
}
|
||||||
|
|
||||||
ctx->context = ibv_open_device(ctx->dev);
|
ctx->context = ibv_open_device(ctx->dev);
|
||||||
if (!ctx->context)
|
if (!ctx->context)
|
||||||
|
@ -123,7 +263,6 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->ib_port = ib_port;
|
ctx->ib_port = ib_port;
|
||||||
ctx->gid_index = gid_index;
|
|
||||||
if ((res = ibv_query_port(ctx->context, ib_port, &ctx->portinfo)) != 0)
|
if ((res = ibv_query_port(ctx->context, ib_port, &ctx->portinfo)) != 0)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Couldn't get RDMA device %s port %d info: %s\n", ibv_get_device_name(ctx->dev), ib_port, strerror(res));
|
fprintf(stderr, "Couldn't get RDMA device %s port %d info: %s\n", ibv_get_device_name(ctx->dev), ib_port, strerror(res));
|
||||||
|
@ -135,11 +274,48 @@ msgr_rdma_context_t *msgr_rdma_context_t::create(const char *ib_devname, uint8_t
|
||||||
fprintf(stderr, "RDMA device %s must have local LID because it's not Ethernet, but LID is zero\n", ibv_get_device_name(ctx->dev));
|
fprintf(stderr, "RDMA device %s must have local LID because it's not Ethernet, but LID is zero\n", ibv_get_device_name(ctx->dev));
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (ibv_query_gid(ctx->context, ib_port, gid_index, &ctx->my_gid))
|
|
||||||
|
if (gid_index != -1)
|
||||||
|
{
|
||||||
|
ctx->gid_index = gid_index;
|
||||||
|
if (ibv_query_gid_ex(ctx->context, ib_port, gid_index, &ctx->my_gid, 0))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Couldn't read RDMA device %s GID index %d\n", ibv_get_device_name(ctx->dev), gid_index);
|
fprintf(stderr, "Couldn't read RDMA device %s GID index %d\n", ibv_get_device_name(ctx->dev), gid_index);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Auto-guess GID
|
||||||
|
for (int k = 0; k < ctx->portinfo.gid_tbl_len; k++)
|
||||||
|
{
|
||||||
|
ibv_gid_entry gidx;
|
||||||
|
if (ibv_query_gid_ex(ctx->context, ib_port, k, &gidx, 0) != 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Couldn't read RDMA device %s GID index %d\n", ibv_get_device_name(ctx->dev), k);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
// Skip empty GID
|
||||||
|
if (((uint64_t*)gidx.gid.raw)[0] == 0 &&
|
||||||
|
((uint64_t*)gidx.gid.raw)[1] == 0)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Prefer IPv4 RoCEv2 GID by default
|
||||||
|
if (gid_index == -1 ||
|
||||||
|
gidx.gid_type == IBV_GID_TYPE_ROCE_V2 &&
|
||||||
|
(ctx->my_gid.gid_type != IBV_GID_TYPE_ROCE_V2 || is_ipv4_gid(&gidx)))
|
||||||
|
{
|
||||||
|
gid_index = k;
|
||||||
|
ctx->my_gid = gidx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ctx->gid_index = gid_index = (gid_index == -1 ? 0 : gid_index);
|
||||||
|
if (log_level > 0)
|
||||||
|
{
|
||||||
|
log_rdma_dev_port_gid(ctx->dev, ctx->ib_port, ctx->gid_index, ctx->my_gid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ctx->pd = ibv_alloc_pd(ctx->context);
|
ctx->pd = ibv_alloc_pd(ctx->context);
|
||||||
if (!ctx->pd)
|
if (!ctx->pd)
|
||||||
|
@ -255,7 +431,7 @@ msgr_rdma_connection_t *msgr_rdma_connection_t::create(msgr_rdma_context_t *ctx,
|
||||||
}
|
}
|
||||||
|
|
||||||
conn->addr.lid = ctx->my_lid;
|
conn->addr.lid = ctx->my_lid;
|
||||||
conn->addr.gid = ctx->my_gid;
|
conn->addr.gid = ctx->my_gid.gid;
|
||||||
conn->addr.qpn = conn->qp->qp_num;
|
conn->addr.qpn = conn->qp->qp_num;
|
||||||
conn->addr.psn = lrand48() & 0xffffff;
|
conn->addr.psn = lrand48() & 0xffffff;
|
||||||
|
|
||||||
|
|
|
@ -31,12 +31,12 @@ struct msgr_rdma_context_t
|
||||||
uint8_t ib_port;
|
uint8_t ib_port;
|
||||||
uint8_t gid_index;
|
uint8_t gid_index;
|
||||||
uint16_t my_lid;
|
uint16_t my_lid;
|
||||||
ibv_gid my_gid;
|
ibv_gid_entry my_gid;
|
||||||
uint32_t mtu;
|
uint32_t mtu;
|
||||||
int max_cqe = 0;
|
int max_cqe = 0;
|
||||||
int used_max_cqe = 0;
|
int used_max_cqe = 0;
|
||||||
|
|
||||||
static msgr_rdma_context_t *create(const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, bool odp, int log_level);
|
static msgr_rdma_context_t *create(std::vector<std::string> osd_networks, const char *ib_devname, uint8_t ib_port, uint8_t gid_index, uint32_t mtu, bool odp, int log_level);
|
||||||
~msgr_rdma_context_t();
|
~msgr_rdma_context_t();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -216,7 +216,7 @@ resume_1:
|
||||||
for (uint64_t osd_num: node.child_osds)
|
for (uint64_t osd_num: node.child_osds)
|
||||||
{
|
{
|
||||||
auto & osd = placement_tree->osds.at(osd_num);
|
auto & osd = placement_tree->osds.at(osd_num);
|
||||||
fmt_items.push_back(json11::Json::object{
|
auto json_osd = json11::Json::object{
|
||||||
{ "type", "osd" },
|
{ "type", "osd" },
|
||||||
{ "name", osd.num },
|
{ "name", osd.num },
|
||||||
{ "parent", node.name },
|
{ "parent", node.name },
|
||||||
|
@ -230,7 +230,16 @@ resume_1:
|
||||||
{ "bitmap", (uint64_t)osd.bitmap_granularity },
|
{ "bitmap", (uint64_t)osd.bitmap_granularity },
|
||||||
{ "commit", osd.immediate_commit == IMMEDIATE_NONE ? "none" : (osd.immediate_commit == IMMEDIATE_ALL ? "all" : "small") },
|
{ "commit", osd.immediate_commit == IMMEDIATE_NONE ? "none" : (osd.immediate_commit == IMMEDIATE_ALL ? "all" : "small") },
|
||||||
{ "op_stats", osd_stats[osd_num]["op_stats"] },
|
{ "op_stats", osd_stats[osd_num]["op_stats"] },
|
||||||
});
|
};
|
||||||
|
if (osd_stats[osd_num]["slow_ops_primary"].uint64_value() > 0)
|
||||||
|
{
|
||||||
|
json_osd["slow_ops_primary"] = osd_stats[osd_num]["slow_ops_primary"];
|
||||||
|
}
|
||||||
|
if (osd_stats[osd_num]["slow_ops_secondary"].uint64_value() > 0)
|
||||||
|
{
|
||||||
|
json_osd["slow_ops_secondary"] = osd_stats[osd_num]["slow_ops_secondary"];
|
||||||
|
}
|
||||||
|
fmt_items.push_back(json_osd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result.data = fmt_items;
|
result.data = fmt_items;
|
||||||
|
|
|
@ -134,6 +134,7 @@ resume_2:
|
||||||
}
|
}
|
||||||
int osd_count = 0, osd_up = 0;
|
int osd_count = 0, osd_up = 0;
|
||||||
uint64_t total_raw = 0, free_raw = 0, free_down_raw = 0, down_raw = 0;
|
uint64_t total_raw = 0, free_raw = 0, free_down_raw = 0, down_raw = 0;
|
||||||
|
std::vector<uint64_t> slow_op_primary_osds, slow_op_secondary_osds;
|
||||||
parent->iterate_kvs_1(osd_stats, "/osd/stats/", [&](uint64_t stat_osd_num, json11::Json value)
|
parent->iterate_kvs_1(osd_stats, "/osd/stats/", [&](uint64_t stat_osd_num, json11::Json value)
|
||||||
{
|
{
|
||||||
osd_count++;
|
osd_count++;
|
||||||
|
@ -153,6 +154,14 @@ resume_2:
|
||||||
if (peer_it != parent->cli->st_cli.peer_states.end())
|
if (peer_it != parent->cli->st_cli.peer_states.end())
|
||||||
{
|
{
|
||||||
osd_up++;
|
osd_up++;
|
||||||
|
if (value["slow_ops_primary"].uint64_value() > 0)
|
||||||
|
{
|
||||||
|
slow_op_primary_osds.push_back(stat_osd_num);
|
||||||
|
}
|
||||||
|
if (value["slow_ops_secondary"].uint64_value() > 0)
|
||||||
|
{
|
||||||
|
slow_op_secondary_osds.push_back(stat_osd_num);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -216,6 +225,10 @@ resume_2:
|
||||||
{ "mon_master", mon_master },
|
{ "mon_master", mon_master },
|
||||||
{ "osd_up", osd_up },
|
{ "osd_up", osd_up },
|
||||||
{ "osd_count", osd_count },
|
{ "osd_count", osd_count },
|
||||||
|
{ "osds_full", osds_full },
|
||||||
|
{ "osds_nearfull", osds_nearfull },
|
||||||
|
{ "osds_primary_slow_ops", slow_op_primary_osds },
|
||||||
|
{ "osds_secondary_slow_ops", slow_op_secondary_osds },
|
||||||
{ "total_raw", total_raw },
|
{ "total_raw", total_raw },
|
||||||
{ "free_raw", free_raw },
|
{ "free_raw", free_raw },
|
||||||
{ "down_raw", down_raw },
|
{ "down_raw", down_raw },
|
||||||
|
@ -300,6 +313,26 @@ resume_2:
|
||||||
warning_str += " "+std::to_string(osds_nearfull)+
|
warning_str += " "+std::to_string(osds_nearfull)+
|
||||||
(osds_nearfull > 1 ? " osds are almost full\n" : " osd is almost full\n");
|
(osds_nearfull > 1 ? " osds are almost full\n" : " osd is almost full\n");
|
||||||
}
|
}
|
||||||
|
if (slow_op_primary_osds.size() > 0)
|
||||||
|
{
|
||||||
|
warning_str += " "+std::to_string(slow_op_primary_osds.size());
|
||||||
|
warning_str += (slow_op_primary_osds.size() > 1 ? " osds have" : " osd has");
|
||||||
|
warning_str += " slow client ops: ";
|
||||||
|
for (int i = 0; i < slow_op_primary_osds.size(); i++)
|
||||||
|
{
|
||||||
|
warning_str += (i > 0 ? ", " : "")+std::to_string(slow_op_primary_osds[i])+"\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (slow_op_secondary_osds.size() > 0)
|
||||||
|
{
|
||||||
|
warning_str += " "+std::to_string(slow_op_secondary_osds.size());
|
||||||
|
warning_str += (slow_op_secondary_osds.size() > 1 ? " osds have" : " osd has");
|
||||||
|
warning_str += " slow replication ops: ";
|
||||||
|
for (int i = 0; i < slow_op_secondary_osds.size(); i++)
|
||||||
|
{
|
||||||
|
warning_str += (i > 0 ? ", " : "")+std::to_string(slow_op_secondary_osds[i])+"\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
if (warning_str != "")
|
if (warning_str != "")
|
||||||
{
|
{
|
||||||
warning_str = "\n warning:\n"+warning_str;
|
warning_str = "\n warning:\n"+warning_str;
|
||||||
|
|
|
@ -535,10 +535,12 @@ void osd_t::print_stats()
|
||||||
|
|
||||||
void osd_t::print_slow()
|
void osd_t::print_slow()
|
||||||
{
|
{
|
||||||
bool has_slow = false;
|
cur_slow_op_primary = 0;
|
||||||
|
cur_slow_op_secondary = 0;
|
||||||
char alloc[1024];
|
char alloc[1024];
|
||||||
timespec now;
|
timespec now;
|
||||||
clock_gettime(CLOCK_REALTIME, &now);
|
clock_gettime(CLOCK_REALTIME, &now);
|
||||||
|
// FIXME: Also track slow local blockstore ops and recovery/flush/scrub ops
|
||||||
for (auto & kv: msgr.clients)
|
for (auto & kv: msgr.clients)
|
||||||
{
|
{
|
||||||
for (auto op: kv.second->received_ops)
|
for (auto op: kv.second->received_ops)
|
||||||
|
@ -608,6 +610,7 @@ void osd_t::print_slow()
|
||||||
op->req.hdr.opcode == OSD_OP_SEC_STABILIZE || op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK ||
|
op->req.hdr.opcode == OSD_OP_SEC_STABILIZE || op->req.hdr.opcode == OSD_OP_SEC_ROLLBACK ||
|
||||||
op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)
|
op->req.hdr.opcode == OSD_OP_SEC_READ_BMP)
|
||||||
{
|
{
|
||||||
|
cur_slow_op_secondary++;
|
||||||
bufprintf(" state=%d", op->bs_op ? PRIV(op->bs_op)->op_state : -1);
|
bufprintf(" state=%d", op->bs_op ? PRIV(op->bs_op)->op_state : -1);
|
||||||
int wait_for = op->bs_op ? PRIV(op->bs_op)->wait_for : 0;
|
int wait_for = op->bs_op ? PRIV(op->bs_op)->wait_for : 0;
|
||||||
if (wait_for)
|
if (wait_for)
|
||||||
|
@ -618,15 +621,19 @@ void osd_t::print_slow()
|
||||||
else if (op->req.hdr.opcode == OSD_OP_READ || op->req.hdr.opcode == OSD_OP_WRITE ||
|
else if (op->req.hdr.opcode == OSD_OP_READ || op->req.hdr.opcode == OSD_OP_WRITE ||
|
||||||
op->req.hdr.opcode == OSD_OP_SYNC || op->req.hdr.opcode == OSD_OP_DELETE)
|
op->req.hdr.opcode == OSD_OP_SYNC || op->req.hdr.opcode == OSD_OP_DELETE)
|
||||||
{
|
{
|
||||||
|
cur_slow_op_primary++;
|
||||||
bufprintf(" state=%d", !op->op_data ? -1 : op->op_data->st);
|
bufprintf(" state=%d", !op->op_data ? -1 : op->op_data->st);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cur_slow_op_primary++;
|
||||||
|
}
|
||||||
#undef bufprintf
|
#undef bufprintf
|
||||||
printf("%s\n", alloc);
|
printf("%s\n", alloc);
|
||||||
has_slow = true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (has_slow && bs)
|
if ((cur_slow_op_primary+cur_slow_op_secondary) > 0 && bs)
|
||||||
{
|
{
|
||||||
bs->dump_diagnostics();
|
bs->dump_diagnostics();
|
||||||
}
|
}
|
||||||
|
|
|
@ -151,6 +151,8 @@ class osd_t
|
||||||
bool etcd_reporting_pg_state = false;
|
bool etcd_reporting_pg_state = false;
|
||||||
bool etcd_reporting_stats = false;
|
bool etcd_reporting_stats = false;
|
||||||
int print_stats_timer_id = -1, slow_log_timer_id = -1;
|
int print_stats_timer_id = -1, slow_log_timer_id = -1;
|
||||||
|
uint64_t cur_slow_op_primary = 0;
|
||||||
|
uint64_t cur_slow_op_secondary = 0;
|
||||||
|
|
||||||
// peers and PGs
|
// peers and PGs
|
||||||
|
|
||||||
|
|
|
@ -201,6 +201,14 @@ json11::Json osd_t::get_statistics()
|
||||||
st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none");
|
st["immediate_commit"] = immediate_commit == IMMEDIATE_ALL ? "all" : (immediate_commit == IMMEDIATE_SMALL ? "small" : "none");
|
||||||
st["host"] = self_state["host"];
|
st["host"] = self_state["host"];
|
||||||
st["version"] = VITASTOR_VERSION;
|
st["version"] = VITASTOR_VERSION;
|
||||||
|
if (cur_slow_op_primary > 0)
|
||||||
|
{
|
||||||
|
st["slow_ops_primary"] = cur_slow_op_primary;
|
||||||
|
}
|
||||||
|
if (cur_slow_op_secondary > 0)
|
||||||
|
{
|
||||||
|
st["slow_ops_secondary"] = cur_slow_op_secondary;
|
||||||
|
}
|
||||||
json11::Json::object op_stats, subop_stats;
|
json11::Json::object op_stats, subop_stats;
|
||||||
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
|
for (int i = OSD_OP_MIN; i <= OSD_OP_MAX; i++)
|
||||||
{
|
{
|
||||||
|
|
|
@ -12,6 +12,11 @@ target_link_libraries(stub_bench tcmalloc_minimal)
|
||||||
add_executable(osd_test osd_test.cpp ../util/rw_blocking.cpp ../util/addr_util.cpp)
|
add_executable(osd_test osd_test.cpp ../util/rw_blocking.cpp ../util/addr_util.cpp)
|
||||||
target_link_libraries(osd_test tcmalloc_minimal)
|
target_link_libraries(osd_test tcmalloc_minimal)
|
||||||
|
|
||||||
|
# bindiff
|
||||||
|
add_executable(bindiff
|
||||||
|
bindiff.c
|
||||||
|
)
|
||||||
|
|
||||||
# stub_uring_osd
|
# stub_uring_osd
|
||||||
add_executable(stub_uring_osd
|
add_executable(stub_uring_osd
|
||||||
stub_uring_osd.cpp
|
stub_uring_osd.cpp
|
||||||
|
|
|
@ -0,0 +1,177 @@
|
||||||
|
// Copyright (c) Vitaliy Filippov, 2004+
|
||||||
|
// License: VNPL-1.1 (see README.md for details)
|
||||||
|
|
||||||
|
#ifndef _LARGEFILE64_SOURCE
|
||||||
|
#define _LARGEFILE64_SOURCE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#define BUFSIZE 0x100000
|
||||||
|
|
||||||
|
uint64_t filelength(int fd)
|
||||||
|
{
|
||||||
|
struct stat st;
|
||||||
|
if (fstat(fd, &st) < 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "fstat failed: %s\n", strerror(errno));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (st.st_size < 0)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return (uint64_t)st.st_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t read_blocking(int fd, void *read_buf, size_t remaining)
|
||||||
|
{
|
||||||
|
size_t done = 0;
|
||||||
|
while (done < remaining)
|
||||||
|
{
|
||||||
|
ssize_t r = read(fd, read_buf, remaining-done);
|
||||||
|
if (r <= 0)
|
||||||
|
{
|
||||||
|
if (!errno)
|
||||||
|
{
|
||||||
|
// EOF
|
||||||
|
return done;
|
||||||
|
}
|
||||||
|
else if (errno != EINTR && errno != EAGAIN && errno != EPIPE)
|
||||||
|
{
|
||||||
|
perror("read");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
done += (size_t)r;
|
||||||
|
read_buf = (uint8_t*)read_buf + r;
|
||||||
|
}
|
||||||
|
return done;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t write_blocking(int fd, void *write_buf, size_t remaining)
|
||||||
|
{
|
||||||
|
size_t done = 0;
|
||||||
|
while (done < remaining)
|
||||||
|
{
|
||||||
|
ssize_t r = write(fd, write_buf, remaining-done);
|
||||||
|
if (r < 0)
|
||||||
|
{
|
||||||
|
if (errno != EINTR && errno != EAGAIN && errno != EPIPE)
|
||||||
|
{
|
||||||
|
perror("write");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
done += (size_t)r;
|
||||||
|
write_buf = (uint8_t*)write_buf + r;
|
||||||
|
}
|
||||||
|
return done;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int narg, char *args[])
|
||||||
|
{
|
||||||
|
int fd1 = -1, fd2 = -1;
|
||||||
|
uint8_t *buf1 = NULL, *buf2 = NULL;
|
||||||
|
uint64_t addr = 0, l1 = 0, l2 = 0, l = 0, diffl = 0;
|
||||||
|
size_t buf1_len = 0, buf2_len = 0, i = 0, j = 0, dl = 0;
|
||||||
|
int argoff = 0;
|
||||||
|
int nosource = 0;
|
||||||
|
fprintf(stderr, "VMX HexDiff v2.1\nLicense: GPLv3.0+, (c) 2005+, Vitaliy Filippov\n");
|
||||||
|
argoff = 1;
|
||||||
|
if (narg > argoff && strcmp(args[argoff], "-n") == 0)
|
||||||
|
{
|
||||||
|
nosource = 1;
|
||||||
|
argoff++;
|
||||||
|
}
|
||||||
|
if (narg < argoff+2)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "USAGE: bindiff [-n] <file1> <file2>\n"
|
||||||
|
"This will create hex patch file1->file2 and write it to stdout.\n"
|
||||||
|
"[-n] = do not write file1 data in patch, only file2.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fd1 = open(args[argoff], O_RDONLY);
|
||||||
|
if (fd1 < 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Couldn't open %s: %s\n", args[argoff], strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fd2 = open(args[argoff+1], O_RDONLY);
|
||||||
|
if (fd2 < 0)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Couldn't open %s: %s\n", args[argoff+1], strerror(errno));
|
||||||
|
close(fd1);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
l1 = filelength(fd1);
|
||||||
|
l2 = filelength(fd2);
|
||||||
|
if (l1 < l2)
|
||||||
|
l = l1;
|
||||||
|
else
|
||||||
|
l = l2;
|
||||||
|
addr = diffl = 0;
|
||||||
|
buf1 = malloc(BUFSIZE+1);
|
||||||
|
buf2 = malloc(BUFSIZE+1);
|
||||||
|
while ((buf1_len = read_blocking(fd1, buf1, BUFSIZE)) > 0 && (buf2_len = read_blocking(fd2, buf2, BUFSIZE)) > 0)
|
||||||
|
{
|
||||||
|
buf1[buf1_len] = buf2[buf2_len] = 0;
|
||||||
|
for (dl = 0, i = 0; i <= buf1_len && i <= buf2_len; i++, addr++)
|
||||||
|
{
|
||||||
|
if (buf1[i] != buf2[i])
|
||||||
|
{
|
||||||
|
dl++;
|
||||||
|
}
|
||||||
|
else if (dl)
|
||||||
|
{
|
||||||
|
printf("%08jX: ", addr-dl);
|
||||||
|
if (!nosource)
|
||||||
|
{
|
||||||
|
for (j = i-dl; j < i; j++)
|
||||||
|
printf("%02X", buf1[j]);
|
||||||
|
printf(" ");
|
||||||
|
}
|
||||||
|
for (j = i-dl; j < i; j++)
|
||||||
|
printf("%02X", buf2[j]);
|
||||||
|
printf("\n");
|
||||||
|
diffl += dl;
|
||||||
|
dl = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
addr--;
|
||||||
|
}
|
||||||
|
if (l1 < l2)
|
||||||
|
{
|
||||||
|
printf("%08zX: ", i);
|
||||||
|
while ((buf2_len = read_blocking(fd2, buf2, BUFSIZE)) > 0)
|
||||||
|
{
|
||||||
|
for (j = 0; j < buf2_len; j++, i++)
|
||||||
|
printf("%02X", buf2[j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
else if (l1 > l2)
|
||||||
|
{
|
||||||
|
printf("SIZE %08zX\n", l2);
|
||||||
|
}
|
||||||
|
if (diffl != 0 || l1 != l2)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Difference in %zu of %zu common bytes\n", diffl, l);
|
||||||
|
if (l1 != l2)
|
||||||
|
fprintf(stderr, "Length difference!\nFile \"%s\": %zu\nFile \"%s\": %zu\n", args [1], l1, args [2], l2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Files are equal\n");
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -65,7 +65,7 @@ std::string addr_to_string(const sockaddr_storage &addr)
|
||||||
return std::string(peer_str)+":"+std::to_string(port);
|
return std::string(peer_str)+":"+std::to_string(port);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool cidr_match(const in_addr &addr, const in_addr &net, uint8_t bits)
|
bool cidr_match(const in_addr &addr, const in_addr &net, uint8_t bits)
|
||||||
{
|
{
|
||||||
if (bits == 0)
|
if (bits == 0)
|
||||||
{
|
{
|
||||||
|
@ -75,7 +75,7 @@ static bool cidr_match(const in_addr &addr, const in_addr &net, uint8_t bits)
|
||||||
return !((addr.s_addr ^ net.s_addr) & htonl(0xFFFFFFFFu << (32 - bits)));
|
return !((addr.s_addr ^ net.s_addr) & htonl(0xFFFFFFFFu << (32 - bits)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_t bits)
|
bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_t bits)
|
||||||
{
|
{
|
||||||
const uint32_t *a = address.s6_addr32;
|
const uint32_t *a = address.s6_addr32;
|
||||||
const uint32_t *n = network.s6_addr32;
|
const uint32_t *n = network.s6_addr32;
|
||||||
|
@ -93,47 +93,49 @@ static bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct addr_mask_t
|
addr_mask_t cidr_parse(std::string mask)
|
||||||
{
|
{
|
||||||
sa_family_t family;
|
unsigned bits = 255;
|
||||||
in_addr ipv4;
|
|
||||||
in6_addr ipv6;
|
|
||||||
uint8_t bits;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<std::string> getifaddr_list(std::vector<std::string> mask_cfg, bool include_v6)
|
|
||||||
{
|
|
||||||
std::vector<addr_mask_t> masks;
|
|
||||||
for (auto mask: mask_cfg)
|
|
||||||
{
|
|
||||||
unsigned bits = 0;
|
|
||||||
int p = mask.find('/');
|
int p = mask.find('/');
|
||||||
if (p != std::string::npos)
|
if (p != std::string::npos)
|
||||||
{
|
{
|
||||||
char null_byte = 0;
|
char null_byte = 0;
|
||||||
if (sscanf(mask.c_str()+p+1, "%u%c", &bits, &null_byte) != 1 || bits > 128)
|
if (sscanf(mask.c_str()+p+1, "%u%c", &bits, &null_byte) != 1 || bits > 128)
|
||||||
{
|
throw std::runtime_error("Invalid IP address mask: " + mask);
|
||||||
throw std::runtime_error((include_v6 ? "Invalid IPv4 address mask: " : "Invalid IP address mask: ") + mask);
|
|
||||||
}
|
|
||||||
mask = mask.substr(0, p);
|
mask = mask.substr(0, p);
|
||||||
}
|
}
|
||||||
in_addr ipv4;
|
in_addr ipv4;
|
||||||
in6_addr ipv6;
|
in6_addr ipv6;
|
||||||
if (inet_pton(AF_INET, mask.c_str(), &ipv4) == 1)
|
if (inet_pton(AF_INET, mask.c_str(), &ipv4) == 1)
|
||||||
{
|
{
|
||||||
|
if (bits == 255)
|
||||||
|
bits = 32;
|
||||||
if (bits > 32)
|
if (bits > 32)
|
||||||
{
|
throw std::runtime_error("Invalid IP address mask: " + mask);
|
||||||
throw std::runtime_error((include_v6 ? "Invalid IPv4 address mask: " : "Invalid IP address mask: ") + mask);
|
return (addr_mask_t){ .family = AF_INET, .ipv4 = ipv4, .bits = (uint8_t)(bits ? bits : 32) };
|
||||||
}
|
}
|
||||||
masks.push_back((addr_mask_t){ .family = AF_INET, .ipv4 = ipv4, .bits = (uint8_t)bits });
|
else if (inet_pton(AF_INET6, mask.c_str(), &ipv6) == 1)
|
||||||
}
|
|
||||||
else if (include_v6 && inet_pton(AF_INET6, mask.c_str(), &ipv6) == 1)
|
|
||||||
{
|
{
|
||||||
masks.push_back((addr_mask_t){ .family = AF_INET6, .ipv6 = ipv6, .bits = (uint8_t)bits });
|
if (bits == 255)
|
||||||
|
bits = 128;
|
||||||
|
return (addr_mask_t){ .family = AF_INET6, .ipv6 = ipv6, .bits = (uint8_t)bits };
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw std::runtime_error((include_v6 ? "Invalid IPv4 address mask: " : "Invalid IP address mask: ") + mask);
|
throw std::runtime_error("Invalid IP address mask: " + mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> getifaddr_list(std::vector<std::string> mask_cfg, bool include_v6)
|
||||||
|
{
|
||||||
|
std::vector<addr_mask_t> masks;
|
||||||
|
for (auto mask: mask_cfg)
|
||||||
|
{
|
||||||
|
masks.push_back(cidr_parse(mask));
|
||||||
|
if (masks[masks.size()-1].family == AF_INET6)
|
||||||
|
{
|
||||||
|
// Auto-enable IPv6 addresses
|
||||||
|
include_v6 = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::set<std::string> addresses;
|
std::set<std::string> addresses;
|
||||||
|
|
|
@ -1,10 +1,22 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <netinet/in.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
struct addr_mask_t
|
||||||
|
{
|
||||||
|
sa_family_t family;
|
||||||
|
in_addr ipv4;
|
||||||
|
in6_addr ipv6;
|
||||||
|
uint8_t bits;
|
||||||
|
};
|
||||||
|
|
||||||
bool string_to_addr(std::string str, bool parse_port, int default_port, struct sockaddr_storage *addr);
|
bool string_to_addr(std::string str, bool parse_port, int default_port, struct sockaddr_storage *addr);
|
||||||
std::string addr_to_string(const sockaddr_storage &addr);
|
std::string addr_to_string(const sockaddr_storage &addr);
|
||||||
|
addr_mask_t cidr_parse(std::string mask);
|
||||||
|
bool cidr_match(const in_addr &address, const in_addr &network, uint8_t bits);
|
||||||
|
bool cidr6_match(const in6_addr &address, const in6_addr &network, uint8_t bits);
|
||||||
std::vector<std::string> getifaddr_list(std::vector<std::string> mask_cfg = std::vector<std::string>(), bool include_v6 = false);
|
std::vector<std::string> getifaddr_list(std::vector<std::string> mask_cfg = std::vector<std::string>(), bool include_v6 = false);
|
||||||
int create_and_bind_socket(std::string bind_address, int bind_port, int listen_backlog, int *listening_port);
|
int create_and_bind_socket(std::string bind_address, int bind_port, int listen_backlog, int *listening_port);
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
|
|
||||||
#include "rw_blocking.h"
|
#include "rw_blocking.h"
|
||||||
|
|
||||||
int read_blocking(int fd, void *read_buf, size_t remaining)
|
size_t read_blocking(int fd, void *read_buf, size_t remaining)
|
||||||
{
|
{
|
||||||
size_t done = 0;
|
size_t done = 0;
|
||||||
while (done < remaining)
|
while (done < remaining)
|
||||||
|
@ -30,13 +30,13 @@ int read_blocking(int fd, void *read_buf, size_t remaining)
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
done += r;
|
done += (size_t)r;
|
||||||
read_buf = (uint8_t*)read_buf + r;
|
read_buf = (uint8_t*)read_buf + r;
|
||||||
}
|
}
|
||||||
return done;
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
int write_blocking(int fd, void *write_buf, size_t remaining)
|
size_t write_blocking(int fd, void *write_buf, size_t remaining)
|
||||||
{
|
{
|
||||||
size_t done = 0;
|
size_t done = 0;
|
||||||
while (done < remaining)
|
while (done < remaining)
|
||||||
|
@ -51,7 +51,7 @@ int write_blocking(int fd, void *write_buf, size_t remaining)
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
done += r;
|
done += (size_t)r;
|
||||||
write_buf = (uint8_t*)write_buf + r;
|
write_buf = (uint8_t*)write_buf + r;
|
||||||
}
|
}
|
||||||
return done;
|
return done;
|
||||||
|
|
|
@ -6,8 +6,8 @@
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/uio.h>
|
#include <sys/uio.h>
|
||||||
|
|
||||||
int read_blocking(int fd, void *read_buf, size_t remaining);
|
size_t read_blocking(int fd, void *read_buf, size_t remaining);
|
||||||
int write_blocking(int fd, void *write_buf, size_t remaining);
|
size_t write_blocking(int fd, void *write_buf, size_t remaining);
|
||||||
int readv_blocking(int fd, iovec *iov, int iovcnt);
|
int readv_blocking(int fd, iovec *iov, int iovcnt);
|
||||||
int writev_blocking(int fd, iovec *iov, int iovcnt);
|
int writev_blocking(int fd, iovec *iov, int iovcnt);
|
||||||
int sendv_blocking(int fd, iovec *iov, int iovcnt, int flags);
|
int sendv_blocking(int fd, iovec *iov, int iovcnt, int flags);
|
||||||
|
|
|
@ -60,6 +60,7 @@ qemu-img convert -S 4096 -p \
|
||||||
-O raw ./testdata/bin/read.bin
|
-O raw ./testdata/bin/read.bin
|
||||||
|
|
||||||
if ! diff -q ./testdata/bin/read.bin ./testdata/bin/mirror.bin; then
|
if ! diff -q ./testdata/bin/read.bin ./testdata/bin/mirror.bin; then
|
||||||
|
build/src/test/bindiff ./testdata/bin/read.bin ./testdata/bin/mirror.bin
|
||||||
format_error Data lost during self-heal
|
format_error Data lost during self-heal
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue