-----BEGIN PGP SIGNATURE-----

iQEzBAABCAAdFiEEIV1G9IJGaJ7HfzVi7wSWWzmNYhEFAmXwPUAACgkQ7wSWWzmN
 YhFnIwgAgctDniJwlRxXB01eVlzXz7IulHnpSby07XEJxENSpGB8ufaeE4eK5gJy
 NVK6C2+1EU2vRxm4oIdcvtN4C4/jtRbYYjiSTx7eE4FmSkqshSnR5XCV72LDqG3i
 WbzInjMvYfysmcMXLfrWgxOnVew9WqEzlpEWlc7FfNKnkzBVf+JDztfqCUx0XM7H
 qefw4ImjqQw993QxJpipXC7aEGUyouB0RIBB71FkCa9ihlh9x7W68evbOI/jTn5q
 HWuStgS02sKHjRFliMbdbMY77FNUz4Yroo/GKSvGt64atxkQSJqPNAV+/9n18LNy
 QAH5eK6cXFPOIAaYpADU5kHDVVAFiw==
 =iBdx
 -----END PGP SIGNATURE-----

Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCAAdFiEEIV1G9IJGaJ7HfzVi7wSWWzmNYhEFAmXwPUAACgkQ7wSWWzmN
# YhFnIwgAgctDniJwlRxXB01eVlzXz7IulHnpSby07XEJxENSpGB8ufaeE4eK5gJy
# NVK6C2+1EU2vRxm4oIdcvtN4C4/jtRbYYjiSTx7eE4FmSkqshSnR5XCV72LDqG3i
# WbzInjMvYfysmcMXLfrWgxOnVew9WqEzlpEWlc7FfNKnkzBVf+JDztfqCUx0XM7H
# qefw4ImjqQw993QxJpipXC7aEGUyouB0RIBB71FkCa9ihlh9x7W68evbOI/jTn5q
# HWuStgS02sKHjRFliMbdbMY77FNUz4Yroo/GKSvGt64atxkQSJqPNAV+/9n18LNy
# QAH5eK6cXFPOIAaYpADU5kHDVVAFiw==
# =iBdx
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 12 Mar 2024 11:32:16 GMT
# gpg:                using RSA key 215D46F48246689EC77F3562EF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F  3562 EF04 965B 398D 6211

* tag 'net-pull-request' of https://github.com/jasowang/qemu:
  ebpf: Updated eBPF program and skeleton.
  qmp: Added new command to retrieve eBPF blob.
  virtio-net: Added property to load eBPF RSS with fds.
  ebpf: Added eBPF initialization by fds.
  ebpf: Added eBPF map update through mmap.
  Avoid unaligned fetch in ladr_match()
  e1000e: fix link state on resume
  igb: fix link state on resume

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
master
Peter Maydell 2024-03-12 13:42:57 +00:00
commit 35ac6831d9
20 changed files with 1058 additions and 809 deletions

69
ebpf/ebpf.c Normal file
View File

@ -0,0 +1,69 @@
/*
* QEMU eBPF binary declaration routine.
*
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Andrew Melnychenko <andrew@daynix.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "qemu/osdep.h"
#include "qemu/queue.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-ebpf.h"
#include "ebpf/ebpf.h"
typedef struct ElfBinaryDataEntry {
int id;
const void *data;
size_t datalen;
QSLIST_ENTRY(ElfBinaryDataEntry) node;
} ElfBinaryDataEntry;
static QSLIST_HEAD(, ElfBinaryDataEntry) ebpf_elf_obj_list =
QSLIST_HEAD_INITIALIZER();
void ebpf_register_binary_data(int id, const void *data, size_t datalen)
{
struct ElfBinaryDataEntry *dataentry = NULL;
dataentry = g_new0(struct ElfBinaryDataEntry, 1);
dataentry->data = data;
dataentry->datalen = datalen;
dataentry->id = id;
QSLIST_INSERT_HEAD(&ebpf_elf_obj_list, dataentry, node);
}
const void *ebpf_find_binary_by_id(int id, size_t *sz, Error **errp)
{
struct ElfBinaryDataEntry *it = NULL;
QSLIST_FOREACH(it, &ebpf_elf_obj_list, node) {
if (id == it->id) {
*sz = it->datalen;
return it->data;
}
}
error_setg(errp, "can't find eBPF object with id: %d", id);
return NULL;
}
EbpfObject *qmp_request_ebpf(EbpfProgramID id, Error **errp)
{
EbpfObject *ret = NULL;
size_t size = 0;
const void *data = ebpf_find_binary_by_id(id, &size, errp);
if (!data) {
return NULL;
}
ret = g_new0(EbpfObject, 1);
ret->object = g_base64_encode(data, size);
return ret;
}

29
ebpf/ebpf.h Normal file
View File

@ -0,0 +1,29 @@
/*
* QEMU eBPF binary declaration routine.
*
* Developed by Daynix Computing LTD (http://www.daynix.com)
*
* Authors:
* Andrew Melnychenko <andrew@daynix.com>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef EBPF_H
#define EBPF_H
void ebpf_register_binary_data(int id, const void *data,
size_t datalen);
const void *ebpf_find_binary_by_id(int id, size_t *sz,
struct Error **errp);
#define ebpf_binary_init(id, fn) \
static void __attribute__((constructor)) ebpf_binary_init_ ## fn(void) \
{ \
size_t datalen = 0; \
const void *data = fn(&datalen); \
ebpf_register_binary_data(id, data, datalen); \
}
#endif /* EBPF_H */

View File

@ -28,6 +28,12 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
return false;
}
bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
int config_fd, int toeplitz_fd, int table_fd)
{
return false;
}
bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
uint16_t *indirections_table, uint8_t *toeplitz_key)
{

View File

@ -13,6 +13,8 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
#include "qapi/qapi-types-misc.h"
#include "qapi/qapi-commands-ebpf.h"
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
@ -21,38 +23,97 @@
#include "ebpf/ebpf_rss.h"
#include "ebpf/rss.bpf.skeleton.h"
#include "trace.h"
#include "ebpf/ebpf.h"
void ebpf_rss_init(struct EBPFRSSContext *ctx)
{
if (ctx != NULL) {
ctx->obj = NULL;
ctx->program_fd = -1;
ctx->map_configuration = -1;
ctx->map_toeplitz_key = -1;
ctx->map_indirections_table = -1;
ctx->mmap_configuration = NULL;
ctx->mmap_toeplitz_key = NULL;
ctx->mmap_indirections_table = NULL;
}
}
bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx)
{
return ctx != NULL && ctx->obj != NULL;
return ctx != NULL && (ctx->obj != NULL || ctx->program_fd != -1);
}
static bool ebpf_rss_mmap(struct EBPFRSSContext *ctx)
{
if (!ebpf_rss_is_loaded(ctx)) {
return false;
}
ctx->mmap_configuration = mmap(NULL, qemu_real_host_page_size(),
PROT_READ | PROT_WRITE, MAP_SHARED,
ctx->map_configuration, 0);
if (ctx->mmap_configuration == MAP_FAILED) {
return false;
}
ctx->mmap_toeplitz_key = mmap(NULL, qemu_real_host_page_size(),
PROT_READ | PROT_WRITE, MAP_SHARED,
ctx->map_toeplitz_key, 0);
if (ctx->mmap_toeplitz_key == MAP_FAILED) {
goto toeplitz_fail;
}
ctx->mmap_indirections_table = mmap(NULL, qemu_real_host_page_size(),
PROT_READ | PROT_WRITE, MAP_SHARED,
ctx->map_indirections_table, 0);
if (ctx->mmap_indirections_table == MAP_FAILED) {
goto indirection_fail;
}
return true;
indirection_fail:
munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
ctx->mmap_toeplitz_key = NULL;
toeplitz_fail:
munmap(ctx->mmap_configuration, qemu_real_host_page_size());
ctx->mmap_configuration = NULL;
ctx->mmap_indirections_table = NULL;
return false;
}
static void ebpf_rss_munmap(struct EBPFRSSContext *ctx)
{
if (!ebpf_rss_is_loaded(ctx)) {
return;
}
munmap(ctx->mmap_indirections_table, qemu_real_host_page_size());
munmap(ctx->mmap_toeplitz_key, qemu_real_host_page_size());
munmap(ctx->mmap_configuration, qemu_real_host_page_size());
ctx->mmap_configuration = NULL;
ctx->mmap_toeplitz_key = NULL;
ctx->mmap_indirections_table = NULL;
}
bool ebpf_rss_load(struct EBPFRSSContext *ctx)
{
struct rss_bpf *rss_bpf_ctx;
if (ctx == NULL) {
if (ebpf_rss_is_loaded(ctx)) {
return false;
}
rss_bpf_ctx = rss_bpf__open();
if (rss_bpf_ctx == NULL) {
trace_ebpf_error("eBPF RSS", "can not open eBPF RSS object");
goto error;
}
bpf_program__set_type(rss_bpf_ctx->progs.tun_rss_steering_prog, BPF_PROG_TYPE_SOCKET_FILTER);
if (rss_bpf__load(rss_bpf_ctx)) {
trace_ebpf_error("eBPF RSS", "can not load RSS program");
goto error;
}
@ -66,26 +127,57 @@ bool ebpf_rss_load(struct EBPFRSSContext *ctx)
ctx->map_toeplitz_key = bpf_map__fd(
rss_bpf_ctx->maps.tap_rss_map_toeplitz_key);
if (!ebpf_rss_mmap(ctx)) {
goto error;
}
return true;
error:
rss_bpf__destroy(rss_bpf_ctx);
ctx->obj = NULL;
ctx->program_fd = -1;
ctx->map_configuration = -1;
ctx->map_toeplitz_key = -1;
ctx->map_indirections_table = -1;
return false;
}
bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
int config_fd, int toeplitz_fd, int table_fd)
{
if (ebpf_rss_is_loaded(ctx)) {
return false;
}
if (program_fd < 0 || config_fd < 0 || toeplitz_fd < 0 || table_fd < 0) {
return false;
}
ctx->program_fd = program_fd;
ctx->map_configuration = config_fd;
ctx->map_toeplitz_key = toeplitz_fd;
ctx->map_indirections_table = table_fd;
if (!ebpf_rss_mmap(ctx)) {
ctx->program_fd = -1;
ctx->map_configuration = -1;
ctx->map_toeplitz_key = -1;
ctx->map_indirections_table = -1;
return false;
}
return true;
}
static bool ebpf_rss_set_config(struct EBPFRSSContext *ctx,
struct EBPFRSSConfig *config)
{
uint32_t map_key = 0;
if (!ebpf_rss_is_loaded(ctx)) {
return false;
}
if (bpf_map_update_elem(ctx->map_configuration,
&map_key, config, 0) < 0) {
return false;
}
memcpy(ctx->mmap_configuration, config, sizeof(*config));
return true;
}
@ -93,27 +185,19 @@ static bool ebpf_rss_set_indirections_table(struct EBPFRSSContext *ctx,
uint16_t *indirections_table,
size_t len)
{
uint32_t i = 0;
if (!ebpf_rss_is_loaded(ctx) || indirections_table == NULL ||
len > VIRTIO_NET_RSS_MAX_TABLE_LEN) {
return false;
}
for (; i < len; ++i) {
if (bpf_map_update_elem(ctx->map_indirections_table, &i,
indirections_table + i, 0) < 0) {
return false;
}
}
memcpy(ctx->mmap_indirections_table, indirections_table,
sizeof(*indirections_table) * len);
return true;
}
static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
uint8_t *toeplitz_key)
{
uint32_t map_key = 0;
/* prepare toeplitz key */
uint8_t toe[VIRTIO_NET_RSS_MAX_KEY_SIZE] = {};
@ -123,10 +207,7 @@ static bool ebpf_rss_set_toepliz_key(struct EBPFRSSContext *ctx,
memcpy(toe, toeplitz_key, VIRTIO_NET_RSS_MAX_KEY_SIZE);
*(uint32_t *)toe = ntohl(*(uint32_t *)toe);
if (bpf_map_update_elem(ctx->map_toeplitz_key, &map_key, toe,
0) < 0) {
return false;
}
memcpy(ctx->mmap_toeplitz_key, toe, VIRTIO_NET_RSS_MAX_KEY_SIZE);
return true;
}
@ -160,6 +241,22 @@ void ebpf_rss_unload(struct EBPFRSSContext *ctx)
return;
}
rss_bpf__destroy(ctx->obj);
ebpf_rss_munmap(ctx);
if (ctx->obj) {
rss_bpf__destroy(ctx->obj);
} else {
close(ctx->program_fd);
close(ctx->map_configuration);
close(ctx->map_toeplitz_key);
close(ctx->map_indirections_table);
}
ctx->obj = NULL;
ctx->program_fd = -1;
ctx->map_configuration = -1;
ctx->map_toeplitz_key = -1;
ctx->map_indirections_table = -1;
}
ebpf_binary_init(EBPF_PROGRAMID_RSS, rss_bpf__elf_bytes)

View File

@ -14,12 +14,19 @@
#ifndef QEMU_EBPF_RSS_H
#define QEMU_EBPF_RSS_H
#define EBPF_RSS_MAX_FDS 4
struct EBPFRSSContext {
void *obj;
int program_fd;
int map_configuration;
int map_toeplitz_key;
int map_indirections_table;
/* mapped eBPF maps for direct access to omit bpf_map_update_elem() */
void *mmap_configuration;
void *mmap_toeplitz_key;
void *mmap_indirections_table;
};
struct EBPFRSSConfig {
@ -36,6 +43,9 @@ bool ebpf_rss_is_loaded(struct EBPFRSSContext *ctx);
bool ebpf_rss_load(struct EBPFRSSContext *ctx);
bool ebpf_rss_load_fds(struct EBPFRSSContext *ctx, int program_fd,
int config_fd, int toeplitz_fd, int table_fd);
bool ebpf_rss_set_all(struct EBPFRSSContext *ctx, struct EBPFRSSConfig *config,
uint16_t *indirections_table, uint8_t *toeplitz_key);

View File

@ -1 +1 @@
system_ss.add(when: libbpf, if_true: files('ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))
common_ss.add(when: libbpf, if_true: files('ebpf.c', 'ebpf_rss.c'), if_false: files('ebpf_rss-stub.c'))

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
#include "trace/trace-ebpf.h"

View File

@ -123,14 +123,6 @@ e1000e_intmgr_timer_resume(E1000IntrDelayTimer *timer)
}
}
static void
e1000e_intmgr_timer_pause(E1000IntrDelayTimer *timer)
{
if (timer->running) {
timer_del(timer->timer);
}
}
static inline void
e1000e_intrmgr_stop_timer(E1000IntrDelayTimer *timer)
{
@ -398,24 +390,6 @@ e1000e_intrmgr_resume(E1000ECore *core)
}
}
static void
e1000e_intrmgr_pause(E1000ECore *core)
{
int i;
e1000e_intmgr_timer_pause(&core->radv);
e1000e_intmgr_timer_pause(&core->rdtr);
e1000e_intmgr_timer_pause(&core->raid);
e1000e_intmgr_timer_pause(&core->tidv);
e1000e_intmgr_timer_pause(&core->tadv);
e1000e_intmgr_timer_pause(&core->itr);
for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) {
e1000e_intmgr_timer_pause(&core->eitr[i]);
}
}
static void
e1000e_intrmgr_reset(E1000ECore *core)
{
@ -3334,12 +3308,6 @@ e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size)
return 0;
}
static inline void
e1000e_autoneg_pause(E1000ECore *core)
{
timer_del(core->autoneg_timer);
}
static void
e1000e_autoneg_resume(E1000ECore *core)
{
@ -3351,22 +3319,6 @@ e1000e_autoneg_resume(E1000ECore *core)
}
}
static void
e1000e_vm_state_change(void *opaque, bool running, RunState state)
{
E1000ECore *core = opaque;
if (running) {
trace_e1000e_vm_state_running();
e1000e_intrmgr_resume(core);
e1000e_autoneg_resume(core);
} else {
trace_e1000e_vm_state_stopped();
e1000e_autoneg_pause(core);
e1000e_intrmgr_pause(core);
}
}
void
e1000e_core_pci_realize(E1000ECore *core,
const uint16_t *eeprom_templ,
@ -3379,9 +3331,6 @@ e1000e_core_pci_realize(E1000ECore *core,
e1000e_autoneg_timer, core);
e1000e_intrmgr_pci_realize(core);
core->vmstate =
qemu_add_vm_change_state_handler(e1000e_vm_state_change, core);
for (i = 0; i < E1000E_NUM_QUEUES; i++) {
net_tx_pkt_init(&core->tx[i].tx_pkt, E1000E_MAX_TX_FRAGS);
}
@ -3405,8 +3354,6 @@ e1000e_core_pci_uninit(E1000ECore *core)
e1000e_intrmgr_pci_unint(core);
qemu_del_vm_change_state_handler(core->vmstate);
for (i = 0; i < E1000E_NUM_QUEUES; i++) {
net_tx_pkt_uninit(core->tx[i].tx_pkt);
}
@ -3576,5 +3523,12 @@ e1000e_core_post_load(E1000ECore *core)
*/
nc->link_down = (core->mac[STATUS] & E1000_STATUS_LU) == 0;
/*
* we need to restart intrmgr timers, as an older version of
* QEMU can have stopped them before migration
*/
e1000e_intrmgr_resume(core);
e1000e_autoneg_resume(core);
return 0;
}

View File

@ -98,8 +98,6 @@ struct E1000Core {
E1000IntrDelayTimer eitr[E1000E_MSIX_VEC_NUM];
VMChangeStateEntry *vmstate;
uint32_t itr_guest_value;
uint32_t eitr_guest_value[E1000E_MSIX_VEC_NUM];

View File

@ -160,14 +160,6 @@ igb_intmgr_timer_resume(IGBIntrDelayTimer *timer)
}
}
static void
igb_intmgr_timer_pause(IGBIntrDelayTimer *timer)
{
if (timer->running) {
timer_del(timer->timer);
}
}
static void
igb_intrmgr_on_msix_throttling_timer(void *opaque)
{
@ -212,16 +204,6 @@ igb_intrmgr_resume(IGBCore *core)
}
}
static void
igb_intrmgr_pause(IGBCore *core)
{
int i;
for (i = 0; i < IGB_INTR_NUM; i++) {
igb_intmgr_timer_pause(&core->eitr[i]);
}
}
static void
igb_intrmgr_reset(IGBCore *core)
{
@ -4290,12 +4272,6 @@ igb_core_read(IGBCore *core, hwaddr addr, unsigned size)
return 0;
}
static inline void
igb_autoneg_pause(IGBCore *core)
{
timer_del(core->autoneg_timer);
}
static void
igb_autoneg_resume(IGBCore *core)
{
@ -4307,22 +4283,6 @@ igb_autoneg_resume(IGBCore *core)
}
}
static void
igb_vm_state_change(void *opaque, bool running, RunState state)
{
IGBCore *core = opaque;
if (running) {
trace_e1000e_vm_state_running();
igb_intrmgr_resume(core);
igb_autoneg_resume(core);
} else {
trace_e1000e_vm_state_stopped();
igb_autoneg_pause(core);
igb_intrmgr_pause(core);
}
}
void
igb_core_pci_realize(IGBCore *core,
const uint16_t *eeprom_templ,
@ -4335,8 +4295,6 @@ igb_core_pci_realize(IGBCore *core,
igb_autoneg_timer, core);
igb_intrmgr_pci_realize(core);
core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
for (i = 0; i < IGB_NUM_QUEUES; i++) {
net_tx_pkt_init(&core->tx[i].tx_pkt, E1000E_MAX_TX_FRAGS);
}
@ -4360,8 +4318,6 @@ igb_core_pci_uninit(IGBCore *core)
igb_intrmgr_pci_unint(core);
qemu_del_vm_change_state_handler(core->vmstate);
for (i = 0; i < IGB_NUM_QUEUES; i++) {
net_tx_pkt_uninit(core->tx[i].tx_pkt);
}
@ -4586,5 +4542,12 @@ igb_core_post_load(IGBCore *core)
*/
nc->link_down = (core->mac[STATUS] & E1000_STATUS_LU) == 0;
/*
* we need to restart intrmgr timers, as an older version of
* QEMU can have stopped them before migration
*/
igb_intrmgr_resume(core);
igb_autoneg_resume(core);
return 0;
}

View File

@ -90,8 +90,6 @@ struct IGBCore {
IGBIntrDelayTimer eitr[IGB_INTR_NUM];
VMChangeStateEntry *vmstate;
uint32_t eitr_guest_value[IGB_INTR_NUM];
uint8_t permanent_mac[ETH_ALEN];

View File

@ -632,7 +632,7 @@ static inline int ladr_match(PCNetState *s, const uint8_t *buf, int size)
{
struct qemu_ether_header *hdr = (void *)buf;
if ((*(hdr->ether_dhost)&0x01) &&
((uint64_t *)&s->csr[8])[0] != 0LL) {
(s->csr[8] | s->csr[9] | s->csr[10] | s->csr[11]) != 0) {
uint8_t ladr[8] = {
s->csr[8] & 0xff, s->csr[8] >> 8,
s->csr[9] & 0xff, s->csr[9] >> 8,

View File

@ -42,6 +42,7 @@
#include "sysemu/sysemu.h"
#include "trace.h"
#include "monitor/qdev.h"
#include "monitor/monitor.h"
#include "hw/pci/pci_device.h"
#include "net_rx_pkt.h"
#include "hw/virtio/vhost.h"
@ -1328,14 +1329,53 @@ static void virtio_net_detach_epbf_rss(VirtIONet *n)
virtio_net_attach_ebpf_to_backend(n->nic, -1);
}
static bool virtio_net_load_ebpf(VirtIONet *n)
static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp)
{
if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
/* backend doesn't support steering ebpf */
return false;
int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1};
int ret = true;
int i = 0;
ERRP_GUARD();
if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) {
error_setg(errp,
"Expected %d file descriptors but got %d",
EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds);
return false;
}
for (i = 0; i < n->nr_ebpf_rss_fds; i++) {
fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp);
if (*errp) {
ret = false;
goto exit;
}
}
return ebpf_rss_load(&n->ebpf_rss);
ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]);
exit:
if (!ret || *errp) {
for (i = 0; i < n->nr_ebpf_rss_fds && fds[i] != -1; i++) {
close(fds[i]);
}
}
return ret;
}
static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp)
{
bool ret = false;
if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) {
if (!(n->ebpf_rss_fds
&& virtio_net_load_ebpf_fds(n, errp))) {
ret = ebpf_rss_load(&n->ebpf_rss);
}
}
return ret;
}
static void virtio_net_unload_ebpf(VirtIONet *n)
@ -3768,7 +3808,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
net_rx_pkt_init(&n->rx_pkt);
if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) {
virtio_net_load_ebpf(n);
virtio_net_load_ebpf(n, errp);
}
}
@ -3930,6 +3970,8 @@ static Property virtio_net_properties[] = {
VIRTIO_NET_F_RSS, false),
DEFINE_PROP_BIT64("hash", VirtIONet, host_features,
VIRTIO_NET_F_HASH_REPORT, false),
DEFINE_PROP_ARRAY("ebpf-rss-fds", VirtIONet, nr_ebpf_rss_fds,
ebpf_rss_fds, qdev_prop_string, char*),
DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features,
VIRTIO_NET_F_RSC_EXT, false),
DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout,

View File

@ -225,6 +225,8 @@ struct VirtIONet {
VirtioNetRssData rss_data;
struct NetRxPkt *rx_pkt;
struct EBPFRSSContext ebpf_rss;
uint32_t nr_ebpf_rss_fds;
char **ebpf_rss_fds;
};
size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,

View File

@ -2006,19 +2006,23 @@ elif get_option('vduse_blk_export').disabled()
endif
# libbpf
libbpf = dependency('libbpf', required: get_option('bpf'), method: 'pkg-config')
bpf_version = '1.1.0'
libbpf = dependency('libbpf', version: '>=' + bpf_version, required: get_option('bpf'), method: 'pkg-config')
if libbpf.found() and not cc.links('''
#include <bpf/libbpf.h>
#include <linux/bpf.h>
int main(void)
{
// check flag availability
int flag = BPF_F_MMAPABLE;
bpf_object__destroy_skeleton(NULL);
return 0;
}''', dependencies: libbpf)
libbpf = not_found
if get_option('bpf').enabled()
error('libbpf skeleton test failed')
error('libbpf skeleton/mmaping test failed')
else
warning('libbpf skeleton test failed, disabling')
warning('libbpf skeleton/mmaping test failed, disabling')
endif
endif

66
qapi/ebpf.json Normal file
View File

@ -0,0 +1,66 @@
# -*- Mode: Python -*-
# vim: filetype=python
#
# This work is licensed under the terms of the GNU GPL, version 2 or later.
# See the COPYING file in the top-level directory.
##
# = eBPF Objects
#
# eBPF object is an ELF binary that contains the eBPF
# program and eBPF map description(BTF). Overall, eBPF
# object should contain the program and enough metadata
# to create/load eBPF with libbpf. As the eBPF maps/program
# should correspond to QEMU, the eBPF can't be used from
# different QEMU build.
#
# Currently, there is a possible eBPF for receive-side scaling (RSS).
#
##
##
# @EbpfObject:
#
# An eBPF ELF object.
#
# @object: the eBPF object encoded in base64
#
# Since: 9.0
##
{ 'struct': 'EbpfObject',
'data': {'object': 'str'},
'if': 'CONFIG_EBPF' }
##
# @EbpfProgramID:
#
# The eBPF programs that can be gotten with request-ebpf.
#
# @rss: Receive side scaling, technology that allows steering traffic
# between queues by calculation hash. Users may set up
# indirection table and hash/packet types configurations. Used
# with virtio-net.
#
# Since: 9.0
##
{ 'enum': 'EbpfProgramID',
'if': 'CONFIG_EBPF',
'data': [ { 'name': 'rss' } ] }
##
# @request-ebpf:
#
# Retrieve an eBPF object that can be loaded with libbpf. Management
# applications (g.e. libvirt) may load it and pass file descriptors to
# QEMU, so they can run running QEMU without BPF capabilities.
#
# @id: The ID of the program to return.
#
# Returns: eBPF object encoded in base64.
#
# Since: 9.0
##
{ 'command': 'request-ebpf',
'data': { 'id': 'EbpfProgramID' },
'returns': 'EbpfObject',
'if': 'CONFIG_EBPF' }

View File

@ -33,6 +33,7 @@ qapi_all_modules = [
'crypto',
'cxl',
'dump',
'ebpf',
'error',
'introspect',
'job',

View File

@ -53,6 +53,7 @@
{ 'include': 'char.json' }
{ 'include': 'dump.json' }
{ 'include': 'net.json' }
{ 'include': 'ebpf.json' }
{ 'include': 'rdma.json' }
{ 'include': 'rocker.json' }
{ 'include': 'tpm.json' }

View File

@ -81,6 +81,7 @@ struct {
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(struct rss_config_t));
__uint(max_entries, 1);
__uint(map_flags, BPF_F_MMAPABLE);
} tap_rss_map_configurations SEC(".maps");
struct {
@ -88,6 +89,7 @@ struct {
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(struct toeplitz_key_data_t));
__uint(max_entries, 1);
__uint(map_flags, BPF_F_MMAPABLE);
} tap_rss_map_toeplitz_key SEC(".maps");
struct {
@ -95,6 +97,7 @@ struct {
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u16));
__uint(max_entries, INDIRECTION_TABLE_SIZE);
__uint(map_flags, BPF_F_MMAPABLE);
} tap_rss_map_indirection_table SEC(".maps");
static inline void net_rx_rss_add_chunk(__u8 *rss_input, size_t *bytes_written,
@ -317,7 +320,7 @@ static inline int parse_packet(struct __sk_buff *skb,
info->in_src = ip.saddr;
info->in_dst = ip.daddr;
info->is_fragmented = !!ip.frag_off;
info->is_fragmented = !!(bpf_ntohs(ip.frag_off) & (0x2000 | 0x1fff));
l4_protocol = ip.protocol;
l4_offset = ip.ihl * 4;
@ -528,7 +531,7 @@ static inline __u32 calculate_rss_hash(struct __sk_buff *skb,
return result;
}
SEC("tun_rss_steering")
SEC("socket")
int tun_rss_steering_prog(struct __sk_buff *skb)
{