mirror of https://github.com/proxmox/mirror_qemu
net: enable IFF_VNET_HDR on tap fds if available
For now, we just add an empty header before writing and strip the header after reading. We really only want IFF_VNET_HDR when virtio_net is using it, but it would significantly complicate matters to try and do that. There should be little or no performance impact with always adding headers. Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>master
parent
38c75be313
commit
8e0f8e5bf8
83
net.c
83
net.c
|
@ -1260,14 +1260,20 @@ void do_info_usernet(Monitor *mon)
|
||||||
|
|
||||||
#if !defined(_WIN32)
|
#if !defined(_WIN32)
|
||||||
|
|
||||||
|
/* Maximum GSO packet size (64k) plus plenty of room for
|
||||||
|
* the ethernet and virtio_net headers
|
||||||
|
*/
|
||||||
|
#define TAP_BUFSIZE (4096 + 65536)
|
||||||
|
|
||||||
typedef struct TAPState {
|
typedef struct TAPState {
|
||||||
VLANClientState *vc;
|
VLANClientState *vc;
|
||||||
int fd;
|
int fd;
|
||||||
char down_script[1024];
|
char down_script[1024];
|
||||||
char down_script_arg[128];
|
char down_script_arg[128];
|
||||||
uint8_t buf[4096];
|
uint8_t buf[TAP_BUFSIZE];
|
||||||
unsigned int read_poll : 1;
|
unsigned int read_poll : 1;
|
||||||
unsigned int write_poll : 1;
|
unsigned int write_poll : 1;
|
||||||
|
unsigned int has_vnet_hdr : 1;
|
||||||
} TAPState;
|
} TAPState;
|
||||||
|
|
||||||
static int launch_script(const char *setup_script, const char *ifname, int fd);
|
static int launch_script(const char *setup_script, const char *ifname, int fd);
|
||||||
|
@ -1326,15 +1332,33 @@ static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov,
|
||||||
int iovcnt)
|
int iovcnt)
|
||||||
{
|
{
|
||||||
TAPState *s = vc->opaque;
|
TAPState *s = vc->opaque;
|
||||||
|
const struct iovec *iovp = iov;
|
||||||
|
struct iovec iov_copy[iovcnt + 1];
|
||||||
|
struct virtio_net_hdr hdr = { 0, };
|
||||||
|
|
||||||
return tap_write_packet(s, iov, iovcnt);
|
if (s->has_vnet_hdr) {
|
||||||
|
iov_copy[0].iov_base = &hdr;
|
||||||
|
iov_copy[0].iov_len = sizeof(hdr);
|
||||||
|
memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
|
||||||
|
iovp = iov_copy;
|
||||||
|
iovcnt++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return tap_write_packet(s, iovp, iovcnt);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size)
|
static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size)
|
||||||
{
|
{
|
||||||
TAPState *s = vc->opaque;
|
TAPState *s = vc->opaque;
|
||||||
struct iovec iov[1];
|
struct iovec iov[2];
|
||||||
int iovcnt = 0;
|
int iovcnt = 0;
|
||||||
|
struct virtio_net_hdr hdr = { 0, };
|
||||||
|
|
||||||
|
if (s->has_vnet_hdr) {
|
||||||
|
iov[iovcnt].iov_base = &hdr;
|
||||||
|
iov[iovcnt].iov_len = sizeof(hdr);
|
||||||
|
iovcnt++;
|
||||||
|
}
|
||||||
|
|
||||||
iov[iovcnt].iov_base = (char *)buf;
|
iov[iovcnt].iov_base = (char *)buf;
|
||||||
iov[iovcnt].iov_len = size;
|
iov[iovcnt].iov_len = size;
|
||||||
|
@ -1380,12 +1404,19 @@ static void tap_send(void *opaque)
|
||||||
int size;
|
int size;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
uint8_t *buf = s->buf;
|
||||||
|
|
||||||
size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
|
size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
|
||||||
if (size <= 0) {
|
if (size <= 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
size = qemu_send_packet_async(s->vc, s->buf, size, tap_send_completed);
|
if (s->has_vnet_hdr) {
|
||||||
|
buf += sizeof(struct virtio_net_hdr);
|
||||||
|
size -= sizeof(struct virtio_net_hdr);
|
||||||
|
}
|
||||||
|
|
||||||
|
size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed);
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
tap_read_poll(s, 0);
|
tap_read_poll(s, 0);
|
||||||
}
|
}
|
||||||
|
@ -1415,6 +1446,18 @@ static int tap_set_sndbuf(TAPState *s, QemuOpts *opts)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int tap_probe_vnet_hdr(int fd)
|
||||||
|
{
|
||||||
|
struct ifreq ifr;
|
||||||
|
|
||||||
|
if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
|
||||||
|
qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ifr.ifr_flags & IFF_VNET_HDR;
|
||||||
|
}
|
||||||
|
|
||||||
static void tap_cleanup(VLANClientState *vc)
|
static void tap_cleanup(VLANClientState *vc)
|
||||||
{
|
{
|
||||||
TAPState *s = vc->opaque;
|
TAPState *s = vc->opaque;
|
||||||
|
@ -1435,12 +1478,14 @@ static void tap_cleanup(VLANClientState *vc)
|
||||||
static TAPState *net_tap_fd_init(VLANState *vlan,
|
static TAPState *net_tap_fd_init(VLANState *vlan,
|
||||||
const char *model,
|
const char *model,
|
||||||
const char *name,
|
const char *name,
|
||||||
int fd)
|
int fd,
|
||||||
|
int vnet_hdr)
|
||||||
{
|
{
|
||||||
TAPState *s;
|
TAPState *s;
|
||||||
|
|
||||||
s = qemu_mallocz(sizeof(TAPState));
|
s = qemu_mallocz(sizeof(TAPState));
|
||||||
s->fd = fd;
|
s->fd = fd;
|
||||||
|
s->has_vnet_hdr = vnet_hdr != 0;
|
||||||
s->vc = qemu_new_vlan_client(vlan, NULL, model, name, NULL,
|
s->vc = qemu_new_vlan_client(vlan, NULL, model, name, NULL,
|
||||||
tap_receive, tap_receive_iov,
|
tap_receive, tap_receive_iov,
|
||||||
tap_cleanup, s);
|
tap_cleanup, s);
|
||||||
|
@ -1450,7 +1495,7 @@ static TAPState *net_tap_fd_init(VLANState *vlan,
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined (CONFIG_BSD) || defined (__FreeBSD_kernel__)
|
#if defined (CONFIG_BSD) || defined (__FreeBSD_kernel__)
|
||||||
static int tap_open(char *ifname, int ifname_size)
|
static int tap_open(char *ifname, int ifname_size, int *vnet_hdr)
|
||||||
{
|
{
|
||||||
int fd;
|
int fd;
|
||||||
char *dev;
|
char *dev;
|
||||||
|
@ -1592,7 +1637,7 @@ static int tap_alloc(char *dev, size_t dev_size)
|
||||||
return tap_fd;
|
return tap_fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int tap_open(char *ifname, int ifname_size)
|
static int tap_open(char *ifname, int ifname_size, int *vnet_hdr)
|
||||||
{
|
{
|
||||||
char dev[10]="";
|
char dev[10]="";
|
||||||
int fd;
|
int fd;
|
||||||
|
@ -1605,13 +1650,13 @@ static int tap_open(char *ifname, int ifname_size)
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
#elif defined (_AIX)
|
#elif defined (_AIX)
|
||||||
static int tap_open(char *ifname, int ifname_size)
|
static int tap_open(char *ifname, int ifname_size, int *vnet_hdr)
|
||||||
{
|
{
|
||||||
fprintf (stderr, "no tap on AIX\n");
|
fprintf (stderr, "no tap on AIX\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static int tap_open(char *ifname, int ifname_size)
|
static int tap_open(char *ifname, int ifname_size, int *vnet_hdr)
|
||||||
{
|
{
|
||||||
struct ifreq ifr;
|
struct ifreq ifr;
|
||||||
int fd, ret;
|
int fd, ret;
|
||||||
|
@ -1623,6 +1668,17 @@ static int tap_open(char *ifname, int ifname_size)
|
||||||
}
|
}
|
||||||
memset(&ifr, 0, sizeof(ifr));
|
memset(&ifr, 0, sizeof(ifr));
|
||||||
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
|
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
|
||||||
|
|
||||||
|
{
|
||||||
|
unsigned int features;
|
||||||
|
|
||||||
|
if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
|
||||||
|
features & IFF_VNET_HDR) {
|
||||||
|
*vnet_hdr = 1;
|
||||||
|
ifr.ifr_flags |= IFF_VNET_HDR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ifname[0] != '\0')
|
if (ifname[0] != '\0')
|
||||||
pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
|
pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
|
||||||
else
|
else
|
||||||
|
@ -1688,14 +1744,15 @@ static TAPState *net_tap_init(VLANState *vlan, const char *model,
|
||||||
const char *setup_script, const char *down_script)
|
const char *setup_script, const char *down_script)
|
||||||
{
|
{
|
||||||
TAPState *s;
|
TAPState *s;
|
||||||
int fd;
|
int fd, vnet_hdr;
|
||||||
char ifname[128];
|
char ifname[128];
|
||||||
|
|
||||||
if (ifname1 != NULL)
|
if (ifname1 != NULL)
|
||||||
pstrcpy(ifname, sizeof(ifname), ifname1);
|
pstrcpy(ifname, sizeof(ifname), ifname1);
|
||||||
else
|
else
|
||||||
ifname[0] = '\0';
|
ifname[0] = '\0';
|
||||||
TFR(fd = tap_open(ifname, sizeof(ifname)));
|
vnet_hdr = 0;
|
||||||
|
TFR(fd = tap_open(ifname, sizeof(ifname), &vnet_hdr));
|
||||||
if (fd < 0)
|
if (fd < 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
|
@ -1705,7 +1762,7 @@ static TAPState *net_tap_init(VLANState *vlan, const char *model,
|
||||||
launch_script(setup_script, ifname, fd)) {
|
launch_script(setup_script, ifname, fd)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
s = net_tap_fd_init(vlan, model, name, fd);
|
s = net_tap_fd_init(vlan, model, name, fd, vnet_hdr);
|
||||||
snprintf(s->vc->info_str, sizeof(s->vc->info_str),
|
snprintf(s->vc->info_str, sizeof(s->vc->info_str),
|
||||||
"ifname=%s,script=%s,downscript=%s",
|
"ifname=%s,script=%s,downscript=%s",
|
||||||
ifname, setup_script, down_script);
|
ifname, setup_script, down_script);
|
||||||
|
@ -2659,7 +2716,7 @@ static int net_init_tap(QemuOpts *opts,
|
||||||
|
|
||||||
fcntl(fd, F_SETFL, O_NONBLOCK);
|
fcntl(fd, F_SETFL, O_NONBLOCK);
|
||||||
|
|
||||||
s = net_tap_fd_init(vlan, "tap", name, fd);
|
s = net_tap_fd_init(vlan, "tap", name, fd, tap_probe_vnet_hdr(fd));
|
||||||
if (!s) {
|
if (!s) {
|
||||||
close(fd);
|
close(fd);
|
||||||
}
|
}
|
||||||
|
|
14
tap-linux.h
14
tap-linux.h
|
@ -16,14 +16,28 @@
|
||||||
#ifndef QEMU_TAP_H
|
#ifndef QEMU_TAP_H
|
||||||
#define QEMU_TAP_H
|
#define QEMU_TAP_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
#include <linux/ioctl.h>
|
#include <linux/ioctl.h>
|
||||||
|
|
||||||
/* Ioctl defines */
|
/* Ioctl defines */
|
||||||
#define TUNSETIFF _IOW('T', 202, int)
|
#define TUNSETIFF _IOW('T', 202, int)
|
||||||
|
#define TUNGETFEATURES _IOR('T', 207, unsigned int)
|
||||||
|
#define TUNGETIFF _IOR('T', 210, unsigned int)
|
||||||
#define TUNSETSNDBUF _IOW('T', 212, int)
|
#define TUNSETSNDBUF _IOW('T', 212, int)
|
||||||
|
|
||||||
/* TUNSETIFF ifr flags */
|
/* TUNSETIFF ifr flags */
|
||||||
#define IFF_TAP 0x0002
|
#define IFF_TAP 0x0002
|
||||||
#define IFF_NO_PI 0x1000
|
#define IFF_NO_PI 0x1000
|
||||||
|
#define IFF_VNET_HDR 0x4000
|
||||||
|
|
||||||
|
struct virtio_net_hdr
|
||||||
|
{
|
||||||
|
uint8_t flags;
|
||||||
|
uint8_t gso_type;
|
||||||
|
uint16_t hdr_len;
|
||||||
|
uint16_t gso_size;
|
||||||
|
uint16_t csum_start;
|
||||||
|
uint16_t csum_offset;
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* QEMU_TAP_H */
|
#endif /* QEMU_TAP_H */
|
||||||
|
|
Loading…
Reference in New Issue