- More updates to support soon-to-be-released support for docker containers.

master
Asim Jamshed 2018-09-19 00:53:13 +00:00
parent 611cc05d25
commit 2ac34695d0
11 changed files with 387 additions and 47 deletions

View File

@ -85,7 +85,8 @@ epwget: epwget.o ${MTCP_FLD}/lib/libmtcp.a
$(HIDE) ${CC} $< ${LIBS} ${UTIL_OBJ} -o $@
clean:
rm -f *~ *.o ${TARGETS} log_*
$(MSG) " CLEAN $(TARGETS)"
$(HIDE) rm -f *~ *.o ${TARGETS} log_*
distclean: clean
rm -rf Makefile

View File

@ -16,17 +16,27 @@ DPDK_INC=${RTE_SDK}/${RTE_TARGET}/include
DPDK_LIB=${RTE_SDK}/${RTE_TARGET}/lib
appname=dpdk_iface_main
#-------------------------------------------------------------------------#
ifeq ($V,) # no echo
export MSG=@echo
export HIDE=@
else
export MSG=@\#
export HIDE=
endif
#-------------------------------------------------------------------------#
all: dpdk_iface.c $(appname) $(appname).c
make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules
$(appname): $(appname).c
$(CC) $(CFLAGS) $(appname).c -g -o $(appname) -g \
$(MSG) " CC $<"
$(HIDE) $(CC) -g $(CFLAGS) $(appname).c -g -o $(appname) -g \
-I${DPDK_INC} -include $(DPDK_INC)/rte_config.h \
-L$(DPDK_LIB) ${DPDK_MACHINE_LDFLAGS} -lpthread
clean:
make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean
rm -rf *~ *.o *.ko dpdk_iface_main
$(MSG) " CLEAN $(appname)"
$(HIDE) rm -rf *~ *.o *.ko dpdk_iface_main
run: all
sudo ./$(appname)

View File

@ -123,6 +123,7 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct net_device *netdev;
struct stats_struct ss;
struct net_adapter *adapter = NULL;
struct PciDevice pd;
switch (cmd) {
case SEND_STATS:
@ -134,8 +135,11 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ret = update_stats(&ss);
break;
case CREATE_IFACE:
ret = copy_from_user(&pd,
(PciDevice __user *)arg,
sizeof(PciDevice));
ret = copy_from_user(mac_addr,
(unsigned char __user *)arg,
(unsigned char __user *)pd.ports_eth_addr,
ETH_ALEN);
if (!ret) {
/* first check whether the entry does not exist */
@ -174,6 +178,18 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
THIS_MODULE->name, adapter->bd_number);
/* reset nstats */
memset(&adapter->nstats, 0, sizeof(struct net_device_stats));
/* set 'fake' pci address */
memcpy(&adapter->pa, &pd.pa, sizeof(struct PciAddress));
ret = copy_to_user((unsigned char __user *)arg,
netdev->name,
IFNAMSIZ);
if (!ret) {
printk(KERN_INFO "%s: Copying %s name to userspace\n",
THIS_MODULE->name, netdev->name);
} else {
printk(KERN_INFO "%s: Interface %s copy to user failed!\n",
THIS_MODULE->name, netdev->name);
}
ret = 0;
}
}
@ -181,7 +197,31 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case CLEAR_IFACE:
clear_all_netdevices();
break;
case FETCH_PCI_ADDRESS:
ret = copy_from_user(&pd,
(PciDevice __user *)arg,
sizeof(PciDevice));
if (!ret) {
read_lock(&dev_base_lock);
netdev = first_net_device(&init_net);
while (netdev) {
if (strcmp(netdev->name, pd.ifname) == 0) {
read_unlock(&dev_base_lock);
printk(KERN_INFO "%s: Passing PCI info of %s to user\n",
THIS_MODULE->name, pd.ifname);
adapter = netdev_priv(netdev);
ret = copy_to_user(&((PciDevice __user *)arg)->pa,
&adapter->pa,
sizeof(struct PciAddress));
return -ret;
}
netdev = next_net_device(netdev);
}
read_unlock(&dev_base_lock);
ret = -1;
}
break;
default:
ret = -ENOTTY;
break;
@ -216,7 +256,9 @@ iface_pci_init_module(void)
THIS_MODULE->name);
return ret;
}
printk(KERN_INFO "%s: Loaded\n",
THIS_MODULE->name);
return 0;
}
/*--------------------------------------------------------------------------*/

View File

@ -15,6 +15,7 @@ struct net_adapter {
u16 bd_number;
bool netdev_registered;
struct net_device_stats nstats;
struct PciAddress pa;
};
/*--------------------------------------------------------------------------*/
/**

View File

@ -1,6 +1,9 @@
#ifndef __DPDK_IFACE_COMMON_H__
#define __DPDK_IFACE_COMMON_H__
/*--------------------------------------------------------------------------*/
/* for ETH_ALEN */
#include <linux/if.h>
/*--------------------------------------------------------------------------*/
/* major number */
#define MAJOR_NO 511 //1110
/* dev name */
@ -10,8 +13,31 @@
#define SEND_STATS 0
#define CREATE_IFACE 1
#define CLEAR_IFACE 4
#define FETCH_PCI_ADDRESS 5
/* max qid */
#define MAX_QID 128
#ifndef MAX_DEVICES
#define MAX_DEVICES 128
#endif
#define PCI_DOM "%04hX"
#define PCI_BUS "%02hhX"
#define PCI_DEVICE "%02hhX"
#define PCI_FUNC "%01hhX"
#define PCI_LENGTH 13
/*--------------------------------------------------------------------------*/
typedef struct PciAddress {
uint16_t domain;
uint8_t bus;
uint8_t device;
uint8_t function;
} PciAddress;
/*--------------------------------------------------------------------------*/
typedef struct PciDevice {
union {
uint8_t *ports_eth_addr;
char ifname[IFNAMSIZ];
};
PciAddress pa;
} PciDevice __attribute__((aligned(64)));
/*--------------------------------------------------------------------------*/
#endif /* __DPDK_IFACE_COMMON_H__ */

View File

@ -1,36 +1,173 @@
#include <rte_ethdev.h>
#define _GNU_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <dirent.h>
#include <rte_version.h>
#include <rte_ethdev.h>
#include "dpdk_iface_common.h"
/*--------------------------------------------------------------------------*/
#define SYSFS_PCI_DRIVER_PATH "/sys/bus/pci/drivers/"
#define SYSFS_PCI_IGB_UIO SYSFS_PCI_DRIVER_PATH"igb_uio"
#define SYSFS_PCI_VFIO_PCI SYSFS_PCI_DRIVER_PATH"vfio-pci"
#define SYSFS_PCI_UIOPCIGEN SYSFS_PCI_DRIVER_PATH"uio_pci_generic"
#define RTE_ARGC_MAX (RTE_MAX_ETHPORTS << 1) + 7
/*--------------------------------------------------------------------------*/
typedef struct {
PciDevice pd;
struct rte_eth_dev_info dev_details;
struct ether_addr ports_eth_addr;
} DevInfo;
static DevInfo di[RTE_MAX_ETHPORTS];
/*--------------------------------------------------------------------------*/
/**
* Really crappy version for detecting pci entries..
* but it should work.
*/
int
IsPciEnt(const struct dirent *entry)
{
if (entry->d_type == DT_LNK &&
strstr(entry->d_name, ":") != NULL)
return 1;
return 0;
}
/*--------------------------------------------------------------------------*/
/**
* Similar to strverscmp(), but sorts in hexadecimal context
*/
int
localversionsort(const void *elem1, const void *elem2)
{
uint16_t domain1, domain2;
uint8_t bus1, bus2, device1, device2, function1, function2;
DevInfo *d1 = (DevInfo *)elem1;
DevInfo *d2 = (DevInfo *)elem2;
domain1 = d1->pd.pa.domain;
domain2 = d2->pd.pa.domain;
bus1 = d1->pd.pa.bus;
bus2 = d2->pd.pa.bus;
device1 = d1->pd.pa.device;
device2 = d2->pd.pa.device;
function1 = d1->pd.pa.function;
function2 = d2->pd.pa.function;
if (domain1 < domain2) return -1;
if (domain2 < domain1) return 1;
if (bus1 < bus2) return -1;
if (bus2 < bus1) return 1;
if (device1 < device2) return -1;
if (device2 < device1) return 1;
if (function1 < function2)
return -1;
if (function2 < function1)
return 1;
return 0;
}
/*--------------------------------------------------------------------------*/
int
probe_all_rte_devices(char **argv, int *argc)
{
struct dirent **dirlist;
int pci_index, total_files, i, j;
/* reset pci_index */
pci_index = 0;
for (j = 0; j < 3; j++) {
switch (j) {
case 0:
/* scan igb_uio first */
total_files = scandir(SYSFS_PCI_IGB_UIO, &dirlist,
IsPciEnt, versionsort);
break;
case 1:
/* scan vfio_pci next */
total_files = scandir(SYSFS_PCI_VFIO_PCI, &dirlist,
IsPciEnt, versionsort);
break;
case 2:
/* finally scan uio_pci_generic */
total_files = scandir(SYSFS_PCI_UIOPCIGEN, &dirlist,
IsPciEnt, versionsort);
break;
default:
fprintf(stderr, "Control can never come here!\n");
goto panic_err;
}
for (i = 0; i < total_files; i++, pci_index++) {
argv[*argc] = strdup("-w");
argv[*argc + 1] = strdup(dirlist[i]->d_name);
if (argv[*argc] == NULL ||
argv[*argc + 1] == NULL)
goto alloc_err;
*argc += 2;
if (sscanf(dirlist[i]->d_name, PCI_DOM":"PCI_BUS":"
PCI_DEVICE"."PCI_FUNC,
&di[pci_index].pd.pa.domain,
&di[pci_index].pd.pa.bus,
&di[pci_index].pd.pa.device,
&di[pci_index].pd.pa.function) != 4)
goto sscanf_err;
free(dirlist[i]);
}
free(dirlist);
}
/* now sort all recorded entries */
qsort(di, pci_index, sizeof(DevInfo), localversionsort);
return pci_index;
sscanf_err:
fprintf(stderr, "Unable to retrieve pci address!\n");
exit(EXIT_FAILURE);
alloc_err:
fprintf(stderr, "Can't allocate memory for argv items!\n");
exit(EXIT_FAILURE);
panic_err:
fprintf(stderr, "Could not open the directory!\n");
exit(EXIT_FAILURE);
}
/*--------------------------------------------------------------------------*/
int
main(int argc, char **argv)
{
int ret, fd, num_devices;
int ret, fd, num_devices, i;
dev_t dev;
char *cpumaskbuf = "0x1";
char *mem_channels = "4";
char *rte_argv[] = {"",
"-c",
cpumaskbuf,
"-n",
mem_channels,
"--proc-type=auto",
""
char *rte_argv[RTE_ARGC_MAX] = {"",
"-c",
cpumaskbuf,
"-n",
mem_channels,
"--proc-type=auto"
};
const int rte_argc = 6;
typedef struct {
struct ether_addr ports_eth_addr;
struct rte_eth_dev_info dev_details;
} dev_info;
dev_info di[RTE_MAX_ETHPORTS];
int rte_argc = 6;
ret = probe_all_rte_devices(rte_argv, &rte_argc);
for (i = 0; i < ret; i++) {
fprintf(stderr, "Pci Address: %04hX:%02hhX:%02hhX.%01hhX\n",
di[i].pd.pa.domain,
di[i].pd.pa.bus,
di[i].pd.pa.device,
di[i].pd.pa.function);
}
if (geteuid()) {
fprintf(stderr, "[CAUTION] Run the app as root!\n");
exit(EXIT_FAILURE);
@ -86,6 +223,7 @@ main(int argc, char **argv)
}
for (ret = 0; ret < num_devices; ret++) {
di[ret].pd.ports_eth_addr = &di[ret].ports_eth_addr.addr_bytes[0];
/* get mac addr entries of detected dpdk ports */
rte_eth_macaddr_get(ret, &di[ret].ports_eth_addr);
/* check port capabailties/info */
@ -104,23 +242,55 @@ main(int argc, char **argv)
/* clear all previous entries */
fprintf(stderr, "Clearing previous entries\n");
ioctl(fd, CLEAR_IFACE, di[0].ports_eth_addr.addr_bytes);
ret = ioctl(fd, CLEAR_IFACE, di[0].ports_eth_addr.addr_bytes);
if (ret == -1) {
fprintf(stderr, "ioctl call failed!\n");
return EXIT_FAILURE;
}
/* register the newly detected dpdk ports */
for (ret = 0; ret < num_devices; ret++) {
if (strcmp(di[ret].dev_details.driver_name, "net_mlx4") &&
strcmp(di[ret].dev_details.driver_name, "net_mlx5")) {
fprintf(stderr, "Registering port %d (%02X:%02X:%02X:%02X:%02X:%02X) to mTCP stack\n",
fprintf(stderr, "Registering port %d (%02X:%02X:%02X:%02X:%02X:%02X) to mTCP stack",
ret,
di[ret].ports_eth_addr.addr_bytes[0], di[ret].ports_eth_addr.addr_bytes[1],
di[ret].ports_eth_addr.addr_bytes[2], di[ret].ports_eth_addr.addr_bytes[3],
di[ret].ports_eth_addr.addr_bytes[4], di[ret].ports_eth_addr.addr_bytes[5]);
ioctl(fd, 1, di[ret].ports_eth_addr.addr_bytes);
di[ret].ports_eth_addr.addr_bytes[0],
di[ret].ports_eth_addr.addr_bytes[1],
di[ret].ports_eth_addr.addr_bytes[2],
di[ret].ports_eth_addr.addr_bytes[3],
di[ret].ports_eth_addr.addr_bytes[4],
di[ret].ports_eth_addr.addr_bytes[5]);
di[ret].pd.ports_eth_addr = di[ret].ports_eth_addr.addr_bytes;
if (ioctl(fd, CREATE_IFACE, &di[ret].pd) == -1) {
fprintf(stderr, "ioctl call failed!\n");
}
fprintf(stderr, " (%s).\n",
di[ret].pd.ifname);
}
}
/* close the fd */
close(fd);
#if 0
/*
* XXX: It seems that there is a bug in the RTE SDK.
* The dynamically allocated rte_argv params are left
* as dangling pointers. Freeing them causes program
* to crash.
*/
/* free up all resources */
for (; rte_argc >= 6; rte_argc--) {
if (rte_argv[rte_argc] != NULL) {
fprintf(stderr, "Cleaning up rte_argv[%d]: %s (%p)\n",
rte_argc, rte_argv[rte_argc], rte_argv[rte_argc]);
free(rte_argv[rte_argc]);
rte_argv[rte_argc] = NULL;
}
}
#endif
return EXIT_SUCCESS;
}
/*--------------------------------------------------------------------------*/

View File

@ -120,12 +120,16 @@ $(MTCP_HDR):
cp $(INC_DIR)/$@ $(MTCP_HDR_DIR)/$@
clean: clean-library
rm -f *.o *~ core
rm -f .*.d
$(MSG) " CLEAN *.o's"
$(HIDE) rm -f *.o *~ core
$(MSG) " CLEAN *.d's"
$(HIDE) rm -f .*.d
clean-library:
rm -f $(MTCP_LIB_DIR)/*
rm -f $(MTCP_HDR_DIR)/*
$(MSG) " CLEAN *.a"
$(HIDE) rm -f $(MTCP_LIB_DIR)/*
$(MSG) " CLEAN *.h"
$(HIDE) rm -f $(MTCP_HDR_DIR)/*
distclean: clean
rm -f Makefile

View File

@ -32,6 +32,8 @@
/* for ip defragging */
#include <rte_ip_frag.h>
#endif
/* for ioctl funcs */
#include <dpdk_iface_common.h>
/*----------------------------------------------------------------------------*/
/* Essential macros */
#define MAX_RX_QUEUE_PER_LCORE MAX_CPUS
@ -182,7 +184,6 @@ struct dpdk_private_context {
} __rte_cache_aligned;
#ifdef ENABLE_STATS_IOCTL
#define DEV_NAME "/dev/dpdk-iface"
/**
* stats struct passed on from user space to the driver
*/
@ -263,9 +264,9 @@ dpdk_init_handle(struct mtcp_thread_context *ctxt)
#endif /* !IP_DEFRAG */
#ifdef ENABLE_STATS_IOCTL
dpc->fd = open(DEV_NAME, O_RDWR);
dpc->fd = open(DEV_PATH, O_RDWR);
if (dpc->fd == -1) {
TRACE_ERROR("Can't open " DEV_NAME " for context->cpu: %d! "
TRACE_ERROR("Can't open " DEV_PATH " for context->cpu: %d! "
"Are you using mlx4/mlx5 driver?\n",
ctxt->cpu);
}
@ -337,7 +338,8 @@ dpdk_send_pkts(struct mtcp_thread_context *ctxt, int ifidx)
ss.qid = ctxt->cpu;
ss.dev = portid;
/* pass the info now */
ioctl(dpc->fd, 0, &ss);
if (ioctl(dpc->fd, SEND_STATS, &ss) == -1)
TRACE_ERROR("Can't update iface stats!\n");
dpc->cur_ts = mtcp->cur_ts;
if (ctxt->cpu == 0)
rte_eth_stats_reset(portid);
@ -723,6 +725,7 @@ dpdk_load_module(void)
/* retrieve current flow control settings per port */
memset(&fc_conf, 0, sizeof(fc_conf));
#ifndef CONTAINERIZED_SUPPORT
ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf);
if (ret != 0)
rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n");
@ -733,6 +736,7 @@ dpdk_load_module(void)
if (ret != 0)
rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n",
ret);
#endif
#ifdef DEBUG
printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",

View File

@ -0,0 +1 @@
../../../dpdk-iface-kmod/dpdk_iface_common.h

View File

@ -13,8 +13,10 @@
/* for ioctl */
#include <sys/ioctl.h>
#ifndef DISABLE_DPDK
#define RTE_ARGC_MAX (RTE_MAX_ETHPORTS << 1) + 9
/* for dpdk ethernet functions (get mac addresses) */
#include <rte_ethdev.h>
#include <dpdk_iface_common.h>
#endif
/* for TRACE_* */
#include "debug.h"
@ -85,6 +87,60 @@ GetNumQueues()
}
#endif /* !PSIO */
/*----------------------------------------------------------------------------*/
#ifndef DISABLE_DPDK
static void
probe_all_rte_devices(char **argv, int *argc, char *dev_name_list)
{
PciDevice pd;
int fd;
static char end[] = "";
static const char delim[] = " \t";
static char *dev_tokenizer;
char *dev_token, *saveptr;
dev_tokenizer = strdup(dev_name_list);
if (dev_tokenizer == NULL) {
TRACE_ERROR("Can't allocate memory for dev_tokenizer!\n");
exit(EXIT_FAILURE);
}
fd = open(DEV_PATH, O_RDONLY);
if (fd == -1) {
TRACE_ERROR("Error opening dpdk-face!\n");
exit(EXIT_FAILURE);
}
dev_token = strtok_r(dev_tokenizer, delim, &saveptr);
while (dev_token != NULL) {
strcpy(pd.ifname, dev_token);
if (ioctl(fd, FETCH_PCI_ADDRESS, &pd) == -1) {
TRACE_DBG("Could not find pci info on dpdk "
"device: %s. Is it a dpdk-attached "
"interface?\n", dev_token);
goto loop_over;
}
argv[*argc] = strdup("-w");
argv[*argc + 1] = calloc(PCI_LENGTH, 1);
if (argv[*argc] == NULL ||
argv[*argc + 1] == NULL) {
TRACE_ERROR("Memory allocation error!\n");
exit(EXIT_FAILURE);
}
sprintf(argv[*argc + 1], PCI_DOM":"PCI_BUS":"
PCI_DEVICE"."PCI_FUNC,
pd.pa.domain, pd.pa.bus, pd.pa.device,
pd.pa.function);
*argc += 2;
loop_over:
dev_token = strtok_r(NULL, delim, &saveptr);
}
/* add the terminating "" sequence */
argv[*argc] = end;
close(fd);
free(dev_tokenizer);
}
#endif /* !DISABLE_DPDK */
/*----------------------------------------------------------------------------*/
int
SetInterfaceInfo(char* dev_name_list)
{
@ -203,17 +259,24 @@ SetInterfaceInfo(char* dev_name_list)
exit(EXIT_FAILURE);
}
sprintf(mem_channels, "%d", CONFIG.num_mem_ch);
/* initialize the rte env first, what a waste of implementation effort! */
char *argv[] = {"",
"-c",
cpumaskbuf,
"-n",
mem_channels,
"--proc-type=auto",
""
/* initialize the rte env first, what a waste of implementation effort! */
#ifdef CONTAINERIZED_SUPPORT
int argc = 8;
#else
int argc = 6;
#endif
char *argv[RTE_ARGC_MAX] = {"",
"-c",
cpumaskbuf,
"-n",
mem_channels,
#ifdef CONTAINERIZED_SUPPORT
"--socket-mem",
"1024",
#endif
"--proc-type=auto"
};
const int argc = 6;
probe_all_rte_devices(argv, &argc, dev_name_list);
/*
* re-set getopt extern variable optind.
@ -314,7 +377,24 @@ SetInterfaceInfo(char* dev_name_list)
} while (iter_if != NULL);
freeifaddrs(ifap);
#if 0
/*
* XXX: It seems that there is a bug in the RTE SDK.
* The dynamically allocated rte_argv params are left
* as dangling pointers. Freeing them causes program
* to crash.
*/
/* free up all resources */
for (; rte_argc >= 9; rte_argc--) {
if (rte_argv[rte_argc] != NULL) {
fprintf(stderr, "Cleaning up rte_argv[%d]: %s (%p)\n",
rte_argc, rte_argv[rte_argc], rte_argv[rte_argc]);
free(rte_argv[rte_argc]);
rte_argv[rte_argc] = NULL;
}
}
#endif
/* check if process is primary or secondary */
CONFIG.multi_process_is_master = (eal_proc_type_detect() == RTE_PROC_PRIMARY) ?
1 : 0;

View File

@ -36,7 +36,8 @@ $(OBJS): %.o: %.c Makefile
$(HIDE) ${CC} ${GCC_OPT} ${CFLAGS} $<
clean:
rm -rf *~ *.o
$(MSG) " CC *.o"
$(HIDE) rm -rf *~ *.o
distclean: clean
rm -f Makefile