- Add the new dpdk interface kernel driver.

master
Asim Jamshed 2018-07-30 13:40:39 -04:00
parent 1c32cd9a5c
commit 64025a5361
5 changed files with 593 additions and 0 deletions

33
dpdk-iface-kmod/Makefile Normal file
View File

@ -0,0 +1,33 @@
#-------------------------------------------------------------------------#
ifeq ($(RTE_SDK),)
$(error "Please define RTE_SDK environment variable")
endif
ifeq ($(RTE_TARGET),)
$(error "Please define RTE_TARGET environment variable")
endif
#-------------------------------------------------------------------------#
include $(RTE_SDK)/mk/rte.vars.mk
CC=gcc
obj-m=dpdk_iface.o
DPDK_MACHINE_LINKER_FLAGS=$${RTE_SDK}/$${RTE_TARGET}/lib/ldflags.txt
DPDK_MACHINE_LDFLAGS=$(shell cat ${DPDK_MACHINE_LINKER_FLAGS})
DPDK_INC=${RTE_SDK}/${RTE_TARGET}/include
DPDK_LIB=${RTE_SDK}/${RTE_TARGET}/lib
appname=dpdk_iface_main
#-------------------------------------------------------------------------#
all: dpdk_iface.c $(appname) $(appname).c
make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules
$(appname): $(appname).c
$(CC) $(CFLAGS) $(appname).c -g -o $(appname) -g \
-I${DPDK_INC} -include $(DPDK_INC)/rte_config.h \
-L$(DPDK_LIB) ${DPDK_MACHINE_LDFLAGS} -lpthread
clean:
make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean
rm -rf *~ *.o *.ko dpdk_iface_main
run: all
sudo ./$(appname)
#-------------------------------------------------------------------------#

View File

@ -0,0 +1,236 @@
/*-
* GPL LICENSE SUMMARY
*
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
* The full GNU General Public License is included in this distribution
* in the file called LICENSE.GPL.
*
* Contact Information:
* Intel Corporation
*/
/*--------------------------------------------------------------------------*/
#include <linux/device.h>
#include <linux/module.h>
#include <linux/version.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/if_ether.h>
#include <linux/etherdevice.h>
#include "dpdk_iface.h"
/*--------------------------------------------------------------------------*/
struct stats_struct sarrays[MAX_DEVICES][MAX_QID] = {{{0, 0, 0, 0, 0, 0, 0, 0, 0}}};
struct stats_struct old_sarrays[MAX_DEVICES][MAX_QID] = {{{0, 0, 0, 0, 0, 0, 0, 0, 0}}};
/*--------------------------------------------------------------------------*/
static int
update_stats(struct stats_struct *stats)
{
uint8_t qid = stats->qid;
uint8_t device = stats->dev;
struct stats_struct *old_sarray = &old_sarrays[device][qid];
struct stats_struct *sarray = &sarrays[device][qid];
if (unlikely(sarrays[device][qid].rx_bytes > stats->rx_bytes ||
sarrays[device][qid].tx_bytes > stats->tx_bytes)) {
/* mTCP app restarted?? */
old_sarray->rx_bytes += sarray->rx_bytes;
old_sarray->rx_pkts += sarray->rx_pkts;
old_sarray->tx_bytes += sarray->tx_bytes;
old_sarray->tx_pkts += sarray->tx_pkts;
old_sarray->rmiss += sarray->rmiss;
old_sarray->rerr += sarray->rerr;
old_sarray->terr += sarray->terr;
}
sarray->rx_bytes = stats->rx_bytes;
sarray->rx_pkts = stats->rx_pkts;
sarray->tx_bytes = stats->tx_bytes;
sarray->tx_pkts = stats->tx_pkts;
sarray->rmiss = stats->rmiss;
sarray->rerr = stats->rerr;
sarray->terr = stats->terr;
#if 0
printk(KERN_ALERT "%s: Dev: %d, Qid: %d, RXP: %llu, "
"RXB: %llu, TXP: %llu, TXB: %llu\n",
device, qid,
THIS_MODULE->name,
(long long unsigned int)sarray->rx_pkts,
(long long unsigned int)sarray->rx_bytes,
(long long unsigned int)sarray->tx_pkts,
(long long unsigned int)sarray->tx_bytes);
#endif
return 0;
}
/*--------------------------------------------------------------------------*/
static void
clear_all_netdevices(void)
{
struct net_device *netdev, *dpdk_netdev;
uint8_t freed;
do {
dpdk_netdev = NULL;
freed = 0;
write_lock(&dev_base_lock);
netdev = first_net_device(&init_net);
while (netdev) {
if (strncmp(netdev->name, IFACE_PREFIX,
strlen(IFACE_PREFIX)) == 0) {
dpdk_netdev = netdev;
break;
}
netdev = next_net_device(netdev);
}
write_unlock(&dev_base_lock);
if (dpdk_netdev) {
unregister_netdev(dpdk_netdev);
free_netdev(dpdk_netdev);
freed = 1;
}
} while (freed);
}
/*--------------------------------------------------------------------------*/
int
igb_net_open(struct inode *inode, struct file *filp)
{
return 0;
}
/*--------------------------------------------------------------------------*/
int
igb_net_release(struct inode *inode, struct file *filp)
{
return 0;
}
/*--------------------------------------------------------------------------*/
long
igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int ret = 0;
unsigned char mac_addr[ETH_ALEN];
struct net_device *netdev;
struct stats_struct ss;
struct net_adapter *adapter = NULL;
switch (cmd) {
case SEND_STATS:
ret = copy_from_user(&ss,
(struct stats_struct __user *)arg,
sizeof(struct stats_struct));
if (ret)
return -EFAULT;
ret = update_stats(&ss);
break;
case CREATE_IFACE:
ret = copy_from_user(mac_addr,
(unsigned char __user *)arg,
ETH_ALEN);
if (!ret) {
/* first check whether the entry does not exist */
read_lock(&dev_base_lock);
netdev = first_net_device(&init_net);
while (netdev) {
if (memcmp(netdev->dev_addr, mac_addr, ETH_ALEN) == 0) {
read_unlock(&dev_base_lock);
printk(KERN_ERR "%s: port already registered!\n", THIS_MODULE->name);
return -EINVAL;
}
netdev = next_net_device(netdev);
}
read_unlock(&dev_base_lock);
/* initialize the corresponding netdev */
netdev = alloc_etherdev(sizeof(struct net_adapter));
if (!netdev) {
ret = -ENOMEM;
} else {
SET_NETDEV_DEV(netdev, NULL);
adapter = netdev_priv(netdev);
adapter->netdev = netdev;
netdev_assign_netdev_ops(netdev);
memcpy(netdev->dev_addr, mac_addr, ETH_ALEN);
strcpy(netdev->name, IFACE_PREFIX"%d");
ret = register_netdev(netdev);
if (ret)
goto fail_ioremap;
adapter->netdev_registered = true;
if ((ret=sscanf(netdev->name, IFACE_PREFIX"%hu", &adapter->bd_number)) <= 0)
goto fail_bdnumber;
printk(KERN_INFO "%s: ifindex picked: %hu\n",
THIS_MODULE->name, adapter->bd_number);
/* reset nstats */
memset(&adapter->nstats, 0, sizeof(struct net_device_stats));
ret = 0;
}
}
break;
case CLEAR_IFACE:
clear_all_netdevices();
break;
default:
ret = -ENOTTY;
break;
}
return ret;
fail_bdnumber:
unregister_netdev(netdev);
fail_ioremap:
free_netdev(netdev);
return ret;
}
/*--------------------------------------------------------------------------*/
static struct file_operations igb_net_fops = {
.open = igb_net_open,
.release = igb_net_release,
.unlocked_ioctl = igb_net_ioctl,
};
/*--------------------------------------------------------------------------*/
static int __init
iface_pci_init_module(void)
{
int ret;
ret = register_chrdev(MAJOR_NO /* MAJOR */,
DEV_NAME /*NAME*/,
&igb_net_fops);
if (ret < 0) {
printk(KERN_ERR "%s: register_chrdev failed\n",
THIS_MODULE->name);
return ret;
}
return 0;
}
/*--------------------------------------------------------------------------*/
static void __exit
iface_pci_exit_module(void)
{
clear_all_netdevices();
unregister_chrdev(MAJOR_NO, DEV_NAME);
}
/*--------------------------------------------------------------------------*/
module_init(iface_pci_init_module);
module_exit(iface_pci_exit_module);
MODULE_DESCRIPTION("Interface driver for DPDK devices");
MODULE_LICENSE("BSD");
MODULE_AUTHOR("mtcp@list.ndsl.kaist.edu");
/*--------------------------------------------------------------------------*/

View File

@ -0,0 +1,181 @@
#ifndef __DPDK_IFACE_H__
#define __DPDK_IFACE_H__
/*--------------------------------------------------------------------------*/
#include <linux/netdevice.h>
#include "dpdk_iface_common.h"
/*--------------------------------------------------------------------------*/
#define IFACE_PREFIX "dpdk"
/*--------------------------------------------------------------------------*/
/**
* net adapter private struct
*/
struct net_adapter {
struct net_device *netdev;
unsigned char mac_addr[ETH_ALEN];
u16 bd_number;
bool netdev_registered;
struct net_device_stats nstats;
};
/*--------------------------------------------------------------------------*/
/**
* stats struct passed on from user space to the driver
*/
struct stats_struct {
uint64_t tx_bytes;
uint64_t tx_pkts;
uint64_t rx_bytes;
uint64_t rx_pkts;
uint64_t rmiss;
uint64_t rerr;
uint64_t terr;
uint8_t qid;
uint8_t dev;
};
/*--------------------------------------------------------------------------*/
/* sarray declaration */
extern struct stats_struct sarrays[MAX_DEVICES][MAX_QID];
extern struct stats_struct old_sarrays[MAX_DEVICES][MAX_QID];
/*----------------------------------------------------------------------------*/
/**
* dummy function whenever a device is `opened'
*/
static int
netdev_open(struct net_device *netdev)
{
(void)netdev;
return 0;
}
/*----------------------------------------------------------------------------*/
/**
* dummy function for retrieving net stats
*/
static struct net_device_stats *
netdev_stats(struct net_device *netdev)
{
struct net_adapter *adapter;
struct stats_struct *old_sarray = NULL;
struct stats_struct *sarray = NULL;
int i, ifdx;
adapter = netdev_priv(netdev);
ifdx = adapter->bd_number;
if (ifdx >= MAX_DEVICES)
printk(KERN_ERR "ifindex value: %d is greater than MAX_DEVICES!\n",
ifdx);
adapter->nstats.rx_packets = adapter->nstats.tx_packets = 0;
adapter->nstats.rx_bytes = adapter->nstats.tx_bytes = 0;
for (i = 0; i < MAX_QID; i++) {
sarray = &sarrays[ifdx][i];
old_sarray = &old_sarrays[ifdx][i];
adapter->nstats.rx_packets += sarray->rx_pkts + old_sarray->rx_pkts;
adapter->nstats.rx_bytes += sarray->rx_bytes + old_sarray->rx_bytes;
adapter->nstats.tx_packets += sarray->tx_pkts + old_sarray->tx_pkts;
adapter->nstats.tx_bytes += sarray->tx_bytes + old_sarray->tx_bytes;
adapter->nstats.rx_missed_errors += sarray->rmiss + old_sarray->rmiss;
adapter->nstats.rx_frame_errors += sarray->rerr + old_sarray->rerr;
adapter->nstats.tx_errors += sarray->terr + old_sarray->terr;
}
#if 0
printk(KERN_INFO "ifdx: %d, rxp: %llu, rxb: %llu, txp: %llu, txb: %llu\n",
ifdx,
(long long unsigned int)adapter->nstats.rx_packets,
(long long unsigned int)adapter->nstats.rx_bytes,
(long long unsigned int)adapter->nstats.tx_packets,
(long long unsigned int)adapter->nstats.tx_bytes);
#endif
return &adapter->nstats;
}
/*----------------------------------------------------------------------------*/
/**
* dummy function for setting features
*/
static int
netdev_set_features(struct net_device *netdev, netdev_features_t features)
{
(void)netdev;
(void)features;
return 0;
}
/*----------------------------------------------------------------------------*/
/**
* dummy function for fixing features
*/
static netdev_features_t
netdev_fix_features(struct net_device *netdev, netdev_features_t features)
{
(void)netdev;
(void)features;
return 0;
}
/*----------------------------------------------------------------------------*/
/**
* dummy function that returns void
*/
static void
netdev_no_ret(struct net_device *netdev)
{
(void)netdev;
return;
}
/*----------------------------------------------------------------------------*/
/**
* dummy tx function
*/
static int
netdev_xmit(struct sk_buff *skb, struct net_device *netdev) {
(void)netdev;
(void)skb;
return 0;
}
/*----------------------------------------------------------------------------*/
/**
* A naive net_device_ops struct to get the interface visible to the OS
*/
static const struct net_device_ops netdev_ops = {
.ndo_open = netdev_open,
.ndo_stop = netdev_open,
.ndo_start_xmit = netdev_xmit,
.ndo_set_rx_mode = netdev_no_ret,
.ndo_validate_addr = netdev_open,
.ndo_set_mac_address = NULL,
.ndo_change_mtu = NULL,
.ndo_tx_timeout = netdev_no_ret,
.ndo_vlan_rx_add_vid = NULL,
.ndo_vlan_rx_kill_vid = NULL,
.ndo_do_ioctl = NULL,
.ndo_set_vf_mac = NULL,
.ndo_set_vf_vlan = NULL,
#if LINUX_VERSION_CODE <= KERNEL_VERSION(3, 15, 0)
.ndo_set_vf_tx_rate = NULL,
#else
.ndo_set_vf_rate = NULL,
#endif
.ndo_set_vf_spoofchk = NULL,
.ndo_get_vf_config = NULL,
.ndo_get_stats = netdev_stats,
.ndo_setup_tc = NULL,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = netdev_no_ret,
.ndo_netpoll_setup = NULL,
.ndo_netpoll_cleanup = NULL,
#endif
.ndo_set_features = netdev_set_features,
.ndo_fix_features = netdev_fix_features,
.ndo_fdb_add = NULL,
};
/*----------------------------------------------------------------------------*/
/**
* assignment function
*/
void
netdev_assign_netdev_ops(struct net_device *dev)
{
dev->netdev_ops = &netdev_ops;
}
/*----------------------------------------------------------------------------*/
#endif /* __DPDK_IFACE_H__ */

View File

@ -0,0 +1,17 @@
#ifndef __DPDK_IFACE_COMMON_H__
#define __DPDK_IFACE_COMMON_H__
/*--------------------------------------------------------------------------*/
/* major number */
#define MAJOR_NO 1110
/* dev name */
#define DEV_NAME "dpdk-iface"
#define DEV_PATH "/dev/"DEV_NAME
/* ioctl# */
#define SEND_STATS 0
#define CREATE_IFACE 1
#define CLEAR_IFACE 4
/* max qid */
#define MAX_QID 128
#define MAX_DEVICES 128
/*--------------------------------------------------------------------------*/
#endif /* __DPDK_IFACE_COMMON_H__ */

View File

@ -0,0 +1,126 @@
#include <rte_ethdev.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <rte_version.h>
#include "dpdk_iface_common.h"
/*--------------------------------------------------------------------------*/
int
main(int argc, char **argv)
{
int ret, fd, num_devices;
dev_t dev;
char *cpumaskbuf = "0x1";
char *mem_channels = "4";
char *rte_argv[] = {"",
"-c",
cpumaskbuf,
"-n",
mem_channels,
"--proc-type=auto",
""
};
const int rte_argc = 6;
typedef struct {
struct ether_addr ports_eth_addr;
struct rte_eth_dev_info dev_details;
} dev_info;
dev_info di[RTE_MAX_ETHPORTS];
if (geteuid()) {
fprintf(stderr, "[CAUTION] Run the app as root!\n");
exit(EXIT_FAILURE);
}
/* remove previously created dpdk-iface device node file */
fprintf(stderr, "Removing existing device node entry...");
ret = remove(DEV_PATH);
fprintf(stderr, (ret == 0) ? "\033[32m done. \033[0m \n" :
"\033[32m not present. \033[0m \n");
/* create dpdk-iface device node entry */
dev = makedev(MAJOR_NO, 0);
ret = mknod(DEV_PATH, S_IFCHR | O_RDWR, dev);
if (ret == 0)
fprintf(stderr, "Creating device node entry...");
else {
fprintf(stderr, "Failed to create device node entry\n");
return EXIT_FAILURE;
}
fprintf(stderr, "\033[32m done. \033[0m \n");
/* setting permissions on the device node entry */
ret = chmod(DEV_PATH,
S_IRGRP | S_IROTH | S_IRUSR |
S_IWGRP | S_IWOTH | S_IWUSR);
if (ret == 0)
fprintf(stderr, "Setting permissions on the device node entry...");
else {
fprintf(stderr, "Failed to set permissions on the device node entry\n");
return EXIT_FAILURE;
}
fprintf(stderr, "\033[32m done. \033[0m \n");
#if RTE_VERSION < RTE_VERSION_NUM(17, 05, 0, 16)
rte_set_log_level(RTE_LOG_EMERG);
#else
rte_log_set_global_level(RTE_LOG_EMERG);
#endif
fprintf(stderr, "Scanning the system for dpdk-compatible devices...");
/* initialize the rte env first */
ret = rte_eal_init(rte_argc, rte_argv);
/* get total count of detected ethernet ports */
num_devices = rte_eth_dev_count();
if (num_devices == 0) {
fprintf(stderr, "No Ethernet port detected!\n");
exit(EXIT_FAILURE);
}
for (ret = 0; ret < num_devices; ret++) {
/* get mac addr entries of detected dpdk ports */
rte_eth_macaddr_get(ret, &di[ret].ports_eth_addr);
/* check port capabailties/info */
rte_eth_dev_info_get(ret, &di[ret].dev_details);
}
fprintf(stderr, "\033[32m done. \033[0m \n");
/* open the device node first */
fd = open(DEV_PATH, O_RDWR);
if (fd == -1) {
fprintf(stderr, "Failed to open %s for port detection!\n",
DEV_PATH);
exit(EXIT_FAILURE);
}
/* clear all previous entries */
fprintf(stderr, "Clearing previous entries\n");
ioctl(fd, CLEAR_IFACE, di[0].ports_eth_addr.addr_bytes);
/* register the newly detected dpdk ports */
for (ret = 0; ret < num_devices; ret++) {
if (strcmp(di[ret].dev_details.driver_name, "net_mlx4") &&
strcmp(di[ret].dev_details.driver_name, "net_mlx5")) {
fprintf(stderr, "Registering port %d (%02X:%02X:%02X:%02X:%02X:%02X) to mTCP stack\n",
ret,
di[ret].ports_eth_addr.addr_bytes[0], di[ret].ports_eth_addr.addr_bytes[1],
di[ret].ports_eth_addr.addr_bytes[2], di[ret].ports_eth_addr.addr_bytes[3],
di[ret].ports_eth_addr.addr_bytes[4], di[ret].ports_eth_addr.addr_bytes[5]);
ioctl(fd, 1, di[ret].ports_eth_addr.addr_bytes);
}
}
/* close the fd */
close(fd);
return EXIT_SUCCESS;
}
/*--------------------------------------------------------------------------*/