Merge with upstream

master
koolzz 2018-12-03 05:18:10 -06:00
commit 7267d098fb
37 changed files with 968 additions and 379 deletions

View File

@ -8,6 +8,9 @@ sed -i -e 's/O_TO_EXE_STR =/\$(shell if [ \! -d \${RTE_SDK}\/\${RTE_TARGET}\/lib
cd dpdk/
make install T=x86_64-native-linuxapp-gcc
cd ..
cd dpdk-iface-kmod
make
cd ..
autoreconf -ivf
./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET
make

14
.gitignore vendored
View File

@ -1,3 +1,15 @@
# mtcp-specific files
Makefile
autom4te.cache/
config.h
config.log
config.status
dpdk-iface-kmod/dpdk_iface_main
io_engine/lib/Makefile
mtcp/src/Makefile
util/Makefile
stamp-h1
# Prerequisites
*.d
@ -49,4 +61,4 @@
modules.order
Module.symvers
Mkfile.old
dkms.conf
dkms.conf

1
README
View File

@ -285,6 +285,7 @@ We tested the DPDK version (polling driver) with Linux-3.13.0 kernel.
2. Intel-82599 ixgbe (Max-queue-limit: 16)
3. Intel-I350 igb (Max-queue-limit: 08)
4. Intel-X710 i40e (Max-queue-limit: ~)
5. Intel-X722 i40e (Max-queue-limit: ~)
========================================================================
FREQUENTLY ASKED QUESTIONS

365
README.md
View File

@ -1,24 +1,24 @@
[![Build Status](https://travis-ci.org/eunyoung14/mtcp.svg?branch=master)](https://travis-ci.org/eunyoung14/mtcp)
[![Build Status](https://scan.coverity.com/projects/11896/badge.svg)](https://scan.coverity.com/projects/eunyoung14-mtcp)
# README #
# README
mTCP is a highly scalable user-level TCP stack for multicore systems.
mTCP source code is distributed under the Modified BSD License. For
more detail, please refer to the LICENSE. The license term of io_engine
driver and ported applications may differ from the mTCPs.
### PREREQUISITE ###
## Prerequisites
We require the following libraries to run mTCP.
- ``libps`` (PacketShader I/O engine library) OR ``libdpdk`` (Intel's DPDK package*) or ``netmap`` driver
- ``libnuma``
- ``libpthread``
- ``librt``
- ``libgmp`` (for DPDK/ONVM driver)
- `libdpdk` (Intel's DPDK package*) or `libps` (PacketShader I/O engine library) or `netmap` driver
- `libnuma`
- `libpthread`
- `librt`
- `libgmp` (for DPDK/ONVM driver)
Compling PSIO/DPDK/NETMAP/ONVM driver requires kernel headers.
- For Debian/Ubuntu, try ``apt-get install linux-headers-$(uname -r)``
- For Debian/Ubuntu, try ``apt-get install linux-headers-$(uname -r)``
We have modified the dpdk package to export net_device stat data
(for Intel-based Ethernet adapters only) to the OS. To achieve this, we have
@ -27,7 +27,7 @@ created a new LKM dpdk-iface-kmow. We also modified
process of mTCP applications. We recommend using our package for DPDK
installation.
### INCLUDED DIRECTORIES ###
## Included directories
mtcp: mtcp source code directory
- mtcp/src: source code
@ -54,140 +54,154 @@ util: useful source code for applications
config: sample mTCP configuration files (may not be necessary)
### INSTALL GUIDES ###
## Install guides
mTCP can be prepared in three ways.
mTCP can be prepared in four ways.
***PSIO VERSION***
### ***DPDK VERSION***
1. make in io_engine/driver:
```# make```
- check ps_ixgbe.ko
- please note that psio only runs on linux-2.6.x kernels
(linux-2.6.32 ~ linux-2.6.38)
2. install the driver:
```# ./install.py <# cores> <# cores>```
- refer to http://shader.kaist.edu/packetshader/io_engine/
- you may need to change the ip address in install.py:46
3. Setup mtcp library:
```bash
# ./configure --with-psio-lib=<$path_to_ioengine>
## e.g. ./configure --with-psio-lib=`echo $PWD`/io_engine
# make
```
- By default, mTCP assumes that there are 16 CPUs in your system.
You can set the CPU limit, e.g. on a 8-core system, by using the following command:
```bash
# ./configure --with-psio-lib=`echo $PWD`/io_engine CFLAGS="-DMAX_CPUS=8"
```
Please note that your NIC should support RSS queues equal to the MAX_CPUS value
(since mTCP expects a one-to-one RSS queue to CPU binding).
- In case `./configure' script prints an error, run the
following command; and then re-do step-3 (configure again):
1. Download DPDK submodule.
```bash
# autoreconf -ivf
git submodule init
git submodule update
```
- check libmtcp.a in mtcp/lib
- check header files in mtcp/include
- check example binary files in apps/example
2. Setup DPDK.
4. Check the configurations in apps/example
- epserver.conf for server-side configuration
- epwget.conf for client-side configuration
- you may write your own configuration file for your application
```bash
./setup_mtcp_dpdk_env.sh [<path to $RTE_SDK>]
```
5. Run the applications!
- Press [14] to compile x86_64-native-linuxapp-gcc version
- Press [17] to install the driver
- Press [21] to setup 2048 2MB hugepages
- Press [23] to register the Ethernet ports
- Press [34] to quit the tool
- Only those devices will work with DPDK drivers that are listed
on this page: http://dpdk.org/doc/nics. Please make sure that your
NIC is compatible before moving on to the next step.
***DPDK VERSION***
- We use `dpdk/` submodule as our DPDK driver. FYI, you can pass a different
dpdk source directory as command line argument.
1. Set up DPDK first.
```bash
# bash setup_mtcp_dpdk_env.sh [<path to $RTE_SDK>]
```
Press [14] to compile x86_64-native-linuxapp-gcc version
Press [17] to install the driver
Press [21] to setup 2048 2MB hugepages
Press [23] to register the Ethernet ports
Press [34] to quit the tool
- Only those devices will work with DPDK drivers that are listed
on this page: http://dpdk.org/doc/nics. Please make sure that your
NIC is compatible before moving on to the next step.
- We use dpdk-18.02/ as our DPDK driver. FYI, you can pass a different
dpdk source directory as command line argument.
2. Next bring the dpdk-registered interfaces up, and then set RTE_SDK
3. Next bring the dpdk-registered interfaces up, and then set RTE_SDK
and RTE_TARGET environment variables.
```bash
# sudo ifconfig dpdk0 x.x.x.x netmask 255.255.255.0 up
# export RTE_SDK=`echo $PWD`/dpdk
# export RTE_TARGET=x86_64-native-linuxapp-gcc
sudo ifconfig dpdk0 x.x.x.x netmask 255.255.255.0 up
export RTE_SDK=`echo $PWD`/dpdk
export RTE_TARGET=x86_64-native-linuxapp-gcc
```
3. Setup mtcp library:
```bash
# ./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET
# make
4. Setup mtcp library:
```bash
./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET
make
```
- By default, mTCP assumes that there are 16 CPUs in your system.
You can set the CPU limit, e.g. on a 32-core system, by using the following command:
```bash
# ./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET CFLAGS="-DMAX_CPUS=32"
```
- By default, mTCP assumes that there are 16 CPUs in your system.
You can set the CPU limit, e.g. on a 32-core system, by using the following command:
```bash
./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET CFLAGS="-DMAX_CPUS=32"
```
Please note that your NIC should support RSS queues equal to the MAX_CPUS value
(since mTCP expects a one-to-one RSS queue to CPU binding).
- In case `./configure' script prints an error, run the
following command; and then re-do step-4 (configure again):
```# autoreconf -ivf```
- checksum offloading in the NIC is now ENABLED (by default)!!!
- this only works for dpdk at the moment
- use ```./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET --disable-hwcsum``` to disable checksum offloading.
- check libmtcp.a in mtcp/lib
- check header files in mtcp/include
- check example binary files in apps/example
- In case `./configure` script prints an error, run the
following command; and then re-do step-4 (configure again):
```bash
autoreconf -ivf
```
- checksum offloading in the NIC is now ENABLED (by default)!!!
- this only works for dpdk at the moment
- use ```./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET --disable-hwcsum``` to disable checksum offloading.
- check `libmtcp.a` in `mtcp/lib`
- check header files in `mtcp/include`
- check example binary files in `apps/example`
4. Check the configurations in apps/example
- epserver.conf for server-side configuration
- epwget.conf for client-side configuration
5. Check the configurations in `apps/example`
- `epserver.conf` for server-side configuration
- `epwget.conf` for client-side configuration
- you may write your own configuration file for your application
6. Run the applications!
7. You can revert back all your changes by running the following script.
```bash
./setup_linux_env.sh [<path to $RTE_SDK>]
```
- Press [29] to unbind the Ethernet ports
- Press [30] to remove igb_uio.ko driver
- Press [33] to remove hugepage mappings
- Press [34] to quit the tool
### ***PSIO VERSION***
1. make in io_engine/driver:
```bash
make
```
- check ps_ixgbe.ko
- please note that psio only runs on linux-2.6.x kernels
(linux-2.6.32 ~ linux-2.6.38)
2. install the driver:
```bash
./install.py <# cores> <# cores>
```
- refer to http://shader.kaist.edu/packetshader/io_engine/
- you may need to change the ip address in install.py:46
3. Setup mtcp library:
```bash
./configure --with-psio-lib=<$path_to_ioengine>
# e.g. ./configure --with-psio-lib=`echo $PWD`/io_engine
make
```
- By default, mTCP assumes that there are 16 CPUs in your system.
You can set the CPU limit, e.g. on a 8-core system, by using the following command:
```bash
./configure --with-psio-lib=`echo $PWD`/io_engine CFLAGS="-DMAX_CPUS=8"
```
Please note that your NIC should support RSS queues equal to the MAX_CPUS value
(since mTCP expects a one-to-one RSS queue to CPU binding).
- In case `./configure` script prints an error, run the
following command; and then re-do step-3 (configure again):
```bash
autoreconf -ivf
```
- check `libmtcp.a` in `mtcp/lib`
- check header files in `mtcp/include`
- check example binary files in `apps/example`
4. Check the configurations in `apps/example`
- `epserver.conf` for server-side configuration
- `epwget.conf` for client-side configuration
- you may write your own configuration file for your application
5. Run the applications!
6. You can revert back all your changes by running the following script.
```bash
# bash setup_linux_env.sh [<path to $RTE_SDK]]
```
Press [29] to unbind the Ethernet ports
Press [30] to remove igb_uio.ko driver
Press [33] to remove hugepage mappings
Press [34] to quit the tool
***ONVM VERSION***
### ***ONVM VERSION***
***NEW***: Now you can run mTCP applications (server + client) locally.
A local setup is useful when only 1 machine is available for the experiment.
@ -201,88 +215,93 @@ ONVM basics are explained in https://github.com/sdnfv/openNetVM.
2. Next bring the dpdk-registered interfaces up. This can be setup using:
```# sudo ifconfig dpdk0 x.x.x.x netmask 255.255.255.0 up```
```bash
sudo ifconfig dpdk0 x.x.x.x netmask 255.255.255.0 up
```
3. Setup mtcp library
```bash
# ./configure --with-dpdk-lib=$<path_to_dpdk> --with-onvm-lib=$<path_to_onvm_lib>
# e.g. ./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET --with-onvm-lib=`echo $ONVM_HOME`/onvm
# make
./configure --with-dpdk-lib=$<path_to_dpdk> --with-onvm-lib=$<path_to_onvm_lib>
# e.g. ./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET --with-onvm-lib=`echo $ONVM_HOME`/onvm
make
```
- By default, mTCP assumes that there are 16 CPUs in your system.
- By default, mTCP assumes that there are 16 CPUs in your system.
You can set the CPU limit, e.g. on a 32-core system, by using the following command:
```bash
# ./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET --with-onvm-lib=$<path_to_onvm_lib> CFLAGS="-DMAX_CPUS=32"
```
```bash
./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET --with-onvm-lib=$<path_to_onvm_lib> CFLAGS="-DMAX_CPUS=32"
```
Please note that your NIC should support RSS queues equal to the MAX_CPUS value
(since mTCP expects a one-to-one RSS queue to CPU binding).
- In case `./configure' script prints an error, run the
- In case `./configure` script prints an error, run the
following command; and then re-do step-4 (configure again):
```# autoreconf -ivf```
- checksum offloading in the NIC is now ENABLED (by default)!!!
- this only works for dpdk at the moment
- use ```./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET --with-onvm-lib=$<path_to_onvm_lib> --disable-hwcsum``` to disable checksum offloading.
- check libmtcp.a in mtcp/lib
- check header files in mtcp/include
- check example binary files in apps/example
```bash
autoreconf -ivf
```
5. Check the configurations in apps/example
- epserver.conf for server-side configuration
- epwget.conf for client-side configuration
- checksum offloading in the NIC is now ENABLED (by default)!!!
- this only works for dpdk at the moment
- use ```./configure --with-dpdk-lib=$RTE_SDK/$RTE_TARGET --with-onvm-lib=$<path_to_onvm_lib> --disable-hwcsum``` to disable checksum offloading.
- check `libmtcp.a` in `mtcp/lib`
- check header files in `mtcp/include`
- check example binary files in `apps/example`
5. Check the configurations in `apps/example`
- `epserver.conf` for server-side configuration
- `epwget.conf` for client-side configuration
- you may write your own configuration file for your application
6. Run the applications!
7. You can revert back all your changes by running the following script.
```bash
# bash setup_linux_env.sh
```
```bash
./setup_linux_env.sh
```
Press [29] to unbind the Ethernet ports
Press [30] to remove igb_uio.ko driver
Press [33] to remove hugepage mappings
Press [34] to quit the tool
- Press [29] to unbind the Ethernet ports
- Press [30] to remove igb_uio.ko driver
- Press [33] to remove hugepage mappings
- Press [34] to quit the tool
**Notes**
Once you have started onvm_mgr, sometimes an mTCP application may fail to get launched due
to an error resembling the one mentioned below:
(```EAL: FATAL: Cannot init memory```, or
``` Cannot mmap memory for rte_config at [0x7ffff7fb6000], got [0x7ffff7e74000] - please use '--base-virtaddr' option```, or
```EAL: Cannot mmap device resource file /sys/bus/pci/devices/0000:06:00.0/resource3 to address: 0x7ffff7ff1000```)
- ```EAL: FATAL: Cannot init memory```
- ``` Cannot mmap memory for rte_config at [0x7ffff7fb6000], got [0x7ffff7e74000] - please use '--base-virtaddr' option```
- ```EAL: Cannot mmap device resource file /sys/bus/pci/devices/0000:06:00.0/resource3 to address: 0x7ffff7ff1000```
To prevent this, use the base virtual address parameter to run the ONVM manager, e.g.:
```
```bash
cd openNetVM/onvm
./go.sh 1,2,3 1 -s stdout -v 0x7f000000000
```
***NETMAP VERSION***
### ***NETMAP VERSION***
See README.netmap for details.
***TESTED ENVIRONMENTS***
## Tested environments
mTCP runs on Linux-based operating systems (2.6.x for PSIO) with generic
x86_64 CPUs, but to help evaluation, we provide our tested environments
as follows.
Intel Xeon E5-2690 octacore CPU @ 2.90 GHz 32 GB of RAM (4 memory channels)
10 GbE NIC with Intel 82599 chipset (specifically Intel X520-DA2)
Debian 6.0.7 (Linux 2.6.32-5-amd64)
Intel Xeon E5-2690 octacore CPU @ 2.90 GHz 32 GB of RAM (4 memory channels)
10 GbE NIC with Intel 82599 chipset (specifically Intel X520-DA2)
Debian 6.0.7 (Linux 2.6.32-5-amd64)
Intel Core i7-3770 quadcore CPU @ 3.40 GHz 16 GB of RAM (2 memory channels)
10 GbE NIC with Intel 82599 chipset (specifically Intel X520-DA2)
Ubuntu 10.04 (Linux 2.6.32-47)
Intel Core i7-3770 quadcore CPU @ 3.40 GHz 16 GB of RAM (2 memory channels)
10 GbE NIC with Intel 82599 chipset (specifically Intel X520-DA2)
Ubuntu 10.04 (Linux 2.6.32-47)
Event-driven PacketShader I/O engine (extended io_engine-0.2)
@ -290,7 +309,7 @@ Event-driven PacketShader I/O engine (extended io_engine-0.2)
We tested the DPDK version (polling driver) with Linux-3.13.0 kernel.
***NOTES***
## Notes
1. mTCP currently runs with fixed memory pools. That means, the size of
TCP receive and send buffers are fixed at the startup and does not
@ -310,29 +329,30 @@ We tested the DPDK version (polling driver) with Linux-3.13.0 kernel.
5. mTCP has been tested with the following Ethernet adapters:
1. Intel-82598 ixgbe (Max-queue-limit: 16)
2. Intel-82599 ixgbe (Max-queue-limit: 16)
3. Intel-I350 igb (Max-queue-limit: 08)
4. Intel-X710 i40e (Max-queue-limit: ~)
1. Intel-82598 ixgbe (Max-queue-limit: 16)
2. Intel-82599 ixgbe (Max-queue-limit: 16)
3. Intel-I350 igb (Max-queue-limit: 08)
4. Intel-X710 i40e (Max-queue-limit: ~)
5. Intel-X722 i40e (Max-queue-limit: ~)
***FREQUENTLY ASKED QUESTIONS***
## Frequently asked questions
1. How can I quit the application?
- Use ^C to gracefully shutdown the application. Two consecutive
- Use ^C to gracefully shutdown the application. Two consecutive
^C (separated by 1 sec) will force quit.
2. My application keeps printing "No route to 0.0.0.0"
- Try to turn off your network-manager for xge*. The network manager
- Try to turn off your network-manager for xge*. The network manager
can override the IP configuration set by install.py in PSIO driver.
3. Can I statically set the routing or arp table?
- Yes, mTCP allows static route and arp configuration. Go to the
- Yes, mTCP allows static route and arp configuration. Go to the
config directory and see sample_route.conf or sample_arp.conf.
Copy and adapt it to your condition and link (ln -s) the config
directory to the application directory. mTCP will find
config/route.conf and config/arp.conf for static configuration.
***CAUTION***
## Caution
1. Do not remove I/O driver (```ps_ixgbe/igb_uio```) while running mTCP
applications. The application will panic!
@ -340,7 +360,12 @@ We tested the DPDK version (polling driver) with Linux-3.13.0 kernel.
2. Use the ps_ixgbe/dpdk driver contained in this package, not the one
from some other place (e.g., from io_engine github).
Contact: mtcp-user at list.ndsl.kaist.edu
April 2, 2015.
EunYoung Jeong <notav at ndsl.kaist.edu>
M. Asim Jamshed <ajamshed at ndsl.kaist.edu>
## Contacts
GitHub issue board is the preferred way to report bugs and ask questions about mTCP.
***CONTACTS FOR THE AUTHORS***
User mailing list <mtcp-user at list.ndsl.kaist.edu>
EunYoung Jeong <notav at ndsl.kaist.edu>
M. Asim Jamshed <ajamshed at ndsl.kaist.edu>

4
apps/example/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
Makefile
epserver
epwget
log_*

View File

@ -87,7 +87,8 @@ epwget: epwget.o ${MTCP_FLD}/lib/libmtcp.a
$(HIDE) ${CC} $< ${LIBS} ${UTIL_OBJ} -o $@
clean:
rm -f *~ *.o ${TARGETS} log_*
$(MSG) " CLEAN $(TARGETS)"
$(HIDE) rm -f *~ *.o ${TARGETS} log_*
distclean: clean
rm -rf Makefile

View File

@ -27,9 +27,8 @@ multiprocess = 1
#port = xge1
#------ DPDK ports -------#
port = dpdk0
#port = dpdk1
#port = dpdk0 dpdk1
#port = dpdk0:0
#port = dpdk0:1
# Maximum concurrency per core (default = 10000)
#max_concurrency = 10000
@ -60,8 +59,6 @@ tcp_timewait = 0
#stat_print = xge1
#------ DPDK ports -------#
stat_print = dpdk0
#stat_print = dpdk0:0
#stat_print = dpdk0:1
#stat_print = dpdk1
#######################################################

View File

@ -16,6 +16,9 @@ io = dpdk
# following line is uncommented.
#num_cores = 8
# Core mask
#core_mask = 0000000F0
# Number of memory channels per processor socket (dpdk-only)
num_mem_ch = 4
@ -46,9 +49,8 @@ num_mem_ch = 4
#port = xge1
#------ DPDK ports -------#
port = dpdk0
#port = dpdk1
#port = dpdk0 dpdk1
#port = dpdk0:0
#port = dpdk0:1
# Maximum concurrency per core (default = 10000)
#max_concurrency = 10000
@ -79,8 +81,6 @@ tcp_timewait = 0
#stat_print = xge1
#------ DPDK ports -------#
stat_print = dpdk0
#stat_print = dpdk0:0
#stat_print = dpdk0:1
#stat_print = dpdk1
#######################################################

View File

@ -26,9 +26,8 @@ multiprocess = 1
#port = xge1
#------ DPDK ports -------#
port = dpdk0
#port = dpdk1
#port = dpdk0 dpdk1
#port = dpdk0:0
#port = dpdk0:1
# Maximum concurrency per core (default = 10000)
#max_concurrency = 10000
@ -59,8 +58,6 @@ tcp_timewait = 0
#stat_print = xge1
#------ DPDK ports -------#
stat_print = dpdk0
#stat_print = dpdk0:0
#stat_print = dpdk0:1
#stat_print = dpdk1
#######################################################

View File

@ -77,7 +77,7 @@ static int flows[MAX_CPUS];
static int flowcnt = 0;
static int concurrency;
static int max_fds;
static int response_size = 0;
static uint64_t response_size = 0;
/*----------------------------------------------------------------------------*/
struct wget_stat
{
@ -292,10 +292,10 @@ DownloadComplete(thread_context_t ctx, int sockid, struct wget_vars *wv)
ctx->stat.completes++;
if (response_size == 0) {
response_size = wv->recv;
fprintf(stderr, "Response size set to %d\n", response_size);
fprintf(stderr, "Response size set to %lu\n", response_size);
} else {
if (wv->recv != response_size) {
fprintf(stderr, "Response size mismatch! mine: %ld, theirs: %d\n",
fprintf(stderr, "Response size mismatch! mine: %lu, theirs: %lu\n",
wv->recv, response_size);
}
}

View File

@ -34,16 +34,14 @@ num_mem_ch = 4
#onvm_serv = 1
#--------------------------#
# Used port (please adjust accordingly)
#------ PSIO ports -------#
#port = xge0 xge1
#port = xge1
#------ DPDK ports -------#
port = dpdk0
#port = dpdk1
#port = dpdk0 dpdk1
#port = dpdk0:0
#port = dpdk0:1
# Enable multi-process support
#multiprocess = 1

View File

@ -16,6 +16,9 @@ io = dpdk
# following line is uncommented.
#num_cores = 8
# Core mask
#core_mask = 00000FFF0
# Number of memory channels per processor socket (dpdk-only)
num_mem_ch = 4
@ -46,22 +49,21 @@ num_mem_ch = 4
#port = xge1
#------ DPDK ports -------#
port = dpdk0
#port = dpdk1
#port = dpdk0 dpdk1
#port = dpdk0:0
#port = dpdk0:1
# Maximum concurrency per core (default = 10000)
max_concurrency = 8192
#max_concurrency = 8192
# Maximum number of socket buffers per core (default = 10000)
# Set this to small value if there are many idle connections
max_num_buffers = 8192
#max_num_buffers = 8192
# Receive buffer size of sockets; if not set: rcvbuf = sndbuf
rcvbuf = 8192
#rcvbuf = 8192
# Send buffer size of sockets; if not set: sndbuf = rcvbuf
sndbuf = 8192
#sndbuf = 8192
# if sndbuf & rcvbuf not set: sndbuf = rcvbuf = 8192
@ -79,8 +81,6 @@ tcp_timewait = 0
#stat_print = xge1
#------ DPDK ports -------#
stat_print = dpdk0
#stat_print = dpdk0:0
#stat_print = dpdk0:1
#stat_print = dpdk1
#######################################################

View File

@ -1595,6 +1595,7 @@ main(int argc, char **argv) {
*/
mtcp_getconf(&mcfg);
mcfg.num_cores = cpus;
mcfg.max_concurrency = mcfg.max_num_buffers = srv->max_conns;
mtcp_setconf(&mcfg);
/* initialize the mtcp context */
if (mtcp_init("mtcp.conf")) {
@ -1602,10 +1603,6 @@ main(int argc, char **argv) {
goto clean_up;
}
mtcp_getconf(&mcfg);
mcfg.max_concurrency = mcfg.max_num_buffers = srv_states[0]->max_conns;
mtcp_setconf(&mcfg);
/* register SIGINT signal handler */
mtcp_register_signal(SIGINT, signal_handler);
#endif
@ -1619,6 +1616,7 @@ main(int argc, char **argv) {
start_server, (void *)srv_states[i])) {
goto clean_up;
}
sleep(1);
}
/*

View File

@ -38,9 +38,8 @@ num_mem_ch = 4
#port = xge1
#------ DPDK ports -------#
port = dpdk0
#port = dpdk0 dpdk1
#port = dpdk0
#port = dpdk1
#port = dpdk0 dpdk1
# Maximum concurrency per core (default = 10000)
#max_concurrency = 10000

View File

@ -16,17 +16,27 @@ DPDK_INC=${RTE_SDK}/${RTE_TARGET}/include
DPDK_LIB=${RTE_SDK}/${RTE_TARGET}/lib
appname=dpdk_iface_main
#-------------------------------------------------------------------------#
ifeq ($V,) # no echo
export MSG=@echo
export HIDE=@
else
export MSG=@\#
export HIDE=
endif
#-------------------------------------------------------------------------#
all: dpdk_iface.c $(appname) $(appname).c
make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) modules
$(appname): $(appname).c
$(CC) $(CFLAGS) $(appname).c -g -o $(appname) -g \
$(MSG) " CC $<"
$(HIDE) $(CC) -g $(CFLAGS) $(appname).c -g -o $(appname) -g \
-I${DPDK_INC} -include $(DPDK_INC)/rte_config.h \
-L$(DPDK_LIB) ${DPDK_MACHINE_LDFLAGS} -lpthread
clean:
make -C /lib/modules/$(shell uname -r)/build/ M=$(PWD) clean
rm -rf *~ *.o *.ko dpdk_iface_main
$(MSG) " CLEAN $(appname)"
$(HIDE) rm -rf *~ *.o *.ko dpdk_iface_main
run: all
sudo ./$(appname)

View File

@ -33,6 +33,7 @@
/*--------------------------------------------------------------------------*/
struct stats_struct sarrays[MAX_DEVICES][MAX_QID] = {{{0, 0, 0, 0, 0, 0, 0, 0, 0}}};
struct stats_struct old_sarrays[MAX_DEVICES][MAX_QID] = {{{0, 0, 0, 0, 0, 0, 0, 0, 0}}};
static int major_no = -1;
/*--------------------------------------------------------------------------*/
static int
update_stats(struct stats_struct *stats)
@ -123,6 +124,7 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
struct net_device *netdev;
struct stats_struct ss;
struct net_adapter *adapter = NULL;
struct PciDevice pd;
switch (cmd) {
case SEND_STATS:
@ -134,8 +136,11 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ret = update_stats(&ss);
break;
case CREATE_IFACE:
ret = copy_from_user(&pd,
(PciDevice __user *)arg,
sizeof(PciDevice));
ret = copy_from_user(mac_addr,
(unsigned char __user *)arg,
(unsigned char __user *)pd.ports_eth_addr,
ETH_ALEN);
if (!ret) {
/* first check whether the entry does not exist */
@ -174,14 +179,55 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
THIS_MODULE->name, adapter->bd_number);
/* reset nstats */
memset(&adapter->nstats, 0, sizeof(struct net_device_stats));
ret = 0;
/* set 'fake' pci address */
memcpy(&adapter->pa, &pd.pa, sizeof(struct PciAddress));
ret = copy_to_user((unsigned char __user *)arg,
netdev->name,
IFNAMSIZ);
if (ret) {
printk(KERN_INFO "%s: Interface %s copy to user failed!\n",
THIS_MODULE->name, netdev->name);
ret = -1;
goto fail_pciaddr;
}
/* set numa locality */
adapter->numa_socket = pd.numa_socket;
}
}
break;
case CLEAR_IFACE:
clear_all_netdevices();
break;
case FETCH_PCI_ADDRESS:
ret = copy_from_user(&pd,
(PciDevice __user *)arg,
sizeof(PciDevice));
if (!ret) {
read_lock(&dev_base_lock);
netdev = first_net_device(&init_net);
while (netdev) {
if (strcmp(netdev->name, pd.ifname) == 0) {
read_unlock(&dev_base_lock);
printk(KERN_INFO "%s: Passing PCI info of %s to user\n",
THIS_MODULE->name, pd.ifname);
adapter = netdev_priv(netdev);
ret = copy_to_user(&((PciDevice __user *)arg)->pa,
&adapter->pa,
sizeof(struct PciAddress));
if (ret) return -1;
ret = copy_to_user(&((PciDevice __user *)arg)->numa_socket,
&adapter->numa_socket,
sizeof(adapter->numa_socket));
if (ret) return -1;
return 0;
}
netdev = next_net_device(netdev);
}
read_unlock(&dev_base_lock);
ret = -1;
}
break;
default:
ret = -ENOTTY;
break;
@ -189,7 +235,7 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return ret;
fail_pciaddr:
fail_bdnumber:
unregister_netdev(netdev);
fail_ioremap:
@ -208,7 +254,7 @@ iface_pci_init_module(void)
{
int ret;
ret = register_chrdev(MAJOR_NO /* MAJOR */,
ret = register_chrdev(0 /* MAJOR */,
DEV_NAME /*NAME*/,
&igb_net_fops);
if (ret < 0) {
@ -216,6 +262,12 @@ iface_pci_init_module(void)
THIS_MODULE->name);
return ret;
}
printk(KERN_INFO "%s: Loaded\n",
THIS_MODULE->name);
/* record major number */
major_no = ret;
return 0;
}
@ -224,7 +276,7 @@ static void __exit
iface_pci_exit_module(void)
{
clear_all_netdevices();
unregister_chrdev(MAJOR_NO, DEV_NAME);
unregister_chrdev(major_no, DEV_NAME);
}
/*--------------------------------------------------------------------------*/
module_init(iface_pci_init_module);

View File

@ -14,7 +14,9 @@ struct net_adapter {
unsigned char mac_addr[ETH_ALEN];
u16 bd_number;
bool netdev_registered;
int numa_socket;
struct net_device_stats nstats;
struct PciAddress pa;
};
/*--------------------------------------------------------------------------*/
/**

View File

@ -1,17 +1,46 @@
#ifndef __DPDK_IFACE_COMMON_H__
#define __DPDK_IFACE_COMMON_H__
/*--------------------------------------------------------------------------*/
/* major number */
#define MAJOR_NO 511 //1110
/* for ETH_ALEN */
#ifndef __KERNEL__
#include <net/if.h>
#endif
#include <asm/bitsperlong.h>
/*--------------------------------------------------------------------------*/
/* dev name */
#define DEV_NAME "dpdk-iface"
#define DEV_PATH "/dev/"DEV_NAME
#define DEV_PROC_PATH "/proc/devices"
/* ioctl# */
#define SEND_STATS 0
#define CREATE_IFACE 1
#define CLEAR_IFACE 4
#define FETCH_PCI_ADDRESS 5
/* max qid */
#define MAX_QID 128
#ifndef MAX_DEVICES
#define MAX_DEVICES 128
#endif
#define PCI_DOM "%04hX"
#define PCI_BUS "%02hhX"
#define PCI_DEVICE "%02hhX"
#define PCI_FUNC "%01hhX"
#define PCI_LENGTH 13
/*--------------------------------------------------------------------------*/
typedef struct PciAddress {
uint16_t domain;
uint8_t bus;
uint8_t device;
uint8_t function;
} PciAddress;
/*--------------------------------------------------------------------------*/
typedef struct PciDevice {
union {
uint8_t *ports_eth_addr;
char ifname[IFNAMSIZ];
};
PciAddress pa;
int numa_socket;
} PciDevice __attribute__((aligned(__BITS_PER_LONG)));
/*--------------------------------------------------------------------------*/
#endif /* __DPDK_IFACE_COMMON_H__ */

View File

@ -1,36 +1,213 @@
#include <rte_ethdev.h>
#define _GNU_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <dirent.h>
#include <rte_version.h>
#include <rte_ethdev.h>
#include "dpdk_iface_common.h"
/*--------------------------------------------------------------------------*/
//#define DEBUG 1
#define SYSFS_PCI_DRIVER_PATH "/sys/bus/pci/drivers/"
#define SYSFS_PCI_IGB_UIO SYSFS_PCI_DRIVER_PATH"igb_uio"
#define SYSFS_PCI_VFIO_PCI SYSFS_PCI_DRIVER_PATH"vfio-pci"
#define SYSFS_PCI_UIOPCIGEN SYSFS_PCI_DRIVER_PATH"uio_pci_generic"
#define RTE_ARGC_MAX (RTE_MAX_ETHPORTS << 1) + 7
/*--------------------------------------------------------------------------*/
typedef struct {
PciDevice pd;
struct rte_eth_dev_info dev_details;
struct ether_addr ports_eth_addr;
} DevInfo;
static DevInfo di[RTE_MAX_ETHPORTS];
/*--------------------------------------------------------------------------*/
/**
* Really crappy version for detecting pci entries..
* but it should work.
*/
int
IsPciEnt(const struct dirent *entry)
{
if (entry->d_type == DT_LNK &&
strstr(entry->d_name, ":") != NULL)
return 1;
return 0;
}
/*--------------------------------------------------------------------------*/
/**
* Similar to strverscmp(), but sorts in hexadecimal context
*/
int
localversionsort(const void *elem1, const void *elem2)
{
uint16_t domain1, domain2;
uint8_t bus1, bus2, device1, device2, function1, function2;
DevInfo *d1 = (DevInfo *)elem1;
DevInfo *d2 = (DevInfo *)elem2;
domain1 = d1->pd.pa.domain;
domain2 = d2->pd.pa.domain;
bus1 = d1->pd.pa.bus;
bus2 = d2->pd.pa.bus;
device1 = d1->pd.pa.device;
device2 = d2->pd.pa.device;
function1 = d1->pd.pa.function;
function2 = d2->pd.pa.function;
if (domain1 < domain2) return -1;
if (domain2 < domain1) return 1;
if (bus1 < bus2) return -1;
if (bus2 < bus1) return 1;
if (device1 < device2) return -1;
if (device2 < device1) return 1;
if (function1 < function2)
return -1;
if (function2 < function1)
return 1;
return 0;
}
/*--------------------------------------------------------------------------*/
int
probe_all_rte_devices(char **argv, int *argc)
{
struct dirent **dirlist;
int pci_index, total_files, i, j;
/* reset pci_index */
pci_index = 0;
for (j = 0; j < 3; j++) {
switch (j) {
case 0:
/* scan igb_uio first */
total_files = scandir(SYSFS_PCI_IGB_UIO, &dirlist,
IsPciEnt, versionsort);
break;
case 1:
/* scan vfio_pci next */
total_files = scandir(SYSFS_PCI_VFIO_PCI, &dirlist,
IsPciEnt, versionsort);
break;
case 2:
/* finally scan uio_pci_generic */
total_files = scandir(SYSFS_PCI_UIOPCIGEN, &dirlist,
IsPciEnt, versionsort);
break;
default:
fprintf(stderr, "Control can never come here!\n");
goto panic_err;
}
for (i = 0; i < total_files; i++, pci_index++) {
argv[*argc] = strdup("-w");
argv[*argc + 1] = strdup(dirlist[i]->d_name);
if (argv[*argc] == NULL ||
argv[*argc + 1] == NULL)
goto alloc_err;
*argc += 2;
if (sscanf(dirlist[i]->d_name, PCI_DOM":"PCI_BUS":"
PCI_DEVICE"."PCI_FUNC,
&di[pci_index].pd.pa.domain,
&di[pci_index].pd.pa.bus,
&di[pci_index].pd.pa.device,
&di[pci_index].pd.pa.function) != 4)
goto sscanf_err;
free(dirlist[i]);
}
//free(dirlist);
}
/* now sort all recorded entries */
qsort(di, pci_index, sizeof(DevInfo), localversionsort);
return pci_index;
sscanf_err:
fprintf(stderr, "Unable to retrieve pci address!\n");
exit(EXIT_FAILURE);
alloc_err:
fprintf(stderr, "Can't allocate memory for argv items!\n");
exit(EXIT_FAILURE);
panic_err:
fprintf(stderr, "Could not open the directory!\n");
exit(EXIT_FAILURE);
}
/*--------------------------------------------------------------------------*/
int
fetch_major_no()
{
FILE *f;
int major_no;
char *line;
size_t len;
char dummy[512];
major_no = -1;
len = 0;
line = NULL;
f = fopen(DEV_PROC_PATH, "r");
if (f == NULL) {
fprintf(stderr, "Can't open %s file\n", DEV_PROC_PATH);
return -1;
}
while (getline(&line, &len, f) != -1) {
if (strstr(line, DEV_NAME) != NULL) {
if (sscanf(line, "%d %s", &major_no, dummy) == 2) {
free(line);
break;
}
}
free(line);
line = NULL;
len = 0;
}
/* close the file descriptor */
fclose(f);
return major_no;
}
/*--------------------------------------------------------------------------*/
int
main(int argc, char **argv)
{
int ret, fd, num_devices;
int ret, fd, num_devices, i;
dev_t dev;
char *cpumaskbuf = "0x1";
char *mem_channels = "4";
char *rte_argv[] = {"",
"-c",
cpumaskbuf,
"-n",
mem_channels,
"--proc-type=auto",
""
char *rte_argv[RTE_ARGC_MAX] = {"",
"-c",
cpumaskbuf,
"-n",
mem_channels,
"--proc-type=auto"
};
const int rte_argc = 6;
typedef struct {
struct ether_addr ports_eth_addr;
struct rte_eth_dev_info dev_details;
} dev_info;
dev_info di[RTE_MAX_ETHPORTS];
int rte_argc = 6;
ret = probe_all_rte_devices(rte_argv, &rte_argc);
#if DEBUG
for (i = 0; i < ret; i++) {
fprintf(stderr, "Pci Address: %04hX:%02hhX:%02hhX.%01hhX\n",
di[i].pd.pa.domain,
di[i].pd.pa.bus,
di[i].pd.pa.device,
di[i].pd.pa.function);
}
#endif
if (geteuid()) {
fprintf(stderr, "[CAUTION] Run the app as root!\n");
exit(EXIT_FAILURE);
@ -43,7 +220,11 @@ main(int argc, char **argv)
"\033[32m not present. \033[0m \n");
/* create dpdk-iface device node entry */
#if 0
dev = makedev(MAJOR_NO, 0);
#else
dev = makedev(fetch_major_no(), 0);
#endif
ret = mknod(DEV_PATH, S_IFCHR | O_RDWR, dev);
if (ret == 0)
fprintf(stderr, "Creating device node entry...");
@ -86,10 +267,16 @@ main(int argc, char **argv)
}
for (ret = 0; ret < num_devices; ret++) {
di[ret].pd.ports_eth_addr = &di[ret].ports_eth_addr.addr_bytes[0];
/* get mac addr entries of detected dpdk ports */
rte_eth_macaddr_get(ret, &di[ret].ports_eth_addr);
/* check port capabailties/info */
rte_eth_dev_info_get(ret, &di[ret].dev_details);
/* get numa socket location for future socket-mem field */
if ((di[ret].pd.numa_socket=rte_eth_dev_socket_id(ret)) == -1) {
fprintf(stderr, "Can't determine socket ID!\n");
exit(EXIT_FAILURE);
}
}
fprintf(stderr, "\033[32m done. \033[0m \n");
@ -104,23 +291,55 @@ main(int argc, char **argv)
/* clear all previous entries */
fprintf(stderr, "Clearing previous entries\n");
ioctl(fd, CLEAR_IFACE, di[0].ports_eth_addr.addr_bytes);
ret = ioctl(fd, CLEAR_IFACE, di[0].ports_eth_addr.addr_bytes);
if (ret == -1) {
fprintf(stderr, "ioctl call failed!\n");
return EXIT_FAILURE;
}
/* register the newly detected dpdk ports */
for (ret = 0; ret < num_devices; ret++) {
if (strcmp(di[ret].dev_details.driver_name, "net_mlx4") &&
strcmp(di[ret].dev_details.driver_name, "net_mlx5")) {
fprintf(stderr, "Registering port %d (%02X:%02X:%02X:%02X:%02X:%02X) to mTCP stack\n",
fprintf(stderr, "Registering port %d (%02X:%02X:%02X:%02X:%02X:%02X) to mTCP stack",
ret,
di[ret].ports_eth_addr.addr_bytes[0], di[ret].ports_eth_addr.addr_bytes[1],
di[ret].ports_eth_addr.addr_bytes[2], di[ret].ports_eth_addr.addr_bytes[3],
di[ret].ports_eth_addr.addr_bytes[4], di[ret].ports_eth_addr.addr_bytes[5]);
ioctl(fd, 1, di[ret].ports_eth_addr.addr_bytes);
di[ret].ports_eth_addr.addr_bytes[0],
di[ret].ports_eth_addr.addr_bytes[1],
di[ret].ports_eth_addr.addr_bytes[2],
di[ret].ports_eth_addr.addr_bytes[3],
di[ret].ports_eth_addr.addr_bytes[4],
di[ret].ports_eth_addr.addr_bytes[5]);
di[ret].pd.ports_eth_addr = di[ret].ports_eth_addr.addr_bytes;
if (ioctl(fd, CREATE_IFACE, &di[ret].pd) == -1) {
fprintf(stderr, "ioctl call failed!\n");
}
fprintf(stderr, " (%s).\n",
di[ret].pd.ifname);
}
}
/* close the fd */
close(fd);
#if 0
/*
* XXX: It seems that there is a bug in the RTE SDK.
* The dynamically allocated rte_argv params are left
* as dangling pointers. Freeing them causes program
* to crash.
*/
/* free up all resources */
for (; rte_argc >= 6; rte_argc--) {
if (rte_argv[rte_argc] != NULL) {
fprintf(stderr, "Cleaning up rte_argv[%d]: %s (%p)\n",
rte_argc, rte_argv[rte_argc], rte_argv[rte_argc]);
free(rte_argv[rte_argc]);
rte_argv[rte_argc] = NULL;
}
}
#endif
return EXIT_SUCCESS;
}
/*--------------------------------------------------------------------------*/

View File

@ -24,6 +24,22 @@ GCC_OPT = -m64
else
GCC_OPT =
endif
#
# DBGMSG = Enable macro to print all ingress icmp, ip & ip packets (in _log file)
# DBGFUNC = Enable macro to print func trace as followed by an ingress packet (in _log file)
# STREAM = Enable macro to print tcp_streams module operations (in _log file)
# STATE = Enable macro to print change in tcp states (in _log file)
# STAT = Not used
# APP = Enable macro to print app-specific debug statements (in _log file)
# EPOLL = Enable macro to print epoll-related operations (in _log file)
# DUMP_STREAM = Enable macro to print tcp_streams (in _log file)
# NETSTAT = Enable macro to print net stats
# INFO = Enable macro to print general statements
# DBGERR = Enable macro to print error statements
# DBGCERR = Enable macro to print error statements
#
GCC_OPT += -Wall -fPIC -fgnu89-inline -Werror
#DBG_OPT = -DDBGMSG -DDBGFUNC -DSTREAM -DSTATE -DTSTAT -DAPP -DEPOLL
#DBG_OPT = -DDBGMSG -DDBGFUNC -DSTREAM -DSTATE
@ -122,12 +138,16 @@ $(MTCP_HDR):
cp $(INC_DIR)/$@ $(MTCP_HDR_DIR)/$@
clean: clean-library
rm -f *.o *~ core
rm -f .*.d
$(MSG) " CLEAN *.o's"
$(HIDE) rm -f *.o *~ core
$(MSG) " CLEAN *.d's"
$(HIDE) rm -f .*.d
clean-library:
rm -f $(MTCP_LIB_DIR)/*
rm -f $(MTCP_HDR_DIR)/*
$(MSG) " CLEAN *.a"
$(HIDE) rm -f $(MTCP_LIB_DIR)/*
$(MSG) " CLEAN *.h"
$(HIDE) rm -f $(MTCP_HDR_DIR)/*
distclean: clean
rm -f Makefile

View File

@ -59,7 +59,7 @@ struct arp_manager
struct arp_manager g_arpm;
/*----------------------------------------------------------------------------*/
void
DumpARPPacket(struct arphdr *arph);
DumpARPPacket(mtcp_manager_t mtcp, struct arphdr *arph);
/*----------------------------------------------------------------------------*/
int
InitARPTable()
@ -161,7 +161,7 @@ ARPOutput(struct mtcp_manager *mtcp, int nif, int opcode,
memset(arph->pad, 0, ARP_PAD_LEN);
#if DBGMSG
DumpARPPacket(arph);
DumpARPPacket(mtcp, arph);
#endif
return 0;
@ -285,7 +285,7 @@ ProcessARPPacket(mtcp_manager_t mtcp, uint32_t cur_ts,
return TRUE;
#if DBGMSG
DumpARPPacket(arph);
DumpARPPacket(mtcp, arph);
#endif
switch (ntohs(arph->ar_op)) {
@ -357,26 +357,26 @@ PrintARPTable()
}
/*----------------------------------------------------------------------------*/
void
DumpARPPacket(struct arphdr *arph)
DumpARPPacket(mtcp_manager_t mtcp, struct arphdr *arph)
{
uint8_t *t;
fprintf(stderr, "ARP header: \n");
fprintf(stderr, "Hardware type: %d (len: %d), "
"protocol type: %d (len: %d), opcode: %d\n",
ntohs(arph->ar_hrd), arph->ar_hln,
ntohs(arph->ar_pro), arph->ar_pln, ntohs(arph->ar_op));
thread_printf(mtcp, mtcp->log_fp, "ARP header: \n");
thread_printf(mtcp, mtcp->log_fp, "Hardware type: %d (len: %d), "
"protocol type: %d (len: %d), opcode: %d\n",
ntohs(arph->ar_hrd), arph->ar_hln,
ntohs(arph->ar_pro), arph->ar_pln, ntohs(arph->ar_op));
t = (uint8_t *)&arph->ar_sip;
fprintf(stderr, "Sender IP: %u.%u.%u.%u, "
"haddr: %02X:%02X:%02X:%02X:%02X:%02X\n",
t[0], t[1], t[2], t[3],
arph->ar_sha[0], arph->ar_sha[1], arph->ar_sha[2],
arph->ar_sha[3], arph->ar_sha[4], arph->ar_sha[5]);
thread_printf(mtcp, mtcp->log_fp, "Sender IP: %u.%u.%u.%u, "
"haddr: %02X:%02X:%02X:%02X:%02X:%02X\n",
t[0], t[1], t[2], t[3],
arph->ar_sha[0], arph->ar_sha[1], arph->ar_sha[2],
arph->ar_sha[3], arph->ar_sha[4], arph->ar_sha[5]);
t = (uint8_t *)&arph->ar_tip;
fprintf(stderr, "Target IP: %u.%u.%u.%u, "
"haddr: %02X:%02X:%02X:%02X:%02X:%02X\n",
t[0], t[1], t[2], t[3],
arph->ar_tha[0], arph->ar_tha[1], arph->ar_tha[2],
arph->ar_tha[3], arph->ar_tha[4], arph->ar_tha[5]);
thread_printf(mtcp, mtcp->log_fp, "Target IP: %u.%u.%u.%u, "
"haddr: %02X:%02X:%02X:%02X:%02X:%02X\n",
t[0], t[1], t[2], t[3],
arph->ar_tha[0], arph->ar_tha[1], arph->ar_tha[2],
arph->ar_tha[3], arph->ar_tha[4], arph->ar_tha[5]);
}
/*----------------------------------------------------------------------------*/

View File

@ -21,15 +21,31 @@
/* for if_nametoindex */
#include <net/if.h>
#define MAX_ROUTE_ENTRY 64
#define MAX_OPTLINE_LEN 1024
#define ALL_STRING "all"
#define MAX_ROUTE_ENTRY 64
#define MAX_OPTLINE_LEN 1024
#define ALL_STRING "all"
static const char *route_file = "config/route.conf";
static const char *arp_file = "config/arp.conf";
struct mtcp_manager *g_mtcp[MAX_CPUS] = {NULL};
struct mtcp_config CONFIG = {0};
addr_pool_t ap[ETH_NUM] = {NULL};
static const char *route_file = "config/route.conf";
static const char *arp_file = "config/arp.conf";
struct mtcp_manager *g_mtcp[MAX_CPUS] = {NULL};
struct mtcp_config CONFIG = {
/* set default configuration */
.max_concurrency = 10000,
.max_num_buffers = 10000,
.rcvbuf_size = -1,
.sndbuf_size = -1,
.tcp_timeout = TCP_TIMEOUT,
.tcp_timewait = TCP_TIMEWAIT,
.num_mem_ch = 0,
#ifdef ENABLE_ONVM
.onvm_inst = (uint16_t) -1,
.onvm_dest = (uint16_t) -1,
.onvm_serv = (uint16_t) -1
#endif
};
addr_pool_t ap[ETH_NUM] = {NULL};
static char port_list[MAX_OPTLINE_LEN] = "";
static char port_stat_list[MAX_OPTLINE_LEN] = "";
/* total cpus detected in the mTCP stack*/
int num_cpus;
/* this should be equal to num_cpus */
@ -475,14 +491,14 @@ LoadARPTable()
numEntry = GetIntValue(p + sizeof(ARP_ENTRY));
if (numEntry <= 0) {
fprintf(stderr, "Wrong entry in arp.conf: %s\n", p);
exit(-1);
exit(EXIT_FAILURE);
}
#if 0
CONFIG.arp.entry = (struct arp_entry *)
calloc(numEntry + MAX_ARPENTRY, sizeof(struct arp_entry));
if (CONFIG.arp.entry == NULL) {
fprintf(stderr, "Wrong entry in arp.conf: %s\n", p);
exit(-1);
exit(EXIT_FAILURE);
}
#endif
hasNumEntry = 1;
@ -491,7 +507,7 @@ LoadARPTable()
fprintf(stderr,
"Error in arp.conf: more entries than "
"are specifed, entry=%s\n", p);
exit(-1);
exit(EXIT_FAILURE);
}
EnrollARPTableEntry(p);
numEntry--;
@ -521,6 +537,18 @@ SetMultiProcessSupport(char *multiprocess_details)
return 0;
}
/*----------------------------------------------------------------------------*/
static inline void
SaveInterfaceInfo(char *dev_name_list)
{
strcpy(port_list, dev_name_list);
}
/*----------------------------------------------------------------------------*/
static inline void
SaveInterfaceStatList(char *dev_name_list)
{
strcpy(port_stat_list, dev_name_list);
}
/*----------------------------------------------------------------------------*/
static int
ParseConfiguration(char *line)
{
@ -556,6 +584,10 @@ ParseConfiguration(char *line)
return -1;
}
num_cpus = CONFIG.num_cores;
} else if (strcmp(p, "core_mask") == 0) {
#ifndef DISABLE_DPDK
mpz_set_str(CONFIG._cpumask, q, 16);
#endif
} else if (strcmp(p, "max_concurrency") == 0) {
CONFIG.max_concurrency = mystrtol(q, 10);
if (CONFIG.max_concurrency < 0) {
@ -591,19 +623,12 @@ ParseConfiguration(char *line)
CONFIG.tcp_timewait = SEC_TO_USEC(CONFIG.tcp_timewait) / TIME_TICK;
}
} else if (strcmp(p, "stat_print") == 0) {
int i;
for (i = 0; i < CONFIG.eths_num; i++) {
if (strcmp(CONFIG.eths[i].dev_name, q) == 0) {
CONFIG.eths[i].stat_print = TRUE;
}
}
SaveInterfaceStatList(q);
} else if (strcmp(p, "port") == 0) {
if(strncmp(q, ALL_STRING, sizeof(ALL_STRING)) == 0) {
SetInterfaceInfo(q);
} else {
SetInterfaceInfo(line + strlen(p) + 1);
}
if(strncmp(q, ALL_STRING, sizeof(ALL_STRING)) == 0)
SaveInterfaceInfo(q);
else
SaveInterfaceInfo(line + strlen(p) + 1);
} else if (strcmp(p, "io") == 0) {
AssignIOModule(q);
if (CheckIOModuleAccessPermissions() == -1) {
@ -647,19 +672,8 @@ LoadConfiguration(const char *fname)
return -1;
}
/* set default configuration */
CONFIG.num_cores = num_cpus;
CONFIG.max_concurrency = 10000;
CONFIG.max_num_buffers = 10000;
CONFIG.rcvbuf_size = -1;
CONFIG.sndbuf_size = -1;
CONFIG.tcp_timeout = TCP_TIMEOUT;
CONFIG.tcp_timewait = TCP_TIMEWAIT;
CONFIG.num_mem_ch = 0;
#ifdef ENABLE_ONVM
CONFIG.onvm_inst = (uint16_t) -1;
CONFIG.onvm_dest = (uint16_t) -1;
CONFIG.onvm_serv = (uint16_t) -1;
#ifndef DISABLE_DPDK
mpz_init(CONFIG._cpumask);
#endif
while (1) {
char *p;
@ -700,6 +714,8 @@ LoadConfiguration(const char *fname)
if (CONFIG.rcvbuf_size == -1 && CONFIG.sndbuf_size == -1)
CONFIG.sndbuf_size = CONFIG.rcvbuf_size = 8192;
return SetNetEnv(port_list, port_stat_list);
return 0;
}
/*----------------------------------------------------------------------------*/

View File

@ -64,7 +64,11 @@ struct mtcp_thread_context *g_pctx[MAX_CPUS] = {0};
struct log_thread_context *g_logctx[MAX_CPUS] = {0};
/*----------------------------------------------------------------------------*/
static pthread_t g_thread[MAX_CPUS] = {0};
#if defined (PKTDUMP) || (DBGMSG) || (DBGFUNC) || \
(STREAM) || (STATE) || (STAT) || (APP) || (EPOLL) \
|| (DUMP_STREAM)
static pthread_t log_thread[MAX_CPUS] = {0};
#endif
/*----------------------------------------------------------------------------*/
static sem_t g_init_sem[MAX_CPUS];
static int running[MAX_CPUS] = {0};
@ -1180,8 +1184,8 @@ mtcp_create_context(int cpu)
if (cpu >= CONFIG.num_cores) {
TRACE_ERROR("Failed initialize new mtcp context. "
"Requested cpu id %d exceed the number of cores %d configured to use.\n",
cpu, CONFIG.num_cores);
"Requested cpu id %d exceed the number of cores %d configured to use.\n",
cpu, CONFIG.num_cores);
return NULL;
}
@ -1215,6 +1219,9 @@ mtcp_create_context(int cpu)
return NULL;
}
InitLogThreadContext(g_logctx[cpu], cpu);
#if defined (PKTDUMP) || (DBGMSG) || (DBGFUNC) || \
(STREAM) || (STATE) || (STAT) || (APP) || \
(EPOLL) || (DUMP_STREAM)
if (pthread_create(&log_thread[cpu],
NULL, ThreadLogMain, (void *)g_logctx[cpu])) {
perror("pthread_create");
@ -1223,13 +1230,14 @@ mtcp_create_context(int cpu)
free(mctx);
return NULL;
}
#endif
#ifndef DISABLE_DPDK
/* Wake up mTCP threads (wake up I/O threads) */
if (current_iomodule_func == &dpdk_module_func) {
int master;
master = rte_get_master_lcore();
if (master == cpu) {
if (master == whichCoreID(cpu)) {
lcore_config[master].ret = 0;
lcore_config[master].state = FINISHED;
@ -1239,7 +1247,7 @@ mtcp_create_context(int cpu)
return NULL;
}
} else
rte_eal_remote_launch(MTCPDPDKRunThread, mctx, cpu);
rte_eal_remote_launch(MTCPDPDKRunThread, mctx, whichCoreID(cpu));
} else
#endif
{
@ -1317,7 +1325,10 @@ mtcp_free_context(mctx_t mctx)
ret = write(log_ctx->pair_sp_fd, "F", 1);
assert(ret == 1);
UNUSED(ret);
#if defined (PKTDUMP) || (DBGMSG) || (DBGFUNC) || (STREAM)\
|| (STATE) || (STAT) || (APP) || (EPOLL) || (DUMP_STREAM)
pthread_join(log_thread[ctx->cpu], NULL);
#endif
fclose(mtcp->log_fp);
TRACE_LOG("Log thread %d joined.\n", mctx->cpu);
@ -1553,6 +1564,9 @@ mtcp_destroy()
for (i = 0; i < CONFIG.eths_num; i++)
DestroyAddressPool(ap[i]);
#ifndef DISABLE_DPDK
mpz_clear(CONFIG._cpumask);
#endif
TRACE_INFO("All MTCP threads are joined.\n");
}
/*----------------------------------------------------------------------------*/

View File

@ -16,12 +16,14 @@
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_lcore.h>
#include <gmp.h>
#include <mtcp.h>
#endif
#define MAX_FILE_NAME 1024
/*----------------------------------------------------------------------------*/
int
inline int
GetNumCPUs()
{
return sysconf(_SC_NPROCESSORS_ONLN);
@ -33,6 +35,27 @@ Gettid()
return syscall(__NR_gettid);
}
/*----------------------------------------------------------------------------*/
inline int
whichCoreID(int thread_no)
{
#ifndef DISABLE_DPDK
int i, cpu_id;
if (mpz_get_ui(CONFIG._cpumask) == 0)
return thread_no;
else {
int limit = mpz_popcount(CONFIG._cpumask);
for (cpu_id = 0, i = 0; i < limit; cpu_id++)
if (mpz_tstbit(CONFIG._cpumask, cpu_id)) {
if (thread_no == i)
return cpu_id;
i++;
}
}
#endif
return thread_no;
}
/*----------------------------------------------------------------------------*/
int
mtcp_core_affinitize(int cpu)
{
@ -42,6 +65,8 @@ mtcp_core_affinitize(int cpu)
n = GetNumCPUs();
cpu = whichCoreID(cpu);
if (cpu < 0 || cpu >= (int) n) {
errno = -EINVAL;
return -1;

View File

@ -32,6 +32,8 @@
/* for ip defragging */
#include <rte_ip_frag.h>
#endif
/* for ioctl funcs */
#include <dpdk_iface_common.h>
/*----------------------------------------------------------------------------*/
/* Essential macros */
#define MAX_RX_QUEUE_PER_LCORE MAX_CPUS
@ -185,7 +187,6 @@ struct dpdk_private_context {
} __rte_cache_aligned;
#ifdef ENABLE_STATS_IOCTL
#define DEV_NAME "/dev/dpdk-iface"
/**
* stats struct passed on from user space to the driver
*/
@ -266,9 +267,9 @@ dpdk_init_handle(struct mtcp_thread_context *ctxt)
#endif /* !IP_DEFRAG */
#ifdef ENABLE_STATS_IOCTL
dpc->fd = open(DEV_NAME, O_RDWR);
dpc->fd = open(DEV_PATH, O_RDWR);
if (dpc->fd == -1) {
TRACE_ERROR("Can't open " DEV_NAME " for context->cpu: %d! "
TRACE_ERROR("Can't open " DEV_PATH " for context->cpu: %d! "
"Are you using mlx4/mlx5 driver?\n",
ctxt->cpu);
}
@ -340,7 +341,8 @@ dpdk_send_pkts(struct mtcp_thread_context *ctxt, int ifidx)
ss.qid = ctxt->cpu;
ss.dev = portid;
/* pass the info now */
ioctl(dpc->fd, 0, &ss);
if (ioctl(dpc->fd, SEND_STATS, &ss) == -1)
TRACE_ERROR("Can't update iface stats!\n");
dpc->cur_ts = mtcp->cur_ts;
if (ctxt->cpu == 0)
rte_eth_stats_reset(portid);
@ -623,7 +625,7 @@ dpdk_load_module(void)
/* for Ethernet flow control settings */
struct rte_eth_fc_conf fc_conf;
/* setting the rss key */
static const uint8_t key[] = {
static uint8_t key[] = {
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, /* 10 */
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, /* 20 */
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, /* 30 */
@ -632,7 +634,7 @@ dpdk_load_module(void)
0x05, 0x05 /* 60 - 8 */
};
port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)&key;
port_conf.rx_adv_conf.rss_conf.rss_key = (uint8_t *)key;
port_conf.rx_adv_conf.rss_conf.rss_key_len = sizeof(key);
if (!CONFIG.multi_process || (CONFIG.multi_process && CONFIG.multi_process_is_master)) {
@ -728,13 +730,13 @@ dpdk_load_module(void)
memset(&fc_conf, 0, sizeof(fc_conf));
ret = rte_eth_dev_flow_ctrl_get(portid, &fc_conf);
if (ret != 0)
rte_exit(EXIT_FAILURE, "Failed to get flow control info!\n");
TRACE_INFO("Failed to get flow control info!\n");
/* and just disable the rx/tx flow control */
fc_conf.mode = RTE_FC_NONE;
ret = rte_eth_dev_flow_ctrl_set(portid, &fc_conf);
if (ret != 0)
rte_exit(EXIT_FAILURE, "Failed to set flow control info!: errno: %d\n",
TRACE_INFO("Failed to set flow control info!: errno: %d\n",
ret);
#ifdef DEBUG

View File

@ -13,7 +13,7 @@
#define IP_NEXT_PTR(iph) ((uint8_t *)iph + (iph->ihl << 2))
/*----------------------------------------------------------------------------*/
void
DumpICMPPacket(struct icmphdr *icmph, uint32_t saddr, uint32_t daddr);
DumpICMPPacket(mtcp_manager_t mtcp, struct icmphdr *icmph, uint32_t saddr, uint32_t daddr);
/*----------------------------------------------------------------------------*/
static uint16_t
ICMPChecksum(uint16_t *icmph, int len)
@ -69,7 +69,7 @@ ICMPOutput(struct mtcp_manager *mtcp, uint32_t saddr, uint32_t daddr,
ICMPChecksum((uint16_t *)icmph, sizeof(struct icmphdr) + len);
#if DBGMSG
DumpICMPPacket(icmph, saddr, daddr);
DumpICMPPacket(mtcp, icmph, saddr, daddr);
#endif
return 0;
}
@ -142,23 +142,23 @@ ProcessICMPPacket(mtcp_manager_t mtcp, struct iphdr *iph, int len)
}
/*----------------------------------------------------------------------------*/
void
DumpICMPPacket(struct icmphdr *icmph, uint32_t saddr, uint32_t daddr)
DumpICMPPacket(mtcp_manager_t mtcp, struct icmphdr *icmph, uint32_t saddr, uint32_t daddr)
{
uint8_t *t;
fprintf(stderr, "ICMP header: \n");
fprintf(stderr, "Type: %d, "
"Code: %d, ID: %d, Sequence: %d\n",
icmph->icmp_type, icmph->icmp_code,
ntohs(ICMP_ECHO_GET_ID(icmph)), ntohs(ICMP_ECHO_GET_SEQ(icmph)));
thread_printf(mtcp, mtcp->log_fp, "ICMP header: \n");
thread_printf(mtcp, mtcp->log_fp, "Type: %d, "
"Code: %d, ID: %d, Sequence: %d\n",
icmph->icmp_type, icmph->icmp_code,
ntohs(ICMP_ECHO_GET_ID(icmph)), ntohs(ICMP_ECHO_GET_SEQ(icmph)));
t = (uint8_t *)&saddr;
fprintf(stderr, "Sender IP: %u.%u.%u.%u\n",
t[0], t[1], t[2], t[3]);
thread_printf(mtcp, mtcp->log_fp, "Sender IP: %u.%u.%u.%u\n",
t[0], t[1], t[2], t[3]);
t = (uint8_t *)&daddr;
fprintf(stderr, "Target IP: %u.%u.%u.%u\n",
t[0], t[1], t[2], t[3]);
thread_printf(mtcp, mtcp->log_fp, "Target IP: %u.%u.%u.%u\n",
t[0], t[1], t[2], t[3]);
}
/*----------------------------------------------------------------------------*/
#undef IP_NEXT_PTR

View File

@ -35,10 +35,6 @@ PrintInterfaceInfo();
void
PrintRoutingTable();
/* set socket modes */
int
SetSocketMode(int8_t socket_mode);
/* fetch mask from prefix */
uint32_t
MaskFromPrefix(int prefix);

View File

@ -1,6 +1,8 @@
#ifndef CPU_H
#define CPU_H
int GetNumCPUs();
inline int GetNumCPUs();
inline int whichCoreID(int thread_no);
#endif /* CPU_H */

View File

@ -0,0 +1 @@
../../../dpdk-iface-kmod/dpdk_iface_common.h

View File

@ -72,7 +72,7 @@ typedef struct io_module_func {
} io_module_func __attribute__((aligned(__WORDSIZE)));
/*----------------------------------------------------------------------------*/
/* set I/O module context */
int SetInterfaceInfo(char *);
int SetNetEnv(char *port_list, char *port_stat_list);
/* retrive device-specific endian type */
int FetchEndianType();

View File

@ -6,6 +6,9 @@
#include <sys/time.h>
#include <sys/queue.h>
#include <pthread.h>
#ifndef DISABLE_DPDK
#include <gmp.h>
#endif
#include "memory_mgt.h"
#include "tcp_ring_buffer.h"
@ -39,32 +42,32 @@
#define ETHERNET_HEADER_LEN 14 // sizeof(struct ethhdr)
#define IP_HEADER_LEN 20 // sizeof(struct iphdr)
#define TCP_HEADER_LEN 20 // sizeof(struct tcphdr)
#define TOTAL_TCP_HEADER_LEN 54 // total header length
#define TOTAL_TCP_HEADER_LEN 54 // total header length
/* configrations */
#define BACKLOG_SIZE (10*1024)
#define MAX_PKT_SIZE (2*1024)
#define ETH_NUM 4
#define BACKLOG_SIZE (10*1024)
#define MAX_PKT_SIZE (2*1024)
#define ETH_NUM MAX_DEVICES
#define TCP_OPT_TIMESTAMP_ENABLED TRUE /* enabled for rtt measure */
#define TCP_OPT_SACK_ENABLED FALSE /* not implemented */
#define TCP_OPT_TIMESTAMP_ENABLED TRUE /* enabled for rtt measure */
#define TCP_OPT_SACK_ENABLED FALSE /* not implemented */
#define LOCK_STREAM_QUEUE FALSE
#define USE_SPIN_LOCK TRUE
#define INTR_SLEEPING_MTCP TRUE
#define PROMISCUOUS_MODE TRUE
#define LOCK_STREAM_QUEUE FALSE
#define USE_SPIN_LOCK TRUE
#define INTR_SLEEPING_MTCP TRUE
#define PROMISCUOUS_MODE TRUE
/* blocking api became obsolete */
#define BLOCKING_SUPPORT FALSE
#define BLOCKING_SUPPORT FALSE
#ifndef MAX_CPUS
#define MAX_CPUS 16
#define MAX_CPUS 16
#endif
/*----------------------------------------------------------------------------*/
/* Statistics */
#ifdef NETSTAT
#define NETSTAT_PERTHREAD TRUE
#define NETSTAT_TOTAL TRUE
#define NETSTAT_PERTHREAD TRUE
#define NETSTAT_TOTAL TRUE
#endif /* NETSTAT */
#define RTM_STAT FALSE
/*----------------------------------------------------------------------------*/
@ -135,9 +138,6 @@ struct arp_table
/*----------------------------------------------------------------------------*/
struct mtcp_config
{
/* socket mode */
int8_t socket_mode;
/* network interface config */
struct eth_table *eths;
int *nif_to_eidx; // mapping physic port indexes to that of the configured port-list
@ -154,6 +154,9 @@ struct mtcp_config
int num_cores;
int num_mem_ch;
int max_concurrency;
#ifndef DISABLE_DPDK
mpz_t _cpumask;
#endif
int max_num_buffers;
int rcvbuf_size;

View File

@ -13,8 +13,12 @@
/* for ioctl */
#include <sys/ioctl.h>
#ifndef DISABLE_DPDK
#define RTE_ARGC_MAX (RTE_MAX_ETHPORTS << 1) + 9
/* for dpdk ethernet functions (get mac addresses) */
#include <rte_ethdev.h>
#include <dpdk_iface_common.h>
/* for ceil func */
#include <math.h>
#endif
/* for TRACE_* */
#include "debug.h"
@ -43,6 +47,13 @@
io_module_func *current_iomodule_func = &dpdk_module_func;
#ifndef DISABLE_DPDK
enum rte_proc_type_t eal_proc_type_detect(void);
/**
* DPDK's RTE consumes some huge pages for internal bookkeeping.
* Therefore, it is not always safe to reserve the exact amount
* of pages for our stack (e.g. dividing requested mem, in MB, by
* (1<<20) would be insufficient). Hence, the following value.
*/
#define RTE_SOCKET_MEM_SHIFT ((1<<19)|(1<<18))
#endif
/*----------------------------------------------------------------------------*/
#define ALL_STRING "all"
@ -85,8 +96,68 @@ GetNumQueues()
}
#endif /* !PSIO */
/*----------------------------------------------------------------------------*/
#ifndef DISABLE_DPDK
/**
* returns max numa ID while probing for rte devices
*/
static int
probe_all_rte_devices(char **argv, int *argc, char *dev_name_list)
{
PciDevice pd;
int fd, numa_id = -1;
static char end[] = "";
static const char delim[] = " \t";
static char *dev_tokenizer;
char *dev_token, *saveptr;
dev_tokenizer = strdup(dev_name_list);
if (dev_tokenizer == NULL) {
TRACE_ERROR("Can't allocate memory for dev_tokenizer!\n");
exit(EXIT_FAILURE);
}
fd = open(DEV_PATH, O_RDONLY);
if (fd == -1) {
TRACE_ERROR("Error opening dpdk-face!\n");
exit(EXIT_FAILURE);
}
dev_token = strtok_r(dev_tokenizer, delim, &saveptr);
while (dev_token != NULL) {
strcpy(pd.ifname, dev_token);
if (ioctl(fd, FETCH_PCI_ADDRESS, &pd) == -1) {
TRACE_DBG("Could not find pci info on dpdk "
"device: %s. Is it a dpdk-attached "
"interface?\n", dev_token);
goto loop_over;
}
argv[*argc] = strdup("-w");
argv[*argc + 1] = calloc(PCI_LENGTH, 1);
if (argv[*argc] == NULL ||
argv[*argc + 1] == NULL) {
TRACE_ERROR("Memory allocation error!\n");
exit(EXIT_FAILURE);
}
sprintf(argv[*argc + 1], PCI_DOM":"PCI_BUS":"
PCI_DEVICE"."PCI_FUNC,
pd.pa.domain, pd.pa.bus, pd.pa.device,
pd.pa.function);
*argc += 2;
if (pd.numa_socket > numa_id) numa_id = pd.numa_socket;
loop_over:
dev_token = strtok_r(NULL, delim, &saveptr);
}
/* add the terminating "" sequence */
argv[*argc] = end;
close(fd);
free(dev_tokenizer);
return numa_id;
}
#endif /* !DISABLE_DPDK */
/*----------------------------------------------------------------------------*/
int
SetInterfaceInfo(char* dev_name_list)
SetNetEnv(char *dev_name_list, char *port_stat_list)
{
int eidx = 0;
int i, j;
@ -182,20 +253,28 @@ SetInterfaceInfo(char* dev_name_list)
#ifndef DISABLE_DPDK
int cpu = CONFIG.num_cores;
mpz_t _cpumask;
char cpumaskbuf[30];
char mem_channels[5];
int ret;
char cpumaskbuf[32] = "";
char mem_channels[8] = "";
char socket_mem_str[32] = "";
// int i;
int ret, socket_mem;
static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
/* STEP 1: first determine CPU mask */
mpz_init(_cpumask);
/* get the cpu mask */
for (ret = 0; ret < cpu; ret++)
mpz_setbit(_cpumask, ret);
gmp_sprintf(cpumaskbuf, "%ZX", _cpumask);
mpz_clear(_cpumask);
if (!mpz_cmp(_cpumask, CONFIG._cpumask)) {
/* get the cpu mask */
for (ret = 0; ret < cpu; ret++)
mpz_setbit(_cpumask, ret);
gmp_sprintf(cpumaskbuf, "%ZX", _cpumask);
} else
gmp_sprintf(cpumaskbuf, "%ZX", CONFIG._cpumask);
mpz_clear(_cpumask);
/* STEP 2: determine memory channels per socket */
/* get the mem channels per socket */
if (CONFIG.num_mem_ch == 0) {
TRACE_ERROR("DPDK module requires # of memory channels "
@ -203,18 +282,46 @@ SetInterfaceInfo(char* dev_name_list)
exit(EXIT_FAILURE);
}
sprintf(mem_channels, "%d", CONFIG.num_mem_ch);
/* initialize the rte env first, what a waste of implementation effort! */
char *argv[] = {"",
"-c",
cpumaskbuf,
"-n",
mem_channels,
"--proc-type=auto",
""
};
const int argc = 6;
/* STEP 3: determine socket memory */
/* get socket memory threshold (in MB) */
socket_mem =
RTE_ALIGN_CEIL((unsigned long)ceil((CONFIG.num_cores *
(CONFIG.rcvbuf_size +
CONFIG.sndbuf_size +
sizeof(struct tcp_stream) +
sizeof(struct tcp_recv_vars) +
sizeof(struct tcp_send_vars) +
sizeof(struct fragment_ctx)) *
CONFIG.max_concurrency)/RTE_SOCKET_MEM_SHIFT),
RTE_CACHE_LINE_SIZE);
/* initialize the rte env, what a waste of implementation effort! */
int argc = 6;//8;
char *argv[RTE_ARGC_MAX] = {"",
"-c",
cpumaskbuf,
"-n",
mem_channels,
#if 0
"--socket-mem",
socket_mem_str,
#endif
"--proc-type=auto"
};
ret = probe_all_rte_devices(argv, &argc, dev_name_list);
/* STEP 4: build up socket mem parameter */
sprintf(socket_mem_str, "%d", socket_mem);
#if 0
char *smsptr = socket_mem_str + strlen(socket_mem_str);
for (i = 1; i < ret + 1; i++) {
sprintf(smsptr, ",%d", socket_mem);
smsptr += strlen(smsptr);
}
TRACE_DBG("socket_mem: %s\n", socket_mem_str);
#endif
/*
* re-set getopt extern variable optind.
* this issue was a bitch to debug
@ -228,12 +335,15 @@ SetInterfaceInfo(char* dev_name_list)
/* initialize the dpdk eal env */
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid EAL args!\n");
if (ret < 0) {
TRACE_ERROR("Invalid EAL args!\n");
exit(EXIT_FAILURE);
}
/* give me the count of 'detected' ethernet ports */
num_devices = rte_eth_dev_count();
if (num_devices == 0) {
rte_exit(EXIT_FAILURE, "No Ethernet port!\n");
TRACE_ERROR("No Ethernet port!\n");
exit(EXIT_FAILURE);
}
/* get mac addr entries of 'detected' dpdk ports */
@ -314,7 +424,24 @@ SetInterfaceInfo(char* dev_name_list)
} while (iter_if != NULL);
freeifaddrs(ifap);
#if 0
/*
* XXX: It seems that there is a bug in the RTE SDK.
* The dynamically allocated rte_argv params are left
* as dangling pointers. Freeing them causes program
* to crash.
*/
/* free up all resources */
for (; rte_argc >= 9; rte_argc--) {
if (rte_argv[rte_argc] != NULL) {
fprintf(stderr, "Cleaning up rte_argv[%d]: %s (%p)\n",
rte_argc, rte_argv[rte_argc], rte_argv[rte_argc]);
free(rte_argv[rte_argc]);
rte_argv[rte_argc] = NULL;
}
}
#endif
/* check if process is primary or secondary */
CONFIG.multi_process_is_master = (eal_proc_type_detect() == RTE_PROC_PRIMARY) ?
1 : 0;
@ -378,7 +505,7 @@ SetInterfaceInfo(char* dev_name_list)
ETH_ALEN))
CONFIG.eths[eidx].ifindex = ifr.ifr_ifindex;
#endif
CONFIG.eths[eidx].ifindex = eidx;//if_nametoindex(ifr.ifr_name);
CONFIG.eths[eidx].ifindex = eidx;
TRACE_INFO("Ifindex of interface %s is: %d\n",
ifr.ifr_name, CONFIG.eths[eidx].ifindex);
#if 0
@ -391,7 +518,7 @@ SetInterfaceInfo(char* dev_name_list)
break;
}
}
devices_attached[num_devices_attached] = if_nametoindex(ifr.ifr_name);//CONFIG.eths[eidx].ifindex;
devices_attached[num_devices_attached] = if_nametoindex(ifr.ifr_name);
num_devices_attached++;
fprintf(stderr, "Total number of attached devices: %d\n",
num_devices_attached);
@ -407,8 +534,8 @@ SetInterfaceInfo(char* dev_name_list)
#ifdef ENABLE_ONVM
int cpu = CONFIG.num_cores;
mpz_t cpumask;
char cpumaskbuf[30];
char mem_channels[5];
char cpumaskbuf[32];
char mem_channels[8];
char service[6];
char instance[6];
int ret;
@ -461,12 +588,15 @@ SetInterfaceInfo(char* dev_name_list)
/* initialize the dpdk eal env */
ret = onvm_nflib_init(argc, argv, "mtcp_nf", &CONFIG.nf_info);
if (ret < 0)
rte_exit(EXIT_FAILURE, "Invalid EAL args!\n");
if (ret < 0) {
TRACE_ERROR("Invalid EAL args!\n");
exit(EXIT_FAILURE);
}
/* give me the count of 'detected' ethernet ports */
num_devices = ports->num_ports;
if (num_devices == 0) {
rte_exit(EXIT_FAILURE, "No Ethernet port!\n");
TRACE_ERROR("No Ethernet port!\n");
exit(EXIT_FAILURE);
}
num_queues = MIN(CONFIG.num_cores, MAX_CPUS);
@ -552,6 +682,10 @@ SetInterfaceInfo(char* dev_name_list)
/* the physic port index of the i-th port listed in the config file is j*/
CONFIG.nif_to_eidx[j] = i;
/* finally set the port stats option `on' */
if (strcmp(CONFIG.eths[i].dev_name, port_stat_list) == 0)
CONFIG.eths[i].stat_print = TRUE;
}
return 0;

View File

@ -14,6 +14,8 @@
#include "netmap_user.h"
/* for poll */
#include <sys/poll.h>
/* for ETHER_CRC_LEN */
#include <net/ethernet.h>
/*----------------------------------------------------------------------------*/
#define MAX_PKT_BURST 64
#define ETHERNET_FRAME_SIZE 1514
@ -22,6 +24,14 @@
#define IDLE_POLL_WAIT 1 /* msecs */
#define IDLE_POLL_COUNT 10
//#define CONST_POLLING 1
/*
* Ethernet frame overhead
*/
#define ETHER_IFG 12
#define ETHER_PREAMBLE 8
#define ETHER_OVR (ETHER_CRC_LEN + ETHER_PREAMBLE + ETHER_IFG)
/*----------------------------------------------------------------------------*/
struct netmap_private_context {
@ -115,7 +125,7 @@ netmap_send_pkts(struct mtcp_thread_context *ctxt, int nif)
#ifdef NETSTAT
mtcp->nstat.tx_packets[nif]++;
mtcp->nstat.tx_bytes[nif] += pkt_size + 24;
mtcp->nstat.tx_bytes[nif] += pkt_size + ETHER_OVR;
#endif
tx_again:

View File

@ -11,9 +11,19 @@
#include "debug.h"
/* for num_devices_* */
#include "config.h"
/* for ETHER_CRC_LEN */
#include <net/ethernet.h>
/*----------------------------------------------------------------------------*/
#define PS_CHUNK_SIZE 64
#define PS_SELECT_TIMEOUT 100 /* in us */
/*
* Ethernet frame overhead
*/
#define ETHER_IFG 12
#define ETHER_PREAMBLE 8
#define ETHER_OVR (ETHER_CRC_LEN + ETHER_PREAMBLE + ETHER_IFG)
/*----------------------------------------------------------------------------*/
struct ps_device devices[MAX_DEVICES];
/*----------------------------------------------------------------------------*/
@ -159,7 +169,7 @@ psio_flush_pkts(struct mtcp_thread_context *ctx, int nif)
for (i = 0; i < send_cnt; i++) {
#ifdef NETSTAT
mtcp->nstat.tx_bytes[nif] += c_buf->info[start_idx].len + 24;
mtcp->nstat.tx_bytes[nif] += c_buf->info[start_idx].len + ETHER_OVR;
#endif
#if PKTDUMP
DumpPacket(mtcp, c_buf->buf + c_buf->info[start_idx].offset,

6
setup_linux_env.sh Normal file → Executable file
View File

@ -21,7 +21,11 @@ else
fi
# Compile dpdk and configure system
bash $RTE_SDK/usertools/dpdk-setup.sh
if [ -f $RTE_SDK/usertools/dpdk-setup.sh ]; then
bash $RTE_SDK/usertools/dpdk-setup.sh
else
bash $RTE_SDK/tools/setup.sh
fi
printf "${GREEN}Goodbye!$NC\n"

6
setup_mtcp_dpdk_env.sh Normal file → Executable file
View File

@ -29,7 +29,11 @@ else
fi
# Compile dpdk and configure system
bash $RTE_SDK/usertools/dpdk-setup.sh
if [ -f $RTE_SDK/usertools/dpdk-setup.sh ]; then
bash $RTE_SDK/usertools/dpdk-setup.sh
else
bash $RTE_SDK/tools/setup.sh
fi
# Print the user message
cd $RTE_SDK

View File

@ -36,7 +36,8 @@ $(OBJS): %.o: %.c Makefile
$(HIDE) ${CC} ${GCC_OPT} ${CFLAGS} $<
clean:
rm -rf *~ *.o
$(MSG) " CC *.o"
$(HIDE) rm -rf *~ *.o
distclean: clean
rm -f Makefile