From 49dcd7beb6e8c5ff8566e6e20b63c1e130e516f5 Mon Sep 17 00:00:00 2001 From: Vitaliy Filippov Date: Sat, 28 Aug 2021 18:34:27 +0300 Subject: [PATCH] Basic DPDK-based ping-pong app for measuring roundtrip latency --- Makefile | 63 +++++++++++ pingpong.c | 319 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 382 insertions(+) create mode 100644 Makefile create mode 100644 pingpong.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..dd7164b --- /dev/null +++ b/Makefile @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2014 Intel Corporation + +# binary name +APP = pingpong + +# all source are stored in SRCS-y +SRCS-y := pingpong.c + +# Build using pkg-config variables if possible +ifeq ($(shell pkg-config --exists libdpdk && echo 0),0) + +all: shared +.PHONY: shared static +shared: build/$(APP)-shared + ln -sf $(APP)-shared build/$(APP) +static: build/$(APP)-static + ln -sf $(APP)-static build/$(APP) + +PKGCONF ?= pkg-config + +PC_FILE := $(shell $(PKGCONF) --path libdpdk 2>/dev/null) +CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk) -g +LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk) +LDFLAGS_STATIC = -Wl,-Bstatic $(shell $(PKGCONF) --static --libs libdpdk) + +build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build + $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED) + +build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build + $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC) + +build: + @mkdir -p $@ + +.PHONY: clean +clean: + rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared + test -d build && rmdir -p build || true + +else # Build using legacy build system + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +# Default target, detect a build directory, by looking for a path with a .config +RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config))))) + +include $(RTE_SDK)/mk/rte.vars.mk + +CFLAGS += $(WERROR_FLAGS) + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_main.o += -Wno-return-type +endif + +EXTRA_CFLAGS += -O3 -g -Wfatal-errors + +include $(RTE_SDK)/mk/rte.extapp.mk +endif diff --git a/pingpong.c b/pingpong.c new file mode 100644 index 0000000..073fada --- /dev/null +++ b/pingpong.c @@ -0,0 +1,319 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021+ Vitaliy Filippov + */ + +// DPDK-based ping-pong +// USAGE: +// Host 1: RECV=1 ./pingpong +// Host 2: ./pingpong + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RX_RING_SIZE 1024 +#define TX_RING_SIZE 1024 + +#define NUM_MBUFS 8191 +#define MBUF_CACHE_SIZE 250 +#define BURST_SIZE 32 + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { + .max_rx_pkt_len = RTE_ETHER_MAX_LEN, + }, +}; + +/* + * Initializes a given port using global settings and with the RX buffers + * coming from the mbuf_pool passed as a parameter. + */ +static inline int +port_init(uint16_t port, struct rte_mempool *mbuf_pool) +{ + struct rte_eth_conf port_conf = port_conf_default; + const uint16_t rx_rings = 1, tx_rings = 1; + uint16_t nb_rxd = RX_RING_SIZE; + uint16_t nb_txd = TX_RING_SIZE; + int retval; + uint16_t q; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf txconf; + + if (!rte_eth_dev_is_valid_port(port)) + return -1; + + retval = rte_eth_dev_info_get(port, &dev_info); + if (retval != 0) { + printf("Error during getting device (port %u) info: %s\n", + port, strerror(-retval)); + return retval; + } + + if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + port_conf.txmode.offloads |= + DEV_TX_OFFLOAD_MBUF_FAST_FREE; + + /* Configure the Ethernet device. */ + retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf); + if (retval != 0) + return retval; + + retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd); + if (retval != 0) + return retval; + + /* Allocate and set up 1 RX queue per Ethernet port. */ + for (q = 0; q < rx_rings; q++) { + retval = rte_eth_rx_queue_setup(port, q, nb_rxd, + rte_eth_dev_socket_id(port), NULL, mbuf_pool); + if (retval < 0) + return retval; + } + + txconf = dev_info.default_txconf; + txconf.offloads = port_conf.txmode.offloads; + /* Allocate and set up 1 TX queue per Ethernet port. */ + for (q = 0; q < tx_rings; q++) { + retval = rte_eth_tx_queue_setup(port, q, nb_txd, + rte_eth_dev_socket_id(port), &txconf); + if (retval < 0) + return retval; + } + + if (9000 > dev_info.max_mtu || 9000 < dev_info.min_mtu) { + printf("Set MTU failed. MTU:%u is not in valid range, min:%u - max:%u\n", + 9000, dev_info.min_mtu, dev_info.max_mtu); + return -1; + } + retval = rte_eth_dev_set_mtu(port, 9000); + if (retval < 0) { + printf("Set MTU failed. retval=%d\n", retval); + return retval; + } + + /* Start the Ethernet port. */ + retval = rte_eth_dev_start(port); + if (retval < 0) + return retval; + + /* Display the port MAC address. */ + struct rte_ether_addr addr; + retval = rte_eth_macaddr_get(port, &addr); + if (retval != 0) + return retval; + + printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8 + " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n", + port, + addr.addr_bytes[0], addr.addr_bytes[1], + addr.addr_bytes[2], addr.addr_bytes[3], + addr.addr_bytes[4], addr.addr_bytes[5]); + + /* Enable RX in promiscuous mode for the Ethernet device. */ + retval = rte_eth_promiscuous_enable(port); + if (retval != 0) + return retval; + + return 0; +} + +/* + * The lcore main. This is the main thread that does the work, reading from + * an input port and writing to an output port. + */ +static __attribute__((noreturn)) void +lcore_recv_main(void) +{ + uint16_t port; + + /* + * Check that the port is on the same NUMA node as the polling thread + * for best performance. + */ + RTE_ETH_FOREACH_DEV(port) + if (rte_eth_dev_socket_id(port) > 0 && + rte_eth_dev_socket_id(port) != + (int)rte_socket_id()) + printf("WARNING, port %u is on remote NUMA node to " + "polling thread.\n\tPerformance will " + "not be optimal.\n", port); + + printf("\nCore %u echoing packets. [Ctrl+C to quit]\n", + rte_lcore_id()); + + uint16_t buf; + struct rte_ether_addr s_addr; + + /* Run until the application is quit or killed. */ + for (;;) { + /* + * Receive packets on a port and forward them on the paired + * port. The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc. + */ + RTE_ETH_FOREACH_DEV(port) { + + /* Get burst of RX packets, from first port of pair. */ + struct rte_mbuf *bufs[BURST_SIZE]; + const uint16_t nb_rx = rte_eth_rx_burst(port, 0, + bufs, BURST_SIZE); + + if (unlikely(nb_rx == 0)) + continue; + + for (buf = 0; buf < nb_rx; buf++) { + struct rte_ether_hdr *eth_hdr = rte_pktmbuf_mtod(bufs[buf], struct rte_ether_hdr*); + s_addr = eth_hdr->s_addr; + eth_hdr->s_addr = eth_hdr->d_addr; + eth_hdr->d_addr = s_addr; + } + + /* Send burst of TX packets, to second port of pair. */ + const uint16_t nb_tx = rte_eth_tx_burst(port, 0, + bufs, nb_rx); + + /* Free any unsent packets. */ + if (unlikely(nb_tx < nb_rx)) { + for (buf = nb_tx; buf < nb_rx; buf++) + rte_pktmbuf_free(bufs[buf]); + } + } + } +} + +static struct rte_mempool *mbuf_pool; +static uint64_t lat_count = 0, lat_us = 0, lat_min = 0, lat_max = 0; +static double lat_mean = 0, lat_m2 = 0; + +static __attribute__((noreturn)) void +lcore_send_main(void) +{ + uint16_t port; + + /* + * Check that the port is on the same NUMA node as the polling thread + * for best performance. + */ + RTE_ETH_FOREACH_DEV(port) + if (rte_eth_dev_socket_id(port) > 0 && + rte_eth_dev_socket_id(port) != + (int)rte_socket_id()) + printf("WARNING, port %u is on remote NUMA node to " + "polling thread.\n\tPerformance will " + "not be optimal.\n", port); + + printf("\nCore %u sending echo packets. [Ctrl+C to quit]\n", + rte_lcore_id()); + + struct rte_ether_hdr *eth_hdr; + struct rte_ether_addr s_addr = {{0xc6,0x47,0x2c,0x07,0x8a,0x24}}; + struct rte_ether_addr d_addr = {{0xbc,0x01,0xa4,0x9b,0x03,0x98}}; + uint16_t ether_type = 0x0a00; + uint64_t lat_this; + + /* Run until the application is quit or killed. */ + struct timespec ckpt, start, end; + clock_gettime(CLOCK_REALTIME, &ckpt); + for (;;) { + RTE_ETH_FOREACH_DEV(port) { + clock_gettime(CLOCK_REALTIME, &start); + struct rte_mbuf *pkt[BURST_SIZE]; + int i; + for (i = 0; i < 1; i++) { + pkt[i] = rte_pktmbuf_alloc(mbuf_pool); + eth_hdr = (struct rte_ether_hdr*)rte_pktmbuf_append(pkt[i], sizeof(struct rte_ether_hdr)); + eth_hdr->d_addr = d_addr; + eth_hdr->s_addr = s_addr; + eth_hdr->ether_type = ether_type; + char *data = rte_pktmbuf_append(pkt[i], 4096); + memset(data, 0xaa, 4096); + } + uint16_t nb_tx = 0; + while (nb_tx == 0) { + nb_tx = rte_eth_tx_burst(port, 0, pkt, 1); + } + // Receive reply + pkt[0] = 0; + uint16_t nb_rx = 0; + while (nb_rx == 0) { + nb_rx = rte_eth_rx_burst(port, 0, pkt, BURST_SIZE); + } + for (uint16_t buf = 0; buf < nb_rx; buf++) + rte_pktmbuf_free(pkt[buf]); + clock_gettime(CLOCK_REALTIME, &end); + lat_this = (end.tv_sec-start.tv_sec)*1000000 + (end.tv_nsec-start.tv_nsec)/1000; + lat_count++; + lat_us += lat_this; + if (lat_min == 0 || lat_min > lat_this) + lat_min = lat_this; + if (lat_max < lat_this) + lat_max = lat_this; + double delta = lat_this-lat_mean; + lat_mean += delta/lat_count; + lat_m2 += delta*(lat_this-lat_mean); + if (end.tv_sec > ckpt.tv_sec+5) { + printf("latency: avg %lu us, min %lu us, max %lu us, stddev %lu us (%lu packets)\n", + lat_us/lat_count, lat_min, lat_max, (uint64_t)(lat_m2/(lat_count-1)), lat_count); + lat_count = lat_us = lat_min = lat_max = 0; + lat_mean = lat_m2 = 0; + ckpt = end; + } + } + } +} + +/* + * The main function, which does initialization and calls the per-lcore + * functions. + */ +int +main(int argc, char *argv[]) +{ + unsigned nb_ports; + uint16_t portid; + + /* Initialize the Environment Abstraction Layer (EAL). */ + int ret = rte_eal_init(argc, argv); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); + + argc -= ret; + argv += ret; + + /* Check that there is a port to send/receive on. */ + nb_ports = rte_eth_dev_count_avail(); + if (!nb_ports) + rte_exit(EXIT_FAILURE, "Error: no ports\n"); + + /* Creates a new mempool in memory to hold the mbufs. */ + mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports, + MBUF_CACHE_SIZE, 0, 9000, rte_socket_id()); + + if (mbuf_pool == NULL) + rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n"); + + // FIXME: Use only one device + /* Initialize all ports. */ + RTE_ETH_FOREACH_DEV(portid) + if (port_init(portid, mbuf_pool) != 0) + rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu16 "\n", + portid); + + if (rte_lcore_count() > 1) + printf("\nWARNING: Too many lcores enabled. Only 1 used.\n"); + + /* Call lcore_main on the master core only. */ + if (getenv("RECV")) + lcore_recv_main(); + else + lcore_send_main(); + + return 0; +}