diff --git a/dpdk-iface-kmod/dpdk_iface.c b/dpdk-iface-kmod/dpdk_iface.c index 9d1a297..18b19e9 100644 --- a/dpdk-iface-kmod/dpdk_iface.c +++ b/dpdk-iface-kmod/dpdk_iface.c @@ -188,7 +188,10 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) printk(KERN_INFO "%s: Interface %s copy to user failed!\n", THIS_MODULE->name, netdev->name); ret = -1; + goto fail_pciaddr; } + /* set numa locality */ + adapter->numa_socket = pd.numa_socket; } } break; @@ -212,7 +215,12 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ret = copy_to_user(&((PciDevice __user *)arg)->pa, &adapter->pa, sizeof(struct PciAddress)); - return -ret; + if (ret) return -1; + ret = copy_to_user(&((PciDevice __user *)arg)->numa_socket, + &adapter->numa_socket, + sizeof(adapter->numa_socket)); + if (ret) return -1; + return 0; } netdev = next_net_device(netdev); } @@ -227,7 +235,7 @@ igb_net_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return ret; - + fail_pciaddr: fail_bdnumber: unregister_netdev(netdev); fail_ioremap: diff --git a/dpdk-iface-kmod/dpdk_iface.h b/dpdk-iface-kmod/dpdk_iface.h index aed5185..a498685 100644 --- a/dpdk-iface-kmod/dpdk_iface.h +++ b/dpdk-iface-kmod/dpdk_iface.h @@ -14,6 +14,7 @@ struct net_adapter { unsigned char mac_addr[ETH_ALEN]; u16 bd_number; bool netdev_registered; + int numa_socket; struct net_device_stats nstats; struct PciAddress pa; }; diff --git a/dpdk-iface-kmod/dpdk_iface_common.h b/dpdk-iface-kmod/dpdk_iface_common.h index 5734f72..416a2f5 100644 --- a/dpdk-iface-kmod/dpdk_iface_common.h +++ b/dpdk-iface-kmod/dpdk_iface_common.h @@ -40,6 +40,7 @@ typedef struct PciDevice { char ifname[IFNAMSIZ]; }; PciAddress pa; + int numa_socket; } PciDevice __attribute__((aligned(__BITS_PER_LONG))); /*--------------------------------------------------------------------------*/ #endif /* __DPDK_IFACE_COMMON_H__ */ diff --git a/dpdk-iface-kmod/dpdk_iface_main.c b/dpdk-iface-kmod/dpdk_iface_main.c index add2db9..143cea4 100644 --- a/dpdk-iface-kmod/dpdk_iface_main.c +++ b/dpdk-iface-kmod/dpdk_iface_main.c @@ -272,6 +272,11 @@ main(int argc, char **argv) rte_eth_macaddr_get(ret, &di[ret].ports_eth_addr); /* check port capabailties/info */ rte_eth_dev_info_get(ret, &di[ret].dev_details); + /* get numa socket location for future socket-mem field */ + if ((di[ret].pd.numa_socket=rte_eth_dev_socket_id(ret)) == -1) { + fprintf(stderr, "Can't determine socket ID!\n"); + exit(EXIT_FAILURE); + } } fprintf(stderr, "\033[32m done. \033[0m \n"); diff --git a/mtcp/src/io_module.c b/mtcp/src/io_module.c index 8455cf1..7ea7aab 100644 --- a/mtcp/src/io_module.c +++ b/mtcp/src/io_module.c @@ -97,11 +97,14 @@ GetNumQueues() #endif /* !PSIO */ /*----------------------------------------------------------------------------*/ #ifndef DISABLE_DPDK -static void +/** + * returns max numa ID while probing for rte devices + */ +static int probe_all_rte_devices(char **argv, int *argc, char *dev_name_list) { PciDevice pd; - int fd; + int fd, numa_id = -1; static char end[] = ""; static const char delim[] = " \t"; static char *dev_tokenizer; @@ -139,6 +142,7 @@ probe_all_rte_devices(char **argv, int *argc, char *dev_name_list) pd.pa.domain, pd.pa.bus, pd.pa.device, pd.pa.function); *argc += 2; + if (pd.numa_socket > numa_id) numa_id = pd.numa_socket; loop_over: dev_token = strtok_r(NULL, delim, &saveptr); } @@ -147,6 +151,8 @@ probe_all_rte_devices(char **argv, int *argc, char *dev_name_list) argv[*argc] = end; close(fd); free(dev_tokenizer); + + return numa_id; } #endif /* !DISABLE_DPDK */ /*----------------------------------------------------------------------------*/ @@ -247,12 +253,13 @@ SetNetEnv(char *dev_name_list, char *port_stat_list) #ifndef DISABLE_DPDK int cpu = CONFIG.num_cores; mpz_t _cpumask; - char cpumaskbuf[32]; - char mem_channels[8]; - char socket_mem[8]; - int ret; + char cpumaskbuf[32] = ""; + char mem_channels[8] = ""; + char socket_mem_str[32] = ""; + int i, ret, socket_mem; static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; + /* STEP 1: first determine CPU mask */ mpz_init(_cpumask); if (!mpz_cmp(_cpumask, CONFIG._cpumask)) { @@ -265,7 +272,8 @@ SetNetEnv(char *dev_name_list, char *port_stat_list) gmp_sprintf(cpumaskbuf, "%ZX", CONFIG._cpumask); mpz_clear(_cpumask); - + + /* STEP 2: determine memory channels per socket */ /* get the mem channels per socket */ if (CONFIG.num_mem_ch == 0) { TRACE_ERROR("DPDK module requires # of memory channels " @@ -273,9 +281,10 @@ SetNetEnv(char *dev_name_list, char *port_stat_list) exit(EXIT_FAILURE); } sprintf(mem_channels, "%d", CONFIG.num_mem_ch); - + + /* STEP 3: determine socket memory */ /* get socket memory threshold (in MB) */ - sprintf(socket_mem, "%ld", + socket_mem = RTE_ALIGN_CEIL((unsigned long)ceil((CONFIG.num_cores * (CONFIG.rcvbuf_size + CONFIG.sndbuf_size + @@ -284,10 +293,9 @@ SetNetEnv(char *dev_name_list, char *port_stat_list) sizeof(struct tcp_send_vars) + sizeof(struct fragment_ctx)) * CONFIG.max_concurrency)/RTE_SOCKET_MEM_SHIFT), - RTE_CACHE_LINE_SIZE)); + RTE_CACHE_LINE_SIZE); - TRACE_DBG("socket_mem: %s\n", socket_mem); - /* initialize the rte env first, what a waste of implementation effort! */ + /* initialize the rte env, what a waste of implementation effort! */ int argc = 8; char *argv[RTE_ARGC_MAX] = {"", "-c", @@ -295,11 +303,20 @@ SetNetEnv(char *dev_name_list, char *port_stat_list) "-n", mem_channels, "--socket-mem", - socket_mem, + socket_mem_str, "--proc-type=auto" }; - probe_all_rte_devices(argv, &argc, dev_name_list); + ret = probe_all_rte_devices(argv, &argc, dev_name_list); + /* STEP 4: build up socket mem parameter */ + sprintf(socket_mem_str, "%d", socket_mem); + char *smsptr = socket_mem_str + strlen(socket_mem_str); + for (i = 1; i < ret + 1; i++) { + sprintf(smsptr, ",%d", socket_mem); + smsptr += strlen(smsptr); + } + TRACE_DBG("socket_mem: %s\n", socket_mem_str); + /* * re-set getopt extern variable optind. * this issue was a bitch to debug