mTCP, io_engine, and sample applications
parent
209e31cc00
commit
d9af90df66
|
@ -0,0 +1,43 @@
|
|||
CC = gcc
|
||||
#CFLAGS = -Wall -g -DINFO -DDBGERR
|
||||
CFLAGS = -DNDEBUG -O3 -DINFO -DDBGERR
|
||||
|
||||
TARGET = epserver epwget eprelay
|
||||
|
||||
UTIL_FLD = ../../util
|
||||
UTIL_INC = ${UTIL_FLD}/include
|
||||
|
||||
EPSERVER_OBJS = epserver.o ${UTIL_FLD}/http_parsing.o ${UTIL_FLD}/tdate_parse.o
|
||||
EPWGET_OBJS = epwget.o ${UTIL_FLD}/rss.o ${UTIL_FLD}/http_parsing.o ${UTIL_FLD}/tdate_parse.o
|
||||
EPRELAY_OBJS = eprelay.o ${UTIL_FLD}/rss.o ${UTIL_FLD}/http_parsing.o \
|
||||
${UTIL_FLD}/tdate_parse.o ${UTIL_FLD}/ring_buffer.o
|
||||
EPPIPE_OBJS = eppipe.o
|
||||
|
||||
MTCP_FLD = ../../mtcp/lib
|
||||
MTCP_INC = ../../mtcp/include
|
||||
MTCP_LIB = ${MTCP_FLD}/libmtcp.a
|
||||
|
||||
PS_FLD = ../../../io_engine/io_engine-2.0.38.2/lib
|
||||
PS_INC = ../../../io_engine/io_engine-2.0.38.2/include
|
||||
|
||||
INC = -I./include/ -I${UTIL_INC} -I${MTCP_INC} -I${PS_INC}
|
||||
LIBS = -lnuma -lmtcp -lps -lpthread -lrt
|
||||
LIB = -L${PS_FLD} -L${MTCP_FLD}
|
||||
|
||||
all: epserver epwget eppipe
|
||||
|
||||
%.o: %.c
|
||||
${CC} -c ${CFLAGS} ${INC} -o $@ $<
|
||||
|
||||
epserver: ${EPSERVER_OBJS} ${MTCP_LIB}
|
||||
${CC} -o epserver ${EPSERVER_OBJS} ${LIB} ${LIBS}
|
||||
|
||||
epwget: ${EPWGET_OBJS} ${MTCP_LIB}
|
||||
${CC} -o epwget ${EPWGET_OBJS} ${LIB} ${LIBS}
|
||||
|
||||
eppipe: ${EPPIPE_OBJS} ${MTCP_LIB}
|
||||
${CC} -o eppipe ${EPPIPE_OBJS} ${LIB} ${LIBS}
|
||||
|
||||
|
||||
clean:
|
||||
rm -f *~ *.o ${TARGET}
|
|
@ -0,0 +1,37 @@
|
|||
========================================================================
|
||||
USAGE OF EXAMPLE APPLICATIONS
|
||||
========================================================================
|
||||
|
||||
epserver: a simple mtcp-epoll-based web server
|
||||
usage: ./epserver www_home [-N #cores]
|
||||
ex) ./epserver /home/notav/www -N 8
|
||||
|
||||
options:
|
||||
www_home: the directory to server. # max files are limited to
|
||||
MAX_FILES in epserver.c:36
|
||||
-N: number of CPU cores to use. default: all existing cores
|
||||
|
||||
========================================================================
|
||||
|
||||
epwget: simple mtcp-epoll-based http request generator
|
||||
usage: ./epwget URL #requests [-N #cores] [-c concurrency]
|
||||
ex) ./epwget 10.0.0.43/example.txt 10000000 -N 8 -c 8000
|
||||
|
||||
options:
|
||||
URL: url of the content to download.
|
||||
#requests: number of requests to generate
|
||||
-N: number of CPU cores to use. default: min(# cores, # requests)
|
||||
-c: number of maximum concurrent connections. default: 100
|
||||
|
||||
notes:
|
||||
- epwget can use a range of IP addresses for larger concurrent
|
||||
connections that cannot be in an IP. you can set it in epwget.c:33.
|
||||
- epwget overrides some part of the settings in epgwet.conf and uses
|
||||
mtcp_setconf() internally to apply the input arguments to the
|
||||
configuration.
|
||||
|
||||
========================================================================
|
||||
|
||||
Contact: mtcp at list.ndsl.kaist.edu
|
||||
April 2, 2014.
|
||||
EunYoung Jeong <notav at ndsl.kaist.edu>
|
|
@ -0,0 +1,656 @@
|
|||
#define _LARGEFILE64_SOURCE
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <fcntl.h>
|
||||
#include <dirent.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <pthread.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include <mtcp_api.h>
|
||||
#include <mtcp_epoll.h>
|
||||
|
||||
#include "http_parsing.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define MAX_FLOW_NUM (10000)
|
||||
|
||||
#define RCVBUF_SIZE (2*1024)
|
||||
#define SNDBUF_SIZE (8*1024)
|
||||
|
||||
#define MAX_EVENTS (MAX_FLOW_NUM * 3)
|
||||
|
||||
#define HTTP_HEADER_LEN 1024
|
||||
#define URL_LEN 128
|
||||
|
||||
#define MAX_CPUS 16
|
||||
#define MAX_FILES 30
|
||||
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE (1)
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE (0)
|
||||
#endif
|
||||
|
||||
#ifndef ERROR
|
||||
#define ERROR (-1)
|
||||
#endif
|
||||
|
||||
#define HT_SUPPORT FALSE
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct file_cache
|
||||
{
|
||||
char name[128];
|
||||
char fullname[256];
|
||||
uint64_t size;
|
||||
char *file;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct server_vars
|
||||
{
|
||||
char request[HTTP_HEADER_LEN];
|
||||
int recv_len;
|
||||
int request_len;
|
||||
long int total_read, total_sent;
|
||||
uint8_t done;
|
||||
uint8_t rspheader_sent;
|
||||
uint8_t keep_alive;
|
||||
|
||||
int fidx; // file cache index
|
||||
char fname[128]; // file name
|
||||
long int fsize; // file size
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct thread_context
|
||||
{
|
||||
mctx_t mctx;
|
||||
int ep;
|
||||
struct server_vars *svars;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int num_cores;
|
||||
static int core_limit;
|
||||
static pthread_t app_thread[MAX_CPUS];
|
||||
static int done[MAX_CPUS];
|
||||
/*----------------------------------------------------------------------------*/
|
||||
const char *www_main;
|
||||
static struct file_cache fcache[MAX_FILES];
|
||||
static int nfiles;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int finished;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static char *
|
||||
StatusCodeToString(int scode)
|
||||
{
|
||||
switch (scode) {
|
||||
case 200:
|
||||
return "OK";
|
||||
break;
|
||||
|
||||
case 404:
|
||||
return "Not Found";
|
||||
break;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
CleanServerVariable(struct server_vars *sv)
|
||||
{
|
||||
sv->recv_len = 0;
|
||||
sv->request_len = 0;
|
||||
sv->total_read = 0;
|
||||
sv->total_sent = 0;
|
||||
sv->done = 0;
|
||||
sv->rspheader_sent = 0;
|
||||
sv->keep_alive = 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
CloseConnection(struct thread_context *ctx, int sockid, struct server_vars *sv)
|
||||
{
|
||||
mtcp_epoll_ctl(ctx->mctx, ctx->ep, MTCP_EPOLL_CTL_DEL, sockid, NULL);
|
||||
mtcp_close(ctx->mctx, sockid);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
SendUntilAvailable(struct thread_context *ctx, int sockid, struct server_vars *sv)
|
||||
{
|
||||
int ret;
|
||||
int sent;
|
||||
int len;
|
||||
|
||||
if (sv->done || !sv->rspheader_sent) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
sent = 0;
|
||||
ret = 1;
|
||||
while (ret > 0) {
|
||||
len = MIN(SNDBUF_SIZE, sv->fsize - sv->total_sent);
|
||||
if (len <= 0) {
|
||||
break;
|
||||
}
|
||||
ret = mtcp_write(ctx->mctx, sockid,
|
||||
fcache[sv->fidx].file + sv->total_sent, len);
|
||||
if (ret < 0) {
|
||||
TRACE_APP("Connection closed with client.\n");
|
||||
break;
|
||||
}
|
||||
TRACE_APP("Socket %d: mtcp_write try: %d, ret: %d\n", sockid, len, ret);
|
||||
sent += ret;
|
||||
sv->total_sent += ret;
|
||||
}
|
||||
|
||||
if (sv->total_sent >= fcache[sv->fidx].size) {
|
||||
struct mtcp_epoll_event ev;
|
||||
sv->done = TRUE;
|
||||
finished++;
|
||||
|
||||
if (sv->keep_alive) {
|
||||
/* if keep-alive connection, wait for the incoming request */
|
||||
ev.events = MTCP_EPOLLIN;
|
||||
ev.data.sockid = sockid;
|
||||
mtcp_epoll_ctl(ctx->mctx, ctx->ep, MTCP_EPOLL_CTL_MOD, sockid, &ev);
|
||||
|
||||
CleanServerVariable(sv);
|
||||
} else {
|
||||
/* else, close connection */
|
||||
CloseConnection(ctx, sockid, sv);
|
||||
}
|
||||
}
|
||||
|
||||
return sent;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
HandleReadEvent(struct thread_context *ctx, int sockid, struct server_vars *sv)
|
||||
{
|
||||
struct mtcp_epoll_event ev;
|
||||
char buf[HTTP_HEADER_LEN];
|
||||
char url[URL_LEN];
|
||||
char response[HTTP_HEADER_LEN];
|
||||
int scode; // status code
|
||||
time_t t_now;
|
||||
char t_str[128];
|
||||
char keepalive_str[128];
|
||||
int rd;
|
||||
int i;
|
||||
int len;
|
||||
int sent;
|
||||
|
||||
/* HTTP request handling */
|
||||
rd = mtcp_read(ctx->mctx, sockid, buf, HTTP_HEADER_LEN);
|
||||
if (rd <= 0) {
|
||||
return rd;
|
||||
}
|
||||
memcpy(sv->request + sv->recv_len,
|
||||
(char *)buf, MIN(rd, HTTP_HEADER_LEN - sv->recv_len));
|
||||
sv->recv_len += rd;
|
||||
//sv->request[rd] = '\0';
|
||||
//fprintf(stderr, "HTTP Request: \n%s", request);
|
||||
sv->request_len = find_http_header(sv->request, sv->recv_len);
|
||||
if (sv->request_len <= 0) {
|
||||
TRACE_ERROR("Socket %d: Failed to parse HTTP request header.\n"
|
||||
"read bytes: %d, recv_len: %d, "
|
||||
"request_len: %d, strlen: %ld, request: \n%s\n",
|
||||
sockid, rd, sv->recv_len,
|
||||
sv->request_len, strlen(sv->request), sv->request);
|
||||
return rd;
|
||||
}
|
||||
|
||||
http_get_url(sv->request, sv->request_len, url, URL_LEN);
|
||||
TRACE_APP("Socket %d URL: %s\n", sockid, url);
|
||||
sprintf(sv->fname, "%s%s", www_main, url);
|
||||
TRACE_APP("Socket %d File name: %s\n", sockid, sv->fname);
|
||||
|
||||
sv->keep_alive = FALSE;
|
||||
if (http_header_str_val(sv->request, "Connection: ",
|
||||
strlen("Connection: "), keepalive_str, 128)) {
|
||||
if (strstr(keepalive_str, "Keep-Alive")) {
|
||||
sv->keep_alive = TRUE;
|
||||
} else if (strstr(keepalive_str, "Close")) {
|
||||
sv->keep_alive = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Find file in cache */
|
||||
scode = 404;
|
||||
for (i = 0; i < nfiles; i++) {
|
||||
if (strcmp(sv->fname, fcache[i].fullname) == 0) {
|
||||
sv->fsize = fcache[i].size;
|
||||
sv->fidx = i;
|
||||
scode = 200;
|
||||
break;
|
||||
}
|
||||
}
|
||||
TRACE_APP("Socket %d File size: %ld (%ldMB)\n",
|
||||
sockid, sv->fsize, sv->fsize / 1024 / 1024);
|
||||
|
||||
/* Response header handling */
|
||||
time(&t_now);
|
||||
strftime(t_str, 128, "%a, %d %b %Y %X GMT", gmtime(&t_now));
|
||||
if (sv->keep_alive)
|
||||
sprintf(keepalive_str, "Keep-Alive");
|
||||
else
|
||||
sprintf(keepalive_str, "Close");
|
||||
|
||||
sprintf(response, "HTTP/1.1 %d %s\r\n"
|
||||
"Date: %s\r\n"
|
||||
"Server: Webserver on Middlebox TCP (Ubuntu)\r\n"
|
||||
"Content-Length: %ld\r\n"
|
||||
"Connection: %s\r\n\r\n",
|
||||
scode, StatusCodeToString(scode), t_str, sv->fsize, keepalive_str);
|
||||
len = strlen(response);
|
||||
TRACE_APP("Socket %d HTTP Response: \n%s", sockid, response);
|
||||
sent = mtcp_write(ctx->mctx, sockid, response, len);
|
||||
TRACE_APP("Socket %d Sent response header: try: %d, sent: %d\n",
|
||||
sockid, len, sent);
|
||||
assert(sent == len);
|
||||
sv->rspheader_sent = TRUE;
|
||||
|
||||
ev.events = MTCP_EPOLLIN | MTCP_EPOLLOUT;
|
||||
ev.data.sockid = sockid;
|
||||
mtcp_epoll_ctl(ctx->mctx, ctx->ep, MTCP_EPOLL_CTL_MOD, sockid, &ev);
|
||||
|
||||
SendUntilAvailable(ctx, sockid, sv);
|
||||
|
||||
return rd;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
AcceptConnection(struct thread_context *ctx, int listener)
|
||||
{
|
||||
mctx_t mctx = ctx->mctx;
|
||||
struct server_vars *sv;
|
||||
struct mtcp_epoll_event ev;
|
||||
int c;
|
||||
|
||||
c = mtcp_accept(mctx, listener, NULL, NULL);
|
||||
|
||||
if (c >= 0) {
|
||||
if (c >= MAX_FLOW_NUM) {
|
||||
TRACE_ERROR("Invalid socket id %d.\n", c);
|
||||
return -1;
|
||||
}
|
||||
|
||||
sv = &ctx->svars[c];
|
||||
CleanServerVariable(sv);
|
||||
TRACE_APP("New connection %d accepted.\n", c);
|
||||
ev.events = MTCP_EPOLLIN;
|
||||
ev.data.sockid = c;
|
||||
mtcp_setsock_nonblock(ctx->mctx, c);
|
||||
mtcp_epoll_ctl(mctx, ctx->ep, MTCP_EPOLL_CTL_ADD, c, &ev);
|
||||
TRACE_APP("Socket %d registered.\n", c);
|
||||
|
||||
} else {
|
||||
if (errno != EAGAIN) {
|
||||
TRACE_ERROR("mtcp_accept() error %s\n",
|
||||
strerror(errno));
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct thread_context *
|
||||
InitializeServerThread(int core)
|
||||
{
|
||||
struct thread_context *ctx;
|
||||
|
||||
/* affinitize application thread to a CPU core */
|
||||
#if HT_SUPPORT
|
||||
mtcp_core_affinitize(core + (num_cores / 2));
|
||||
#else
|
||||
mtcp_core_affinitize(core);
|
||||
#endif /* HT_SUPPORT */
|
||||
|
||||
ctx = (struct thread_context *)calloc(1, sizeof(struct thread_context));
|
||||
if (!ctx) {
|
||||
TRACE_ERROR("Failed to create thread context!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* create mtcp context: this will spawn an mtcp thread */
|
||||
ctx->mctx = mtcp_create_context(core);
|
||||
if (!ctx->mctx) {
|
||||
TRACE_ERROR("Failed to create mtcp context!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* create epoll descriptor */
|
||||
ctx->ep = mtcp_epoll_create(ctx->mctx, MAX_EVENTS);
|
||||
if (ctx->ep < 0) {
|
||||
TRACE_ERROR("Failed to create epoll descriptor!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* allocate memory for server variables */
|
||||
ctx->svars = (struct server_vars *)
|
||||
calloc(MAX_FLOW_NUM, sizeof(struct server_vars));
|
||||
if (!ctx->svars) {
|
||||
TRACE_ERROR("Failed to create server_vars struct!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ctx;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
CreateListeningSocket(struct thread_context *ctx)
|
||||
{
|
||||
int listener;
|
||||
struct mtcp_epoll_event ev;
|
||||
struct sockaddr_in saddr;
|
||||
int ret;
|
||||
|
||||
/* create socket and set it as nonblocking */
|
||||
listener = mtcp_socket(ctx->mctx, AF_INET, SOCK_STREAM, 0);
|
||||
if (listener < 0) {
|
||||
TRACE_ERROR("Failed to create listening socket!\n");
|
||||
return -1;
|
||||
}
|
||||
ret = mtcp_setsock_nonblock(ctx->mctx, listener);
|
||||
if (ret < 0) {
|
||||
TRACE_ERROR("Failed to set socket in nonblocking mode.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* bind to port 80 */
|
||||
saddr.sin_family = AF_INET;
|
||||
saddr.sin_addr.s_addr = INADDR_ANY;
|
||||
saddr.sin_port = htons(80);
|
||||
ret = mtcp_bind(ctx->mctx, listener,
|
||||
(struct sockaddr *)&saddr, sizeof(struct sockaddr_in));
|
||||
if (ret < 0) {
|
||||
TRACE_ERROR("Failed to bind to the listening socket!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* listen (backlog: 4K) */
|
||||
ret = mtcp_listen(ctx->mctx, listener, 4096);
|
||||
if (ret < 0) {
|
||||
TRACE_ERROR("mtcp_listen() failed!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* wait for incoming accept events */
|
||||
ev.events = MTCP_EPOLLIN;
|
||||
ev.data.sockid = listener;
|
||||
mtcp_epoll_ctl(ctx->mctx, ctx->ep, MTCP_EPOLL_CTL_ADD, listener, &ev);
|
||||
|
||||
return listener;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void *
|
||||
RunServerThread(void *arg)
|
||||
{
|
||||
int core = *(int *)arg;
|
||||
struct thread_context *ctx;
|
||||
mctx_t mctx;
|
||||
int listener;
|
||||
int ep;
|
||||
struct mtcp_epoll_event *events;
|
||||
int nevents;
|
||||
int i, ret;
|
||||
int do_accept;
|
||||
|
||||
/* initialization */
|
||||
ctx = InitializeServerThread(core);
|
||||
if (!ctx) {
|
||||
TRACE_ERROR("Failed to initialize server thread.\n");
|
||||
exit(-1);
|
||||
}
|
||||
mctx = ctx->mctx;
|
||||
ep = ctx->ep;
|
||||
|
||||
events = (struct mtcp_epoll_event *)
|
||||
calloc(MAX_EVENTS, sizeof(struct mtcp_epoll_event));
|
||||
if (!events) {
|
||||
TRACE_ERROR("Failed to create event struct!\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
listener = CreateListeningSocket(ctx);
|
||||
if (listener < 0) {
|
||||
TRACE_ERROR("Failed to create listening socket.\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
while (!done[core]) {
|
||||
nevents = mtcp_epoll_wait(mctx, ep, events, MAX_EVENTS, -1);
|
||||
if (nevents < 0) {
|
||||
if (errno != EINTR)
|
||||
perror("mtcp_epoll_wait");
|
||||
break;
|
||||
}
|
||||
|
||||
do_accept = FALSE;
|
||||
for (i = 0; i < nevents; i++) {
|
||||
|
||||
if (events[i].data.sockid == listener) {
|
||||
/* if the event is for the listener, accept connection */
|
||||
do_accept = TRUE;
|
||||
|
||||
} else if (events[i].events & MTCP_EPOLLERR) {
|
||||
int err;
|
||||
socklen_t len = sizeof(err);
|
||||
|
||||
/* error on the connection */
|
||||
TRACE_APP("[CPU %d] Error on socket %d\n",
|
||||
core, events[i].data.sockid);
|
||||
if (mtcp_getsockopt(mctx, events[i].data.sockid,
|
||||
SOL_SOCKET, SO_ERROR, (void *)&err, &len) == 0) {
|
||||
if (err != ETIMEDOUT) {
|
||||
fprintf(stderr, "Error on socket %d: %s\n",
|
||||
events[i].data.sockid, strerror(err));
|
||||
}
|
||||
} else {
|
||||
perror("mtcp_getsockopt");
|
||||
}
|
||||
CloseConnection(ctx, events[i].data.sockid,
|
||||
&ctx->svars[events[i].data.sockid]);
|
||||
|
||||
} else if (events[i].events & MTCP_EPOLLIN) {
|
||||
ret = HandleReadEvent(ctx, events[i].data.sockid,
|
||||
&ctx->svars[events[i].data.sockid]);
|
||||
|
||||
if (ret == 0) {
|
||||
/* connection closed by remote host */
|
||||
CloseConnection(ctx, events[i].data.sockid,
|
||||
&ctx->svars[events[i].data.sockid]);
|
||||
} else if (ret < 0) {
|
||||
/* if not EAGAIN, it's an error */
|
||||
if (errno != EAGAIN) {
|
||||
CloseConnection(ctx, events[i].data.sockid,
|
||||
&ctx->svars[events[i].data.sockid]);
|
||||
}
|
||||
}
|
||||
|
||||
} else if (events[i].events & MTCP_EPOLLOUT) {
|
||||
struct server_vars *sv = &ctx->svars[events[i].data.sockid];
|
||||
if (sv->rspheader_sent) {
|
||||
SendUntilAvailable(ctx, events[i].data.sockid, sv);
|
||||
} else {
|
||||
TRACE_APP("Socket %d: Response header not sent yet.\n",
|
||||
events[i].data.sockid);
|
||||
}
|
||||
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
/* if do_accept flag is set, accept connections */
|
||||
if (do_accept) {
|
||||
while (1) {
|
||||
ret = AcceptConnection(ctx, listener);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* destroy mtcp context: this will kill the mtcp thread */
|
||||
mtcp_destroy_context(mctx);
|
||||
pthread_exit(NULL);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
SignalHandler(int signum)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < core_limit; i++) {
|
||||
if (app_thread[i] == pthread_self()) {
|
||||
//TRACE_INFO("Server thread %d got SIGINT\n", i);
|
||||
done[i] = TRUE;
|
||||
} else {
|
||||
if (!done[i]) {
|
||||
pthread_kill(app_thread[i], signum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
DIR *dir;
|
||||
struct dirent *ent;
|
||||
int fd;
|
||||
int ret;
|
||||
uint64_t total_read;
|
||||
|
||||
int cores[MAX_CPUS];
|
||||
int i;
|
||||
|
||||
num_cores = GetNumCPUs();
|
||||
core_limit = num_cores;
|
||||
|
||||
if (argc < 2) {
|
||||
TRACE_ERROR("$%s directory_to_service\n", argv[0]);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* open the directory to serve */
|
||||
www_main = argv[1];
|
||||
dir = opendir(www_main);
|
||||
if (!dir) {
|
||||
TRACE_ERROR("Failed to open %s.\n", www_main);
|
||||
perror("opendir");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (i = 0; i < argc - 1; i++) {
|
||||
if (strcmp(argv[i], "-N") == 0) {
|
||||
core_limit = atoi(argv[i + 1]);
|
||||
if (core_limit > num_cores) {
|
||||
TRACE_CONFIG("CPU limit should be smaller than the "
|
||||
"number of CPUS: %d\n", num_cores);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nfiles = 0;
|
||||
while ((ent = readdir(dir)) != NULL) {
|
||||
if (strcmp(ent->d_name, ".") == 0)
|
||||
continue;
|
||||
else if (strcmp(ent->d_name, "..") == 0)
|
||||
continue;
|
||||
|
||||
strcpy(fcache[nfiles].name, ent->d_name);
|
||||
sprintf(fcache[nfiles].fullname, "%s/%s", www_main, ent->d_name);
|
||||
fd = open(fcache[nfiles].fullname, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
perror("open");
|
||||
continue;
|
||||
} else {
|
||||
fcache[nfiles].size = lseek64(fd, 0, SEEK_END);
|
||||
lseek64(fd, 0, SEEK_SET);
|
||||
}
|
||||
|
||||
fcache[nfiles].file = (char *)malloc(fcache[nfiles].size);
|
||||
if (!fcache[nfiles].file) {
|
||||
TRACE_ERROR("Failed to allocate memory for file %s\n",
|
||||
fcache[nfiles].name);
|
||||
perror("malloc");
|
||||
continue;
|
||||
}
|
||||
|
||||
TRACE_INFO("Reading %s (%lu bytes)\n",
|
||||
fcache[nfiles].name, fcache[nfiles].size);
|
||||
total_read = 0;
|
||||
while (1) {
|
||||
ret = read(fd, fcache[nfiles].file + total_read,
|
||||
fcache[nfiles].size - total_read);
|
||||
if (ret < 0) {
|
||||
break;
|
||||
} else if (ret == 0) {
|
||||
break;
|
||||
}
|
||||
total_read += ret;
|
||||
}
|
||||
if (total_read < fcache[nfiles].size) {
|
||||
free(fcache[nfiles].file);
|
||||
continue;
|
||||
}
|
||||
close(fd);
|
||||
nfiles++;
|
||||
|
||||
if (nfiles >= MAX_FILES)
|
||||
break;
|
||||
}
|
||||
|
||||
finished = 0;
|
||||
|
||||
/* initialize mtcp */
|
||||
ret = mtcp_init("epserver.conf");
|
||||
if (ret) {
|
||||
TRACE_ERROR("Failed to initialize mtcp\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
/* register signal handler to mtcp */
|
||||
mtcp_register_signal(SIGINT, SignalHandler);
|
||||
|
||||
TRACE_INFO("Application initialization finished.\n");
|
||||
|
||||
for (i = 0; i < core_limit; i++) {
|
||||
cores[i] = i;
|
||||
done[i] = FALSE;
|
||||
|
||||
if (pthread_create(&app_thread[i],
|
||||
NULL, RunServerThread, (void *)&cores[i])) {
|
||||
perror("pthread_create");
|
||||
TRACE_ERROR("Failed to create server thread.\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < core_limit; i++) {
|
||||
pthread_join(app_thread[i], NULL);
|
||||
}
|
||||
|
||||
mtcp_destroy();
|
||||
closedir(dir);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
############### mtcp configuration file ###############
|
||||
|
||||
# maximum concurrency per core
|
||||
max_concurrency = 10000
|
||||
|
||||
# maximum number of socket buffers per core
|
||||
# set this to small value if there are many idle connections
|
||||
max_num_buffers = 10000
|
||||
|
||||
# receive buffer size of sockets
|
||||
rcvbuf = 2048
|
||||
|
||||
# send buffer size of sockets
|
||||
sndbuf = 8192
|
||||
|
||||
# tcp timeout seconds
|
||||
# (tcp_timeout = -1 can disable the timeout check)
|
||||
tcp_timeout = 30
|
||||
|
||||
# tcp timewait seconds
|
||||
tcp_timewait = 0
|
||||
|
||||
# interface to print stats
|
||||
stat_print = xge0
|
||||
#stat_print = xge1
|
|
@ -0,0 +1,812 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <stdint.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
#include <signal.h>
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/queue.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <mtcp_api.h>
|
||||
#include <mtcp_epoll.h>
|
||||
#include "rss.h"
|
||||
#include "http_parsing.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define MAX_CPUS 16
|
||||
|
||||
#define MAX_URL_LEN 128
|
||||
#define MAX_FILE_LEN 128
|
||||
#define HTTP_HEADER_LEN 1024
|
||||
|
||||
#define IP_RANGE 1
|
||||
#define MAX_IP_STR_LEN 16
|
||||
|
||||
#define BUF_SIZE (8*1024)
|
||||
|
||||
#define CALC_MD5SUM FALSE
|
||||
|
||||
#define TIMEVAL_TO_MSEC(t) ((t.tv_sec * 1000) + (t.tv_usec / 1000))
|
||||
#define TIMEVAL_TO_USEC(t) ((t.tv_sec * 1000000) + (t.tv_usec))
|
||||
#define TS_GT(a,b) ((int64_t)((a)-(b)) > 0)
|
||||
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE (1)
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE (0)
|
||||
#endif
|
||||
|
||||
#ifndef ERROR
|
||||
#define ERROR (-1)
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static pthread_t app_thread[MAX_CPUS];
|
||||
static mctx_t g_mctx[MAX_CPUS];
|
||||
static int done[MAX_CPUS];
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int num_cores;
|
||||
static int core_limit;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int fio = FALSE;
|
||||
static char outfile[MAX_FILE_LEN + 1];
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static char host[MAX_IP_STR_LEN + 1];
|
||||
static char url[MAX_URL_LEN + 1];
|
||||
static in_addr_t daddr;
|
||||
static in_port_t dport;
|
||||
static in_addr_t saddr;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int total_flows;
|
||||
static int flows[MAX_CPUS];
|
||||
static int flowcnt = 0;
|
||||
static int concurrency;
|
||||
static int max_fds;
|
||||
static int response_size = 0;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct wget_stat
|
||||
{
|
||||
uint64_t waits;
|
||||
uint64_t events;
|
||||
uint64_t connects;
|
||||
uint64_t reads;
|
||||
uint64_t writes;
|
||||
uint64_t completes;
|
||||
|
||||
uint64_t errors;
|
||||
uint64_t timedout;
|
||||
|
||||
uint64_t sum_resp_time;
|
||||
uint64_t max_resp_time;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct thread_context
|
||||
{
|
||||
int core;
|
||||
|
||||
mctx_t mctx;
|
||||
int ep;
|
||||
struct wget_vars *wvars;
|
||||
|
||||
int target;
|
||||
int started;
|
||||
int errors;
|
||||
int incompletes;
|
||||
int done;
|
||||
int pending;
|
||||
|
||||
struct wget_stat stat;
|
||||
};
|
||||
typedef struct thread_context* thread_context_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct wget_vars
|
||||
{
|
||||
int request_sent;
|
||||
|
||||
char response[HTTP_HEADER_LEN];
|
||||
int resp_len;
|
||||
int headerset;
|
||||
uint32_t header_len;
|
||||
uint64_t file_len;
|
||||
uint64_t recv;
|
||||
uint64_t write;
|
||||
|
||||
struct timeval t_start;
|
||||
struct timeval t_end;
|
||||
|
||||
int fd;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static struct thread_context *g_ctx[MAX_CPUS];
|
||||
static struct wget_stat *g_stat[MAX_CPUS];
|
||||
/*----------------------------------------------------------------------------*/
|
||||
thread_context_t
|
||||
CreateContext(int core)
|
||||
{
|
||||
thread_context_t ctx;
|
||||
|
||||
ctx = (thread_context_t)calloc(1, sizeof(struct thread_context));
|
||||
if (!ctx) {
|
||||
perror("malloc");
|
||||
TRACE_ERROR("Failed to allocate memory for thread context.\n");
|
||||
return NULL;
|
||||
}
|
||||
ctx->core = core;
|
||||
|
||||
ctx->mctx = mtcp_create_context(core);
|
||||
if (!ctx->mctx) {
|
||||
TRACE_ERROR("Failed to create mtcp context.\n");
|
||||
return NULL;
|
||||
}
|
||||
g_mctx[core] = ctx->mctx;
|
||||
|
||||
return ctx;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyContext(thread_context_t ctx)
|
||||
{
|
||||
mtcp_destroy_context(ctx->mctx);
|
||||
free(ctx);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline int
|
||||
CreateConnection(thread_context_t ctx)
|
||||
{
|
||||
mctx_t mctx = ctx->mctx;
|
||||
struct mtcp_epoll_event ev;
|
||||
struct sockaddr_in addr;
|
||||
int sockid;
|
||||
int ret;
|
||||
|
||||
sockid = mtcp_socket(mctx, AF_INET, SOCK_STREAM, 0);
|
||||
if (sockid < 0) {
|
||||
TRACE_INFO("Failed to create socket!\n");
|
||||
return -1;
|
||||
}
|
||||
memset(&ctx->wvars[sockid], 0, sizeof(struct wget_vars));
|
||||
ret = mtcp_setsock_nonblock(mctx, sockid);
|
||||
if (ret < 0) {
|
||||
TRACE_ERROR("Failed to set socket in nonblocking mode.\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
addr.sin_family = AF_INET;
|
||||
addr.sin_addr.s_addr = daddr;
|
||||
addr.sin_port = dport;
|
||||
|
||||
ret = mtcp_connect(mctx, sockid,
|
||||
(struct sockaddr *)&addr, sizeof(struct sockaddr_in));
|
||||
if (ret < 0) {
|
||||
if (errno != EINPROGRESS) {
|
||||
perror("mtcp_connect");
|
||||
mtcp_close(mctx, sockid);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
ctx->started++;
|
||||
ctx->pending++;
|
||||
ctx->stat.connects++;
|
||||
|
||||
ev.events = MTCP_EPOLLOUT;
|
||||
ev.data.sockid = sockid;
|
||||
mtcp_epoll_ctl(mctx, ctx->ep, MTCP_EPOLL_CTL_ADD, sockid, &ev);
|
||||
|
||||
return sockid;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline void
|
||||
CloseConnection(thread_context_t ctx, int sockid)
|
||||
{
|
||||
mtcp_epoll_ctl(ctx->mctx, ctx->ep, MTCP_EPOLL_CTL_DEL, sockid, NULL);
|
||||
mtcp_close(ctx->mctx, sockid);
|
||||
ctx->pending--;
|
||||
ctx->done++;
|
||||
assert(ctx->pending >= 0);
|
||||
while (ctx->pending < concurrency && ctx->started < ctx->target) {
|
||||
if (CreateConnection(ctx) < 0) {
|
||||
done[ctx->core] = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline int
|
||||
SendHTTPRequest(thread_context_t ctx, int sockid, struct wget_vars *wv)
|
||||
{
|
||||
char request[HTTP_HEADER_LEN];
|
||||
struct mtcp_epoll_event ev;
|
||||
int wr;
|
||||
int len;
|
||||
|
||||
wv->headerset = FALSE;
|
||||
wv->recv = 0;
|
||||
wv->header_len = wv->file_len = 0;
|
||||
|
||||
snprintf(request, HTTP_HEADER_LEN, "GET %s HTTP/1.0\r\n"
|
||||
"User-Agent: Wget/1.12 (linux-gnu)\r\n"
|
||||
"Accept: */*\r\n"
|
||||
"Host: %s\r\n"
|
||||
// "Connection: Keep-Alive\r\n\r\n",
|
||||
"Connection: Close\r\n\r\n",
|
||||
url, host);
|
||||
len = strlen(request);
|
||||
|
||||
wr = mtcp_write(ctx->mctx, sockid, request, len);
|
||||
if (wr < len) {
|
||||
TRACE_ERROR("Socket %d: Sending HTTP request failed. "
|
||||
"try: %d, sent: %d\n", sockid, len, wr);
|
||||
}
|
||||
ctx->stat.writes += wr;
|
||||
TRACE_APP("Socket %d HTTP Request of %d bytes. sent.\n", sockid, wr);
|
||||
wv->request_sent = TRUE;
|
||||
|
||||
ev.events = MTCP_EPOLLIN;
|
||||
ev.data.sockid = sockid;
|
||||
mtcp_epoll_ctl(ctx->mctx, ctx->ep, MTCP_EPOLL_CTL_MOD, sockid, &ev);
|
||||
|
||||
gettimeofday(&wv->t_start, NULL);
|
||||
|
||||
char fname[MAX_FILE_LEN + 1];
|
||||
if (fio) {
|
||||
snprintf(fname, MAX_FILE_LEN, "%s.%d", outfile, flowcnt++);
|
||||
wv->fd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
||||
if (wv->fd < 0) {
|
||||
TRACE_APP("Failed to open file descriptor for %s\n", fname);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline int
|
||||
DownloadComplete(thread_context_t ctx, int sockid, struct wget_vars *wv)
|
||||
{
|
||||
#ifdef APP
|
||||
mctx_t mctx = ctx->mctx;
|
||||
#endif
|
||||
uint64_t tdiff;
|
||||
|
||||
TRACE_APP("Socket %d File download complete!\n", sockid);
|
||||
gettimeofday(&wv->t_end, NULL);
|
||||
CloseConnection(ctx, sockid);
|
||||
ctx->stat.completes++;
|
||||
if (response_size == 0) {
|
||||
response_size = wv->recv;
|
||||
fprintf(stderr, "Response size set to %d\n", response_size);
|
||||
} else {
|
||||
if (wv->recv != response_size) {
|
||||
fprintf(stderr, "Response size mismatch! mine: %ld, theirs: %d\n",
|
||||
wv->recv, response_size);
|
||||
}
|
||||
}
|
||||
tdiff = (wv->t_end.tv_sec - wv->t_start.tv_sec) * 1000000 +
|
||||
(wv->t_end.tv_usec - wv->t_start.tv_usec);
|
||||
TRACE_APP("Socket %d Total received bytes: %lu (%luMB)\n",
|
||||
sockid, wv->recv, wv->recv / 1000000);
|
||||
TRACE_APP("Socket %d Total spent time: %lu us\n", sockid, tdiff);
|
||||
if (tdiff > 0) {
|
||||
TRACE_APP("Socket %d Average bandwidth: %lf[MB/s]\n",
|
||||
sockid, (double)wv->recv / tdiff);
|
||||
}
|
||||
ctx->stat.sum_resp_time += tdiff;
|
||||
if (tdiff > ctx->stat.max_resp_time)
|
||||
ctx->stat.max_resp_time = tdiff;
|
||||
|
||||
if (fio && wv->fd > 0)
|
||||
close(wv->fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline int
|
||||
HandleReadEvent(thread_context_t ctx, int sockid, struct wget_vars *wv)
|
||||
{
|
||||
mctx_t mctx = ctx->mctx;
|
||||
char buf[BUF_SIZE];
|
||||
char *pbuf;
|
||||
int rd, copy_len;
|
||||
|
||||
rd = 1;
|
||||
while (rd > 0) {
|
||||
rd = mtcp_read(mctx, sockid, buf, BUF_SIZE);
|
||||
if (rd <= 0)
|
||||
break;
|
||||
ctx->stat.reads += rd;
|
||||
|
||||
TRACE_APP("Socket %d: mtcp_read ret: %d, total_recv: %lu, "
|
||||
"header_set: %d, header_len: %u, file_len: %lu\n",
|
||||
sockid, rd, wv->recv + rd,
|
||||
wv->headerset, wv->header_len, wv->file_len);
|
||||
|
||||
pbuf = buf;
|
||||
if (!wv->headerset) {
|
||||
copy_len = MIN(rd, HTTP_HEADER_LEN - wv->resp_len);
|
||||
memcpy(wv->response + wv->resp_len, buf, copy_len);
|
||||
wv->resp_len += copy_len;
|
||||
wv->header_len = find_http_header(wv->response, wv->resp_len);
|
||||
if (wv->header_len > 0) {
|
||||
wv->response[wv->header_len] = '\0';
|
||||
wv->file_len = http_header_long_val(wv->response,
|
||||
CONTENT_LENGTH_HDR, sizeof(CONTENT_LENGTH_HDR) - 1);
|
||||
TRACE_APP("Socket %d Parsed response header. "
|
||||
"Header length: %u, File length: %lu (%luMB)\n",
|
||||
sockid, wv->header_len,
|
||||
wv->file_len, wv->file_len / 1024 / 1024);
|
||||
wv->headerset = TRUE;
|
||||
wv->recv += (rd - (wv->resp_len - wv->header_len));
|
||||
rd = (wv->resp_len - wv->header_len);
|
||||
|
||||
pbuf += (rd - (wv->resp_len - wv->header_len));
|
||||
//printf("Successfully parse header.\n");
|
||||
//fflush(stdout);
|
||||
|
||||
} else {
|
||||
/* failed to parse response header */
|
||||
#if 0
|
||||
printf("[CPU %d] Socket %d Failed to parse response header."
|
||||
" Data: \n%s\n", ctx->core, sockid, wv->response);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
wv->recv += rd;
|
||||
rd = 0;
|
||||
ctx->stat.errors++;
|
||||
ctx->errors++;
|
||||
CloseConnection(ctx, sockid);
|
||||
return 0;
|
||||
}
|
||||
//pbuf += wv->header_len;
|
||||
//wv->recv += wv->header_len;
|
||||
//rd -= wv->header_len;
|
||||
}
|
||||
wv->recv += rd;
|
||||
|
||||
if (fio && wv->fd > 0) {
|
||||
int wr = 0;
|
||||
while (wr < rd) {
|
||||
int _wr = write(wv->fd, pbuf + wr, rd - wr);
|
||||
assert (_wr == rd - wr);
|
||||
if (_wr < 0) {
|
||||
perror("write");
|
||||
TRACE_ERROR("Failed to write.\n");
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
wr += _wr;
|
||||
wv->write += _wr;
|
||||
}
|
||||
}
|
||||
|
||||
if (wv->header_len && (wv->recv >= wv->header_len + wv->file_len)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (rd > 0) {
|
||||
if (wv->header_len && (wv->recv >= wv->header_len + wv->file_len)) {
|
||||
TRACE_APP("Socket %d Done Write: "
|
||||
"header: %u file: %lu recv: %lu write: %lu\n",
|
||||
sockid, wv->header_len, wv->file_len,
|
||||
wv->recv - wv->header_len, wv->write);
|
||||
DownloadComplete(ctx, sockid, wv);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} else if (rd == 0) {
|
||||
/* connection closed by remote host */
|
||||
TRACE_DBG("Socket %d connection closed with server.\n", sockid);
|
||||
|
||||
if (wv->header_len && (wv->recv >= wv->header_len + wv->file_len)) {
|
||||
DownloadComplete(ctx, sockid, wv);
|
||||
} else {
|
||||
ctx->stat.errors++;
|
||||
ctx->incompletes++;
|
||||
CloseConnection(ctx, sockid);
|
||||
}
|
||||
|
||||
} else if (rd < 0) {
|
||||
if (errno != EAGAIN) {
|
||||
TRACE_DBG("Socket %d: mtcp_read() error %s\n",
|
||||
sockid, strerror(errno));
|
||||
ctx->stat.errors++;
|
||||
ctx->errors++;
|
||||
CloseConnection(ctx, sockid);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
#if 0
|
||||
void
|
||||
PrintStats()
|
||||
{
|
||||
#define LINE_LEN 2048
|
||||
char line[LINE_LEN];
|
||||
int total_trans;
|
||||
int i;
|
||||
|
||||
total_trans = 0;
|
||||
line[0] = '\0';
|
||||
//sprintf(line, "Trans/s: ");
|
||||
for (i = 0; i < core_limit; i++) {
|
||||
//sprintf(line + strlen(line), "%6d ", g_trans[i]);
|
||||
sprintf(line + strlen(line), "[CPU%2d] %7d trans/s ", i, g_trans[i]);
|
||||
total_trans += g_trans[i];
|
||||
g_trans[i] = 0;
|
||||
if (i % 4 == 3)
|
||||
sprintf(line + strlen(line), "\n");
|
||||
}
|
||||
fprintf(stderr, "%s", line);
|
||||
fprintf(stderr, "[ ALL ] %7d trans/s\n", total_trans);
|
||||
//sprintf(line + strlen(line), "total: %6d", total_trans);
|
||||
//printf("%s\n", line);
|
||||
|
||||
//fprintf(stderr, "Transactions/s: %d\n", total_trans);
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static void
|
||||
PrintStats()
|
||||
{
|
||||
struct wget_stat total = {0};
|
||||
struct wget_stat *st;
|
||||
uint64_t avg_resp_time;
|
||||
uint64_t total_resp_time = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < core_limit; i++) {
|
||||
st = g_stat[i];
|
||||
avg_resp_time = st->completes? st->sum_resp_time / st->completes : 0;
|
||||
#if 0
|
||||
fprintf(stderr, "[CPU%2d] epoll_wait: %5lu, event: %7lu, "
|
||||
"connect: %7lu, read: %4lu MB, write: %4lu MB, "
|
||||
"completes: %7lu (resp_time avg: %4lu, max: %6lu us), "
|
||||
"errors: %2lu (timedout: %2lu)\n",
|
||||
i, st->waits, st->events, st->connects,
|
||||
st->reads / 1000 / 1000, st->writes / 1000 / 1000,
|
||||
st->completes, avg_resp_time, st->max_resp_time,
|
||||
st->errors, st->timedout);
|
||||
#endif
|
||||
|
||||
total.waits += st->waits;
|
||||
total.events += st->events;
|
||||
total.connects += st->connects;
|
||||
total.reads += st->reads;
|
||||
total.writes += st->writes;
|
||||
total.completes += st->completes;
|
||||
total_resp_time += avg_resp_time;
|
||||
if (st->max_resp_time > total.max_resp_time)
|
||||
total.max_resp_time = st->max_resp_time;
|
||||
total.errors += st->errors;
|
||||
total.timedout += st->timedout;
|
||||
|
||||
memset(st, 0, sizeof(struct wget_stat));
|
||||
}
|
||||
fprintf(stderr, "[ ALL ] connect: %7lu, read: %4lu MB, write: %4lu MB, "
|
||||
"completes: %7lu (resp_time avg: %4lu, max: %6lu us)\n",
|
||||
total.connects,
|
||||
total.reads / 1000 / 1000, total.writes / 1000 / 1000,
|
||||
total.completes, total_resp_time / core_limit, total.max_resp_time);
|
||||
#if 0
|
||||
fprintf(stderr, "[ ALL ] epoll_wait: %5lu, event: %7lu, "
|
||||
"connect: %7lu, read: %4lu MB, write: %4lu MB, "
|
||||
"completes: %7lu (resp_time avg: %4lu, max: %6lu us), "
|
||||
"errors: %2lu (timedout: %2lu)\n",
|
||||
total.waits, total.events, total.connects,
|
||||
total.reads / 1000 / 1000, total.writes / 1000 / 1000,
|
||||
total.completes, total_resp_time / core_limit, total.max_resp_time,
|
||||
total.errors, total.timedout);
|
||||
#endif
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void *
|
||||
RunWgetMain(void *arg)
|
||||
{
|
||||
thread_context_t ctx;
|
||||
mctx_t mctx;
|
||||
int core = *(int *)arg;
|
||||
struct in_addr daddr_in;
|
||||
int n, maxevents;
|
||||
int ep;
|
||||
struct mtcp_epoll_event *events;
|
||||
int nevents;
|
||||
struct wget_vars *wvars;
|
||||
int i;
|
||||
|
||||
struct timeval cur_tv, prev_tv;
|
||||
uint64_t cur_ts, prev_ts;
|
||||
|
||||
mtcp_core_affinitize(core);
|
||||
|
||||
ctx = CreateContext(core);
|
||||
if (!ctx) {
|
||||
return NULL;
|
||||
}
|
||||
mctx = ctx->mctx;
|
||||
g_ctx[core] = ctx;
|
||||
g_stat[core] = &ctx->stat;
|
||||
srand(time(NULL));
|
||||
|
||||
mtcp_init_rss(mctx, saddr, IP_RANGE, daddr, dport);
|
||||
|
||||
n = flows[core];
|
||||
if (n == 0) {
|
||||
TRACE_DBG("Application thread %d finished.\n", core);
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
ctx->target = n;
|
||||
|
||||
daddr_in.s_addr = daddr;
|
||||
fprintf(stderr, "Thread %d handles %d flows. connecting to %s:%u\n",
|
||||
core, n, inet_ntoa(daddr_in), ntohs(dport));
|
||||
|
||||
/* Initialization */
|
||||
maxevents = max_fds * 3;
|
||||
ep = mtcp_epoll_create(mctx, maxevents);
|
||||
if (ep < 0) {
|
||||
TRACE_ERROR("Failed to create epoll struct!n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
events = (struct mtcp_epoll_event *)
|
||||
calloc(maxevents, sizeof(struct mtcp_epoll_event));
|
||||
if (!events) {
|
||||
TRACE_ERROR("Failed to allocate events!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
ctx->ep = ep;
|
||||
|
||||
wvars = (struct wget_vars *)calloc(max_fds, sizeof(struct wget_vars));
|
||||
if (!wvars) {
|
||||
TRACE_ERROR("Failed to create wget variables!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
ctx->wvars = wvars;
|
||||
|
||||
ctx->started = ctx->done = ctx->pending = 0;
|
||||
ctx->errors = ctx->incompletes = 0;
|
||||
|
||||
gettimeofday(&cur_tv, NULL);
|
||||
prev_ts = TIMEVAL_TO_USEC(cur_tv);
|
||||
prev_tv = cur_tv;
|
||||
|
||||
while (!done[core]) {
|
||||
gettimeofday(&cur_tv, NULL);
|
||||
cur_ts = TIMEVAL_TO_USEC(cur_tv);
|
||||
|
||||
/* print statistics every second */
|
||||
if (core == 0 && cur_tv.tv_sec > prev_tv.tv_sec) {
|
||||
PrintStats();
|
||||
prev_tv = cur_tv;
|
||||
}
|
||||
|
||||
while (ctx->pending < concurrency && ctx->started < ctx->target) {
|
||||
if (CreateConnection(ctx) < 0) {
|
||||
done[core] = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nevents = mtcp_epoll_wait(mctx, ep, events, maxevents, -1);
|
||||
ctx->stat.waits++;
|
||||
|
||||
if (nevents < 0) {
|
||||
if (errno != EINTR) {
|
||||
TRACE_ERROR("mtcp_epoll_wait failed! ret: %d\n", nevents);
|
||||
}
|
||||
done[core] = TRUE;
|
||||
break;
|
||||
} else {
|
||||
ctx->stat.events += nevents;
|
||||
}
|
||||
|
||||
for (i = 0; i < nevents; i++) {
|
||||
|
||||
if (events[i].events & MTCP_EPOLLERR) {
|
||||
int err;
|
||||
socklen_t len = sizeof(err);
|
||||
|
||||
TRACE_APP("[CPU %d] Error on socket %d\n",
|
||||
core, events[i].data.sockid);
|
||||
ctx->stat.errors++;
|
||||
ctx->errors++;
|
||||
if (mtcp_getsockopt(mctx, events[i].data.sockid,
|
||||
SOL_SOCKET, SO_ERROR, (void *)&err, &len) == 0) {
|
||||
if (err == ETIMEDOUT)
|
||||
ctx->stat.timedout++;
|
||||
}
|
||||
CloseConnection(ctx, events[i].data.sockid);
|
||||
|
||||
} else if (events[i].events & MTCP_EPOLLIN) {
|
||||
HandleReadEvent(ctx,
|
||||
events[i].data.sockid, &wvars[events[i].data.sockid]);
|
||||
|
||||
} else if (events[i].events == MTCP_EPOLLOUT) {
|
||||
struct wget_vars *wv = &wvars[events[i].data.sockid];
|
||||
|
||||
if (!wv->request_sent) {
|
||||
SendHTTPRequest(ctx, events[i].data.sockid, wv);
|
||||
} else {
|
||||
//TRACE_DBG("Request already sent.\n");
|
||||
}
|
||||
|
||||
} else {
|
||||
TRACE_ERROR("Socket %d: event: %s\n",
|
||||
events[i].data.sockid, EventToString(events[i].events));
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->done >= ctx->target) {
|
||||
fprintf(stdout, "[CPU %d] Completed %d connections, "
|
||||
"errors: %d incompletes: %d\n",
|
||||
ctx->core, ctx->done, ctx->errors, ctx->incompletes);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
TRACE_INFO("Wget thread %d waiting for mtcp to be destroyed.\n", core);
|
||||
DestroyContext(ctx);
|
||||
|
||||
TRACE_DBG("Wget thread %d finished.\n", core);
|
||||
pthread_exit(NULL);
|
||||
return NULL;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
SignalHandler(int signum)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < core_limit; i++) {
|
||||
done[i] = TRUE;
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
struct mtcp_conf mcfg;
|
||||
int cores[MAX_CPUS];
|
||||
int flow_per_thread;
|
||||
int flow_remainder_cnt;
|
||||
int total_concurrency = 0;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (argc < 3) {
|
||||
TRACE_CONFIG("Too few arguments!\n");
|
||||
TRACE_CONFIG("Usage: %s url #flows [output]\n", argv[0]);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (strlen(argv[1]) > MAX_URL_LEN) {
|
||||
TRACE_CONFIG("Length of URL should be smaller than %d!\n", MAX_URL_LEN);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
char* slash_p = strchr(argv[1], '/');
|
||||
if (slash_p) {
|
||||
strncpy(host, argv[1], slash_p - argv[1]);
|
||||
strncpy(url, strchr(argv[1], '/'), MAX_URL_LEN);
|
||||
} else {
|
||||
strncpy(host, argv[1], MAX_IP_STR_LEN);
|
||||
strncpy(url, "/", 1);
|
||||
}
|
||||
|
||||
daddr = inet_addr(host);
|
||||
dport = htons(80);
|
||||
saddr = INADDR_ANY;
|
||||
|
||||
total_flows = atoi(argv[2]);
|
||||
if (total_flows <= 0) {
|
||||
TRACE_CONFIG("Number of flows should be large than 0.\n");
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
num_cores = GetNumCPUs();
|
||||
core_limit = num_cores;
|
||||
concurrency = 100;
|
||||
for (i = 3; i < argc - 1; i++) {
|
||||
if (strcmp(argv[i], "-N") == 0) {
|
||||
core_limit = atoi(argv[i + 1]);
|
||||
if (core_limit > num_cores) {
|
||||
TRACE_CONFIG("CPU limit should be smaller than the "
|
||||
"number of CPUS: %d\n", num_cores);
|
||||
return FALSE;
|
||||
}
|
||||
} else if (strcmp(argv[i], "-c") == 0) {
|
||||
total_concurrency = atoi(argv[i + 1]);
|
||||
|
||||
} else if (strcmp(argv[i], "-o") == 0) {
|
||||
if (strlen(argv[i + 1]) > MAX_FILE_LEN) {
|
||||
TRACE_CONFIG("Output file length should be smaller than %d!\n",
|
||||
MAX_FILE_LEN);
|
||||
return FALSE;
|
||||
}
|
||||
fio = TRUE;
|
||||
strncpy(outfile, argv[i + 1], MAX_FILE_LEN);
|
||||
}
|
||||
}
|
||||
|
||||
if (total_flows < core_limit) {
|
||||
core_limit = total_flows;
|
||||
}
|
||||
|
||||
/* per-core concurrency = total_concurrency / # cores */
|
||||
if (total_concurrency > 0)
|
||||
concurrency = total_concurrency / core_limit;
|
||||
|
||||
/* set the max number of fds 3x larger than concurrency */
|
||||
max_fds = concurrency * 3;
|
||||
|
||||
TRACE_CONFIG("Application configuration:\n");
|
||||
TRACE_CONFIG("URL: %s\n", url);
|
||||
TRACE_CONFIG("# of total_flows: %d\n", total_flows);
|
||||
TRACE_CONFIG("# of cores: %d\n", core_limit);
|
||||
TRACE_CONFIG("Concurrency: %d\n", total_concurrency);
|
||||
if (fio) {
|
||||
TRACE_CONFIG("Output file: %s\n", outfile);
|
||||
}
|
||||
|
||||
ret = mtcp_init("epwget.conf");
|
||||
if (ret) {
|
||||
TRACE_ERROR("Failed to initialize mtcp.\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
mtcp_getconf(&mcfg);
|
||||
mcfg.max_concurrency = max_fds;
|
||||
mcfg.max_num_buffers = max_fds;
|
||||
mtcp_setconf(&mcfg);
|
||||
|
||||
mtcp_register_signal(SIGINT, SignalHandler);
|
||||
|
||||
flow_per_thread = total_flows / core_limit;
|
||||
flow_remainder_cnt = total_flows % core_limit;
|
||||
for (i = 0; i < core_limit; i++) {
|
||||
cores[i] = i;
|
||||
done[i] = FALSE;
|
||||
flows[i] = flow_per_thread;
|
||||
|
||||
if (flow_remainder_cnt-- > 0)
|
||||
flows[i]++;
|
||||
|
||||
if (flows[i] == 0)
|
||||
continue;
|
||||
|
||||
if (pthread_create(&app_thread[i],
|
||||
NULL, RunWgetMain, (void *)&cores[i])) {
|
||||
perror("pthread_create");
|
||||
TRACE_ERROR("Failed to create wget thread.\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < core_limit; i++) {
|
||||
pthread_join(app_thread[i], NULL);
|
||||
TRACE_INFO("Wget thread %d joined.\n", i);
|
||||
}
|
||||
|
||||
mtcp_destroy();
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
|
@ -0,0 +1,18 @@
|
|||
############### mtcp configuration file ###############
|
||||
|
||||
# receive buffer size of sockets
|
||||
rcvbuf = 8192
|
||||
|
||||
# send buffer size of sockets
|
||||
sndbuf = 2048
|
||||
|
||||
# tcp timeout seconds
|
||||
# (tcp_timeout = -1 can disable the timeout check)
|
||||
tcp_timeout = 30
|
||||
|
||||
# tcp timewait seconds
|
||||
tcp_timewait = 0
|
||||
|
||||
# interface to print stats
|
||||
stat_print = xge0
|
||||
#stat_print = xge1
|
|
@ -0,0 +1,7 @@
|
|||
# This file is to configure static arp tables
|
||||
# Rename this file to arp.conf and set the appropriate values
|
||||
# (Destination IP address) (Destination MAC address)
|
||||
|
||||
ARP_ENTRY 2
|
||||
10.0.0.1 00:00:00:00:00:01
|
||||
10.0.1.1 00:00:00:00:00:02
|
|
@ -0,0 +1,25 @@
|
|||
############### mtcp configuration file ###############
|
||||
|
||||
# maximum concurrency per core
|
||||
max_concurrency = 100000
|
||||
|
||||
# maximum number of socket buffers per core
|
||||
max_num_buffers = 100000
|
||||
|
||||
# receive buffer size of sockets
|
||||
rcvbuf = 8192
|
||||
|
||||
# send buffer size of sockets
|
||||
sndbuf = 8192
|
||||
|
||||
# tcp timeout seconds
|
||||
# (tcp_timeout = -1 can disable the timeout check)
|
||||
tcp_timeout = 30
|
||||
|
||||
# tcp timewait seconds
|
||||
tcp_timewait = 0
|
||||
|
||||
# NICs to print network statistics per second
|
||||
# if enabled, mTCP will print xx Gbps and xx pps for RX and TX
|
||||
stat_print = xge0
|
||||
#stat_print = xge1
|
|
@ -0,0 +1,7 @@
|
|||
# This file is routing table example of coffee5
|
||||
# copy this file to route.conf and give appropriate routes
|
||||
# (Destination address)/(Prefix) (Device name)
|
||||
|
||||
ROUTES 2
|
||||
10.0.0.1/24 xge0
|
||||
10.0.1.1/24 xge1
|
|
@ -0,0 +1,339 @@
|
|||
|
||||
"This software program is licensed subject to the GNU General Public License
|
||||
(GPL). Version 2, June 1991, available at
|
||||
<http://www.fsf.org/copyleft/gpl.html>"
|
||||
|
||||
GNU General Public License
|
||||
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
|
||||
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this license
|
||||
document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your freedom to
|
||||
share and change it. By contrast, the GNU General Public License is intended
|
||||
to guarantee your freedom to share and change free software--to make sure
|
||||
the software is free for all its users. This General Public License applies
|
||||
to most of the Free Software Foundation's software and to any other program
|
||||
whose authors commit to using it. (Some other Free Software Foundation
|
||||
software is covered by the GNU Library General Public License instead.) You
|
||||
can apply it to your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not price. Our
|
||||
General Public Licenses are designed to make sure that you have the freedom
|
||||
to distribute copies of free software (and charge for this service if you
|
||||
wish), that you receive source code or can get it if you want it, that you
|
||||
can change the software or use pieces of it in new free programs; and that
|
||||
you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid anyone to
|
||||
deny you these rights or to ask you to surrender the rights. These
|
||||
restrictions translate to certain responsibilities for you if you distribute
|
||||
copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether gratis or
|
||||
for a fee, you must give the recipients all the rights that you have. You
|
||||
must make sure that they, too, receive or can get the source code. And you
|
||||
must show them these terms so they know their rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and (2)
|
||||
offer you this license which gives you legal permission to copy, distribute
|
||||
and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain that
|
||||
everyone understands that there is no warranty for this free software. If
|
||||
the software is modified by someone else and passed on, we want its
|
||||
recipients to know that what they have is not the original, so that any
|
||||
problems introduced by others will not reflect on the original authors'
|
||||
reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software patents. We
|
||||
wish to avoid the danger that redistributors of a free program will
|
||||
individually obtain patent licenses, in effect making the program
|
||||
proprietary. To prevent this, we have made it clear that any patent must be
|
||||
licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and modification
|
||||
follow.
|
||||
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains a notice
|
||||
placed by the copyright holder saying it may be distributed under the
|
||||
terms of this General Public License. The "Program", below, refers to any
|
||||
such program or work, and a "work based on the Program" means either the
|
||||
Program or any derivative work under copyright law: that is to say, a
|
||||
work containing the Program or a portion of it, either verbatim or with
|
||||
modifications and/or translated into another language. (Hereinafter,
|
||||
translation is included without limitation in the term "modification".)
|
||||
Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of running
|
||||
the Program is not restricted, and the output from the Program is covered
|
||||
only if its contents constitute a work based on the Program (independent
|
||||
of having been made by running the Program). Whether that is true depends
|
||||
on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's source code
|
||||
as you receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice and
|
||||
disclaimer of warranty; keep intact all the notices that refer to this
|
||||
License and to the absence of any warranty; and give any other recipients
|
||||
of the Program a copy of this License along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and you
|
||||
may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion of it,
|
||||
thus forming a work based on the Program, and copy and distribute such
|
||||
modifications or work under the terms of Section 1 above, provided that
|
||||
you also meet all of these conditions:
|
||||
|
||||
* a) You must cause the modified files to carry prominent notices stating
|
||||
that you changed the files and the date of any change.
|
||||
|
||||
* b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any part
|
||||
thereof, to be licensed as a whole at no charge to all third parties
|
||||
under the terms of this License.
|
||||
|
||||
* c) If the modified program normally reads commands interactively when
|
||||
run, you must cause it, when started running for such interactive
|
||||
use in the most ordinary way, to print or display an announcement
|
||||
including an appropriate copyright notice and a notice that there is
|
||||
no warranty (or else, saying that you provide a warranty) and that
|
||||
users may redistribute the program under these conditions, and
|
||||
telling the user how to view a copy of this License. (Exception: if
|
||||
the Program itself is interactive but does not normally print such
|
||||
an announcement, your work based on the Program is not required to
|
||||
print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If identifiable
|
||||
sections of that work are not derived from the Program, and can be
|
||||
reasonably considered independent and separate works in themselves, then
|
||||
this License, and its terms, do not apply to those sections when you
|
||||
distribute them as separate works. But when you distribute the same
|
||||
sections as part of a whole which is a work based on the Program, the
|
||||
distribution of the whole must be on the terms of this License, whose
|
||||
permissions for other licensees extend to the entire whole, and thus to
|
||||
each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of a
|
||||
storage or distribution medium does not bring the other work under the
|
||||
scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it, under
|
||||
Section 2) in object code or executable form under the terms of Sections
|
||||
1 and 2 above provided that you also do one of the following:
|
||||
|
||||
* a) Accompany it with the complete corresponding machine-readable source
|
||||
code, which must be distributed under the terms of Sections 1 and 2
|
||||
above on a medium customarily used for software interchange; or,
|
||||
|
||||
* b) Accompany it with a written offer, valid for at least three years,
|
||||
to give any third party, for a charge no more than your cost of
|
||||
physically performing source distribution, a complete machine-
|
||||
readable copy of the corresponding source code, to be distributed
|
||||
under the terms of Sections 1 and 2 above on a medium customarily
|
||||
used for software interchange; or,
|
||||
|
||||
* c) Accompany it with the information you received as to the offer to
|
||||
distribute corresponding source code. (This alternative is allowed
|
||||
only for noncommercial distribution and only if you received the
|
||||
program in object code or executable form with such an offer, in
|
||||
accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source code
|
||||
means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to control
|
||||
compilation and installation of the executable. However, as a special
|
||||
exception, the source code distributed need not include anything that is
|
||||
normally distributed (in either source or binary form) with the major
|
||||
components (compiler, kernel, and so on) of the operating system on which
|
||||
the executable runs, unless that component itself accompanies the
|
||||
executable.
|
||||
|
||||
If distribution of executable or object code is made by offering access
|
||||
to copy from a designated place, then offering equivalent access to copy
|
||||
the source code from the same place counts as distribution of the source
|
||||
code, even though third parties are not compelled to copy the source
|
||||
along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program except as
|
||||
expressly provided under this License. Any attempt otherwise to copy,
|
||||
modify, sublicense or distribute the Program is void, and will
|
||||
automatically terminate your rights under this License. However, parties
|
||||
who have received copies, or rights, from you under this License will not
|
||||
have their licenses terminated so long as such parties remain in full
|
||||
compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not signed
|
||||
it. However, nothing else grants you permission to modify or distribute
|
||||
the Program or its derivative works. These actions are prohibited by law
|
||||
if you do not accept this License. Therefore, by modifying or
|
||||
distributing the Program (or any work based on the Program), you
|
||||
indicate your acceptance of this License to do so, and all its terms and
|
||||
conditions for copying, distributing or modifying the Program or works
|
||||
based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further restrictions
|
||||
on the recipients' exercise of the rights granted herein. You are not
|
||||
responsible for enforcing compliance by third parties to this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot distribute
|
||||
so as to satisfy simultaneously your obligations under this License and
|
||||
any other pertinent obligations, then as a consequence you may not
|
||||
distribute the Program at all. For example, if a patent license would
|
||||
not permit royalty-free redistribution of the Program by all those who
|
||||
receive copies directly or indirectly through you, then the only way you
|
||||
could satisfy both it and this License would be to refrain entirely from
|
||||
distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under any
|
||||
particular circumstance, the balance of the section is intended to apply
|
||||
and the section as a whole is intended to apply in other circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is implemented
|
||||
by public license practices. Many people have made generous contributions
|
||||
to the wide range of software distributed through that system in
|
||||
reliance on consistent application of that system; it is up to the
|
||||
author/donor to decide if he or she is willing to distribute software
|
||||
through any other system and a licensee cannot impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to be
|
||||
a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in certain
|
||||
countries either by patents or by copyrighted interfaces, the original
|
||||
copyright holder who places the Program under this License may add an
|
||||
explicit geographical distribution limitation excluding those countries,
|
||||
so that distribution is permitted only in or among countries not thus
|
||||
excluded. In such case, this License incorporates the limitation as if
|
||||
written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions of
|
||||
the General Public License from time to time. Such new versions will be
|
||||
similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and
|
||||
conditions either of that version or of any later version published by
|
||||
the Free Software Foundation. If the Program does not specify a version
|
||||
number of this License, you may choose any version ever published by the
|
||||
Free Software Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free programs
|
||||
whose distribution conditions are different, write to the author to ask
|
||||
for permission. For software which is copyrighted by the Free Software
|
||||
Foundation, write to the Free Software Foundation; we sometimes make
|
||||
exceptions for this. Our decision will be guided by the two goals of
|
||||
preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
|
||||
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
||||
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH
|
||||
YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
|
||||
NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
|
||||
DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
|
||||
DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM
|
||||
(INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
|
||||
INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
|
||||
THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR
|
||||
OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it free
|
||||
software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest to
|
||||
attach them to the start of each source file to most effectively convey the
|
||||
exclusion of warranty; and each file should have at least the "copyright"
|
||||
line and a pointer to where the full notice is found.
|
||||
|
||||
one line to give the program's name and an idea of what it does.
|
||||
Copyright (C) yyyy name of author
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 2 of the License, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc., 59
|
||||
Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this when
|
||||
it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author Gnomovision comes
|
||||
with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free
|
||||
software, and you are welcome to redistribute it under certain conditions;
|
||||
type 'show c' for details.
|
||||
|
||||
The hypothetical commands 'show w' and 'show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may be
|
||||
called something other than 'show w' and 'show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
'Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
signature of Ty Coon, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Library General Public
|
||||
License instead of this License.
|
|
@ -0,0 +1,361 @@
|
|||
################################################################################
|
||||
#
|
||||
# Intel 10 Gigabit PCI Express Linux driver
|
||||
# Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms and conditions of the GNU General Public License,
|
||||
# version 2, as published by the Free Software Foundation.
|
||||
#
|
||||
# This program is distributed in the hope it will be useful, but WITHOUT
|
||||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
# more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
#
|
||||
# The full GNU General Public License is included in this distribution in
|
||||
# the file called "COPYING".
|
||||
#
|
||||
# Contact Information:
|
||||
# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
#
|
||||
################################################################################
|
||||
|
||||
###########################################################################
|
||||
# Driver files
|
||||
|
||||
# core driver files
|
||||
CFILES = ixgbe_main.c ixgbe_common.c ixgbe_api.c ixgbe_param.c \
|
||||
ixgbe_ethtool.c kcompat.c ixgbe_82598.c \
|
||||
ixgbe_82599.c \
|
||||
ixgbe_dcb.c ixgbe_dcb_82598.c \
|
||||
ixgbe_dcb_82599.c \
|
||||
ixgbe_phy.c
|
||||
HFILES = ixgbe.h ixgbe_common.h ixgbe_api.h ixgbe_osdep.h kcompat.h \
|
||||
ixgbe_dcb.h \
|
||||
ixgbe_phy.h
|
||||
ifeq (,$(BUILD_KERNEL))
|
||||
BUILD_KERNEL=$(shell uname -r)
|
||||
endif
|
||||
|
||||
DRIVER_NAME=ps_ixgbe
|
||||
|
||||
###########################################################################
|
||||
# Environment tests
|
||||
|
||||
# Kernel Search Path
|
||||
# All the places we look for kernel source
|
||||
KSP := /lib/modules/$(BUILD_KERNEL)/build \
|
||||
/lib/modules/$(BUILD_KERNEL)/source \
|
||||
/usr/src/linux-$(BUILD_KERNEL) \
|
||||
/usr/src/linux-$($(BUILD_KERNEL) | sed 's/-.*//') \
|
||||
/usr/src/kernel-headers-$(BUILD_KERNEL) \
|
||||
/usr/src/kernel-source-$(BUILD_KERNEL) \
|
||||
/usr/src/linux-$($(BUILD_KERNEL) | sed 's/\([0-9]*\.[0-9]*\)\..*/\1/') \
|
||||
/usr/src/linux
|
||||
|
||||
# prune the list down to only values that exist
|
||||
# and have an include/linux sub-directory
|
||||
test_dir = $(shell [ -e $(dir)/include/linux ] && echo $(dir))
|
||||
KSP := $(foreach dir, $(KSP), $(test_dir))
|
||||
|
||||
# we will use this first valid entry in the search path
|
||||
ifeq (,$(KSRC))
|
||||
KSRC := $(firstword $(KSP))
|
||||
endif
|
||||
|
||||
ifeq (,$(KSRC))
|
||||
$(warning *** Linux kernel source not found in any of these locations:)
|
||||
$(warning $(KSP))
|
||||
$(warning *** Install the appropriate kernel development package, e.g.)
|
||||
$(error kernel-devel, for building kernel modules and try again)
|
||||
else
|
||||
ifeq (/lib/modules/$(BUILD_KERNEL)/source, $(KSRC))
|
||||
KOBJ := /lib/modules/$(BUILD_KERNEL)/build
|
||||
else
|
||||
KOBJ := $(KSRC)
|
||||
endif
|
||||
endif
|
||||
# Version file Search Path
|
||||
VSP := $(KOBJ)/include/generated/utsrelease.h \
|
||||
$(KOBJ)/include/linux/utsrelease.h \
|
||||
$(KOBJ)/include/linux/version.h \
|
||||
/boot/vmlinuz.version.h
|
||||
|
||||
# Config file Search Path
|
||||
CSP := $(KSRC)/include/generated/autoconf.h \
|
||||
$(KSRC)/include/linux/autoconf.h \
|
||||
/boot/vmlinuz.autoconf.h
|
||||
|
||||
# prune the lists down to only files that exist
|
||||
test_file = $(shell [ -f $(file) ] && echo $(file))
|
||||
VSP := $(foreach file, $(VSP), $(test_file))
|
||||
CSP := $(foreach file, $(CSP), $(test_file))
|
||||
|
||||
# and use the first valid entry in the Search Paths
|
||||
ifeq (,$(VERSION_FILE))
|
||||
VERSION_FILE := $(firstword $(VSP))
|
||||
endif
|
||||
ifeq (,$(CONFIG_FILE))
|
||||
CONFIG_FILE := $(firstword $(CSP))
|
||||
endif
|
||||
|
||||
ifeq (,$(wildcard $(VERSION_FILE)))
|
||||
$(error Linux kernel source not configured - missing version.h)
|
||||
endif
|
||||
|
||||
ifeq (,$(wildcard $(CONFIG_FILE)))
|
||||
$(error Linux kernel source not configured - missing autoconf.h)
|
||||
endif
|
||||
|
||||
# pick a compiler
|
||||
ifneq (,$(findstring egcs-2.91.66, $(shell cat /proc/version)))
|
||||
CC := kgcc gcc cc
|
||||
else
|
||||
CC := gcc cc
|
||||
endif
|
||||
test_cc = $(shell $(cc) --version > /dev/null 2>&1 && echo $(cc))
|
||||
CC := $(foreach cc, $(CC), $(test_cc))
|
||||
CC := $(firstword $(CC))
|
||||
ifeq (,$(CC))
|
||||
$(error Compiler not found)
|
||||
endif
|
||||
|
||||
# we need to know what platform the driver is being built on
|
||||
# some additional features are only built on Intel platforms
|
||||
ARCH := $(shell uname -m | sed 's/i.86/i386/')
|
||||
ifeq ($(ARCH),alpha)
|
||||
EXTRA_CFLAGS += -ffixed-8 -mno-fp-regs
|
||||
endif
|
||||
ifeq ($(ARCH),x86_64)
|
||||
EXTRA_CFLAGS += -mcmodel=kernel -mno-red-zone
|
||||
endif
|
||||
ifeq ($(ARCH),ppc)
|
||||
EXTRA_CFLAGS += -msoft-float
|
||||
endif
|
||||
ifeq ($(ARCH),ppc64)
|
||||
EXTRA_CFLAGS += -m64 -msoft-float
|
||||
LDFLAGS += -melf64ppc
|
||||
endif
|
||||
|
||||
# extra flags for module builds
|
||||
EXTRA_CFLAGS += -DDRIVER_$(shell echo $(DRIVER_NAME) | tr '[a-z]' '[A-Z]')
|
||||
EXTRA_CFLAGS += -DDRIVER_NAME=$(DRIVER_NAME)
|
||||
EXTRA_CFLAGS += -DDRIVER_NAME_CAPS=$(shell echo $(DRIVER_NAME) | tr '[a-z]' '[A-Z]')
|
||||
# standard flags for module builds
|
||||
EXTRA_CFLAGS += -DLINUX -D__KERNEL__ -DMODULE -O3 -pipe -Wall
|
||||
EXTRA_CFLAGS += -I$(KSRC)/include -I.
|
||||
EXTRA_CFLAGS += $(shell [ -f $(KSRC)/include/linux/modversions.h ] && \
|
||||
echo "-DMODVERSIONS -DEXPORT_SYMTAB \
|
||||
-include $(KSRC)/include/linux/modversions.h")
|
||||
|
||||
EXTRA_CFLAGS += $(CFLAGS_EXTRA)
|
||||
EXTRA_CFLAGS += -DIXGBE_RSS
|
||||
# do nothing here, so it's stripped properly
|
||||
|
||||
RHC := $(KSRC)/include/linux/rhconfig.h
|
||||
ifneq (,$(wildcard $(RHC)))
|
||||
# 7.3 typo in rhconfig.h
|
||||
ifneq (,$(shell $(CC) $(CFLAGS) -E -dM $(RHC) | grep __module__bigmem))
|
||||
EXTRA_CFLAGS += -D__module_bigmem
|
||||
endif
|
||||
endif
|
||||
|
||||
# get the kernel version - we use this to find the correct install path
|
||||
KVER := $(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(VERSION_FILE) | grep UTS_RELEASE | \
|
||||
awk '{ print $$3 }' | sed 's/\"//g')
|
||||
|
||||
# assume source symlink is the same as build, otherwise adjust KOBJ
|
||||
ifneq (,$(wildcard /lib/modules/$(KVER)/build))
|
||||
ifneq ($(KSRC),$(shell readlink /lib/modules/$(KVER)/build))
|
||||
KOBJ=/lib/modules/$(KVER)/build
|
||||
endif
|
||||
endif
|
||||
|
||||
KKVER := $(shell echo $(KVER) | \
|
||||
awk '{ if ($$0 ~ /2\.[4-9]\./) print "1"; else print "0"}')
|
||||
ifeq ($(KKVER), 0)
|
||||
$(error *** Aborting the build. \
|
||||
*** This driver is not supported on kernel versions older than 2.4.0)
|
||||
endif
|
||||
|
||||
# Add DCB netlink source if our kernel is 2.6.23 or newer
|
||||
KKVER := $(shell echo $(KVER) | \
|
||||
awk '{ if ($$0 ~ /2\.[6-9]\.(2[3-9]|[3-9][0-9])/) print "1"; else print "0"}')
|
||||
ifeq ($(KKVER), 1)
|
||||
CFILES += ixgbe_dcb_nl.c
|
||||
endif
|
||||
|
||||
# Add FCoE source if FCoE is supported by the kernel
|
||||
FCOE := $(shell grep -wE 'CONFIG_FCOE|CONFIG_FCOE_MODULE' $(CONFIG_FILE) | \
|
||||
awk '{print $$3}')
|
||||
ifeq ($(FCOE), 1)
|
||||
CFILES += ixgbe_sysfs.c
|
||||
CFILES += ixgbe_fcoe.c
|
||||
HFILES += ixgbe_fcoe.h
|
||||
endif
|
||||
|
||||
# set the install path
|
||||
INSTDIR := /lib/modules/$(KVER)/kernel/drivers/net/$(DRIVER_NAME)
|
||||
|
||||
# look for SMP in config.h
|
||||
SMP := $(shell $(CC) $(EXTRA_CFLAGS) -E -dM $(CONFIG_FILE) | \
|
||||
grep -w CONFIG_SMP | awk '{ print $$3 }')
|
||||
ifneq ($(SMP),1)
|
||||
SMP := 0
|
||||
endif
|
||||
|
||||
ifneq ($(SMP),$(shell uname -a | grep SMP > /dev/null 2>&1 && echo 1 || echo 0))
|
||||
$(warning ***)
|
||||
ifeq ($(SMP),1)
|
||||
$(warning *** Warning: kernel source configuration (SMP))
|
||||
$(warning *** does not match running kernel (UP))
|
||||
else
|
||||
$(warning *** Warning: kernel source configuration (UP))
|
||||
$(warning *** does not match running kernel (SMP))
|
||||
endif
|
||||
$(warning *** Continuing with build,)
|
||||
$(warning *** resulting driver may not be what you want)
|
||||
$(warning ***)
|
||||
endif
|
||||
|
||||
ifeq ($(SMP),1)
|
||||
EXTRA_CFLAGS += -D__SMP__
|
||||
endif
|
||||
|
||||
###########################################################################
|
||||
# 2.4.x & 2.6.x Specific rules
|
||||
|
||||
K_VERSION:=$(shell echo $(BUILD_KERNEL) | cut -c1-3 | sed 's/2\.[56]/2\.6/')
|
||||
|
||||
ifeq ($(K_VERSION), 2.6)
|
||||
|
||||
# Makefile for 2.6.x kernel
|
||||
TARGET = $(DRIVER_NAME).ko
|
||||
|
||||
# man page
|
||||
MANSECTION = 7
|
||||
MANFILE = $(TARGET:.ko=.$(MANSECTION))
|
||||
|
||||
ifneq ($(PATCHLEVEL),)
|
||||
EXTRA_CFLAGS += $(CFLAGS_EXTRA)
|
||||
obj-m += $(DRIVER_NAME).o
|
||||
$(DRIVER_NAME)-objs := $(CFILES:.c=.o)
|
||||
else
|
||||
default:
|
||||
ifeq ($(KOBJ),$(KSRC))
|
||||
$(MAKE) -C $(KSRC) SUBDIRS=$(shell pwd) modules
|
||||
else
|
||||
$(MAKE) -C $(KSRC) O=$(KOBJ) SUBDIRS=$(shell pwd) modules
|
||||
endif
|
||||
endif
|
||||
|
||||
else # ifeq ($(K_VERSION),2.6)
|
||||
|
||||
# Makefile for 2.4.x kernel
|
||||
TARGET = $(DRIVER_NAME).o
|
||||
|
||||
# man page
|
||||
MANSECTION = 7
|
||||
MANFILE = $(TARGET:.o=.$(MANSECTION))
|
||||
|
||||
# Get rid of compile warnings in kernel header files from SuSE
|
||||
ifneq (,$(wildcard /etc/SuSE-release))
|
||||
EXTRA_CFLAGS += -Wno-sign-compare -fno-strict-aliasing
|
||||
endif
|
||||
|
||||
# Get rid of compile warnings in kernel header files from fedora
|
||||
ifneq (,$(wildcard /etc/fedora-release))
|
||||
EXTRA_CFLAGS += -fno-strict-aliasing
|
||||
endif
|
||||
|
||||
CFLAGS += $(EXTRA_CFLAGS)
|
||||
|
||||
.SILENT: $(TARGET)
|
||||
$(TARGET): $(filter-out $(TARGET), $(CFILES:.c=.o))
|
||||
$(LD) $(LDFLAGS) -r $^ -o $@
|
||||
echo; echo
|
||||
echo "**************************************************"
|
||||
echo "** $(TARGET) built for $(KVER)"
|
||||
echo -n "** SMP "
|
||||
if [ "$(SMP)" = "1" ]; \
|
||||
then echo "Enabled"; else echo "Disabled"; fi
|
||||
echo "**************************************************"
|
||||
echo
|
||||
|
||||
$(CFILES:.c=.o): $(HFILES) Makefile
|
||||
default:
|
||||
$(MAKE)
|
||||
|
||||
endif # ifeq ($(K_VERSION),2.6)
|
||||
|
||||
ifeq (,$(MANDIR))
|
||||
# find the best place to install the man page
|
||||
MANPATH := $(shell (manpath 2>/dev/null || echo $MANPATH) | sed 's/:/ /g')
|
||||
ifneq (,$(MANPATH))
|
||||
# test based on inclusion in MANPATH
|
||||
test_dir = $(findstring $(dir), $(MANPATH))
|
||||
else
|
||||
# no MANPATH, test based on directory existence
|
||||
test_dir = $(shell [ -e $(dir) ] && echo $(dir))
|
||||
endif
|
||||
# our preferred install path
|
||||
# should /usr/local/man be in here ?
|
||||
MANDIR := /usr/share/man /usr/man
|
||||
MANDIR := $(foreach dir, $(MANDIR), $(test_dir))
|
||||
MANDIR := $(firstword $(MANDIR))
|
||||
endif
|
||||
ifeq (,$(MANDIR))
|
||||
# fallback to /usr/man
|
||||
MANDIR := /usr/man
|
||||
endif
|
||||
|
||||
# depmod version for rpm builds
|
||||
DEPVER := $(shell /sbin/depmod -V 2>/dev/null | \
|
||||
awk 'BEGIN {FS="."} NR==1 {print $$2}')
|
||||
|
||||
###########################################################################
|
||||
# Build rules
|
||||
|
||||
$(MANFILE).gz: ../$(MANFILE)
|
||||
gzip -c $< > $@
|
||||
|
||||
install: default $(MANFILE).gz
|
||||
# remove all old versions of the driver
|
||||
find $(INSTALL_MOD_PATH)/lib/modules/$(KVER) -name $(TARGET) -exec rm -f {} \; || true
|
||||
find $(INSTALL_MOD_PATH)/lib/modules/$(KVER) -name $(TARGET).gz -exec rm -f {} \; || true
|
||||
install -D -m 644 $(TARGET) $(INSTALL_MOD_PATH)$(INSTDIR)/$(TARGET)
|
||||
ifeq (,$(INSTALL_MOD_PATH))
|
||||
/sbin/depmod -a || true
|
||||
else
|
||||
ifeq ($(DEPVER),1 )
|
||||
/sbin/depmod -r $(INSTALL_MOD_PATH) -a || true
|
||||
else
|
||||
/sbin/depmod -b $(INSTALL_MOD_PATH) -a -n $(KVERSION) > /dev/null || true
|
||||
endif
|
||||
endif
|
||||
install -D -m 644 $(MANFILE).gz $(INSTALL_MOD_PATH)$(MANDIR)/man$(MANSECTION)/$(MANFILE).gz
|
||||
man -c -P'cat > /dev/null' $(MANFILE:.$(MANSECTION)=) || true
|
||||
|
||||
uninstall:
|
||||
if [ -e $(INSTDIR)/$(TARGET) ] ; then \
|
||||
rm -f $(INSTDIR)/$(TARGET) ; \
|
||||
fi
|
||||
/sbin/depmod -a
|
||||
if [ -e $(MANDIR)/man$(MANSECTION)/$(MANFILE).gz ] ; then \
|
||||
rm -f $(MANDIR)/man$(MANSECTION)/$(MANFILE).gz ; \
|
||||
fi
|
||||
|
||||
.PHONY: clean install
|
||||
|
||||
clean:
|
||||
ifeq ($(KOBJ),$(KSRC))
|
||||
$(MAKE) -C $(KSRC) SUBDIRS=$(shell pwd) clean
|
||||
else
|
||||
$(MAKE) -C $(KSRC) O=$(KOBJ) SUBDIRS=$(shell pwd) clean
|
||||
endif
|
||||
rm -rf $(TARGET) $(TARGET:.ko=.o) $(TARGET:.ko=.mod.c) $(TARGET:.ko=.mod.o) $(CFILES:.c=.o) $(MANFILE).gz .*cmd .tmp_versions
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
def execute(cmd):
|
||||
try:
|
||||
proc = subprocess.Popen(cmd, shell = True, stdout = subprocess.PIPE)
|
||||
return proc.communicate()[0]
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
if os.getuid() != 0:
|
||||
print 'You must be root!'
|
||||
sys.exit(1)
|
||||
|
||||
num_cpus = len(execute('cat /proc/cpuinfo | grep processor').strip().split('\n'))
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print 'usage: %s <interface name>' % sys.argv[0]
|
||||
sys.exit(1)
|
||||
|
||||
ifname = sys.argv[1]
|
||||
|
||||
intrmap = execute('cat /proc/interrupts | grep %s-rx-' % ifname).strip().split('\n')
|
||||
|
||||
for intr in intrmap:
|
||||
irq = int(intr.split()[0][:-1])
|
||||
name = intr.split()[-1]
|
||||
queue = int(name[name.rfind('-') + 1:])
|
||||
|
||||
cpu = queue
|
||||
|
||||
print 'echo %x > /proc/irq/%d/smp_affinity' % (1 << cpu, irq)
|
||||
execute('echo %x > /proc/irq/%d/smp_affinity' % (1 << cpu, irq))
|
|
@ -0,0 +1,73 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
ITR = 956 # interrupt throttling rate
|
||||
|
||||
def execute(cmd):
|
||||
try:
|
||||
proc = subprocess.Popen(cmd, shell = True, stdout = subprocess.PIPE)
|
||||
return proc.communicate()[0]
|
||||
except:
|
||||
pass
|
||||
return None
|
||||
|
||||
def get_num_interfaces():
|
||||
output_82598 = execute('lspci | grep 82598').strip()
|
||||
num_82598 = len(output_82598.split('\n'))
|
||||
if output_82598 == '':
|
||||
num_82598 = 0
|
||||
|
||||
output_82599 = execute('lspci | grep 82599').strip()
|
||||
num_82599 = len(output_82599.split('\n'))
|
||||
if output_82599 == '':
|
||||
num_82599 = 0
|
||||
|
||||
return num_82598 + num_82599
|
||||
|
||||
def get_num_cpus():
|
||||
output = execute('cat /proc/cpuinfo | grep processor')
|
||||
return len(output.strip().split('\n'))
|
||||
|
||||
if os.getuid() != 0:
|
||||
print 'You must be root!'
|
||||
sys.exit(1)
|
||||
|
||||
if len(sys.argv) < 3:
|
||||
print 'usage: %s <# of RX queues> <# of TX queues>' % sys.argv[0]
|
||||
print ' You can specify 0 instead of the number of queues (one queue for each core)'
|
||||
sys.exit(1)
|
||||
|
||||
num_rx_queues = int(sys.argv[1])
|
||||
num_tx_queues = int(sys.argv[2])
|
||||
postfix = '43'
|
||||
|
||||
assert 0 <= num_rx_queues <= 16
|
||||
|
||||
num_ifs = get_num_interfaces()
|
||||
num_cpus = get_num_cpus()
|
||||
|
||||
execute('lsmod | grep ps_ixgbe > /dev/null && sudo rmmod ps_ixgbe')
|
||||
execute('insmod ./ps_ixgbe.ko RXQ=%s TXQ=%s InterruptThrottleRate=%s' %
|
||||
(','.join([str(num_rx_queues)] * num_ifs),
|
||||
','.join([str(num_tx_queues)] * num_ifs),
|
||||
','.join([str(ITR)] * num_ifs))
|
||||
)
|
||||
|
||||
time.sleep(3)
|
||||
for i in range(num_ifs):
|
||||
ifname = 'xge%d' % i
|
||||
print 'setting %s...' % ifname
|
||||
|
||||
execute('ethtool -A %s autoneg off rx off tx off' % ifname)
|
||||
execute('ifconfig %s 10.0.%d.%s mtu 1500 netmask 255.255.255.0' % (ifname, i, postfix))
|
||||
|
||||
print 'OK'
|
||||
print execute('./affinity.py %s' % ifname).strip()
|
||||
|
||||
execute('rm -f /dev/packet_shader')
|
||||
execute('mknod /dev/packet_shader c 1010 0')
|
||||
execute('chmod 666 /dev/packet_shader')
|
|
@ -0,0 +1,552 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _IXGBE_H_
|
||||
#define _IXGBE_H_
|
||||
|
||||
#ifndef IXGBE_NO_LRO
|
||||
#include <net/tcp.h>
|
||||
#endif
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#ifdef SIOCETHTOOL
|
||||
#include <linux/ethtool.h>
|
||||
#endif
|
||||
#ifdef NETIF_F_HW_VLAN_TX
|
||||
#include <linux/if_vlan.h>
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
|
||||
#define IXGBE_DCA
|
||||
#include <linux/dca.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "ixgbe_dcb.h"
|
||||
|
||||
|
||||
#include "kcompat.h"
|
||||
|
||||
#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
|
||||
#define IXGBE_FCOE
|
||||
#include "ixgbe_fcoe.h"
|
||||
#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
|
||||
|
||||
#include "ixgbe_api.h"
|
||||
|
||||
#define PFX "ixgbe: "
|
||||
#define DPRINTK(nlevel, klevel, fmt, args...) \
|
||||
((void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \
|
||||
printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \
|
||||
__FUNCTION__ , ## args)))
|
||||
|
||||
|
||||
/* TX/RX descriptor defines */
|
||||
#define IXGBE_DEFAULT_TXD 4096
|
||||
#define IXGBE_MAX_TXD 4096
|
||||
#define IXGBE_MIN_TXD 64
|
||||
|
||||
#define IXGBE_DEFAULT_RXD 4096
|
||||
#define IXGBE_MAX_RXD 4096
|
||||
#define IXGBE_MIN_RXD 64
|
||||
|
||||
#define IXGBE_SUBWINDOW_BITS 10
|
||||
#define IXGBE_SUBWINDOW_SIZE (1 << IXGBE_SUBWINDOW_BITS)
|
||||
#define IXGBE_SUBWINDOW_MASK (IXGBE_SUBWINDOW_SIZE - 1)
|
||||
#define IXGBE_MAX_SUBWINDOWS (IXGBE_MAX_TXD / IXGBE_SUBWINDOW_SIZE)
|
||||
|
||||
/* flow control */
|
||||
#define IXGBE_DEFAULT_FCRTL 0x10000
|
||||
#define IXGBE_MIN_FCRTL 0x40
|
||||
#define IXGBE_MAX_FCRTL 0x7FF80
|
||||
#define IXGBE_DEFAULT_FCRTH 0x20000
|
||||
#define IXGBE_MIN_FCRTH 0x600
|
||||
#define IXGBE_MAX_FCRTH 0x7FFF0
|
||||
#define IXGBE_DEFAULT_FCPAUSE 0xFFFF
|
||||
#define IXGBE_MIN_FCPAUSE 0
|
||||
#define IXGBE_MAX_FCPAUSE 0xFFFF
|
||||
|
||||
/* Supported Rx Buffer Sizes */
|
||||
#define IXGBE_RXBUFFER_64 64 /* Used for packet split */
|
||||
#define IXGBE_RXBUFFER_128 128 /* Used for packet split */
|
||||
#define IXGBE_RXBUFFER_256 256 /* Used for packet split */
|
||||
#define IXGBE_RXBUFFER_2048 2048
|
||||
#define IXGBE_RXBUFFER_4096 4096
|
||||
#define IXGBE_RXBUFFER_8192 8192
|
||||
#define IXGBE_MAX_RXBUFFER 16384 /* largest size for single descriptor */
|
||||
|
||||
#define IXGBE_RX_HDR_SIZE IXGBE_RXBUFFER_256
|
||||
|
||||
#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
|
||||
|
||||
#if defined(IXGBE_DCB) || defined(IXGBE_RSS) || \
|
||||
defined(IXGBE_VMDQ)
|
||||
#define IXGBE_MQ
|
||||
#endif
|
||||
|
||||
/* How many Rx Buffers do we bundle into one write to the hardware ? */
|
||||
#define IXGBE_RX_BUFFER_WRITE 16 /* Must be power of 2 */
|
||||
|
||||
#define IXGBE_TX_FLAGS_CSUM (u32)(1)
|
||||
#define IXGBE_TX_FLAGS_VLAN (u32)(1 << 1)
|
||||
#define IXGBE_TX_FLAGS_TSO (u32)(1 << 2)
|
||||
#define IXGBE_TX_FLAGS_IPV4 (u32)(1 << 3)
|
||||
#define IXGBE_TX_FLAGS_FCOE (u32)(1 << 4)
|
||||
#define IXGBE_TX_FLAGS_FSO (u32)(1 << 5)
|
||||
#define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000
|
||||
#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000
|
||||
#define IXGBE_TX_FLAGS_VLAN_SHIFT 16
|
||||
|
||||
#define IXGBE_MAX_RSC_INT_RATE 162760
|
||||
|
||||
#ifndef IXGBE_NO_LRO
|
||||
#define IXGBE_LRO_MAX 32 /*Maximum number of LRO descriptors*/
|
||||
#define IXGBE_LRO_GLOBAL 10
|
||||
|
||||
struct ixgbe_lro_stats {
|
||||
u32 flushed;
|
||||
u32 coal;
|
||||
u32 recycled;
|
||||
};
|
||||
|
||||
struct ixgbe_lro_desc {
|
||||
struct hlist_node lro_node;
|
||||
struct sk_buff *skb;
|
||||
u32 source_ip;
|
||||
u32 dest_ip;
|
||||
u16 source_port;
|
||||
u16 dest_port;
|
||||
u16 vlan_tag;
|
||||
u16 len;
|
||||
u32 next_seq;
|
||||
u32 ack_seq;
|
||||
u16 window;
|
||||
u16 mss;
|
||||
u16 opt_bytes;
|
||||
u16 psh:1;
|
||||
u32 tsval;
|
||||
u32 tsecr;
|
||||
u32 append_cnt;
|
||||
};
|
||||
|
||||
struct ixgbe_lro_list {
|
||||
struct hlist_head active;
|
||||
struct hlist_head free;
|
||||
int active_cnt;
|
||||
struct ixgbe_lro_stats stats;
|
||||
};
|
||||
|
||||
#endif /* IXGBE_NO_LRO */
|
||||
/* wrapper around a pointer to a socket buffer,
|
||||
* so a DMA handle can be stored along with the buffer */
|
||||
struct ixgbe_tx_buffer {
|
||||
unsigned long time_stamp;
|
||||
u16 length;
|
||||
u16 next_to_watch;
|
||||
};
|
||||
|
||||
struct ixgbe_rx_buffer {
|
||||
u16 length;
|
||||
};
|
||||
|
||||
struct ixgbe_queue_stats {
|
||||
u64 packets;
|
||||
u64 bytes;
|
||||
};
|
||||
|
||||
struct ____cacheline_aligned ixgbe_ring {
|
||||
void *desc; /* descriptor ring memory */
|
||||
union {
|
||||
struct ixgbe_tx_buffer *tx_buffer_info;
|
||||
struct ixgbe_rx_buffer *rx_buffer_info;
|
||||
};
|
||||
|
||||
struct ixgbe_adapter *adapter;
|
||||
|
||||
u8 atr_sample_rate;
|
||||
u8 atr_count;
|
||||
u16 count; /* amount of descriptors */
|
||||
u16 rx_buf_len;
|
||||
u16 next_to_use;
|
||||
u16 next_to_clean;
|
||||
|
||||
u8 queue_index; /* needed for multiqueue queue management */
|
||||
|
||||
u16 head;
|
||||
u16 tail;
|
||||
|
||||
unsigned int total_bytes;
|
||||
unsigned int total_packets;
|
||||
|
||||
#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
|
||||
/* cpu for tx queue */
|
||||
int cpu;
|
||||
#endif
|
||||
u16 reg_idx; /* holds the special value that gets the
|
||||
* hardware register offset associated
|
||||
* with this ring, which is different
|
||||
* for DCB and RSS modes */
|
||||
|
||||
struct ixgbe_queue_stats stats;
|
||||
unsigned long reinit_state;
|
||||
u64 rsc_count; /* stat for coalesced packets */
|
||||
unsigned int size; /* length in bytes */
|
||||
dma_addr_t dma; /* phys. address of descriptor ring */
|
||||
|
||||
/* [queued, next_to_clean): packets waiting to be pulled */
|
||||
u16 queued; /* only used for RX */
|
||||
|
||||
u8 *window[IXGBE_MAX_SUBWINDOWS];
|
||||
dma_addr_t dma_window[IXGBE_MAX_SUBWINDOWS];
|
||||
unsigned int window_size;
|
||||
|
||||
spinlock_t lock;
|
||||
wait_queue_head_t *wq;
|
||||
};
|
||||
|
||||
enum ixgbe_ring_f_enum {
|
||||
RING_F_NONE = 0,
|
||||
RING_F_DCB,
|
||||
RING_F_VMDQ,
|
||||
RING_F_RXQ,
|
||||
RING_F_TXQ,
|
||||
RING_F_FDIR,
|
||||
RING_F_ARRAY_SIZE /* must be last in enum set */
|
||||
};
|
||||
|
||||
#define IXGBE_MAX_DCB_INDICES 8
|
||||
#define IXGBE_MAX_RSS_INDICES 16
|
||||
#define IXGBE_MAX_VMDQ_INDICES 64
|
||||
#define IXGBE_MAX_FDIR_INDICES 64
|
||||
struct ixgbe_ring_feature {
|
||||
int indices;
|
||||
int mask;
|
||||
};
|
||||
|
||||
#define MAX_RX_QUEUES 128
|
||||
#define MAX_TX_QUEUES 128
|
||||
|
||||
#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
|
||||
? 8 : 1)
|
||||
#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS
|
||||
|
||||
/* MAX_MSIX_Q_VECTORS of these are allocated,
|
||||
* but we only use one per queue-specific vector.
|
||||
*/
|
||||
struct ixgbe_q_vector {
|
||||
struct ixgbe_adapter *adapter;
|
||||
unsigned int v_idx; /* index of q_vector within array, also used for
|
||||
* finding the bit in EICR and friends that
|
||||
* represents the vector for this ring */
|
||||
#ifdef CONFIG_IXGBE_NAPI
|
||||
struct napi_struct napi;
|
||||
#endif
|
||||
DECLARE_BITMAP(rxr_idx, MAX_RX_QUEUES); /* Rx ring indices */
|
||||
DECLARE_BITMAP(txr_idx, MAX_TX_QUEUES); /* Tx ring indices */
|
||||
u8 rxr_count; /* Rx ring count assigned to this vector */
|
||||
u8 txr_count; /* Tx ring count assigned to this vector */
|
||||
u8 tx_itr;
|
||||
u8 rx_itr;
|
||||
u32 eitr;
|
||||
#ifndef IXGBE_NO_LRO
|
||||
struct ixgbe_lro_list *lrolist; /* LRO list for queue vector*/
|
||||
#endif
|
||||
char name[IFNAMSIZ + 9];
|
||||
#ifndef HAVE_NETDEV_NAPI_LIST
|
||||
struct net_device poll_dev;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/* Helper macros to switch between ints/sec and what the register uses.
|
||||
* And yes, it's the same math going both ways. The lowest value
|
||||
* supported by all of the ixgbe hardware is 8.
|
||||
*/
|
||||
#define EITR_INTS_PER_SEC_TO_REG(_eitr) \
|
||||
((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8)
|
||||
#define EITR_REG_TO_INTS_PER_SEC EITR_INTS_PER_SEC_TO_REG
|
||||
|
||||
#define IXGBE_DESC_UNUSED(R) \
|
||||
((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
|
||||
(R)->next_to_clean - (R)->next_to_use - 1)
|
||||
|
||||
#define IXGBE_RX_DESC_ADV(R, i) \
|
||||
(&(((union ixgbe_adv_rx_desc *)((R).desc))[i]))
|
||||
#define IXGBE_TX_DESC_ADV(R, i) \
|
||||
(&(((union ixgbe_adv_tx_desc *)((R).desc))[i]))
|
||||
#define IXGBE_TX_CTXTDESC_ADV(R, i) \
|
||||
(&(((struct ixgbe_adv_tx_context_desc *)((R).desc))[i]))
|
||||
|
||||
#define IXGBE_MAX_JUMBO_FRAME_SIZE 16128
|
||||
|
||||
#ifdef IXGBE_TCP_TIMER
|
||||
#define TCP_TIMER_VECTOR 1
|
||||
#else
|
||||
#define TCP_TIMER_VECTOR 0
|
||||
#endif
|
||||
#define OTHER_VECTOR 1
|
||||
#define NON_Q_VECTORS (OTHER_VECTOR + TCP_TIMER_VECTOR)
|
||||
|
||||
#define IXGBE_MAX_MSIX_VECTORS_82599 64
|
||||
#define IXGBE_MAX_MSIX_Q_VECTORS_82599 64
|
||||
#define IXGBE_MAX_MSIX_Q_VECTORS_82598 16
|
||||
#define IXGBE_MAX_MSIX_VECTORS_82598 18
|
||||
|
||||
/*
|
||||
* Only for array allocations in our adapter struct. On 82598, there will be
|
||||
* unused entries in the array, but that's not a big deal. Also, in 82599,
|
||||
* we can actually assign 64 queue vectors based on our extended-extended
|
||||
* interrupt registers. This is different than 82598, which is limited to 16.
|
||||
*/
|
||||
#define MAX_MSIX_Q_VECTORS IXGBE_MAX_MSIX_Q_VECTORS_82599
|
||||
#define MAX_MSIX_COUNT IXGBE_MAX_MSIX_VECTORS_82599
|
||||
|
||||
#if 0
|
||||
#define MIN_MSIX_Q_VECTORS 2
|
||||
#else
|
||||
/* no TX interrupt - Sangjin */
|
||||
#define MIN_MSIX_Q_VECTORS 1
|
||||
#endif
|
||||
#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS)
|
||||
|
||||
/* board specific private data structure */
|
||||
struct ixgbe_adapter {
|
||||
struct timer_list watchdog_timer;
|
||||
#ifdef NETIF_F_HW_VLAN_TX
|
||||
struct vlan_group *vlgrp;
|
||||
#endif
|
||||
int bd_number;
|
||||
struct work_struct reset_task;
|
||||
struct ixgbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
|
||||
struct ixgbe_dcb_config dcb_cfg;
|
||||
struct ixgbe_dcb_config temp_dcb_cfg;
|
||||
u8 dcb_set_bitmap;
|
||||
enum ixgbe_fc_mode last_lfc_mode;
|
||||
|
||||
int numa_node;
|
||||
|
||||
/* Interrupt Throttle Rate */
|
||||
u32 itr_setting;
|
||||
u16 eitr_low;
|
||||
u16 eitr_high;
|
||||
|
||||
/* TX */
|
||||
struct ixgbe_ring *tx_ring; /* One per active queue */
|
||||
int num_tx_queues;
|
||||
u64 restart_queue;
|
||||
u64 hw_csum_tx_good;
|
||||
u64 lsc_int;
|
||||
u64 hw_tso_ctxt;
|
||||
u64 hw_tso6_ctxt;
|
||||
u32 tx_timeout_count;
|
||||
bool detect_tx_hung;
|
||||
|
||||
/* RX */
|
||||
struct ixgbe_ring *rx_ring; /* One per active queue */
|
||||
int num_rx_queues;
|
||||
int num_rx_pools; /* == num_rx_queues in 82598 */
|
||||
int num_rx_queues_per_pool; /* 1 if 82598, can be many if 82599 */
|
||||
u64 hw_csum_rx_error;
|
||||
u64 hw_rx_no_dma_resources;
|
||||
u64 hw_csum_rx_good;
|
||||
u64 non_eop_descs;
|
||||
#ifndef CONFIG_IXGBE_NAPI
|
||||
u64 rx_dropped_backlog; /* count drops from rx intr handler */
|
||||
#endif
|
||||
int num_msix_vectors;
|
||||
int max_msix_q_vectors; /* true count of q_vectors for device */
|
||||
struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE];
|
||||
struct msix_entry *msix_entries;
|
||||
#ifdef IXGBE_TCP_TIMER
|
||||
irqreturn_t (*msix_handlers[MAX_MSIX_COUNT])(int irq, void *data,
|
||||
struct pt_regs *regs);
|
||||
#endif
|
||||
|
||||
u32 alloc_rx_page_failed;
|
||||
u32 alloc_rx_buff_failed;
|
||||
|
||||
/* Some features need tri-state capability,
|
||||
* thus the additional *_CAPABLE flags.
|
||||
*/
|
||||
u32 flags;
|
||||
#define IXGBE_FLAG_RX_CSUM_ENABLED (u32)(1)
|
||||
#define IXGBE_FLAG_MSI_CAPABLE (u32)(1 << 1)
|
||||
#define IXGBE_FLAG_MSI_ENABLED (u32)(1 << 2)
|
||||
#define IXGBE_FLAG_MSIX_CAPABLE (u32)(1 << 3)
|
||||
#define IXGBE_FLAG_MSIX_ENABLED (u32)(1 << 4)
|
||||
#ifndef IXGBE_NO_LLI
|
||||
#define IXGBE_FLAG_LLI_PUSH (u32)(1 << 5)
|
||||
#endif
|
||||
#define IXGBE_FLAG_RX_1BUF_CAPABLE (u32)(1 << 6)
|
||||
#define IXGBE_FLAG_RX_PS_CAPABLE (u32)(1 << 7)
|
||||
#define IXGBE_FLAG_RX_PS_ENABLED (u32)(1 << 8)
|
||||
#define IXGBE_FLAG_IN_NETPOLL (u32)(1 << 9)
|
||||
#define IXGBE_FLAG_DCA_ENABLED (u32)(1 << 10)
|
||||
#define IXGBE_FLAG_DCA_CAPABLE (u32)(1 << 11)
|
||||
#define IXGBE_FLAG_DCA_ENABLED_DATA (u32)(1 << 12)
|
||||
#define IXGBE_FLAG_MQ_CAPABLE (u32)(1 << 13)
|
||||
#define IXGBE_FLAG_DCB_ENABLED (u32)(1 << 14)
|
||||
#define IXGBE_FLAG_DCB_CAPABLE (u32)(1 << 15)
|
||||
#define IXGBE_FLAG_RSS_ENABLED (u32)(1 << 16)
|
||||
#define IXGBE_FLAG_RSS_CAPABLE (u32)(1 << 17)
|
||||
#define IXGBE_FLAG_VMDQ_CAPABLE (u32)(1 << 18)
|
||||
#define IXGBE_FLAG_VMDQ_ENABLED (u32)(1 << 19)
|
||||
#define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 20)
|
||||
#define IXGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 22)
|
||||
#define IXGBE_FLAG_IN_WATCHDOG_TASK (u32)(1 << 23)
|
||||
#define IXGBE_FLAG_IN_SFP_LINK_TASK (u32)(1 << 24)
|
||||
#define IXGBE_FLAG_IN_SFP_MOD_TASK (u32)(1 << 25)
|
||||
#define IXGBE_FLAG_FDIR_HASH_CAPABLE (u32)(1 << 26)
|
||||
#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE (u32)(1 << 27)
|
||||
|
||||
/* added - Sangjin */
|
||||
#define IXGBE_FLAG_RX_KERNEL_ENABLE (u32)(1 << 28)
|
||||
|
||||
u32 flags2;
|
||||
#ifndef IXGBE_NO_HW_RSC
|
||||
#define IXGBE_FLAG2_RSC_CAPABLE (u32)(1)
|
||||
#define IXGBE_FLAG2_RSC_ENABLED (u32)(1 << 1)
|
||||
#endif /* IXGBE_NO_HW_RSC */
|
||||
#ifndef IXGBE_NO_LRO
|
||||
#define IXGBE_FLAG2_SWLRO_ENABLED (u32)(1 << 2)
|
||||
#endif /* IXGBE_NO_LRO */
|
||||
#define IXGBE_FLAG2_VMDQ_DEFAULT_OVERRIDE (u32)(1 << 3)
|
||||
|
||||
/* default to trying for four seconds */
|
||||
#define IXGBE_TRY_LINK_TIMEOUT (4 * HZ)
|
||||
|
||||
/* OS defined structs */
|
||||
struct net_device *netdev;
|
||||
struct pci_dev *pdev;
|
||||
struct net_device_stats net_stats;
|
||||
#ifndef IXGBE_NO_LRO
|
||||
struct ixgbe_lro_stats lro_stats;
|
||||
#endif
|
||||
|
||||
#ifdef ETHTOOL_TEST
|
||||
u32 test_icr;
|
||||
struct ixgbe_ring test_tx_ring;
|
||||
struct ixgbe_ring test_rx_ring;
|
||||
#endif
|
||||
|
||||
/* structs defined in ixgbe_hw.h */
|
||||
struct ixgbe_hw hw;
|
||||
u16 msg_enable;
|
||||
struct ixgbe_hw_stats stats;
|
||||
#ifndef IXGBE_NO_LLI
|
||||
u32 lli_port;
|
||||
u32 lli_size;
|
||||
u64 lli_int;
|
||||
u32 lli_etype;
|
||||
u32 lli_vlan_pri;
|
||||
#endif /* IXGBE_NO_LLI */
|
||||
/* Interrupt Throttle Rate */
|
||||
u32 eitr_param;
|
||||
|
||||
unsigned long state;
|
||||
u32 *config_space;
|
||||
u64 tx_busy;
|
||||
unsigned int tx_ring_count;
|
||||
unsigned int rx_ring_count;
|
||||
|
||||
u32 link_speed;
|
||||
bool link_up;
|
||||
unsigned long link_check_timeout;
|
||||
|
||||
struct work_struct watchdog_task;
|
||||
struct work_struct sfp_task;
|
||||
struct timer_list sfp_timer;
|
||||
struct work_struct multispeed_fiber_task;
|
||||
struct work_struct sfp_config_module_task;
|
||||
u64 flm;
|
||||
u32 fdir_pballoc;
|
||||
u32 atr_sample_rate;
|
||||
spinlock_t fdir_perfect_lock;
|
||||
struct work_struct fdir_reinit_task;
|
||||
u64 rsc_count;
|
||||
u32 wol;
|
||||
u16 eeprom_version;
|
||||
bool netdev_registered;
|
||||
char lsc_int_name[IFNAMSIZ + 9];
|
||||
#ifdef IXGBE_TCP_TIMER
|
||||
char tcp_timer_name[IFNAMSIZ + 9];
|
||||
#endif
|
||||
};
|
||||
|
||||
enum ixbge_state_t {
|
||||
__IXGBE_TESTING,
|
||||
__IXGBE_RESETTING,
|
||||
__IXGBE_DOWN,
|
||||
__IXGBE_FDIR_INIT_DONE,
|
||||
__IXGBE_SFP_MODULE_NOT_FOUND
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DCB
|
||||
extern struct dcbnl_rtnl_ops dcbnl_ops;
|
||||
extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg,
|
||||
struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max);
|
||||
#endif
|
||||
|
||||
/* needed by ixgbe_main.c */
|
||||
extern int ixgbe_validate_mac_addr(u8 *mc_addr);
|
||||
extern void ixgbe_check_options(struct ixgbe_adapter *adapter);
|
||||
extern void ixgbe_assign_netdev_ops(struct net_device *netdev);
|
||||
|
||||
/* needed by ixgbe_ethtool.c */
|
||||
extern char ixgbe_driver_name[];
|
||||
extern const char ixgbe_driver_version[];
|
||||
|
||||
extern int ixgbe_up(struct ixgbe_adapter *adapter);
|
||||
extern void ixgbe_down(struct ixgbe_adapter *adapter);
|
||||
extern void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);
|
||||
extern void ixgbe_reset(struct ixgbe_adapter *adapter);
|
||||
extern void ixgbe_set_ethtool_ops(struct net_device *netdev);
|
||||
extern int ixgbe_setup_rx_resources(struct ixgbe_adapter *,struct ixgbe_ring *);
|
||||
extern int ixgbe_setup_tx_resources(struct ixgbe_adapter *,struct ixgbe_ring *);
|
||||
extern void ixgbe_free_rx_resources(struct ixgbe_adapter *,struct ixgbe_ring *);
|
||||
extern void ixgbe_free_tx_resources(struct ixgbe_adapter *,struct ixgbe_ring *);
|
||||
extern void ixgbe_update_stats(struct ixgbe_adapter *adapter);
|
||||
extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
|
||||
extern void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter);
|
||||
extern bool ixgbe_is_ixgbe(struct pci_dev *pcidev);
|
||||
|
||||
|
||||
void ixgbe_set_rx_mode(struct net_device *netdev);
|
||||
|
||||
#ifdef ETHTOOL_OPS_COMPAT
|
||||
extern int ethtool_ioctl(struct ifreq *ifr);
|
||||
|
||||
#endif
|
||||
extern int ixgbe_dcb_netlink_register(void);
|
||||
extern int ixgbe_dcb_netlink_unregister(void);
|
||||
|
||||
|
||||
#endif /* _IXGBE_H_ */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,959 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "ixgbe_api.h"
|
||||
#include "ixgbe_common.h"
|
||||
|
||||
extern s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw);
|
||||
extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw);
|
||||
|
||||
/**
|
||||
* ixgbe_init_shared_code - Initialize the shared code
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* This will assign function pointers and assign the MAC type and PHY code.
|
||||
* Does not touch the hardware. This function must be called prior to any
|
||||
* other function in the shared code. The ixgbe_hw structure should be
|
||||
* memset to 0 prior to calling this function. The following fields in
|
||||
* hw structure should be filled in prior to calling this function:
|
||||
* hw_addr, back, device_id, vendor_id, subsystem_device_id,
|
||||
* subsystem_vendor_id, and revision_id
|
||||
**/
|
||||
s32 ixgbe_init_shared_code(struct ixgbe_hw *hw)
|
||||
{
|
||||
s32 status;
|
||||
|
||||
/*
|
||||
* Set the mac type
|
||||
*/
|
||||
ixgbe_set_mac_type(hw);
|
||||
|
||||
switch (hw->mac.type) {
|
||||
case ixgbe_mac_82598EB:
|
||||
status = ixgbe_init_ops_82598(hw);
|
||||
break;
|
||||
case ixgbe_mac_82599EB:
|
||||
status = ixgbe_init_ops_82599(hw);
|
||||
break;
|
||||
default:
|
||||
status = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_set_mac_type - Sets MAC type
|
||||
* @hw: pointer to the HW structure
|
||||
*
|
||||
* This function sets the mac type of the adapter based on the
|
||||
* vendor ID and device ID stored in the hw structure.
|
||||
**/
|
||||
s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
|
||||
{
|
||||
s32 ret_val = 0;
|
||||
|
||||
if (hw->vendor_id == IXGBE_INTEL_VENDOR_ID) {
|
||||
switch (hw->device_id) {
|
||||
case IXGBE_DEV_ID_82598:
|
||||
case IXGBE_DEV_ID_82598_BX:
|
||||
case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
|
||||
case IXGBE_DEV_ID_82598AF_DUAL_PORT:
|
||||
case IXGBE_DEV_ID_82598AT:
|
||||
case IXGBE_DEV_ID_82598AT2:
|
||||
case IXGBE_DEV_ID_82598EB_CX4:
|
||||
case IXGBE_DEV_ID_82598_CX4_DUAL_PORT:
|
||||
case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
|
||||
case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
|
||||
case IXGBE_DEV_ID_82598EB_XF_LR:
|
||||
case IXGBE_DEV_ID_82598EB_SFP_LOM:
|
||||
hw->mac.type = ixgbe_mac_82598EB;
|
||||
break;
|
||||
case IXGBE_DEV_ID_82599_KX4:
|
||||
case IXGBE_DEV_ID_82599_XAUI_LOM:
|
||||
case IXGBE_DEV_ID_82599_SFP:
|
||||
case IXGBE_DEV_ID_82599_T3_LOM:
|
||||
hw->mac.type = ixgbe_mac_82599EB;
|
||||
break;
|
||||
default:
|
||||
ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
hw_dbg(hw, "ixgbe_set_mac_type found mac: %d, returns: %d\n",
|
||||
hw->mac.type, ret_val);
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_init_hw - Initialize the hardware
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Initialize the hardware by resetting and then starting the hardware
|
||||
**/
|
||||
s32 ixgbe_init_hw(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.init_hw, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_reset_hw - Performs a hardware reset
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Resets the hardware by resetting the transmit and receive units, masks and
|
||||
* clears all interrupts, performs a PHY reset, and performs a MAC reset
|
||||
**/
|
||||
s32 ixgbe_reset_hw(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.reset_hw, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_start_hw - Prepares hardware for Rx/Tx
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Starts the hardware by filling the bus info structure and media type,
|
||||
* clears all on chip counters, initializes receive address registers,
|
||||
* multicast table, VLAN filter table, calls routine to setup link and
|
||||
* flow control settings, and leaves transmit and receive units disabled
|
||||
* and uninitialized.
|
||||
**/
|
||||
s32 ixgbe_start_hw(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.start_hw, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_clear_hw_cntrs - Clear hardware counters
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Clears all hardware statistics counters by reading them from the hardware
|
||||
* Statistics counters are clear on read.
|
||||
**/
|
||||
s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.clear_hw_cntrs, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_media_type - Get media type
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Returns the media type (fiber, copper, backplane)
|
||||
**/
|
||||
enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.get_media_type, (hw),
|
||||
ixgbe_media_type_unknown);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_mac_addr - Get MAC address
|
||||
* @hw: pointer to hardware structure
|
||||
* @mac_addr: Adapter MAC address
|
||||
*
|
||||
* Reads the adapter's MAC address from the first Receive Address Register
|
||||
* (RAR0) A reset of the adapter must have been performed prior to calling
|
||||
* this function in order for the MAC address to have been loaded from the
|
||||
* EEPROM into RAR0
|
||||
**/
|
||||
s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.get_mac_addr,
|
||||
(hw, mac_addr), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_san_mac_addr - Get SAN MAC address
|
||||
* @hw: pointer to hardware structure
|
||||
* @san_mac_addr: SAN MAC address
|
||||
*
|
||||
* Reads the SAN MAC address from the EEPROM, if it's available. This is
|
||||
* per-port, so set_lan_id() must be called before reading the addresses.
|
||||
**/
|
||||
s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.get_san_mac_addr,
|
||||
(hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_set_san_mac_addr - Write a SAN MAC address
|
||||
* @hw: pointer to hardware structure
|
||||
* @san_mac_addr: SAN MAC address
|
||||
*
|
||||
* Writes A SAN MAC address to the EEPROM.
|
||||
**/
|
||||
s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.set_san_mac_addr,
|
||||
(hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_device_caps - Get additional device capabilities
|
||||
* @hw: pointer to hardware structure
|
||||
* @device_caps: the EEPROM word for device capabilities
|
||||
*
|
||||
* Reads the extra device capabilities from the EEPROM
|
||||
**/
|
||||
s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.get_device_caps,
|
||||
(hw, device_caps), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_bus_info - Set PCI bus info
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure
|
||||
**/
|
||||
s32 ixgbe_get_bus_info(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.get_bus_info, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_num_of_tx_queues - Get Tx queues
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Returns the number of transmit queues for the given adapter.
|
||||
**/
|
||||
u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw)
|
||||
{
|
||||
return hw->mac.max_tx_queues;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_num_of_rx_queues - Get Rx queues
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Returns the number of receive queues for the given adapter.
|
||||
**/
|
||||
u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw)
|
||||
{
|
||||
return hw->mac.max_rx_queues;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_stop_adapter - Disable Rx/Tx units
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts,
|
||||
* disables transmit and receive units. The adapter_stopped flag is used by
|
||||
* the shared code and drivers to determine if the adapter is in a stopped
|
||||
* state and should not touch the hardware.
|
||||
**/
|
||||
s32 ixgbe_stop_adapter(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.stop_adapter, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_read_pba_num - Reads part number from EEPROM
|
||||
* @hw: pointer to hardware structure
|
||||
* @pba_num: stores the part number from the EEPROM
|
||||
*
|
||||
* Reads the part number from the EEPROM.
|
||||
**/
|
||||
s32 ixgbe_read_pba_num(struct ixgbe_hw *hw, u32 *pba_num)
|
||||
{
|
||||
return ixgbe_read_pba_num_generic(hw, pba_num);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_identify_phy - Get PHY type
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Determines the physical layer module found on the current adapter.
|
||||
**/
|
||||
s32 ixgbe_identify_phy(struct ixgbe_hw *hw)
|
||||
{
|
||||
s32 status = 0;
|
||||
|
||||
if (hw->phy.type == ixgbe_phy_unknown) {
|
||||
status = ixgbe_call_func(hw,
|
||||
hw->phy.ops.identify,
|
||||
(hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_reset_phy - Perform a PHY reset
|
||||
* @hw: pointer to hardware structure
|
||||
**/
|
||||
s32 ixgbe_reset_phy(struct ixgbe_hw *hw)
|
||||
{
|
||||
s32 status = 0;
|
||||
|
||||
if (hw->phy.type == ixgbe_phy_unknown) {
|
||||
if (ixgbe_identify_phy(hw) != 0)
|
||||
status = IXGBE_ERR_PHY;
|
||||
}
|
||||
|
||||
if (status == 0) {
|
||||
status = ixgbe_call_func(hw, hw->phy.ops.reset, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_phy_firmware_version -
|
||||
* @hw: pointer to hardware structure
|
||||
* @firmware_version: pointer to firmware version
|
||||
**/
|
||||
s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, u16 *firmware_version)
|
||||
{
|
||||
s32 status = 0;
|
||||
|
||||
status = ixgbe_call_func(hw, hw->phy.ops.get_firmware_version,
|
||||
(hw, firmware_version),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_read_phy_reg - Read PHY register
|
||||
* @hw: pointer to hardware structure
|
||||
* @reg_addr: 32 bit address of PHY register to read
|
||||
* @phy_data: Pointer to read data from PHY register
|
||||
*
|
||||
* Reads a value from a specified PHY register
|
||||
**/
|
||||
s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
|
||||
u16 *phy_data)
|
||||
{
|
||||
if (hw->phy.id == 0)
|
||||
ixgbe_identify_phy(hw);
|
||||
|
||||
return ixgbe_call_func(hw, hw->phy.ops.read_reg, (hw, reg_addr,
|
||||
device_type, phy_data), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_write_phy_reg - Write PHY register
|
||||
* @hw: pointer to hardware structure
|
||||
* @reg_addr: 32 bit PHY register to write
|
||||
* @phy_data: Data to write to the PHY register
|
||||
*
|
||||
* Writes a value to specified PHY register
|
||||
**/
|
||||
s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
|
||||
u16 phy_data)
|
||||
{
|
||||
if (hw->phy.id == 0)
|
||||
ixgbe_identify_phy(hw);
|
||||
|
||||
return ixgbe_call_func(hw, hw->phy.ops.write_reg, (hw, reg_addr,
|
||||
device_type, phy_data), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_setup_phy_link - Restart PHY autoneg
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Restart autonegotiation and PHY and waits for completion.
|
||||
**/
|
||||
s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->phy.ops.setup_link, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_check_phy_link - Determine link and speed status
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Reads a PHY register to determine if link is up and the current speed for
|
||||
* the PHY.
|
||||
**/
|
||||
s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
|
||||
bool *link_up)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->phy.ops.check_link, (hw, speed,
|
||||
link_up), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_setup_phy_link_speed - Set auto advertise
|
||||
* @hw: pointer to hardware structure
|
||||
* @speed: new link speed
|
||||
* @autoneg: true if autonegotiation enabled
|
||||
*
|
||||
* Sets the auto advertised capabilities
|
||||
**/
|
||||
s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed,
|
||||
bool autoneg,
|
||||
bool autoneg_wait_to_complete)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->phy.ops.setup_link_speed, (hw, speed,
|
||||
autoneg, autoneg_wait_to_complete),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_setup_link - Configure link settings
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Configures link settings based on values in the ixgbe_hw struct.
|
||||
* Restarts the link. Performs autonegotiation if needed.
|
||||
**/
|
||||
s32 ixgbe_setup_link(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.setup_link, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_check_link - Get link and speed status
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Reads the links register to determine if link is up and the current speed
|
||||
**/
|
||||
s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
|
||||
bool *link_up, bool link_up_wait_to_complete)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.check_link, (hw, speed,
|
||||
link_up, link_up_wait_to_complete),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_setup_link_speed - Set link speed
|
||||
* @hw: pointer to hardware structure
|
||||
* @speed: new link speed
|
||||
* @autoneg: true if autonegotiation enabled
|
||||
*
|
||||
* Set the link speed and restarts the link.
|
||||
**/
|
||||
s32 ixgbe_setup_link_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed,
|
||||
bool autoneg,
|
||||
bool autoneg_wait_to_complete)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.setup_link_speed, (hw, speed,
|
||||
autoneg, autoneg_wait_to_complete),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_link_capabilities - Returns link capabilities
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Determines the link capabilities of the current configuration.
|
||||
**/
|
||||
s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
|
||||
bool *autoneg)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.get_link_capabilities, (hw,
|
||||
speed, autoneg), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_led_on - Turn on LEDs
|
||||
* @hw: pointer to hardware structure
|
||||
* @index: led number to turn on
|
||||
*
|
||||
* Turns on the software controllable LEDs.
|
||||
**/
|
||||
s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.led_on, (hw, index),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_led_off - Turn off LEDs
|
||||
* @hw: pointer to hardware structure
|
||||
* @index: led number to turn off
|
||||
*
|
||||
* Turns off the software controllable LEDs.
|
||||
**/
|
||||
s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.led_off, (hw, index),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_blink_led_start - Blink LEDs
|
||||
* @hw: pointer to hardware structure
|
||||
* @index: led number to blink
|
||||
*
|
||||
* Blink LED based on index.
|
||||
**/
|
||||
s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.blink_led_start, (hw, index),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_blink_led_stop - Stop blinking LEDs
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Stop blinking LED based on index.
|
||||
**/
|
||||
s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.blink_led_stop, (hw, index),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_init_eeprom_params - Initialize EEPROM parameters
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Initializes the EEPROM parameters ixgbe_eeprom_info within the
|
||||
* ixgbe_hw struct in order to set up EEPROM access.
|
||||
**/
|
||||
s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->eeprom.ops.init_params, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* ixgbe_write_eeprom - Write word to EEPROM
|
||||
* @hw: pointer to hardware structure
|
||||
* @offset: offset within the EEPROM to be written to
|
||||
* @data: 16 bit word to be written to the EEPROM
|
||||
*
|
||||
* Writes 16 bit value to EEPROM. If ixgbe_eeprom_update_checksum is not
|
||||
* called after this function, the EEPROM will most likely contain an
|
||||
* invalid checksum.
|
||||
**/
|
||||
s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->eeprom.ops.write, (hw, offset, data),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_read_eeprom - Read word from EEPROM
|
||||
* @hw: pointer to hardware structure
|
||||
* @offset: offset within the EEPROM to be read
|
||||
* @data: read 16 bit value from EEPROM
|
||||
*
|
||||
* Reads 16 bit value from EEPROM
|
||||
**/
|
||||
s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->eeprom.ops.read, (hw, offset, data),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_validate_eeprom_checksum - Validate EEPROM checksum
|
||||
* @hw: pointer to hardware structure
|
||||
* @checksum_val: calculated checksum
|
||||
*
|
||||
* Performs checksum calculation and validates the EEPROM checksum
|
||||
**/
|
||||
s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->eeprom.ops.validate_checksum,
|
||||
(hw, checksum_val), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_eeprom_update_checksum - Updates the EEPROM checksum
|
||||
* @hw: pointer to hardware structure
|
||||
**/
|
||||
s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->eeprom.ops.update_checksum, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_insert_mac_addr - Find a RAR for this mac address
|
||||
* @hw: pointer to hardware structure
|
||||
* @addr: Address to put into receive address register
|
||||
* @vmdq: VMDq pool to assign
|
||||
*
|
||||
* Puts an ethernet address into a receive address register, or
|
||||
* finds the rar that it is aleady in; adds to the pool list
|
||||
**/
|
||||
s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.insert_mac_addr,
|
||||
(hw, addr, vmdq),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_set_rar - Set Rx address register
|
||||
* @hw: pointer to hardware structure
|
||||
* @index: Receive address register to write
|
||||
* @addr: Address to put into receive address register
|
||||
* @vmdq: VMDq "set"
|
||||
* @enable_addr: set flag that address is active
|
||||
*
|
||||
* Puts an ethernet address into a receive address register.
|
||||
**/
|
||||
s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
|
||||
u32 enable_addr)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.set_rar, (hw, index, addr, vmdq,
|
||||
enable_addr), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_clear_rar - Clear Rx address register
|
||||
* @hw: pointer to hardware structure
|
||||
* @index: Receive address register to write
|
||||
*
|
||||
* Puts an ethernet address into a receive address register.
|
||||
**/
|
||||
s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.clear_rar, (hw, index),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_set_vmdq - Associate a VMDq index with a receive address
|
||||
* @hw: pointer to hardware structure
|
||||
* @rar: receive address register index to associate with VMDq index
|
||||
* @vmdq: VMDq set or pool index
|
||||
**/
|
||||
s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.set_vmdq, (hw, rar, vmdq),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_clear_vmdq - Disassociate a VMDq index from a receive address
|
||||
* @hw: pointer to hardware structure
|
||||
* @rar: receive address register index to disassociate with VMDq index
|
||||
* @vmdq: VMDq set or pool index
|
||||
**/
|
||||
s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.clear_vmdq, (hw, rar, vmdq),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_init_rx_addrs - Initializes receive address filters.
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Places the MAC address in receive address register 0 and clears the rest
|
||||
* of the receive address registers. Clears the multicast table. Assumes
|
||||
* the receiver is in reset when the routine is called.
|
||||
**/
|
||||
s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.init_rx_addrs, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_num_rx_addrs - Returns the number of RAR entries.
|
||||
* @hw: pointer to hardware structure
|
||||
**/
|
||||
u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw)
|
||||
{
|
||||
return hw->mac.num_rar_entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_update_uc_addr_list - Updates the MAC's list of secondary addresses
|
||||
* @hw: pointer to hardware structure
|
||||
* @addr_list: the list of new multicast addresses
|
||||
* @addr_count: number of addresses
|
||||
* @func: iterator function to walk the multicast address list
|
||||
*
|
||||
* The given list replaces any existing list. Clears the secondary addrs from
|
||||
* receive address registers. Uses unused receive address registers for the
|
||||
* first secondary addresses, and falls back to promiscuous mode as needed.
|
||||
**/
|
||||
s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list,
|
||||
u32 addr_count, ixgbe_mc_addr_itr func)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.update_uc_addr_list, (hw,
|
||||
addr_list, addr_count, func),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_update_mc_addr_list - Updates the MAC's list of multicast addresses
|
||||
* @hw: pointer to hardware structure
|
||||
* @mc_addr_list: the list of new multicast addresses
|
||||
* @mc_addr_count: number of addresses
|
||||
* @func: iterator function to walk the multicast address list
|
||||
*
|
||||
* The given list replaces any existing list. Clears the MC addrs from receive
|
||||
* address registers and the multicast table. Uses unused receive address
|
||||
* registers for the first multicast addresses, and hashes the rest into the
|
||||
* multicast table.
|
||||
**/
|
||||
s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list,
|
||||
u32 mc_addr_count, ixgbe_mc_addr_itr func)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.update_mc_addr_list, (hw,
|
||||
mc_addr_list, mc_addr_count, func),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_enable_mc - Enable multicast address in RAR
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Enables multicast address in RAR and the use of the multicast hash table.
|
||||
**/
|
||||
s32 ixgbe_enable_mc(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.enable_mc, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_disable_mc - Disable multicast address in RAR
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Disables multicast address in RAR and the use of the multicast hash table.
|
||||
**/
|
||||
s32 ixgbe_disable_mc(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.disable_mc, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_clear_vfta - Clear VLAN filter table
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Clears the VLAN filer table, and the VMDq index associated with the filter
|
||||
**/
|
||||
s32 ixgbe_clear_vfta(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.clear_vfta, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_set_vfta - Set VLAN filter table
|
||||
* @hw: pointer to hardware structure
|
||||
* @vlan: VLAN id to write to VLAN filter
|
||||
* @vind: VMDq output index that maps queue to VLAN id in VFTA
|
||||
* @vlan_on: boolean flag to turn on/off VLAN in VFTA
|
||||
*
|
||||
* Turn on/off specified VLAN in the VLAN filter table.
|
||||
**/
|
||||
s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind,
|
||||
vlan_on), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_fc_enable - Enable flow control
|
||||
* @hw: pointer to hardware structure
|
||||
* @packetbuf_num: packet buffer number (0-7)
|
||||
*
|
||||
* Configures the flow control settings based on SW configuration.
|
||||
**/
|
||||
s32 ixgbe_fc_enable(struct ixgbe_hw *hw, s32 packetbuf_num)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.fc_enable, (hw, packetbuf_num),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_read_analog_reg8 - Reads 8 bit analog register
|
||||
* @hw: pointer to hardware structure
|
||||
* @reg: analog register to read
|
||||
* @val: read value
|
||||
*
|
||||
* Performs write operation to analog register specified.
|
||||
**/
|
||||
s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.read_analog_reg8, (hw, reg,
|
||||
val), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_write_analog_reg8 - Writes 8 bit analog register
|
||||
* @hw: pointer to hardware structure
|
||||
* @reg: analog register to write
|
||||
* @val: value to write
|
||||
*
|
||||
* Performs write operation to Atlas analog register specified.
|
||||
**/
|
||||
s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.write_analog_reg8, (hw, reg,
|
||||
val), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_init_uta_tables - Initializes Unicast Table Arrays.
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Initializes the Unicast Table Arrays to zero on device load. This
|
||||
* is part of the Rx init addr execution path.
|
||||
**/
|
||||
s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.init_uta_tables, (hw),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_read_i2c_byte - Reads 8 bit word over I2C at specified device address
|
||||
* @hw: pointer to hardware structure
|
||||
* @byte_offset: byte offset to read
|
||||
* @data: value read
|
||||
*
|
||||
* Performs byte read operation to SFP module's EEPROM over I2C interface.
|
||||
**/
|
||||
s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
|
||||
u8 *data)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->phy.ops.read_i2c_byte, (hw, byte_offset,
|
||||
dev_addr, data), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_write_i2c_byte - Writes 8 bit word over I2C
|
||||
* @hw: pointer to hardware structure
|
||||
* @byte_offset: byte offset to write
|
||||
* @data: value to write
|
||||
*
|
||||
* Performs byte write operation to SFP module's EEPROM over I2C interface
|
||||
* at a specified device address.
|
||||
**/
|
||||
s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
|
||||
u8 data)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->phy.ops.write_i2c_byte, (hw, byte_offset,
|
||||
dev_addr, data), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_write_i2c_eeprom - Writes 8 bit EEPROM word over I2C interface
|
||||
* @hw: pointer to hardware structure
|
||||
* @byte_offset: EEPROM byte offset to write
|
||||
* @eeprom_data: value to write
|
||||
*
|
||||
* Performs byte write operation to SFP module's EEPROM over I2C interface.
|
||||
**/
|
||||
s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw,
|
||||
u8 byte_offset, u8 eeprom_data)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->phy.ops.write_i2c_eeprom,
|
||||
(hw, byte_offset, eeprom_data),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_read_i2c_eeprom - Reads 8 bit EEPROM word over I2C interface
|
||||
* @hw: pointer to hardware structure
|
||||
* @byte_offset: EEPROM byte offset to read
|
||||
* @eeprom_data: value read
|
||||
*
|
||||
* Performs byte read operation to SFP module's EEPROM over I2C interface.
|
||||
**/
|
||||
s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->phy.ops.read_i2c_eeprom,
|
||||
(hw, byte_offset, eeprom_data),
|
||||
IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_get_supported_physical_layer - Returns physical layer type
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Determines physical layer capabilities of the current configuration.
|
||||
**/
|
||||
u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.get_supported_physical_layer,
|
||||
(hw), IXGBE_PHYSICAL_LAYER_UNKNOWN);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_enable_rx_dma - Enables Rx DMA unit, dependant on device specifics
|
||||
* @hw: pointer to hardware structure
|
||||
* @regval: bitfield to write to the Rx DMA register
|
||||
*
|
||||
* Enables the Rx DMA unit of the device.
|
||||
**/
|
||||
s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.enable_rx_dma,
|
||||
(hw, regval), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_acquire_swfw_semaphore - Acquire SWFW semaphore
|
||||
* @hw: pointer to hardware structure
|
||||
* @mask: Mask to specify which semaphore to acquire
|
||||
*
|
||||
* Acquires the SWFW semaphore through SW_FW_SYNC register for the specified
|
||||
* function (CSR, PHY0, PHY1, EEPROM, Flash)
|
||||
**/
|
||||
s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
|
||||
{
|
||||
return ixgbe_call_func(hw, hw->mac.ops.acquire_swfw_sync,
|
||||
(hw, mask), IXGBE_NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_release_swfw_semaphore - Release SWFW semaphore
|
||||
* @hw: pointer to hardware structure
|
||||
* @mask: Mask to specify which semaphore to release
|
||||
*
|
||||
* Releases the SWFW semaphore through SW_FW_SYNC register for the specified
|
||||
* function (CSR, PHY0, PHY1, EEPROM, Flash)
|
||||
**/
|
||||
void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask)
|
||||
{
|
||||
if (hw->mac.ops.release_swfw_sync)
|
||||
hw->mac.ops.release_swfw_sync(hw, mask);
|
||||
}
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _IXGBE_API_H_
|
||||
#define _IXGBE_API_H_
|
||||
|
||||
#include "ixgbe_type.h"
|
||||
|
||||
s32 ixgbe_init_shared_code(struct ixgbe_hw *hw);
|
||||
|
||||
s32 ixgbe_set_mac_type(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_init_hw(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_reset_hw(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_start_hw(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw);
|
||||
enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr);
|
||||
s32 ixgbe_get_bus_info(struct ixgbe_hw *hw);
|
||||
u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw);
|
||||
u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_stop_adapter(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_read_pba_num(struct ixgbe_hw *hw, u32 *pba_num);
|
||||
|
||||
s32 ixgbe_identify_phy(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_reset_phy(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
|
||||
u16 *phy_data);
|
||||
s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
|
||||
u16 phy_data);
|
||||
|
||||
s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_check_phy_link(struct ixgbe_hw *hw,
|
||||
ixgbe_link_speed *speed,
|
||||
bool *link_up);
|
||||
s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw,
|
||||
ixgbe_link_speed speed,
|
||||
bool autoneg,
|
||||
bool autoneg_wait_to_complete);
|
||||
s32 ixgbe_setup_link(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_setup_link_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed,
|
||||
bool autoneg, bool autoneg_wait_to_complete);
|
||||
s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
|
||||
bool *link_up, bool link_up_wait_to_complete);
|
||||
s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
|
||||
bool *autoneg);
|
||||
s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index);
|
||||
s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index);
|
||||
s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index);
|
||||
s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index);
|
||||
|
||||
s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data);
|
||||
s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data);
|
||||
s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val);
|
||||
s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw);
|
||||
|
||||
s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq);
|
||||
s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
|
||||
u32 enable_addr);
|
||||
s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index);
|
||||
s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
|
||||
s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
|
||||
s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw);
|
||||
u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list,
|
||||
u32 addr_count, ixgbe_mc_addr_itr func);
|
||||
s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list,
|
||||
u32 mc_addr_count, ixgbe_mc_addr_itr func);
|
||||
void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr_list, u32 vmdq);
|
||||
s32 ixgbe_enable_mc(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_disable_mc(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_clear_vfta(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan,
|
||||
u32 vind, bool vlan_on);
|
||||
|
||||
s32 ixgbe_fc_enable(struct ixgbe_hw *hw, s32 packetbuf_num);
|
||||
|
||||
void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr);
|
||||
s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw,
|
||||
u16 *firmware_version);
|
||||
s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val);
|
||||
s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val);
|
||||
s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data);
|
||||
u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval);
|
||||
s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 pballoc);
|
||||
s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 pballoc);
|
||||
s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_atr_input *input,
|
||||
u8 queue);
|
||||
s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_atr_input *input,
|
||||
u16 soft_id,
|
||||
u8 queue);
|
||||
u16 ixgbe_atr_compute_hash_82599(struct ixgbe_atr_input *input, u32 key);
|
||||
s32 ixgbe_atr_set_vlan_id_82599(struct ixgbe_atr_input *input, u16 vlan_id);
|
||||
s32 ixgbe_atr_set_src_ipv4_82599(struct ixgbe_atr_input *input, u32 src_addr);
|
||||
s32 ixgbe_atr_set_dst_ipv4_82599(struct ixgbe_atr_input *input, u32 dst_addr);
|
||||
s32 ixgbe_atr_set_src_ipv6_82599(struct ixgbe_atr_input *input, u32 src_addr_1,
|
||||
u32 src_addr_2, u32 src_addr_3,
|
||||
u32 src_addr_4);
|
||||
s32 ixgbe_atr_set_dst_ipv6_82599(struct ixgbe_atr_input *input, u32 dst_addr_1,
|
||||
u32 dst_addr_2, u32 dst_addr_3,
|
||||
u32 dst_addr_4);
|
||||
s32 ixgbe_atr_set_src_port_82599(struct ixgbe_atr_input *input, u16 src_port);
|
||||
s32 ixgbe_atr_set_dst_port_82599(struct ixgbe_atr_input *input, u16 dst_port);
|
||||
s32 ixgbe_atr_set_flex_byte_82599(struct ixgbe_atr_input *input, u16 flex_byte);
|
||||
s32 ixgbe_atr_set_vm_pool_82599(struct ixgbe_atr_input *input, u8 vm_pool);
|
||||
s32 ixgbe_atr_set_l4type_82599(struct ixgbe_atr_input *input, u8 l4type);
|
||||
s32 ixgbe_atr_get_vlan_id_82599(struct ixgbe_atr_input *input, u16 *vlan_id);
|
||||
s32 ixgbe_atr_get_src_ipv4_82599(struct ixgbe_atr_input *input, u32 *src_addr);
|
||||
s32 ixgbe_atr_get_dst_ipv4_82599(struct ixgbe_atr_input *input, u32 *dst_addr);
|
||||
s32 ixgbe_atr_get_src_ipv6_82599(struct ixgbe_atr_input *input, u32 *src_addr_1,
|
||||
u32 *src_addr_2, u32 *src_addr_3,
|
||||
u32 *src_addr_4);
|
||||
s32 ixgbe_atr_get_dst_ipv6_82599(struct ixgbe_atr_input *input, u32 *dst_addr_1,
|
||||
u32 *dst_addr_2, u32 *dst_addr_3,
|
||||
u32 *dst_addr_4);
|
||||
s32 ixgbe_atr_get_src_port_82599(struct ixgbe_atr_input *input, u16 *src_port);
|
||||
s32 ixgbe_atr_get_dst_port_82599(struct ixgbe_atr_input *input, u16 *dst_port);
|
||||
s32 ixgbe_atr_get_flex_byte_82599(struct ixgbe_atr_input *input,
|
||||
u16 *flex_byte);
|
||||
s32 ixgbe_atr_get_vm_pool_82599(struct ixgbe_atr_input *input, u8 *vm_pool);
|
||||
s32 ixgbe_atr_get_l4type_82599(struct ixgbe_atr_input *input, u8 *l4type);
|
||||
s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
|
||||
u8 *data);
|
||||
s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr,
|
||||
u8 data);
|
||||
s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data);
|
||||
s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
|
||||
s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr);
|
||||
s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps);
|
||||
s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
|
||||
void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask);
|
||||
|
||||
|
||||
#endif /* _IXGBE_API_H_ */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,83 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _IXGBE_COMMON_H_
|
||||
#define _IXGBE_COMMON_H_
|
||||
|
||||
#include "ixgbe_type.h"
|
||||
|
||||
s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_read_pba_num_generic(struct ixgbe_hw *hw, u32 *pba_num);
|
||||
s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr);
|
||||
s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw);
|
||||
void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
|
||||
|
||||
s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
|
||||
s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
|
||||
|
||||
s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
|
||||
s32 ixgbe_read_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 *data);
|
||||
s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
|
||||
u16 *data);
|
||||
s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
|
||||
u16 *checksum_val);
|
||||
s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw);
|
||||
|
||||
s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
|
||||
u32 enable_addr);
|
||||
s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index);
|
||||
s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
|
||||
u32 mc_addr_count,
|
||||
ixgbe_mc_addr_itr func);
|
||||
s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list,
|
||||
u32 addr_count, ixgbe_mc_addr_itr func);
|
||||
void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq);
|
||||
s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
|
||||
|
||||
s32 ixgbe_setup_fc(struct ixgbe_hw *hw, s32 packetbuf_num);
|
||||
s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw, s32 packtetbuf_num);
|
||||
s32 ixgbe_fc_autoneg(struct ixgbe_hw *hw);
|
||||
|
||||
s32 ixgbe_validate_mac_addr(u8 *mac_addr);
|
||||
s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask);
|
||||
void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask);
|
||||
s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw);
|
||||
|
||||
s32 ixgbe_read_analog_reg8_generic(struct ixgbe_hw *hw, u32 reg, u8 *val);
|
||||
s32 ixgbe_write_analog_reg8_generic(struct ixgbe_hw *hw, u32 reg, u8 val);
|
||||
s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index);
|
||||
s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index);
|
||||
|
||||
#endif /* IXGBE_COMMON */
|
|
@ -0,0 +1,350 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#include "ixgbe_type.h"
|
||||
#include "ixgbe_dcb.h"
|
||||
#include "ixgbe_dcb_82598.h"
|
||||
#include "ixgbe_dcb_82599.h"
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config - Struct containing DCB settings.
|
||||
* @dcb_config: Pointer to DCB config structure
|
||||
*
|
||||
* This function checks DCB rules for DCB settings.
|
||||
* The following rules are checked:
|
||||
* 1. The sum of bandwidth percentages of all Bandwidth Groups must total 100%.
|
||||
* 2. The sum of bandwidth percentages of all Traffic Classes within a Bandwidth
|
||||
* Group must total 100.
|
||||
* 3. A Traffic Class should not be set to both Link Strict Priority
|
||||
* and Group Strict Priority.
|
||||
* 4. Link strict Bandwidth Groups can only have link strict traffic classes
|
||||
* with zero bandwidth.
|
||||
*/
|
||||
s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
struct tc_bw_alloc *p;
|
||||
s32 ret_val = 0;
|
||||
u8 i, j, bw = 0, bw_id;
|
||||
u8 bw_sum[2][MAX_BW_GROUP];
|
||||
bool link_strict[2][MAX_BW_GROUP];
|
||||
|
||||
memset(bw_sum, 0, sizeof(bw_sum));
|
||||
memset(link_strict, 0, sizeof(link_strict));
|
||||
|
||||
/* First Tx, then Rx */
|
||||
for (i = 0; i < 2; i++) {
|
||||
/* Check each traffic class for rule violation */
|
||||
for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
|
||||
p = &dcb_config->tc_config[j].path[i];
|
||||
|
||||
bw = p->bwg_percent;
|
||||
bw_id = p->bwg_id;
|
||||
|
||||
if (bw_id >= MAX_BW_GROUP) {
|
||||
ret_val = DCB_ERR_CONFIG;
|
||||
goto err_config;
|
||||
}
|
||||
if (p->prio_type == prio_link) {
|
||||
link_strict[i][bw_id] = true;
|
||||
/* Link strict should have zero bandwidth */
|
||||
if (bw) {
|
||||
ret_val = DCB_ERR_LS_BW_NONZERO;
|
||||
goto err_config;
|
||||
}
|
||||
} else if (!bw) {
|
||||
/*
|
||||
* Traffic classes without link strict
|
||||
* should have non-zero bandwidth.
|
||||
*/
|
||||
ret_val = DCB_ERR_TC_BW_ZERO;
|
||||
goto err_config;
|
||||
}
|
||||
bw_sum[i][bw_id] += bw;
|
||||
}
|
||||
|
||||
bw = 0;
|
||||
|
||||
/* Check each bandwidth group for rule violation */
|
||||
for (j = 0; j < MAX_BW_GROUP; j++) {
|
||||
bw += dcb_config->bw_percentage[i][j];
|
||||
/*
|
||||
* Sum of bandwidth percentages of all traffic classes
|
||||
* within a Bandwidth Group must total 100 except for
|
||||
* link strict group (zero bandwidth).
|
||||
*/
|
||||
if (link_strict[i][j]) {
|
||||
if (bw_sum[i][j]) {
|
||||
/*
|
||||
* Link strict group should have zero
|
||||
* bandwidth.
|
||||
*/
|
||||
ret_val = DCB_ERR_LS_BWG_NONZERO;
|
||||
goto err_config;
|
||||
}
|
||||
} else if (bw_sum[i][j] != BW_PERCENT &&
|
||||
bw_sum[i][j] != 0) {
|
||||
ret_val = DCB_ERR_TC_BW;
|
||||
goto err_config;
|
||||
}
|
||||
}
|
||||
|
||||
if (bw != BW_PERCENT) {
|
||||
ret_val = DCB_ERR_BW_GROUP;
|
||||
goto err_config;
|
||||
}
|
||||
}
|
||||
|
||||
return DCB_SUCCESS;
|
||||
|
||||
err_config:
|
||||
hw_dbg(hw, "DCB error code %d while checking %s settings.\n",
|
||||
ret_val, (j == DCB_TX_CONFIG) ? "Tx" : "Rx");
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_calculate_tc_credits - Calculates traffic class credits
|
||||
* @ixgbe_dcb_config: Struct containing DCB settings.
|
||||
* @direction: Configuring either Tx or Rx.
|
||||
*
|
||||
* This function calculates the credits allocated to each traffic class.
|
||||
* It should be called only after the rules are checked by
|
||||
* ixgbe_dcb_check_config().
|
||||
*/
|
||||
s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *dcb_config,
|
||||
u8 direction)
|
||||
{
|
||||
struct tc_bw_alloc *p;
|
||||
s32 ret_val = 0;
|
||||
/* Initialization values default for Tx settings */
|
||||
u32 credit_refill = 0;
|
||||
u32 credit_max = 0;
|
||||
u16 link_percentage = 0;
|
||||
u8 bw_percent = 0;
|
||||
u8 i;
|
||||
|
||||
if (dcb_config == NULL) {
|
||||
ret_val = DCB_ERR_CONFIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Find out the link percentage for each TC first */
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
|
||||
p = &dcb_config->tc_config[i].path[direction];
|
||||
bw_percent = dcb_config->bw_percentage[direction][p->bwg_id];
|
||||
|
||||
link_percentage = p->bwg_percent;
|
||||
/* Must be careful of integer division for very small nums */
|
||||
link_percentage = (link_percentage * bw_percent) / 100;
|
||||
if (p->bwg_percent > 0 && link_percentage == 0)
|
||||
link_percentage = 1;
|
||||
|
||||
/* Save link_percentage for reference */
|
||||
p->link_percent = (u8)link_percentage;
|
||||
|
||||
/* Calculate credit refill and save it */
|
||||
credit_refill = link_percentage * MINIMUM_CREDIT_REFILL;
|
||||
p->data_credits_refill = (u16)credit_refill;
|
||||
|
||||
/* Calculate maximum credit for the TC */
|
||||
credit_max = (link_percentage * MAX_CREDIT) / 100;
|
||||
|
||||
/*
|
||||
* Adjustment based on rule checking, if the percentage
|
||||
* of a TC is too small, the maximum credit may not be
|
||||
* enough to send out a jumbo frame in data plane arbitration.
|
||||
*/
|
||||
if (credit_max && (credit_max < MINIMUM_CREDIT_FOR_JUMBO))
|
||||
credit_max = MINIMUM_CREDIT_FOR_JUMBO;
|
||||
|
||||
if (direction == DCB_TX_CONFIG) {
|
||||
/*
|
||||
* Adjustment based on rule checking, if the
|
||||
* percentage of a TC is too small, the maximum
|
||||
* credit may not be enough to send out a TSO
|
||||
* packet in descriptor plane arbitration.
|
||||
*/
|
||||
if (credit_max && (credit_max < MINIMUM_CREDIT_FOR_TSO))
|
||||
credit_max = MINIMUM_CREDIT_FOR_TSO;
|
||||
|
||||
dcb_config->tc_config[i].desc_credits_max =
|
||||
(u16)credit_max;
|
||||
}
|
||||
|
||||
p->data_credits_max = (u16)credit_max;
|
||||
}
|
||||
|
||||
out:
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_get_tc_stats - Returns status of each traffic class
|
||||
* @hw: pointer to hardware structure
|
||||
* @stats: pointer to statistics structure
|
||||
* @tc_count: Number of elements in bwg_array.
|
||||
*
|
||||
* This function returns the status data for each of the Traffic Classes in use.
|
||||
*/
|
||||
s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count)
|
||||
{
|
||||
s32 ret = 0;
|
||||
if (hw->mac.type == ixgbe_mac_82598EB)
|
||||
ret = ixgbe_dcb_get_tc_stats_82598(hw, stats, tc_count);
|
||||
else if (hw->mac.type == ixgbe_mac_82599EB)
|
||||
ret = ixgbe_dcb_get_tc_stats_82599(hw, stats, tc_count);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_get_pfc_stats - Returns CBFC status of each traffic class
|
||||
* @hw: pointer to hardware structure
|
||||
* @stats: pointer to statistics structure
|
||||
* @tc_count: Number of elements in bwg_array.
|
||||
*
|
||||
* This function returns the CBFC status data for each of the Traffic Classes.
|
||||
*/
|
||||
s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count)
|
||||
{
|
||||
s32 ret = 0;
|
||||
if (hw->mac.type == ixgbe_mac_82598EB)
|
||||
ret = ixgbe_dcb_get_pfc_stats_82598(hw, stats, tc_count);
|
||||
else if (hw->mac.type == ixgbe_mac_82599EB)
|
||||
ret = ixgbe_dcb_get_pfc_stats_82599(hw, stats, tc_count);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_rx_arbiter - Config Rx arbiter
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Rx Data Arbiter and credits for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
s32 ret = 0;
|
||||
if (hw->mac.type == ixgbe_mac_82598EB)
|
||||
ret = ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config);
|
||||
else if (hw->mac.type == ixgbe_mac_82599EB)
|
||||
ret = ixgbe_dcb_config_rx_arbiter_82599(hw, dcb_config);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_tx_desc_arbiter - Config Tx Desc arbiter
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Tx Descriptor Arbiter and credits for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
s32 ret = 0;
|
||||
if (hw->mac.type == ixgbe_mac_82598EB)
|
||||
ret = ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config);
|
||||
else if (hw->mac.type == ixgbe_mac_82599EB)
|
||||
ret = ixgbe_dcb_config_tx_desc_arbiter_82599(hw, dcb_config);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_tx_data_arbiter - Config Tx data arbiter
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Tx Data Arbiter and credits for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
s32 ret = 0;
|
||||
if (hw->mac.type == ixgbe_mac_82598EB)
|
||||
ret = ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config);
|
||||
else if (hw->mac.type == ixgbe_mac_82599EB)
|
||||
ret = ixgbe_dcb_config_tx_data_arbiter_82599(hw, dcb_config);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_pfc - Config priority flow control
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Priority Flow Control for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
s32 ret = 0;
|
||||
if (hw->mac.type == ixgbe_mac_82598EB)
|
||||
ret = ixgbe_dcb_config_pfc_82598(hw, dcb_config);
|
||||
else if (hw->mac.type == ixgbe_mac_82599EB)
|
||||
ret = ixgbe_dcb_config_pfc_82599(hw, dcb_config);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_tc_stats - Config traffic class statistics
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Configure queue statistics registers, all queues belonging to same traffic
|
||||
* class uses a single set of queue statistics counters.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *hw)
|
||||
{
|
||||
s32 ret = 0;
|
||||
if (hw->mac.type == ixgbe_mac_82598EB)
|
||||
ret = ixgbe_dcb_config_tc_stats_82598(hw);
|
||||
else if (hw->mac.type == ixgbe_mac_82599EB)
|
||||
ret = ixgbe_dcb_config_tc_stats_82599(hw);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_hw_config - Config and enable DCB
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure dcb settings and enable dcb mode.
|
||||
*/
|
||||
s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
s32 ret = 0;
|
||||
if (hw->mac.type == ixgbe_mac_82598EB)
|
||||
ret = ixgbe_dcb_hw_config_82598(hw, dcb_config);
|
||||
else if (hw->mac.type == ixgbe_mac_82599EB)
|
||||
ret = ixgbe_dcb_hw_config_82599(hw, dcb_config);
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _DCB_CONFIG_H_
|
||||
#define _DCB_CONFIG_H_
|
||||
|
||||
#include "ixgbe_type.h"
|
||||
|
||||
/* DCB data structures */
|
||||
|
||||
#define IXGBE_MAX_PACKET_BUFFERS 8
|
||||
#define MAX_USER_PRIORITY 8
|
||||
#define MAX_TRAFFIC_CLASS 8
|
||||
#define MAX_BW_GROUP 8
|
||||
#define BW_PERCENT 100
|
||||
|
||||
#define DCB_TX_CONFIG 0
|
||||
#define DCB_RX_CONFIG 1
|
||||
|
||||
/* DCB error Codes */
|
||||
#define DCB_SUCCESS 0
|
||||
#define DCB_ERR_CONFIG -1
|
||||
#define DCB_ERR_PARAM -2
|
||||
|
||||
/* Transmit and receive Errors */
|
||||
/* Error in bandwidth group allocation */
|
||||
#define DCB_ERR_BW_GROUP -3
|
||||
/* Error in traffic class bandwidth allocation */
|
||||
#define DCB_ERR_TC_BW -4
|
||||
/* Traffic class has both link strict and group strict enabled */
|
||||
#define DCB_ERR_LS_GS -5
|
||||
/* Link strict traffic class has non zero bandwidth */
|
||||
#define DCB_ERR_LS_BW_NONZERO -6
|
||||
/* Link strict bandwidth group has non zero bandwidth */
|
||||
#define DCB_ERR_LS_BWG_NONZERO -7
|
||||
/* Traffic class has zero bandwidth */
|
||||
#define DCB_ERR_TC_BW_ZERO -8
|
||||
|
||||
#define DCB_NOT_IMPLEMENTED 0x7FFFFFFF
|
||||
|
||||
struct dcb_pfc_tc_debug {
|
||||
u8 tc;
|
||||
u8 pause_status;
|
||||
u64 pause_quanta;
|
||||
};
|
||||
|
||||
enum strict_prio_type {
|
||||
prio_none = 0,
|
||||
prio_group,
|
||||
prio_link
|
||||
};
|
||||
|
||||
/* DCB capability definitions */
|
||||
#define IXGBE_DCB_PG_SUPPORT 0x00000001
|
||||
#define IXGBE_DCB_PFC_SUPPORT 0x00000002
|
||||
#define IXGBE_DCB_BCN_SUPPORT 0x00000004
|
||||
#define IXGBE_DCB_UP2TC_SUPPORT 0x00000008
|
||||
#define IXGBE_DCB_GSP_SUPPORT 0x00000010
|
||||
|
||||
#define IXGBE_DCB_8_TC_SUPPORT 0x80
|
||||
|
||||
struct dcb_support {
|
||||
/* DCB capabilities */
|
||||
u32 capabilities;
|
||||
|
||||
/* Each bit represents a number of TCs configurable in the hw.
|
||||
* If 8 traffic classes can be configured, the value is 0x80.
|
||||
*/
|
||||
u8 traffic_classes;
|
||||
u8 pfc_traffic_classes;
|
||||
};
|
||||
|
||||
/* Traffic class bandwidth allocation per direction */
|
||||
struct tc_bw_alloc {
|
||||
u8 bwg_id; /* Bandwidth Group (BWG) ID */
|
||||
u8 bwg_percent; /* % of BWG's bandwidth */
|
||||
u8 link_percent; /* % of link bandwidth */
|
||||
u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */
|
||||
u16 data_credits_refill; /* Credit refill amount in 64B granularity */
|
||||
u16 data_credits_max; /* Max credits for a configured packet buffer
|
||||
* in 64B granularity.*/
|
||||
enum strict_prio_type prio_type; /* Link or Group Strict Priority */
|
||||
};
|
||||
|
||||
enum dcb_pfc_type {
|
||||
pfc_disabled = 0,
|
||||
pfc_enabled_full,
|
||||
pfc_enabled_tx,
|
||||
pfc_enabled_rx
|
||||
};
|
||||
|
||||
/* Traffic class configuration */
|
||||
struct tc_configuration {
|
||||
struct tc_bw_alloc path[2]; /* One each for Tx/Rx */
|
||||
enum dcb_pfc_type dcb_pfc; /* Class based flow control setting */
|
||||
|
||||
u16 desc_credits_max; /* For Tx Descriptor arbitration */
|
||||
u8 tc; /* Traffic class (TC) */
|
||||
};
|
||||
|
||||
enum dcb_rx_pba_cfg {
|
||||
pba_equal, /* PBA[0-7] each use 64KB FIFO */
|
||||
pba_80_48 /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */
|
||||
};
|
||||
|
||||
struct dcb_num_tcs {
|
||||
u8 pg_tcs;
|
||||
u8 pfc_tcs;
|
||||
};
|
||||
|
||||
struct ixgbe_dcb_config {
|
||||
struct tc_configuration tc_config[MAX_TRAFFIC_CLASS];
|
||||
struct dcb_support support;
|
||||
struct dcb_num_tcs num_tcs;
|
||||
u8 bw_percentage[2][MAX_BW_GROUP]; /* One each for Tx/Rx */
|
||||
bool pfc_mode_enable;
|
||||
bool round_robin_enable;
|
||||
|
||||
enum dcb_rx_pba_cfg rx_pba_cfg;
|
||||
|
||||
u32 dcb_cfg_version; /* Not used...OS-specific? */
|
||||
u32 link_speed; /* For bandwidth allocation validation purpose */
|
||||
};
|
||||
|
||||
/* DCB driver APIs */
|
||||
|
||||
/* DCB rule checking function.*/
|
||||
s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *config);
|
||||
|
||||
/* DCB credits calculation */
|
||||
s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *config,
|
||||
u8 direction);
|
||||
|
||||
/* DCB PFC functions */
|
||||
s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count);
|
||||
|
||||
/* DCB traffic class stats */
|
||||
s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *);
|
||||
s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count);
|
||||
|
||||
/* DCB config arbiters */
|
||||
s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
|
||||
/* DCB hw initialization */
|
||||
s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw, struct ixgbe_dcb_config *config);
|
||||
|
||||
|
||||
/* DCB definitions for credit calculation */
|
||||
#define MAX_CREDIT_REFILL 511 /* 0x1FF * 64B = 32704B */
|
||||
#define MINIMUM_CREDIT_REFILL 5 /* 5*64B = 320B */
|
||||
#define MINIMUM_CREDIT_FOR_JUMBO 145 /* 145 = UpperBound((9*1024+54)/64B)
|
||||
* for 9KB jumbo frame */
|
||||
#define DCB_MAX_TSO_SIZE 32*1024 /* MAX TSO packet size supported
|
||||
* in DCB mode */
|
||||
#define MINIMUM_CREDIT_FOR_TSO (DCB_MAX_TSO_SIZE/64 + 1) /* 513 for 32KB TSO
|
||||
* packet */
|
||||
#define MAX_CREDIT 4095 /* Maximum credit supported:
|
||||
* 256KB * 1204 / 64B */
|
||||
|
||||
#endif /* _DCB_CONFIG_H */
|
|
@ -0,0 +1,408 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#include "ixgbe_type.h"
|
||||
#include "ixgbe_dcb.h"
|
||||
#include "ixgbe_dcb_82598.h"
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_get_tc_stats_82598 - Return status data for each traffic class
|
||||
* @hw: pointer to hardware structure
|
||||
* @stats: pointer to statistics structure
|
||||
* @tc_count: Number of elements in bwg_array.
|
||||
*
|
||||
* This function returns the status data for each of the Traffic Classes in use.
|
||||
*/
|
||||
s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count)
|
||||
{
|
||||
int tc;
|
||||
|
||||
if (tc_count > MAX_TRAFFIC_CLASS)
|
||||
return DCB_ERR_PARAM;
|
||||
/* Statistics pertaining to each traffic class */
|
||||
for (tc = 0; tc < tc_count; tc++) {
|
||||
/* Transmitted Packets */
|
||||
stats->qptc[tc] += IXGBE_READ_REG(hw, IXGBE_QPTC(tc));
|
||||
/* Transmitted Bytes */
|
||||
stats->qbtc[tc] += IXGBE_READ_REG(hw, IXGBE_QBTC(tc));
|
||||
/* Received Packets */
|
||||
stats->qprc[tc] += IXGBE_READ_REG(hw, IXGBE_QPRC(tc));
|
||||
/* Received Bytes */
|
||||
stats->qbrc[tc] += IXGBE_READ_REG(hw, IXGBE_QBRC(tc));
|
||||
|
||||
#if 0
|
||||
/* Can we get rid of these?? Consequently, getting rid
|
||||
* of the tc_stats structure.
|
||||
*/
|
||||
tc_stats_array[up]->in_overflow_discards = 0;
|
||||
tc_stats_array[up]->out_overflow_discards = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_get_pfc_stats_82598 - Returns CBFC status data
|
||||
* @hw: pointer to hardware structure
|
||||
* @stats: pointer to statistics structure
|
||||
* @tc_count: Number of elements in bwg_array.
|
||||
*
|
||||
* This function returns the CBFC status data for each of the Traffic Classes.
|
||||
*/
|
||||
s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count)
|
||||
{
|
||||
int tc;
|
||||
|
||||
if (tc_count > MAX_TRAFFIC_CLASS)
|
||||
return DCB_ERR_PARAM;
|
||||
for (tc = 0; tc < tc_count; tc++) {
|
||||
/* Priority XOFF Transmitted */
|
||||
stats->pxofftxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(tc));
|
||||
/* Priority XOFF Received */
|
||||
stats->pxoffrxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(tc));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_packet_buffers_82598 - Configure packet buffers
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure packet buffers for DCB mode.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_packet_buffers_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
s32 ret_val = 0;
|
||||
u32 value = IXGBE_RXPBSIZE_64KB;
|
||||
u8 i = 0;
|
||||
|
||||
/* Setup Rx packet buffer sizes */
|
||||
switch (dcb_config->rx_pba_cfg) {
|
||||
case pba_80_48:
|
||||
/* Setup the first four at 80KB */
|
||||
value = IXGBE_RXPBSIZE_80KB;
|
||||
for (; i < 4; i++)
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
|
||||
/* Setup the last four at 48KB...don't re-init i */
|
||||
value = IXGBE_RXPBSIZE_48KB;
|
||||
/* Fall Through */
|
||||
case pba_equal:
|
||||
default:
|
||||
for (; i < IXGBE_MAX_PACKET_BUFFERS; i++)
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
|
||||
|
||||
/* Setup Tx packet buffer sizes */
|
||||
for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
|
||||
IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i),
|
||||
IXGBE_TXPBSIZE_40KB);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_rx_arbiter_82598 - Config Rx data arbiter
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Rx Data Arbiter and credits for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
struct tc_bw_alloc *p;
|
||||
u32 reg = 0;
|
||||
u32 credit_refill = 0;
|
||||
u32 credit_max = 0;
|
||||
u8 i = 0;
|
||||
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_RUPPBMR) | IXGBE_RUPPBMR_MQA;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RUPPBMR, reg);
|
||||
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
|
||||
/* Enable Arbiter */
|
||||
reg &= ~IXGBE_RMCS_ARBDIS;
|
||||
/* Enable Receive Recycle within the BWG */
|
||||
reg |= IXGBE_RMCS_RRM;
|
||||
/* Enable Deficit Fixed Priority arbitration*/
|
||||
reg |= IXGBE_RMCS_DFP;
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
|
||||
|
||||
/* Configure traffic class credits and priority */
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
|
||||
p = &dcb_config->tc_config[i].path[DCB_RX_CONFIG];
|
||||
credit_refill = p->data_credits_refill;
|
||||
credit_max = p->data_credits_max;
|
||||
|
||||
reg = credit_refill | (credit_max << IXGBE_RT2CR_MCL_SHIFT);
|
||||
|
||||
if (p->prio_type == prio_link)
|
||||
reg |= IXGBE_RT2CR_LSP;
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RT2CR(i), reg);
|
||||
}
|
||||
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
|
||||
reg |= IXGBE_RDRXCTL_RDMTS_1_2;
|
||||
reg |= IXGBE_RDRXCTL_MPBEN;
|
||||
reg |= IXGBE_RDRXCTL_MCEN;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg);
|
||||
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
|
||||
/* Make sure there is enough descriptors before arbitration */
|
||||
reg &= ~IXGBE_RXCTRL_DMBYPS;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_tx_desc_arbiter_82598 - Config Tx Desc. arbiter
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Tx Descriptor Arbiter and credits for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
struct tc_bw_alloc *p;
|
||||
u32 reg, max_credits;
|
||||
u8 i;
|
||||
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_DPMCS);
|
||||
|
||||
/* Enable arbiter */
|
||||
reg &= ~IXGBE_DPMCS_ARBDIS;
|
||||
if (!(dcb_config->round_robin_enable)) {
|
||||
/* Enable DFP and Recycle mode */
|
||||
reg |= (IXGBE_DPMCS_TDPAC | IXGBE_DPMCS_TRM);
|
||||
}
|
||||
reg |= IXGBE_DPMCS_TSOEF;
|
||||
/* Configure Max TSO packet size 34KB including payload and headers */
|
||||
reg |= (0x4 << IXGBE_DPMCS_MTSOS_SHIFT);
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_DPMCS, reg);
|
||||
|
||||
/* Configure traffic class credits and priority */
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
|
||||
p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
|
||||
max_credits = dcb_config->tc_config[i].desc_credits_max;
|
||||
reg = max_credits << IXGBE_TDTQ2TCCR_MCL_SHIFT;
|
||||
reg |= p->data_credits_refill;
|
||||
reg |= (u32)(p->bwg_id) << IXGBE_TDTQ2TCCR_BWG_SHIFT;
|
||||
|
||||
if (p->prio_type == prio_group)
|
||||
reg |= IXGBE_TDTQ2TCCR_GSP;
|
||||
|
||||
if (p->prio_type == prio_link)
|
||||
reg |= IXGBE_TDTQ2TCCR_LSP;
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_TDTQ2TCCR(i), reg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_tx_data_arbiter_82598 - Config Tx data arbiter
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Tx Data Arbiter and credits for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
struct tc_bw_alloc *p;
|
||||
u32 reg;
|
||||
u8 i;
|
||||
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_PDPMCS);
|
||||
/* Enable Data Plane Arbiter */
|
||||
reg &= ~IXGBE_PDPMCS_ARBDIS;
|
||||
/* Enable DFP and Transmit Recycle Mode */
|
||||
reg |= (IXGBE_PDPMCS_TPPAC | IXGBE_PDPMCS_TRM);
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_PDPMCS, reg);
|
||||
|
||||
/* Configure traffic class credits and priority */
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
|
||||
p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
|
||||
reg = p->data_credits_refill;
|
||||
reg |= (u32)(p->data_credits_max) << IXGBE_TDPT2TCCR_MCL_SHIFT;
|
||||
reg |= (u32)(p->bwg_id) << IXGBE_TDPT2TCCR_BWG_SHIFT;
|
||||
|
||||
if (p->prio_type == prio_group)
|
||||
reg |= IXGBE_TDPT2TCCR_GSP;
|
||||
|
||||
if (p->prio_type == prio_link)
|
||||
reg |= IXGBE_TDPT2TCCR_LSP;
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_TDPT2TCCR(i), reg);
|
||||
}
|
||||
|
||||
/* Enable Tx packet buffer division */
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_DTXCTL);
|
||||
reg |= IXGBE_DTXCTL_ENDBUBD;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_DTXCTL, reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_pfc_82598 - Config priority flow control
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Priority Flow Control for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
u32 reg, rx_pba_size;
|
||||
u8 i;
|
||||
|
||||
if (!dcb_config->pfc_mode_enable)
|
||||
goto out;
|
||||
|
||||
/* Enable Transmit Priority Flow Control */
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
|
||||
reg &= ~IXGBE_RMCS_TFCE_802_3X;
|
||||
/* correct the reporting of our flow control status */
|
||||
reg |= IXGBE_RMCS_TFCE_PRIORITY;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
|
||||
|
||||
/* Enable Receive Priority Flow Control */
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
|
||||
reg &= ~IXGBE_FCTRL_RFCE;
|
||||
reg |= IXGBE_FCTRL_RPFCE;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg);
|
||||
|
||||
/*
|
||||
* Configure flow control thresholds and enable priority flow control
|
||||
* for each traffic class.
|
||||
*/
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
|
||||
if (dcb_config->rx_pba_cfg == pba_equal) {
|
||||
rx_pba_size = IXGBE_RXPBSIZE_64KB;
|
||||
} else {
|
||||
rx_pba_size = (i < 4) ? IXGBE_RXPBSIZE_80KB
|
||||
: IXGBE_RXPBSIZE_48KB;
|
||||
}
|
||||
|
||||
reg = ((rx_pba_size >> 5) & 0xFFF0);
|
||||
if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx ||
|
||||
dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full)
|
||||
reg |= IXGBE_FCRTL_XONE;
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), reg);
|
||||
|
||||
reg = ((rx_pba_size >> 2) & 0xFFF0);
|
||||
if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx ||
|
||||
dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full)
|
||||
reg |= IXGBE_FCRTH_FCEN;
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), reg);
|
||||
}
|
||||
|
||||
/* Configure pause time */
|
||||
for (i = 0; i < (MAX_TRAFFIC_CLASS >> 1); i++)
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), 0x68006800);
|
||||
|
||||
/* Configure flow control refresh threshold value */
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCRTV, 0x3400);
|
||||
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_tc_stats_82598 - Configure traffic class statistics
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Configure queue statistics registers, all queues belonging to same traffic
|
||||
* class uses a single set of queue statistics counters.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw)
|
||||
{
|
||||
u32 reg = 0;
|
||||
u8 i = 0;
|
||||
u8 j = 0;
|
||||
|
||||
/* Receive Queues stats setting - 8 queues per statistics reg */
|
||||
for (i = 0, j = 0; i < 15 && j < 8; i = i + 2, j++) {
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i));
|
||||
reg |= ((0x1010101) * j);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i), reg);
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i + 1));
|
||||
reg |= ((0x1010101) * j);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i + 1), reg);
|
||||
}
|
||||
/* Transmit Queues stats setting - 4 queues per statistics reg*/
|
||||
for (i = 0; i < 8; i++) {
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_TQSMR(i));
|
||||
reg |= ((0x1010101) * i);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_TQSMR(i), reg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_hw_config_82598 - Config and enable DCB
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure dcb settings and enable dcb mode.
|
||||
*/
|
||||
s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
|
||||
ixgbe_dcb_config_packet_buffers_82598(hw, dcb_config);
|
||||
ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config);
|
||||
ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config);
|
||||
ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config);
|
||||
ixgbe_dcb_config_pfc_82598(hw, dcb_config);
|
||||
ixgbe_dcb_config_tc_stats_82598(hw);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _DCB_82598_CONFIG_H_
|
||||
#define _DCB_82598_CONFIG_H_
|
||||
|
||||
/* DCB register definitions */
|
||||
|
||||
#define IXGBE_DPMCS_MTSOS_SHIFT 16
|
||||
#define IXGBE_DPMCS_TDPAC 0x00000001 /* 0 Round Robin,
|
||||
* 1 DFP - Deficit Fixed Priority */
|
||||
#define IXGBE_DPMCS_TRM 0x00000010 /* Transmit Recycle Mode */
|
||||
#define IXGBE_DPMCS_ARBDIS 0x00000040 /* DCB arbiter disable */
|
||||
#define IXGBE_DPMCS_TSOEF 0x00080000 /* TSO Expand Factor: 0=x4, 1=x2 */
|
||||
|
||||
#define IXGBE_RUPPBMR_MQA 0x80000000 /* Enable UP to queue mapping */
|
||||
|
||||
#define IXGBE_RT2CR_MCL_SHIFT 12 /* Offset to Max Credit Limit setting */
|
||||
#define IXGBE_RT2CR_LSP 0x80000000 /* LSP enable bit */
|
||||
|
||||
#define IXGBE_RDRXCTL_MPBEN 0x00000010 /* DMA config for multiple packet
|
||||
* buffers enable */
|
||||
#define IXGBE_RDRXCTL_MCEN 0x00000040 /* DMA config for multiple cores
|
||||
* (RSS) enable */
|
||||
|
||||
#define IXGBE_TDTQ2TCCR_MCL_SHIFT 12
|
||||
#define IXGBE_TDTQ2TCCR_BWG_SHIFT 9
|
||||
#define IXGBE_TDTQ2TCCR_GSP 0x40000000
|
||||
#define IXGBE_TDTQ2TCCR_LSP 0x80000000
|
||||
|
||||
#define IXGBE_TDPT2TCCR_MCL_SHIFT 12
|
||||
#define IXGBE_TDPT2TCCR_BWG_SHIFT 9
|
||||
#define IXGBE_TDPT2TCCR_GSP 0x40000000
|
||||
#define IXGBE_TDPT2TCCR_LSP 0x80000000
|
||||
|
||||
#define IXGBE_PDPMCS_TPPAC 0x00000020 /* 0 Round Robin,
|
||||
* 1 DFP - Deficit Fixed Priority */
|
||||
#define IXGBE_PDPMCS_ARBDIS 0x00000040 /* Arbiter disable */
|
||||
#define IXGBE_PDPMCS_TRM 0x00000100 /* Transmit Recycle Mode enable */
|
||||
|
||||
#define IXGBE_DTXCTL_ENDBUBD 0x00000004 /* Enable DBU buffer division */
|
||||
|
||||
#define IXGBE_TXPBSIZE_40KB 0x0000A000 /* 40KB Packet Buffer */
|
||||
#define IXGBE_RXPBSIZE_48KB 0x0000C000 /* 48KB Packet Buffer */
|
||||
#define IXGBE_RXPBSIZE_64KB 0x00010000 /* 64KB Packet Buffer */
|
||||
#define IXGBE_RXPBSIZE_80KB 0x00014000 /* 80KB Packet Buffer */
|
||||
|
||||
/* DCB hardware-specific driver APIs */
|
||||
|
||||
/* DCB PFC functions */
|
||||
s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count);
|
||||
|
||||
/* DCB traffic class stats */
|
||||
s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count);
|
||||
|
||||
/* DCB config arbiters */
|
||||
s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
|
||||
/* DCB hw initialization */
|
||||
s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *config);
|
||||
|
||||
#endif /* _DCB_82598_CONFIG_H */
|
|
@ -0,0 +1,501 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
#include "ixgbe_type.h"
|
||||
#include "ixgbe_dcb.h"
|
||||
#include "ixgbe_dcb_82599.h"
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_get_tc_stats_82599 - Returns status for each traffic class
|
||||
* @hw: pointer to hardware structure
|
||||
* @stats: pointer to statistics structure
|
||||
* @tc_count: Number of elements in bwg_array.
|
||||
*
|
||||
* This function returns the status data for each of the Traffic Classes in use.
|
||||
*/
|
||||
s32 ixgbe_dcb_get_tc_stats_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count)
|
||||
{
|
||||
int tc;
|
||||
|
||||
if (tc_count > MAX_TRAFFIC_CLASS)
|
||||
return DCB_ERR_PARAM;
|
||||
/* Statistics pertaining to each traffic class */
|
||||
for (tc = 0; tc < tc_count; tc++) {
|
||||
/* Transmitted Packets */
|
||||
stats->qptc[tc] += IXGBE_READ_REG(hw, IXGBE_QPTC(tc));
|
||||
/* Transmitted Bytes */
|
||||
stats->qbtc[tc] += IXGBE_READ_REG(hw, IXGBE_QBTC(tc));
|
||||
/* Received Packets */
|
||||
stats->qprc[tc] += IXGBE_READ_REG(hw, IXGBE_QPRC(tc));
|
||||
/* Received Bytes */
|
||||
stats->qbrc[tc] += IXGBE_READ_REG(hw, IXGBE_QBRC(tc));
|
||||
|
||||
#if 0
|
||||
/* Can we get rid of these?? Consequently, getting rid
|
||||
* of the tc_stats structure.
|
||||
*/
|
||||
tc_stats_array[up]->in_overflow_discards = 0;
|
||||
tc_stats_array[up]->out_overflow_discards = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_get_pfc_stats_82599 - Return CBFC status data
|
||||
* @hw: pointer to hardware structure
|
||||
* @stats: pointer to statistics structure
|
||||
* @tc_count: Number of elements in bwg_array.
|
||||
*
|
||||
* This function returns the CBFC status data for each of the Traffic Classes.
|
||||
*/
|
||||
s32 ixgbe_dcb_get_pfc_stats_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count)
|
||||
{
|
||||
int tc;
|
||||
|
||||
if (tc_count > MAX_TRAFFIC_CLASS)
|
||||
return DCB_ERR_PARAM;
|
||||
for (tc = 0; tc < tc_count; tc++) {
|
||||
/* Priority XOFF Transmitted */
|
||||
stats->pxofftxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(tc));
|
||||
/* Priority XOFF Received */
|
||||
stats->pxoffrxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(tc));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_packet_buffers_82599 - Configure DCB packet buffers
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure packet buffers for DCB mode.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_packet_buffers_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
s32 ret_val = 0;
|
||||
u32 value = IXGBE_RXPBSIZE_64KB;
|
||||
u8 i = 0;
|
||||
|
||||
/* Setup Rx packet buffer sizes */
|
||||
switch (dcb_config->rx_pba_cfg) {
|
||||
case pba_80_48:
|
||||
/* Setup the first four at 80KB */
|
||||
value = IXGBE_RXPBSIZE_80KB;
|
||||
for (; i < 4; i++)
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
|
||||
/* Setup the last four at 48KB...don't re-init i */
|
||||
value = IXGBE_RXPBSIZE_48KB;
|
||||
/* Fall Through */
|
||||
case pba_equal:
|
||||
default:
|
||||
for (; i < IXGBE_MAX_PACKET_BUFFERS; i++)
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value);
|
||||
|
||||
/* Setup Tx packet buffer sizes */
|
||||
for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
|
||||
IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i),
|
||||
IXGBE_TXPBSIZE_20KB);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i),
|
||||
IXGBE_TXPBTHRESH_DCB);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_rx_arbiter_82599 - Config Rx Data arbiter
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Rx Packet Arbiter and credits for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_rx_arbiter_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
struct tc_bw_alloc *p;
|
||||
u32 reg = 0;
|
||||
u32 credit_refill = 0;
|
||||
u32 credit_max = 0;
|
||||
u8 i = 0;
|
||||
|
||||
/*
|
||||
* Disable the arbiter before changing parameters
|
||||
* (always enable recycle mode; WSP)
|
||||
*/
|
||||
reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
|
||||
|
||||
/* Map all traffic classes to their UP, 1 to 1 */
|
||||
reg = 0;
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++)
|
||||
reg |= (i << (i * IXGBE_RTRUP2TC_UP_SHIFT));
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, reg);
|
||||
|
||||
/* Configure traffic class credits and priority */
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
|
||||
p = &dcb_config->tc_config[i].path[DCB_RX_CONFIG];
|
||||
|
||||
credit_refill = p->data_credits_refill;
|
||||
credit_max = p->data_credits_max;
|
||||
reg = credit_refill | (credit_max << IXGBE_RTRPT4C_MCL_SHIFT);
|
||||
|
||||
reg |= (u32)(p->bwg_id) << IXGBE_RTRPT4C_BWG_SHIFT;
|
||||
|
||||
if (p->prio_type == prio_link)
|
||||
reg |= IXGBE_RTRPT4C_LSP;
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTRPT4C(i), reg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Configure Rx packet plane (recycle mode; WSP) and
|
||||
* enable arbiter
|
||||
*/
|
||||
reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_tx_desc_arbiter_82599 - Config Tx Desc. arbiter
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Tx Descriptor Arbiter and credits for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_tx_desc_arbiter_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
struct tc_bw_alloc *p;
|
||||
u32 reg, max_credits;
|
||||
u8 i;
|
||||
|
||||
/* Clear the per-Tx queue credits; we use per-TC instead */
|
||||
for (i = 0; i < 128; i++) {
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTDT1C, 0);
|
||||
}
|
||||
|
||||
/* Configure traffic class credits and priority */
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
|
||||
p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
|
||||
max_credits = dcb_config->tc_config[i].desc_credits_max;
|
||||
reg = max_credits << IXGBE_RTTDT2C_MCL_SHIFT;
|
||||
reg |= p->data_credits_refill;
|
||||
reg |= (u32)(p->bwg_id) << IXGBE_RTTDT2C_BWG_SHIFT;
|
||||
|
||||
if (p->prio_type == prio_group)
|
||||
reg |= IXGBE_RTTDT2C_GSP;
|
||||
|
||||
if (p->prio_type == prio_link)
|
||||
reg |= IXGBE_RTTDT2C_LSP;
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTDT2C(i), reg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Configure Tx descriptor plane (recycle mode; WSP) and
|
||||
* enable arbiter
|
||||
*/
|
||||
reg = IXGBE_RTTDCS_TDPAC | IXGBE_RTTDCS_TDRM;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_tx_data_arbiter_82599 - Config Tx Data arbiter
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Tx Packet Arbiter and credits for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
struct tc_bw_alloc *p;
|
||||
u32 reg;
|
||||
u8 i;
|
||||
|
||||
/*
|
||||
* Disable the arbiter before changing parameters
|
||||
* (always enable recycle mode; SP; arb delay)
|
||||
*/
|
||||
reg = IXGBE_RTTPCS_TPPAC | IXGBE_RTTPCS_TPRM |
|
||||
(IXGBE_RTTPCS_ARBD_DCB << IXGBE_RTTPCS_ARBD_SHIFT) |
|
||||
IXGBE_RTTPCS_ARBDIS;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTPCS, reg);
|
||||
|
||||
/* Map all traffic classes to their UP, 1 to 1 */
|
||||
reg = 0;
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++)
|
||||
reg |= (i << (i * IXGBE_RTTUP2TC_UP_SHIFT));
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTUP2TC, reg);
|
||||
|
||||
/* Configure traffic class credits and priority */
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
|
||||
p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG];
|
||||
reg = p->data_credits_refill;
|
||||
reg |= (u32)(p->data_credits_max) << IXGBE_RTTPT2C_MCL_SHIFT;
|
||||
reg |= (u32)(p->bwg_id) << IXGBE_RTTPT2C_BWG_SHIFT;
|
||||
|
||||
if (p->prio_type == prio_group)
|
||||
reg |= IXGBE_RTTPT2C_GSP;
|
||||
|
||||
if (p->prio_type == prio_link)
|
||||
reg |= IXGBE_RTTPT2C_LSP;
|
||||
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTPT2C(i), reg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Configure Tx packet plane (recycle mode; SP; arb delay) and
|
||||
* enable arbiter
|
||||
*/
|
||||
reg = IXGBE_RTTPCS_TPPAC | IXGBE_RTTPCS_TPRM |
|
||||
(IXGBE_RTTPCS_ARBD_DCB << IXGBE_RTTPCS_ARBD_SHIFT);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTPCS, reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_pfc_82599 - Configure priority flow control
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure Priority Flow Control (PFC) for each traffic class.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
u32 i, reg, rx_pba_size;
|
||||
|
||||
/* If PFC is disabled globally then fall back to LFC. */
|
||||
if (!dcb_config->pfc_mode_enable) {
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++)
|
||||
hw->mac.ops.fc_enable(hw, i);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Configure PFC Tx thresholds per TC */
|
||||
for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
|
||||
if (dcb_config->rx_pba_cfg == pba_equal)
|
||||
rx_pba_size = IXGBE_RXPBSIZE_64KB;
|
||||
else
|
||||
rx_pba_size = (i < 4) ? IXGBE_RXPBSIZE_80KB
|
||||
: IXGBE_RXPBSIZE_48KB;
|
||||
|
||||
reg = ((rx_pba_size >> 5) & 0xFFE0);
|
||||
if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full ||
|
||||
dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx)
|
||||
reg |= IXGBE_FCRTL_XONE;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), reg);
|
||||
|
||||
reg = ((rx_pba_size >> 2) & 0xFFE0);
|
||||
if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full ||
|
||||
dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx)
|
||||
reg |= IXGBE_FCRTH_FCEN;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), reg);
|
||||
}
|
||||
|
||||
/* Configure pause time (2 TCs per register) */
|
||||
reg = hw->fc.pause_time | (hw->fc.pause_time << 16);
|
||||
for (i = 0; i < (MAX_TRAFFIC_CLASS / 2); i++)
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
|
||||
|
||||
/* Configure flow control refresh threshold value */
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
|
||||
|
||||
/* Enable Transmit PFC */
|
||||
reg = IXGBE_FCCFG_TFCE_PRIORITY;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_FCCFG, reg);
|
||||
|
||||
/*
|
||||
* Enable Receive PFC
|
||||
* We will always honor XOFF frames we receive when
|
||||
* we are in PFC mode.
|
||||
*/
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_MFLCN);
|
||||
reg &= ~IXGBE_MFLCN_RFCE;
|
||||
reg |= IXGBE_MFLCN_RPFCE;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_MFLCN, reg);
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_tc_stats_82599 - Config traffic class statistics
|
||||
* @hw: pointer to hardware structure
|
||||
*
|
||||
* Configure queue statistics registers, all queues belonging to same traffic
|
||||
* class uses a single set of queue statistics counters.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_tc_stats_82599(struct ixgbe_hw *hw)
|
||||
{
|
||||
u32 reg = 0;
|
||||
u8 i = 0;
|
||||
|
||||
/*
|
||||
* Receive Queues stats setting
|
||||
* 32 RQSMR registers, each configuring 4 queues.
|
||||
* Set all 16 queues of each TC to the same stat
|
||||
* with TC 'n' going to stat 'n'.
|
||||
*/
|
||||
for (i = 0; i < 32; i++) {
|
||||
reg = 0x01010101 * (i / 4);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i), reg);
|
||||
}
|
||||
/*
|
||||
* Transmit Queues stats setting
|
||||
* 32 TQSM registers, each controlling 4 queues.
|
||||
* Set all queues of each TC to the same stat
|
||||
* with TC 'n' going to stat 'n'.
|
||||
* Tx queues are allocated non-uniformly to TCs:
|
||||
* 32, 32, 16, 16, 8, 8, 8, 8.
|
||||
*/
|
||||
for (i = 0; i < 32; i++) {
|
||||
if (i < 8)
|
||||
reg = 0x00000000;
|
||||
else if (i < 16)
|
||||
reg = 0x01010101;
|
||||
else if (i < 20)
|
||||
reg = 0x02020202;
|
||||
else if (i < 24)
|
||||
reg = 0x03030303;
|
||||
else if (i < 26)
|
||||
reg = 0x04040404;
|
||||
else if (i < 28)
|
||||
reg = 0x05050505;
|
||||
else if (i < 30)
|
||||
reg = 0x06060606;
|
||||
else
|
||||
reg = 0x07070707;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_TQSM(i), reg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_config_82599 - Configure general DCB parameters
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure general DCB parameters.
|
||||
*/
|
||||
s32 ixgbe_dcb_config_82599(struct ixgbe_hw *hw)
|
||||
{
|
||||
u32 reg;
|
||||
u32 q;
|
||||
|
||||
/* Disable the Tx desc arbiter so that MTQC can be changed */
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
|
||||
reg |= IXGBE_RTTDCS_ARBDIS;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
|
||||
|
||||
/* Enable DCB for Rx with 8 TCs */
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
|
||||
switch (reg & IXGBE_MRQC_MRQE_MASK) {
|
||||
case 0:
|
||||
case IXGBE_MRQC_RT4TCEN:
|
||||
/* RSS disabled cases */
|
||||
reg = (reg & ~IXGBE_MRQC_MRQE_MASK) | IXGBE_MRQC_RT8TCEN;
|
||||
break;
|
||||
case IXGBE_MRQC_RSSEN:
|
||||
case IXGBE_MRQC_RTRSS4TCEN:
|
||||
/* RSS enabled cases */
|
||||
reg = (reg & ~IXGBE_MRQC_MRQE_MASK) | IXGBE_MRQC_RTRSS8TCEN;
|
||||
break;
|
||||
default:
|
||||
/* Unsupported value, assume stale data, overwrite no RSS */
|
||||
reg = (reg & ~IXGBE_MRQC_MRQE_MASK) | IXGBE_MRQC_RT8TCEN;
|
||||
}
|
||||
IXGBE_WRITE_REG(hw, IXGBE_MRQC, reg);
|
||||
|
||||
/* Enable DCB for Tx with 8 TCs */
|
||||
reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg);
|
||||
|
||||
/* Disable drop for all queues */
|
||||
for (q=0; q < 128; q++) {
|
||||
IXGBE_WRITE_REG(hw, IXGBE_QDE, q << IXGBE_QDE_IDX_SHIFT);
|
||||
}
|
||||
|
||||
/* Enable the Tx desc arbiter */
|
||||
reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
|
||||
reg &= ~IXGBE_RTTDCS_ARBDIS;
|
||||
IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ixgbe_dcb_hw_config_82599 - Configure and enable DCB
|
||||
* @hw: pointer to hardware structure
|
||||
* @dcb_config: pointer to ixgbe_dcb_config structure
|
||||
*
|
||||
* Configure dcb settings and enable dcb mode.
|
||||
*/
|
||||
s32 ixgbe_dcb_hw_config_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config)
|
||||
{
|
||||
u32 pap = 0;
|
||||
|
||||
ixgbe_dcb_config_packet_buffers_82599(hw, dcb_config);
|
||||
ixgbe_dcb_config_82599(hw);
|
||||
ixgbe_dcb_config_rx_arbiter_82599(hw, dcb_config);
|
||||
ixgbe_dcb_config_tx_desc_arbiter_82599(hw, dcb_config);
|
||||
ixgbe_dcb_config_tx_data_arbiter_82599(hw, dcb_config);
|
||||
ixgbe_dcb_config_pfc_82599(hw, dcb_config);
|
||||
ixgbe_dcb_config_tc_stats_82599(hw);
|
||||
|
||||
/*
|
||||
* TODO: For DCB SV purpose only,
|
||||
* remove it before product release
|
||||
*/
|
||||
if (dcb_config->link_speed > 0 && dcb_config->link_speed <= 9) {
|
||||
pap = IXGBE_READ_REG(hw, IXGBE_PAP);
|
||||
pap |= (dcb_config->link_speed << 16);
|
||||
IXGBE_WRITE_REG(hw, IXGBE_PAP, pap);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _DCB_82599_CONFIG_H_
|
||||
#define _DCB_82599_CONFIG_H_
|
||||
|
||||
/* DCB register definitions */
|
||||
#define IXGBE_RTTDCS_TDPAC 0x00000001 /* 0 Round Robin,
|
||||
* 1 WSP - Weighted Strict Priority
|
||||
*/
|
||||
#define IXGBE_RTTDCS_VMPAC 0x00000002 /* 0 Round Robin,
|
||||
* 1 WRR - Weighted Round Robin
|
||||
*/
|
||||
#define IXGBE_RTTDCS_TDRM 0x00000010 /* Transmit Recycle Mode */
|
||||
#define IXGBE_RTTDCS_BDPM 0x00400000 /* Bypass Data Pipe - must clear! */
|
||||
#define IXGBE_RTTDCS_BPBFSM 0x00800000 /* Bypass PB Free Space - must
|
||||
* clear!
|
||||
*/
|
||||
#define IXGBE_RTTDCS_SPEED_CHG 0x80000000 /* Link speed change */
|
||||
|
||||
/* Receive UP2TC mapping */
|
||||
#define IXGBE_RTRUP2TC_UP_SHIFT 3
|
||||
/* Transmit UP2TC mapping */
|
||||
#define IXGBE_RTTUP2TC_UP_SHIFT 3
|
||||
|
||||
#define IXGBE_RTRPT4C_MCL_SHIFT 12 /* Offset to Max Credit Limit setting */
|
||||
#define IXGBE_RTRPT4C_BWG_SHIFT 9 /* Offset to BWG index */
|
||||
#define IXGBE_RTRPT4C_GSP 0x40000000 /* GSP enable bit */
|
||||
#define IXGBE_RTRPT4C_LSP 0x80000000 /* LSP enable bit */
|
||||
|
||||
#define IXGBE_RDRXCTL_MPBEN 0x00000010 /* DMA config for multiple packet
|
||||
* buffers enable
|
||||
*/
|
||||
#define IXGBE_RDRXCTL_MCEN 0x00000040 /* DMA config for multiple cores
|
||||
* (RSS) enable
|
||||
*/
|
||||
|
||||
/* RTRPCS Bit Masks */
|
||||
#define IXGBE_RTRPCS_RRM 0x00000002 /* Receive Recycle Mode enable */
|
||||
/* Receive Arbitration Control: 0 Round Robin, 1 DFP */
|
||||
#define IXGBE_RTRPCS_RAC 0x00000004
|
||||
#define IXGBE_RTRPCS_ARBDIS 0x00000040 /* Arbitration disable bit */
|
||||
|
||||
/* RTTDT2C Bit Masks */
|
||||
#define IXGBE_RTTDT2C_MCL_SHIFT 12
|
||||
#define IXGBE_RTTDT2C_BWG_SHIFT 9
|
||||
#define IXGBE_RTTDT2C_GSP 0x40000000
|
||||
#define IXGBE_RTTDT2C_LSP 0x80000000
|
||||
|
||||
#define IXGBE_RTTPT2C_MCL_SHIFT 12
|
||||
#define IXGBE_RTTPT2C_BWG_SHIFT 9
|
||||
#define IXGBE_RTTPT2C_GSP 0x40000000
|
||||
#define IXGBE_RTTPT2C_LSP 0x80000000
|
||||
|
||||
/* RTTPCS Bit Masks */
|
||||
#define IXGBE_RTTPCS_TPPAC 0x00000020 /* 0 Round Robin,
|
||||
* 1 SP - Strict Priority
|
||||
*/
|
||||
#define IXGBE_RTTPCS_ARBDIS 0x00000040 /* Arbiter disable */
|
||||
#define IXGBE_RTTPCS_TPRM 0x00000100 /* Transmit Recycle Mode enable */
|
||||
#define IXGBE_RTTPCS_ARBD_SHIFT 22
|
||||
#define IXGBE_RTTPCS_ARBD_DCB 0x4 /* Arbitration delay in DCB mode */
|
||||
|
||||
#define IXGBE_TXPBSIZE_20KB 0x00005000 /* 20KB Packet Buffer */
|
||||
#define IXGBE_TXPBSIZE_40KB 0x0000A000 /* 40KB Packet Buffer */
|
||||
#define IXGBE_RXPBSIZE_48KB 0x0000C000 /* 48KB Packet Buffer */
|
||||
#define IXGBE_RXPBSIZE_64KB 0x00010000 /* 64KB Packet Buffer */
|
||||
#define IXGBE_RXPBSIZE_80KB 0x00014000 /* 80KB Packet Buffer */
|
||||
#define IXGBE_RXPBSIZE_128KB 0x00020000 /* 128KB Packet Buffer */
|
||||
|
||||
#define IXGBE_TXPBTHRESH_DCB 0xA /* THRESH value for DCB mode */
|
||||
|
||||
|
||||
/* DCB hardware-specific driver APIs */
|
||||
|
||||
/* DCB PFC functions */
|
||||
s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
s32 ixgbe_dcb_get_pfc_stats_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count);
|
||||
|
||||
/* DCB traffic class stats */
|
||||
s32 ixgbe_dcb_config_tc_stats_82599(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_dcb_get_tc_stats_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_hw_stats *stats,
|
||||
u8 tc_count);
|
||||
|
||||
/* DCB config arbiters */
|
||||
s32 ixgbe_dcb_config_tx_desc_arbiter_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
s32 ixgbe_dcb_config_rx_arbiter_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *dcb_config);
|
||||
|
||||
/* DCB hw initialization */
|
||||
s32 ixgbe_dcb_hw_config_82599(struct ixgbe_hw *hw,
|
||||
struct ixgbe_dcb_config *config);
|
||||
|
||||
#endif /* _DCB_82599_CONFIG_H */
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,29 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "ixgbe.h"
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _IXGBE_FCOE_H
|
||||
#define _IXGBE_FCOE_H
|
||||
|
||||
|
||||
#endif /* _IXGBE_FCOE_H */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,107 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
/* glue for the OS independent part of ixgbe
|
||||
* includes register access macros
|
||||
*/
|
||||
|
||||
#ifndef _IXGBE_OSDEP_H_
|
||||
#define _IXGBE_OSDEP_H_
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/sched.h>
|
||||
#include "kcompat.h"
|
||||
|
||||
|
||||
#ifndef msleep
|
||||
#define msleep(x) do { if(in_interrupt()) { \
|
||||
/* Don't mdelay in interrupt context! */ \
|
||||
BUG(); \
|
||||
} else { \
|
||||
msleep(x); \
|
||||
} } while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#undef ASSERT
|
||||
|
||||
#ifdef DBG
|
||||
#define hw_dbg(hw, S, A...) printk(KERN_DEBUG S, ## A)
|
||||
#else
|
||||
#define hw_dbg(hw, S, A...) do {} while (0)
|
||||
#endif
|
||||
|
||||
#ifdef DBG
|
||||
#define IXGBE_WRITE_REG(a, reg, value) do {\
|
||||
switch (reg) { \
|
||||
case IXGBE_EIMS: \
|
||||
case IXGBE_EIMC: \
|
||||
case IXGBE_EIAM: \
|
||||
case IXGBE_EIAC: \
|
||||
case IXGBE_EICR: \
|
||||
case IXGBE_EICS: \
|
||||
printk("%s: Reg - 0x%05X, value - 0x%08X\n", __FUNCTION__, \
|
||||
reg, (u32)(value)); \
|
||||
default: \
|
||||
break; \
|
||||
} \
|
||||
writel((value), ((a)->hw_addr + (reg))); \
|
||||
} while (0)
|
||||
#else
|
||||
#define IXGBE_WRITE_REG(a, reg, value) writel((value), ((a)->hw_addr + (reg)))
|
||||
#endif
|
||||
|
||||
#define IXGBE_READ_REG(a, reg) readl((a)->hw_addr + (reg))
|
||||
|
||||
#define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) ( \
|
||||
writel((value), ((a)->hw_addr + (reg) + ((offset) << 2))))
|
||||
|
||||
#define IXGBE_READ_REG_ARRAY(a, reg, offset) ( \
|
||||
readl((a)->hw_addr + (reg) + ((offset) << 2)))
|
||||
|
||||
#ifndef writeq
|
||||
#define writeq(val, addr) writel((u32) (val), addr); \
|
||||
writel((u32) (val >> 32), (addr + 4));
|
||||
#endif
|
||||
|
||||
#define IXGBE_WRITE_REG64(a, reg, value) writeq((value), ((a)->hw_addr + (reg)))
|
||||
|
||||
#define IXGBE_WRITE_FLUSH(a) IXGBE_READ_REG(a, IXGBE_STATUS)
|
||||
struct ixgbe_hw;
|
||||
extern u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg);
|
||||
extern void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value);
|
||||
#define IXGBE_READ_PCIE_WORD ixgbe_read_pci_cfg_word
|
||||
#define IXGBE_WRITE_PCIE_WORD ixgbe_write_pci_cfg_word
|
||||
#define IXGBE_EEPROM_GRANT_ATTEMPS 100
|
||||
#define IXGBE_HTONL(_i) htonl(_i)
|
||||
#define IXGBE_HTONS(_i) htons(_i)
|
||||
|
||||
#endif /* _IXGBE_OSDEP_H_ */
|
|
@ -0,0 +1,964 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "ixgbe.h"
|
||||
|
||||
/* This is the only thing that needs to be changed to adjust the
|
||||
* maximum number of ports that the driver can manage.
|
||||
*/
|
||||
|
||||
#define IXGBE_MAX_NIC 8
|
||||
|
||||
#define OPTION_UNSET -1
|
||||
#define OPTION_DISABLED 0
|
||||
#define OPTION_ENABLED 1
|
||||
|
||||
/* All parameters are treated the same, as an integer array of values.
|
||||
* This macro just reduces the need to repeat the same declaration code
|
||||
* over and over (plus this helps to avoid typo bugs).
|
||||
*/
|
||||
|
||||
#define IXGBE_PARAM_INIT { [0 ... IXGBE_MAX_NIC] = OPTION_UNSET }
|
||||
#ifndef module_param_array
|
||||
/* Module Parameters are always initialized to -1, so that the driver
|
||||
* can tell the difference between no user specified value or the
|
||||
* user asking for the default value.
|
||||
* The true default values are loaded in when ixgbe_check_options is called.
|
||||
*
|
||||
* This is a GCC extension to ANSI C.
|
||||
* See the item "Labeled Elements in Initializers" in the section
|
||||
* "Extensions to the C Language Family" of the GCC documentation.
|
||||
*/
|
||||
|
||||
#define IXGBE_PARAM(X, desc) \
|
||||
static const int __devinitdata X[IXGBE_MAX_NIC+1] = IXGBE_PARAM_INIT; \
|
||||
MODULE_PARM(X, "1-" __MODULE_STRING(IXGBE_MAX_NIC) "i"); \
|
||||
MODULE_PARM_DESC(X, desc);
|
||||
#else
|
||||
#define IXGBE_PARAM(X, desc) \
|
||||
static int __devinitdata X[IXGBE_MAX_NIC+1] = IXGBE_PARAM_INIT; \
|
||||
static unsigned int num_##X; \
|
||||
module_param_array_named(X, X, int, &num_##X, 0); \
|
||||
MODULE_PARM_DESC(X, desc);
|
||||
#endif
|
||||
|
||||
IXGBE_PARAM(RXKernel, "Disable(0) or enable(1) Linux TCP/IP stack RX, default 0");
|
||||
|
||||
/* Interrupt Type
|
||||
*
|
||||
* Valid Range: 0-2
|
||||
* - 0 - Legacy Interrupt
|
||||
* - 1 - MSI Interrupt
|
||||
* - 2 - MSI-X Interrupt(s)
|
||||
*
|
||||
* Default Value: 2
|
||||
*/
|
||||
IXGBE_PARAM(InterruptType, "Change Interrupt Mode (0=Legacy, 1=MSI, 2=MSI-X), default 2");
|
||||
#define IXGBE_INT_LEGACY 0
|
||||
#define IXGBE_INT_MSI 1
|
||||
#define IXGBE_INT_MSIX 2
|
||||
#define IXGBE_DEFAULT_INT IXGBE_INT_MSIX
|
||||
|
||||
#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
|
||||
/* DCA - Direct Cache Access (DCA) Control
|
||||
*
|
||||
* This option allows the device to hint to DCA enabled processors
|
||||
* which CPU should have its cache warmed with the data being
|
||||
* transferred over PCIe. This can increase performance by reducing
|
||||
* cache misses. ixgbe hardware supports DCA for:
|
||||
* tx descriptor writeback
|
||||
* rx descriptor writeback
|
||||
* rx data
|
||||
* rx data header only (in packet split mode)
|
||||
*
|
||||
* enabling option 2 can cause cache thrash in some tests, particularly
|
||||
* if the CPU is completely utilized
|
||||
*
|
||||
* Valid Range: 0 - 2
|
||||
* - 0 - disables DCA
|
||||
* - 1 - enables DCA
|
||||
* - 2 - enables DCA with rx data included
|
||||
*
|
||||
* Default Value: 2
|
||||
*/
|
||||
|
||||
#define IXGBE_MAX_DCA 0
|
||||
|
||||
IXGBE_PARAM(DCA, "Disable or enable Direct Cache Access, 0=disabled, 1=descriptor only, 2=descriptor and data");
|
||||
|
||||
#endif
|
||||
|
||||
/* RXQ - The number of RX queues with RSS distribution
|
||||
*
|
||||
* Valid Range: 0-16
|
||||
* - 0 - Default, min(16, num_online_cpus())
|
||||
* - 1-16 - sets the Desc. Q's to the specified value.
|
||||
*
|
||||
* Default Value: 0
|
||||
*/
|
||||
|
||||
IXGBE_PARAM(RXQ, "Number of RX queues, default 0=number of cpus");
|
||||
|
||||
/* TXQ - The number of TX queues
|
||||
*
|
||||
* Valid Range: 0-16
|
||||
* - 0 - Default, min(16, num_online_cpus())
|
||||
* - 1-16 - sets the Desc. Q's to the specified value.
|
||||
*
|
||||
* Default Value: 0
|
||||
*/
|
||||
|
||||
IXGBE_PARAM(TXQ, "Number of TX queues, default 0=number of cpus");
|
||||
|
||||
/* Interrupt Throttle Rate (interrupts/sec)
|
||||
*
|
||||
* Valid Range: 956-488281 (0=off, 1=dynamic)
|
||||
*
|
||||
* Default Value: 8000
|
||||
*/
|
||||
#define DEFAULT_ITR 956
|
||||
IXGBE_PARAM(InterruptThrottleRate, "Maximum interrupts per second, per vector, (956-488281), default 8000");
|
||||
#define MAX_ITR IXGBE_MAX_INT_RATE
|
||||
#define MIN_ITR IXGBE_MIN_INT_RATE
|
||||
|
||||
#ifndef IXGBE_NO_LLI
|
||||
/* LLIPort (Low Latency Interrupt TCP Port)
|
||||
*
|
||||
* Valid Range: 0 - 65535
|
||||
*
|
||||
* Default Value: 0 (disabled)
|
||||
*/
|
||||
IXGBE_PARAM(LLIPort, "Low Latency Interrupt TCP Port (0-65535)");
|
||||
|
||||
#define DEFAULT_LLIPORT 0
|
||||
#define MAX_LLIPORT 0xFFFF
|
||||
#define MIN_LLIPORT 0
|
||||
|
||||
/* LLIPush (Low Latency Interrupt on TCP Push flag)
|
||||
*
|
||||
* Valid Range: 0,1
|
||||
*
|
||||
* Default Value: 0 (disabled)
|
||||
*/
|
||||
IXGBE_PARAM(LLIPush, "Low Latency Interrupt on TCP Push flag (0,1)");
|
||||
|
||||
#define DEFAULT_LLIPUSH 0
|
||||
#define MAX_LLIPUSH 1
|
||||
#define MIN_LLIPUSH 0
|
||||
|
||||
/* LLISize (Low Latency Interrupt on Packet Size)
|
||||
*
|
||||
* Valid Range: 0 - 1500
|
||||
*
|
||||
* Default Value: 0 (disabled)
|
||||
*/
|
||||
IXGBE_PARAM(LLISize, "Low Latency Interrupt on Packet Size (0-1500)");
|
||||
|
||||
#define DEFAULT_LLISIZE 0
|
||||
#define MAX_LLISIZE 1500
|
||||
#define MIN_LLISIZE 0
|
||||
|
||||
/* LLIEType (Low Latency Interrupt Ethernet Type)
|
||||
*
|
||||
* Valid Range: 0 - 0x8fff
|
||||
*
|
||||
* Default Value: 0 (disabled)
|
||||
*/
|
||||
IXGBE_PARAM(LLIEType, "Low Latency Interrupt Ethernet Protocol Type");
|
||||
|
||||
#define DEFAULT_LLIETYPE 0
|
||||
#define MAX_LLIETYPE 0x8fff
|
||||
#define MIN_LLIETYPE 0
|
||||
|
||||
/* LLIVLANP (Low Latency Interrupt on VLAN priority threshold)
|
||||
*
|
||||
* Valid Range: 0 - 7
|
||||
*
|
||||
* Default Value: 0 (disabled)
|
||||
*/
|
||||
IXGBE_PARAM(LLIVLANP, "Low Latency Interrupt on VLAN priority threshold");
|
||||
|
||||
#define DEFAULT_LLIVLANP 0
|
||||
#define MAX_LLIVLANP 7
|
||||
#define MIN_LLIVLANP 0
|
||||
|
||||
#endif /* IXGBE_NO_LLI */
|
||||
/* Rx buffer mode
|
||||
*
|
||||
* Valid Range: 0-2 0 = 1buf_mode_always, 1 = ps_mode_always and 2 = optimal
|
||||
*
|
||||
* Default Value: 2
|
||||
*/
|
||||
IXGBE_PARAM(RxBufferMode, "0=1 descriptor per packet,\n"
|
||||
"\t\t\t1=use packet split, multiple descriptors per jumbo frame\n"
|
||||
"\t\t\t2 (default)=use 1buf mode for 1500 mtu, packet split for jumbo");
|
||||
|
||||
#define IXGBE_RXBUFMODE_1BUF_ALWAYS 0
|
||||
#define IXGBE_RXBUFMODE_PS_ALWAYS 1
|
||||
#define IXGBE_RXBUFMODE_OPTIMAL 2
|
||||
#define IXGBE_DEFAULT_RXBUFMODE IXGBE_RXBUFMODE_1BUF_ALWAYS
|
||||
|
||||
/* Flow Director filtering mode
|
||||
*
|
||||
* Valid Range: 0-2 0 = off, 1 = Hashing (ATR), and 2 = perfect filters
|
||||
*
|
||||
* Default Value: 1 (ATR)
|
||||
*/
|
||||
IXGBE_PARAM(FdirMode, "Flow Director filtering modes:\n"
|
||||
"\t\t\t0 = Filtering off\n"
|
||||
"\t\t\t1 = Signature Hashing filters (SW ATR)\n"
|
||||
"\t\t\t2 = Perfect Filters");
|
||||
|
||||
#define IXGBE_FDIR_FILTER_OFF 0
|
||||
#define IXGBE_FDIR_FILTER_HASH 1
|
||||
#define IXGBE_FDIR_FILTER_PERFECT 2
|
||||
/* #define IXGBE_DEFAULT_FDIR_FILTER IXGBE_FDIR_FILTER_HASH */
|
||||
#define IXGBE_DEFAULT_FDIR_FILTER IXGBE_FDIR_FILTER_OFF
|
||||
|
||||
/* Flow Director packet buffer allocation level
|
||||
*
|
||||
* Valid Range: 0-2 0 = 8k hash/2k perfect, 1 = 16k hash/4k perfect,
|
||||
* 2 = 32k hash/8k perfect
|
||||
*
|
||||
* Default Value: 0
|
||||
*/
|
||||
IXGBE_PARAM(FdirPballoc, "Flow Director packet buffer allocation level:\n"
|
||||
"\t\t\t0 = 8k hash filters or 2k perfect filters\n"
|
||||
"\t\t\t1 = 16k hash filters or 4k perfect filters\n"
|
||||
"\t\t\t2 = 32k hash filters or 8k perfect filters");
|
||||
|
||||
#define IXGBE_FDIR_PBALLOC_64K 0
|
||||
#define IXGBE_FDIR_PBALLOC_128K 1
|
||||
#define IXGBE_FDIR_PBALLOC_256K 2
|
||||
#define IXGBE_DEFAULT_FDIR_PBALLOC IXGBE_FDIR_PBALLOC_64K
|
||||
|
||||
/* Software ATR packet sample rate
|
||||
*
|
||||
* Valid Range: 0-100 0 = off, 1-100 = rate of Tx packet inspection
|
||||
*
|
||||
* Default Value: 20
|
||||
*/
|
||||
IXGBE_PARAM(AtrSampleRate, "Software ATR Tx packet sample rate");
|
||||
|
||||
#define IXGBE_MAX_ATR_SAMPLE_RATE 100
|
||||
#define IXGBE_MIN_ATR_SAMPLE_RATE 1
|
||||
#define IXGBE_ATR_SAMPLE_RATE_OFF 0
|
||||
#define IXGBE_DEFAULT_ATR_SAMPLE_RATE 20
|
||||
|
||||
struct ixgbe_option {
|
||||
enum { enable_option, range_option, list_option } type;
|
||||
const char *name;
|
||||
const char *err;
|
||||
int def;
|
||||
union {
|
||||
struct { /* range_option info */
|
||||
int min;
|
||||
int max;
|
||||
} r;
|
||||
struct { /* list_option info */
|
||||
int nr;
|
||||
const struct ixgbe_opt_list {
|
||||
int i;
|
||||
char *str;
|
||||
} *p;
|
||||
} l;
|
||||
} arg;
|
||||
};
|
||||
|
||||
static int __devinit ixgbe_validate_option(unsigned int *value,
|
||||
struct ixgbe_option *opt)
|
||||
{
|
||||
if (*value == OPTION_UNSET) {
|
||||
*value = opt->def;
|
||||
return 0;
|
||||
}
|
||||
|
||||
switch (opt->type) {
|
||||
case enable_option:
|
||||
switch (*value) {
|
||||
case OPTION_ENABLED:
|
||||
printk(KERN_INFO "ixgbe: %s Enabled\n", opt->name);
|
||||
return 0;
|
||||
case OPTION_DISABLED:
|
||||
printk(KERN_INFO "ixgbe: %s Disabled\n", opt->name);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case range_option:
|
||||
if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
|
||||
printk(KERN_INFO "ixgbe: %s set to %d\n", opt->name, *value);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
case list_option: {
|
||||
int i;
|
||||
const struct ixgbe_opt_list *ent;
|
||||
|
||||
for (i = 0; i < opt->arg.l.nr; i++) {
|
||||
ent = &opt->arg.l.p[i];
|
||||
if (*value == ent->i) {
|
||||
if (ent->str[0] != '\0')
|
||||
printk(KERN_INFO "%s\n", ent->str);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
printk(KERN_INFO "ixgbe: Invalid %s specified (%d), %s\n",
|
||||
opt->name, *value, opt->err);
|
||||
*value = opt->def;
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define LIST_LEN(l) (sizeof(l) / sizeof(l[0]))
|
||||
|
||||
/**
|
||||
* ixgbe_check_options - Range Checking for Command Line Parameters
|
||||
* @adapter: board private structure
|
||||
*
|
||||
* This routine checks all command line parameters for valid user
|
||||
* input. If an invalid value is given, or if no user specified
|
||||
* value exists, a default value is used. The final value is stored
|
||||
* in a variable in the adapter structure.
|
||||
**/
|
||||
void __devinit ixgbe_check_options(struct ixgbe_adapter *adapter)
|
||||
{
|
||||
int bd = adapter->bd_number;
|
||||
u32 *aflags = &adapter->flags;
|
||||
struct ixgbe_ring_feature *feature = adapter->ring_feature;
|
||||
|
||||
if (bd >= IXGBE_MAX_NIC) {
|
||||
printk(KERN_NOTICE
|
||||
"Warning: no configuration for board #%d\n", bd);
|
||||
printk(KERN_NOTICE "Using defaults for all values\n");
|
||||
#ifndef module_param_array
|
||||
bd = IXGBE_MAX_NIC;
|
||||
#endif
|
||||
}
|
||||
|
||||
{ /* Linux RX Stack Support */
|
||||
static struct ixgbe_option opt = {
|
||||
.type = enable_option,
|
||||
.name = "Linux TCP/IP stack RX",
|
||||
.err = "defaulting to Disabled",
|
||||
.def = OPTION_DISABLED
|
||||
};
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_RXKernel > bd) {
|
||||
#endif
|
||||
unsigned int tmp = RXKernel[bd];
|
||||
ixgbe_validate_option(&tmp, &opt);
|
||||
if (tmp)
|
||||
*aflags |= IXGBE_FLAG_RX_KERNEL_ENABLE;
|
||||
else
|
||||
*aflags &= ~IXGBE_FLAG_RX_KERNEL_ENABLE;
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
if (opt.def == OPTION_ENABLED)
|
||||
*aflags |= IXGBE_FLAG_RX_KERNEL_ENABLE;
|
||||
else
|
||||
*aflags &= ~IXGBE_FLAG_RX_KERNEL_ENABLE;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
{ /* Interrupt Type */
|
||||
unsigned int i_type;
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Interrupt Type",
|
||||
.err =
|
||||
"using default of "__MODULE_STRING(IXGBE_DEFAULT_INT),
|
||||
.def = IXGBE_DEFAULT_INT,
|
||||
.arg = { .r = { .min = IXGBE_INT_LEGACY,
|
||||
.max = IXGBE_INT_MSIX}}
|
||||
};
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_InterruptType > bd) {
|
||||
#endif
|
||||
i_type = InterruptType[bd];
|
||||
ixgbe_validate_option(&i_type, &opt);
|
||||
switch (i_type) {
|
||||
case IXGBE_INT_MSIX:
|
||||
if (!(*aflags & IXGBE_FLAG_MSIX_CAPABLE))
|
||||
printk(KERN_INFO
|
||||
"Ignoring MSI-X setting; "
|
||||
"support unavailable\n");
|
||||
break;
|
||||
case IXGBE_INT_MSI:
|
||||
if (!(*aflags & IXGBE_FLAG_MSI_CAPABLE)) {
|
||||
printk(KERN_INFO
|
||||
"Ignoring MSI setting; "
|
||||
"support unavailable\n");
|
||||
} else {
|
||||
*aflags &= ~IXGBE_FLAG_MSIX_CAPABLE;
|
||||
*aflags &= ~IXGBE_FLAG_DCB_CAPABLE;
|
||||
}
|
||||
break;
|
||||
case IXGBE_INT_LEGACY:
|
||||
default:
|
||||
*aflags &= ~IXGBE_FLAG_MSIX_CAPABLE;
|
||||
*aflags &= ~IXGBE_FLAG_MSI_CAPABLE;
|
||||
*aflags &= ~IXGBE_FLAG_DCB_CAPABLE;
|
||||
break;
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
*aflags |= IXGBE_FLAG_MSIX_CAPABLE;
|
||||
*aflags |= IXGBE_FLAG_MSI_CAPABLE;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE)
|
||||
{ /* Direct Cache Access (DCA) */
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Direct Cache Access (DCA)",
|
||||
.err = "defaulting to Enabled",
|
||||
.def = OPTION_DISABLED,
|
||||
.arg = { .r = { .min = OPTION_DISABLED,
|
||||
.max = IXGBE_MAX_DCA}}
|
||||
};
|
||||
unsigned int dca = opt.def;
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_DCA > bd) {
|
||||
#endif
|
||||
dca = DCA[bd];
|
||||
ixgbe_validate_option(&dca, &opt);
|
||||
if (!dca)
|
||||
*aflags &= ~IXGBE_FLAG_DCA_CAPABLE;
|
||||
|
||||
/* Check Interoperability */
|
||||
if (!(*aflags & IXGBE_FLAG_DCA_CAPABLE)) {
|
||||
DPRINTK(PROBE, INFO, "DCA is disabled\n");
|
||||
*aflags &= ~IXGBE_FLAG_DCA_ENABLED;
|
||||
}
|
||||
|
||||
if (dca == IXGBE_MAX_DCA) {
|
||||
DPRINTK(PROBE, INFO,
|
||||
"DCA enabled for rx data\n");
|
||||
adapter->flags |= IXGBE_FLAG_DCA_ENABLED_DATA;
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
/* make sure to clear the capability flag if the
|
||||
* option is disabled by default above */
|
||||
if (opt.def == OPTION_DISABLED)
|
||||
*aflags &= ~IXGBE_FLAG_DCA_CAPABLE;
|
||||
}
|
||||
#endif
|
||||
if (dca == IXGBE_MAX_DCA)
|
||||
adapter->flags |= IXGBE_FLAG_DCA_ENABLED_DATA;
|
||||
}
|
||||
#endif /* CONFIG_DCA or CONFIG_DCA_MODULE */
|
||||
{ /* # of RX queues with RSS (RXQ) */
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "RX queues (RXQ)",
|
||||
.err = "using default.",
|
||||
.def = 0,
|
||||
.arg = { .r = { .min = 0,
|
||||
.max = IXGBE_MAX_RSS_INDICES}}
|
||||
};
|
||||
unsigned int rxq = RXQ[bd];
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_RXQ > bd) {
|
||||
#endif
|
||||
switch (rxq) {
|
||||
case 0:
|
||||
/*
|
||||
* Base it off num_online_cpus() with
|
||||
* a hardware limit cap.
|
||||
*/
|
||||
rxq = min(IXGBE_MAX_RSS_INDICES,
|
||||
(int)num_online_cpus());
|
||||
break;
|
||||
default:
|
||||
ixgbe_validate_option(&rxq, &opt);
|
||||
break;
|
||||
}
|
||||
feature[RING_F_RXQ].indices = rxq;
|
||||
*aflags |= IXGBE_FLAG_RSS_ENABLED;
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
rxq = min(IXGBE_MAX_RSS_INDICES,
|
||||
(int)num_online_cpus());
|
||||
feature[RING_F_RXQ].indices = rxq;
|
||||
*aflags |= IXGBE_FLAG_RSS_ENABLED;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
{ /* # of TX queues (TXQ) */
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "TX queues (TXQ)",
|
||||
.err = "using default.",
|
||||
.def = 0,
|
||||
.arg = { .r = { .min = 0,
|
||||
.max = IXGBE_MAX_RSS_INDICES}}
|
||||
};
|
||||
unsigned int txq = TXQ[bd];
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_TXQ > bd) {
|
||||
#endif
|
||||
switch (txq) {
|
||||
case 0:
|
||||
/*
|
||||
* Base it off num_online_cpus() with
|
||||
* a hardware limit cap.
|
||||
*/
|
||||
txq = min(IXGBE_MAX_RSS_INDICES,
|
||||
(int)num_online_cpus());
|
||||
break;
|
||||
default:
|
||||
ixgbe_validate_option(&txq, &opt);
|
||||
break;
|
||||
}
|
||||
feature[RING_F_TXQ].indices = txq;
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
txq = min(IXGBE_MAX_RSS_INDICES,
|
||||
(int)num_online_cpus());
|
||||
feature[RING_F_TXQ].indices = txq;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
{ /* Interrupt Throttling Rate */
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Interrupt Throttling Rate (ints/sec)",
|
||||
.err = "using default of "__MODULE_STRING(DEFAULT_ITR),
|
||||
.def = DEFAULT_ITR,
|
||||
.arg = { .r = { .min = MIN_ITR,
|
||||
.max = MAX_ITR }}
|
||||
};
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_InterruptThrottleRate > bd) {
|
||||
#endif
|
||||
u32 eitr = InterruptThrottleRate[bd];
|
||||
switch (eitr) {
|
||||
case 0:
|
||||
DPRINTK(PROBE, INFO, "%s turned off\n",
|
||||
opt.name);
|
||||
/*
|
||||
* zero is a special value, we don't want to
|
||||
* turn off ITR completely, just set it to an
|
||||
* insane interrupt rate
|
||||
*/
|
||||
adapter->eitr_param = IXGBE_MAX_INT_RATE;
|
||||
adapter->itr_setting = 0;
|
||||
break;
|
||||
case 1:
|
||||
DPRINTK(PROBE, INFO, "dynamic interrupt "
|
||||
"throttling enabled\n");
|
||||
adapter->eitr_param = 20000;
|
||||
adapter->itr_setting = 1;
|
||||
break;
|
||||
default:
|
||||
ixgbe_validate_option(&eitr, &opt);
|
||||
adapter->eitr_param = eitr;
|
||||
/* the first bit is used as control */
|
||||
adapter->itr_setting = eitr & ~1;
|
||||
break;
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
adapter->eitr_param = DEFAULT_ITR;
|
||||
adapter->itr_setting = DEFAULT_ITR;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#ifndef IXGBE_NO_LLI
|
||||
{ /* Low Latency Interrupt TCP Port*/
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Low Latency Interrupt TCP Port",
|
||||
.err = "using default of "
|
||||
__MODULE_STRING(DEFAULT_LLIPORT),
|
||||
.def = DEFAULT_LLIPORT,
|
||||
.arg = { .r = { .min = MIN_LLIPORT,
|
||||
.max = MAX_LLIPORT }}
|
||||
};
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_LLIPort > bd) {
|
||||
#endif
|
||||
adapter->lli_port = LLIPort[bd];
|
||||
if (adapter->lli_port) {
|
||||
ixgbe_validate_option(&adapter->lli_port, &opt);
|
||||
} else {
|
||||
DPRINTK(PROBE, INFO, "%s turned off\n",
|
||||
opt.name);
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
adapter->lli_port = opt.def;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
{ /* Low Latency Interrupt on Packet Size */
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Low Latency Interrupt on Packet Size",
|
||||
.err = "using default of "
|
||||
__MODULE_STRING(DEFAULT_LLISIZE),
|
||||
.def = DEFAULT_LLISIZE,
|
||||
.arg = { .r = { .min = MIN_LLISIZE,
|
||||
.max = MAX_LLISIZE }}
|
||||
};
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_LLISize > bd) {
|
||||
#endif
|
||||
adapter->lli_size = LLISize[bd];
|
||||
if (adapter->lli_size) {
|
||||
ixgbe_validate_option(&adapter->lli_size, &opt);
|
||||
} else {
|
||||
DPRINTK(PROBE, INFO, "%s turned off\n",
|
||||
opt.name);
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
adapter->lli_size = opt.def;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
{ /*Low Latency Interrupt on TCP Push flag*/
|
||||
static struct ixgbe_option opt = {
|
||||
.type = enable_option,
|
||||
.name = "Low Latency Interrupt on TCP Push flag",
|
||||
.err = "defaulting to Disabled",
|
||||
.def = OPTION_DISABLED
|
||||
};
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_LLIPush > bd) {
|
||||
#endif
|
||||
unsigned int lli_push = LLIPush[bd];
|
||||
ixgbe_validate_option(&lli_push, &opt);
|
||||
if (lli_push)
|
||||
*aflags |= IXGBE_FLAG_LLI_PUSH;
|
||||
else
|
||||
*aflags &= ~IXGBE_FLAG_LLI_PUSH;
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
if (opt.def == OPTION_ENABLED)
|
||||
*aflags |= IXGBE_FLAG_LLI_PUSH;
|
||||
else
|
||||
*aflags &= ~IXGBE_FLAG_LLI_PUSH;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
{ /* Low Latency Interrupt EtherType*/
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Low Latency Interrupt on Ethernet Protocol Type",
|
||||
.err = "using default of "
|
||||
__MODULE_STRING(DEFAULT_LLIETYPE),
|
||||
.def = DEFAULT_LLIETYPE,
|
||||
.arg = { .r = { .min = MIN_LLIETYPE,
|
||||
.max = MAX_LLIETYPE }}
|
||||
};
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_LLIEType > bd) {
|
||||
#endif
|
||||
adapter->lli_etype = LLIEType[bd];
|
||||
if (adapter->lli_etype) {
|
||||
ixgbe_validate_option(&adapter->lli_etype, &opt);
|
||||
} else {
|
||||
DPRINTK(PROBE, INFO, "%s turned off\n",
|
||||
opt.name);
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
adapter->lli_etype = opt.def;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
{ /* LLI VLAN Priority */
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Low Latency Interrupt on VLAN priority threashold",
|
||||
.err = "using default of "
|
||||
__MODULE_STRING(DEFAULT_LLIVLANP),
|
||||
.def = DEFAULT_LLIVLANP,
|
||||
.arg = { .r = { .min = MIN_LLIVLANP,
|
||||
.max = MAX_LLIVLANP }}
|
||||
};
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_LLIVLANP > bd) {
|
||||
#endif
|
||||
adapter->lli_vlan_pri = LLIVLANP[bd];
|
||||
if (adapter->lli_vlan_pri) {
|
||||
ixgbe_validate_option(&adapter->lli_vlan_pri, &opt);
|
||||
} else {
|
||||
DPRINTK(PROBE, INFO, "%s turned off\n",
|
||||
opt.name);
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
adapter->lli_vlan_pri = opt.def;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif /* IXGBE_NO_LLI */
|
||||
{ /* Rx buffer mode */
|
||||
unsigned int rx_buf_mode;
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Rx buffer mode",
|
||||
.err = "using default of "
|
||||
__MODULE_STRING(IXGBE_DEFAULT_RXBUFMODE),
|
||||
.def = IXGBE_DEFAULT_RXBUFMODE,
|
||||
.arg = {.r = {.min = IXGBE_RXBUFMODE_1BUF_ALWAYS,
|
||||
.max = IXGBE_RXBUFMODE_OPTIMAL}}
|
||||
};
|
||||
|
||||
#ifdef module_param_array
|
||||
if (num_RxBufferMode > bd) {
|
||||
#endif
|
||||
rx_buf_mode = RxBufferMode[bd];
|
||||
ixgbe_validate_option(&rx_buf_mode, &opt);
|
||||
switch (rx_buf_mode) {
|
||||
case IXGBE_RXBUFMODE_OPTIMAL:
|
||||
*aflags |= IXGBE_FLAG_RX_1BUF_CAPABLE;
|
||||
*aflags |= IXGBE_FLAG_RX_PS_CAPABLE;
|
||||
break;
|
||||
case IXGBE_RXBUFMODE_PS_ALWAYS:
|
||||
*aflags |= IXGBE_FLAG_RX_PS_CAPABLE;
|
||||
break;
|
||||
case IXGBE_RXBUFMODE_1BUF_ALWAYS:
|
||||
*aflags |= IXGBE_FLAG_RX_1BUF_CAPABLE;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
*aflags |= IXGBE_FLAG_RX_1BUF_CAPABLE;
|
||||
*aflags |= IXGBE_FLAG_RX_PS_CAPABLE;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
{ /* Flow Director filtering mode */
|
||||
unsigned int fdir_filter_mode;
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Flow Director filtering mode",
|
||||
.err = "using default of "
|
||||
__MODULE_STRING(IXGBE_DEFAULT_FDIR_FILTER),
|
||||
.def = IXGBE_DEFAULT_FDIR_FILTER,
|
||||
.arg = {.r = {.min = IXGBE_FDIR_FILTER_OFF,
|
||||
.max = IXGBE_FDIR_FILTER_PERFECT}}
|
||||
};
|
||||
|
||||
*aflags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
|
||||
*aflags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
|
||||
if (adapter->hw.mac.type == ixgbe_mac_82598EB)
|
||||
goto no_flow_director;
|
||||
#ifdef module_param_array
|
||||
if (num_FdirMode > bd) {
|
||||
#endif
|
||||
#ifdef HAVE_TX_MQ
|
||||
fdir_filter_mode = FdirMode[bd];
|
||||
#else
|
||||
fdir_filter_mode = IXGBE_FDIR_FILTER_OFF;
|
||||
#endif /* HAVE_TX_MQ */
|
||||
ixgbe_validate_option(&fdir_filter_mode, &opt);
|
||||
|
||||
switch (fdir_filter_mode) {
|
||||
case IXGBE_FDIR_FILTER_OFF:
|
||||
DPRINTK(PROBE, INFO, "Flow Director disabled\n");
|
||||
break;
|
||||
case IXGBE_FDIR_FILTER_HASH:
|
||||
*aflags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
|
||||
*aflags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
|
||||
feature[RING_F_FDIR].indices =
|
||||
IXGBE_MAX_FDIR_INDICES;
|
||||
DPRINTK(PROBE, INFO,
|
||||
"Flow Director hash filtering enabled\n");
|
||||
break;
|
||||
case IXGBE_FDIR_FILTER_PERFECT:
|
||||
*aflags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
|
||||
*aflags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
|
||||
feature[RING_F_FDIR].indices =
|
||||
IXGBE_MAX_FDIR_INDICES;
|
||||
spin_lock_init(&adapter->fdir_perfect_lock);
|
||||
DPRINTK(PROBE, INFO,
|
||||
"Flow Director perfect filtering enabled\n");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
#ifdef HAVE_TX_MQ
|
||||
if (opt.def != IXGBE_FDIR_FILTER_OFF) {
|
||||
*aflags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
|
||||
feature[RING_F_FDIR].indices = IXGBE_MAX_FDIR_INDICES;
|
||||
DPRINTK(PROBE, INFO,
|
||||
"Flow Director hash filtering enabled\n");
|
||||
} else {
|
||||
#endif /* HAVE_TX_MQ */
|
||||
*aflags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
|
||||
*aflags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
|
||||
feature[RING_F_FDIR].indices = 0;
|
||||
DPRINTK(PROBE, INFO,
|
||||
"Flow Director hash filtering disabled\n");
|
||||
#ifdef HAVE_TX_MQ
|
||||
}
|
||||
#endif /* HAVE_TX_MQ */
|
||||
}
|
||||
/* Check interoperability */
|
||||
if ((*aflags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
|
||||
(*aflags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) {
|
||||
if (!(*aflags & IXGBE_FLAG_MQ_CAPABLE)) {
|
||||
DPRINTK(PROBE, INFO,
|
||||
"Flow Director is not supported "
|
||||
"while multiple queues are disabled. "
|
||||
"Disabling Flow Director\n");
|
||||
*aflags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
|
||||
*aflags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
no_flow_director:
|
||||
/* empty code line with semi-colon */ ;
|
||||
}
|
||||
{ /* Flow Director packet buffer allocation */
|
||||
unsigned int fdir_pballoc_mode;
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Flow Director packet buffer allocation",
|
||||
.err = "using default of "
|
||||
__MODULE_STRING(IXGBE_DEFAULT_FDIR_PBALLOC),
|
||||
.def = IXGBE_DEFAULT_FDIR_PBALLOC,
|
||||
.arg = {.r = {.min = IXGBE_FDIR_PBALLOC_64K,
|
||||
.max = IXGBE_FDIR_PBALLOC_256K}}
|
||||
};
|
||||
char pstring[10];
|
||||
|
||||
if ((adapter->hw.mac.type == ixgbe_mac_82598EB) ||
|
||||
(!(*aflags & (IXGBE_FLAG_FDIR_HASH_CAPABLE |
|
||||
IXGBE_FLAG_FDIR_PERFECT_CAPABLE))))
|
||||
goto no_fdir_pballoc;
|
||||
#ifdef module_param_array
|
||||
if (num_FdirPballoc > bd) {
|
||||
#endif
|
||||
fdir_pballoc_mode = FdirPballoc[bd];
|
||||
ixgbe_validate_option(&fdir_pballoc_mode, &opt);
|
||||
switch (fdir_pballoc_mode) {
|
||||
case IXGBE_FDIR_PBALLOC_64K:
|
||||
adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_64K;
|
||||
sprintf(pstring, "64kB");
|
||||
break;
|
||||
case IXGBE_FDIR_PBALLOC_128K:
|
||||
adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_128K;
|
||||
sprintf(pstring, "128kB");
|
||||
break;
|
||||
case IXGBE_FDIR_PBALLOC_256K:
|
||||
adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_256K;
|
||||
sprintf(pstring, "256kB");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
DPRINTK(PROBE, INFO,
|
||||
"Flow Director allocated %s of packet buffer\n",
|
||||
pstring);
|
||||
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
adapter->fdir_pballoc = opt.def;
|
||||
DPRINTK(PROBE, INFO,
|
||||
"Flow Director allocated 64kB of packet buffer\n");
|
||||
|
||||
}
|
||||
#endif
|
||||
no_fdir_pballoc:
|
||||
/* empty code line with semi-colon */ ;
|
||||
}
|
||||
{ /* Flow Director ATR Tx sample packet rate */
|
||||
static struct ixgbe_option opt = {
|
||||
.type = range_option,
|
||||
.name = "Software ATR Tx packet sample rate",
|
||||
.err = "using default of "
|
||||
__MODULE_STRING(IXGBE_DEFAULT_ATR_SAMPLE_RATE),
|
||||
.def = IXGBE_DEFAULT_ATR_SAMPLE_RATE,
|
||||
.arg = {.r = {.min = IXGBE_ATR_SAMPLE_RATE_OFF,
|
||||
.max = IXGBE_MAX_ATR_SAMPLE_RATE}}
|
||||
};
|
||||
static const char atr_string[] =
|
||||
"ATR Tx Packet sample rate set to";
|
||||
|
||||
adapter->atr_sample_rate = IXGBE_ATR_SAMPLE_RATE_OFF;
|
||||
if (adapter->hw.mac.type == ixgbe_mac_82598EB)
|
||||
goto no_fdir_sample;
|
||||
|
||||
/* no sample rate for perfect filtering */
|
||||
if (*aflags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
|
||||
goto no_fdir_sample;
|
||||
#ifdef module_param_array
|
||||
if (num_AtrSampleRate > bd) {
|
||||
#endif
|
||||
/* Only enable the sample rate if hashing (ATR) is on */
|
||||
if (*aflags & IXGBE_FLAG_FDIR_HASH_CAPABLE)
|
||||
adapter->atr_sample_rate = AtrSampleRate[bd];
|
||||
|
||||
if (adapter->atr_sample_rate) {
|
||||
ixgbe_validate_option(&adapter->atr_sample_rate,
|
||||
&opt);
|
||||
DPRINTK(PROBE, INFO, "%s %d\n", atr_string,
|
||||
adapter->atr_sample_rate);
|
||||
}
|
||||
#ifdef module_param_array
|
||||
} else {
|
||||
/* Only enable the sample rate if hashing (ATR) is on */
|
||||
if (*aflags & IXGBE_FLAG_FDIR_HASH_CAPABLE)
|
||||
adapter->atr_sample_rate = opt.def;
|
||||
|
||||
DPRINTK(PROBE, INFO, "%s default of %d\n", atr_string,
|
||||
adapter->atr_sample_rate);
|
||||
}
|
||||
#endif
|
||||
no_fdir_sample:
|
||||
/* empty code line with semi-colon */ ;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,123 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _IXGBE_PHY_H_
|
||||
#define _IXGBE_PHY_H_
|
||||
|
||||
#include "ixgbe_type.h"
|
||||
#define IXGBE_I2C_EEPROM_DEV_ADDR 0xA0
|
||||
|
||||
/* EEPROM byte offsets */
|
||||
#define IXGBE_SFF_IDENTIFIER 0x0
|
||||
#define IXGBE_SFF_IDENTIFIER_SFP 0x3
|
||||
#define IXGBE_SFF_VENDOR_OUI_BYTE0 0x25
|
||||
#define IXGBE_SFF_VENDOR_OUI_BYTE1 0x26
|
||||
#define IXGBE_SFF_VENDOR_OUI_BYTE2 0x27
|
||||
#define IXGBE_SFF_1GBE_COMP_CODES 0x6
|
||||
#define IXGBE_SFF_10GBE_COMP_CODES 0x3
|
||||
#define IXGBE_SFF_CABLE_TECHNOLOGY 0x8
|
||||
|
||||
/* Bitmasks */
|
||||
#define IXGBE_SFF_DA_PASSIVE_CABLE 0x4
|
||||
#define IXGBE_SFF_1GBASESX_CAPABLE 0x1
|
||||
#define IXGBE_SFF_1GBASELX_CAPABLE 0x2
|
||||
#define IXGBE_SFF_10GBASESR_CAPABLE 0x10
|
||||
#define IXGBE_SFF_10GBASELR_CAPABLE 0x20
|
||||
#define IXGBE_I2C_EEPROM_READ_MASK 0x100
|
||||
#define IXGBE_I2C_EEPROM_STATUS_MASK 0x3
|
||||
#define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION 0x0
|
||||
#define IXGBE_I2C_EEPROM_STATUS_PASS 0x1
|
||||
#define IXGBE_I2C_EEPROM_STATUS_FAIL 0x2
|
||||
#define IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS 0x3
|
||||
|
||||
/* Bit-shift macros */
|
||||
#define IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT 24
|
||||
#define IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT 16
|
||||
#define IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT 8
|
||||
|
||||
/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */
|
||||
#define IXGBE_SFF_VENDOR_OUI_TYCO 0x00407600
|
||||
#define IXGBE_SFF_VENDOR_OUI_FTL 0x00906500
|
||||
#define IXGBE_SFF_VENDOR_OUI_AVAGO 0x00176A00
|
||||
#define IXGBE_SFF_VENDOR_OUI_INTEL 0x001B2100
|
||||
|
||||
/* I2C SDA and SCL timing parameters for standard mode */
|
||||
#define IXGBE_I2C_T_HD_STA 4
|
||||
#define IXGBE_I2C_T_LOW 5
|
||||
#define IXGBE_I2C_T_HIGH 4
|
||||
#define IXGBE_I2C_T_SU_STA 5
|
||||
#define IXGBE_I2C_T_HD_DATA 5
|
||||
#define IXGBE_I2C_T_SU_DATA 1
|
||||
#define IXGBE_I2C_T_RISE 1
|
||||
#define IXGBE_I2C_T_FALL 1
|
||||
#define IXGBE_I2C_T_SU_STO 4
|
||||
#define IXGBE_I2C_T_BUF 5
|
||||
|
||||
|
||||
s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw);
|
||||
bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr);
|
||||
enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id);
|
||||
s32 ixgbe_get_phy_id(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
|
||||
u32 device_type, u16 *phy_data);
|
||||
s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
|
||||
u32 device_type, u16 phy_data);
|
||||
s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
|
||||
ixgbe_link_speed speed,
|
||||
bool autoneg,
|
||||
bool autoneg_wait_to_complete);
|
||||
s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
|
||||
ixgbe_link_speed *speed,
|
||||
bool *autoneg);
|
||||
|
||||
/* PHY specific */
|
||||
s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw,
|
||||
ixgbe_link_speed *speed,
|
||||
bool *link_up);
|
||||
s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw,
|
||||
u16 *firmware_version);
|
||||
s32 ixgbe_get_phy_firmware_version_aq(struct ixgbe_hw *hw,
|
||||
u16 *firmware_version);
|
||||
|
||||
s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
|
||||
s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
|
||||
u16 *list_offset,
|
||||
u16 *data_offset);
|
||||
s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
|
||||
u8 dev_addr, u8 *data);
|
||||
s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
|
||||
u8 dev_addr, u8 data);
|
||||
s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
|
||||
u8 *eeprom_data);
|
||||
s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
|
||||
u8 eeprom_data);
|
||||
#endif /* _IXGBE_PHY_H_ */
|
|
@ -0,0 +1,29 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "ixgbe.h"
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,596 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Intel 10 Gigabit PCI Express Linux driver
|
||||
Copyright(c) 1999 - 2009 Intel Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms and conditions of the GNU General Public License,
|
||||
version 2, as published by the Free Software Foundation.
|
||||
|
||||
This program is distributed in the hope it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
The full GNU General Public License is included in this distribution in
|
||||
the file called "COPYING".
|
||||
|
||||
Contact Information:
|
||||
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
|
||||
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "ixgbe.h"
|
||||
#include "kcompat.h"
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21) )
|
||||
struct sk_buff *
|
||||
_kc_skb_pad(struct sk_buff *skb, int pad)
|
||||
{
|
||||
struct sk_buff *nskb;
|
||||
|
||||
/* If the skbuff is non linear tailroom is always zero.. */
|
||||
if(skb_tailroom(skb) >= pad)
|
||||
{
|
||||
memset(skb->data+skb->len, 0, pad);
|
||||
return skb;
|
||||
}
|
||||
|
||||
nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
|
||||
kfree_skb(skb);
|
||||
if(nskb)
|
||||
memset(nskb->data+nskb->len, 0, pad);
|
||||
return nskb;
|
||||
}
|
||||
#endif /* < 2.4.21 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) )
|
||||
|
||||
/**************************************/
|
||||
/* PCI DMA MAPPING */
|
||||
|
||||
#if defined(CONFIG_HIGHMEM)
|
||||
|
||||
#ifndef PCI_DRAM_OFFSET
|
||||
#define PCI_DRAM_OFFSET 0
|
||||
#endif
|
||||
|
||||
u64
|
||||
_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
|
||||
size_t size, int direction)
|
||||
{
|
||||
return (((u64) (page - mem_map) << PAGE_SHIFT) + offset +
|
||||
PCI_DRAM_OFFSET);
|
||||
}
|
||||
|
||||
#else /* CONFIG_HIGHMEM */
|
||||
|
||||
u64
|
||||
_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset,
|
||||
size_t size, int direction)
|
||||
{
|
||||
return pci_map_single(dev, (void *)page_address(page) + offset, size,
|
||||
direction);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HIGHMEM */
|
||||
|
||||
void
|
||||
_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size,
|
||||
int direction)
|
||||
{
|
||||
return pci_unmap_single(dev, dma_addr, size, direction);
|
||||
}
|
||||
|
||||
#endif /* 2.4.13 => 2.4.3 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) )
|
||||
|
||||
/**************************************/
|
||||
/* PCI DRIVER API */
|
||||
|
||||
int
|
||||
_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask)
|
||||
{
|
||||
if (!pci_dma_supported(dev, mask))
|
||||
return -EIO;
|
||||
dev->dma_mask = mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
_kc_pci_request_regions(struct pci_dev *dev, char *res_name)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 6; i++) {
|
||||
if (pci_resource_len(dev, i) == 0)
|
||||
continue;
|
||||
|
||||
if (pci_resource_flags(dev, i) & IORESOURCE_IO) {
|
||||
if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
|
||||
pci_release_regions(dev);
|
||||
return -EBUSY;
|
||||
}
|
||||
} else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) {
|
||||
if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) {
|
||||
pci_release_regions(dev);
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
_kc_pci_release_regions(struct pci_dev *dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 6; i++) {
|
||||
if (pci_resource_len(dev, i) == 0)
|
||||
continue;
|
||||
|
||||
if (pci_resource_flags(dev, i) & IORESOURCE_IO)
|
||||
release_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
|
||||
|
||||
else if (pci_resource_flags(dev, i) & IORESOURCE_MEM)
|
||||
release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i));
|
||||
}
|
||||
}
|
||||
|
||||
/**************************************/
|
||||
/* NETWORK DRIVER API */
|
||||
|
||||
struct net_device *
|
||||
_kc_alloc_etherdev(int sizeof_priv)
|
||||
{
|
||||
struct net_device *dev;
|
||||
int alloc_size;
|
||||
|
||||
alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31;
|
||||
dev = kmalloc(alloc_size, GFP_KERNEL);
|
||||
if (!dev)
|
||||
return NULL;
|
||||
memset(dev, 0, alloc_size);
|
||||
|
||||
if (sizeof_priv)
|
||||
dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31);
|
||||
dev->name[0] = '\0';
|
||||
ether_setup(dev);
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
int
|
||||
_kc_is_valid_ether_addr(u8 *addr)
|
||||
{
|
||||
const char zaddr[6] = { 0, };
|
||||
|
||||
return !(addr[0] & 1) && memcmp(addr, zaddr, 6);
|
||||
}
|
||||
|
||||
#endif /* 2.4.3 => 2.4.0 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) )
|
||||
|
||||
int
|
||||
_kc_pci_set_power_state(struct pci_dev *dev, int state)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* 2.4.6 => 2.4.3 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) )
|
||||
void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page,
|
||||
int off, int size)
|
||||
{
|
||||
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
|
||||
frag->page = page;
|
||||
frag->page_offset = off;
|
||||
frag->size = size;
|
||||
skb_shinfo(skb)->nr_frags = i + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Original Copyright:
|
||||
* find_next_bit.c: fallback find next bit implementation
|
||||
*
|
||||
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
/**
|
||||
* find_next_bit - find the next set bit in a memory region
|
||||
* @addr: The address to base the search on
|
||||
* @offset: The bitnumber to start searching at
|
||||
* @size: The maximum size to search
|
||||
*/
|
||||
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
|
||||
unsigned long offset)
|
||||
{
|
||||
const unsigned long *p = addr + BITOP_WORD(offset);
|
||||
unsigned long result = offset & ~(BITS_PER_LONG-1);
|
||||
unsigned long tmp;
|
||||
|
||||
if (offset >= size)
|
||||
return size;
|
||||
size -= result;
|
||||
offset %= BITS_PER_LONG;
|
||||
if (offset) {
|
||||
tmp = *(p++);
|
||||
tmp &= (~0UL << offset);
|
||||
if (size < BITS_PER_LONG)
|
||||
goto found_first;
|
||||
if (tmp)
|
||||
goto found_middle;
|
||||
size -= BITS_PER_LONG;
|
||||
result += BITS_PER_LONG;
|
||||
}
|
||||
while (size & ~(BITS_PER_LONG-1)) {
|
||||
if ((tmp = *(p++)))
|
||||
goto found_middle;
|
||||
result += BITS_PER_LONG;
|
||||
size -= BITS_PER_LONG;
|
||||
}
|
||||
if (!size)
|
||||
return result;
|
||||
tmp = *p;
|
||||
|
||||
found_first:
|
||||
tmp &= (~0UL >> (BITS_PER_LONG - size));
|
||||
if (tmp == 0UL) /* Are any bits set? */
|
||||
return result + size; /* Nope. */
|
||||
found_middle:
|
||||
return result + ffs(tmp);
|
||||
}
|
||||
|
||||
#endif /* 2.6.0 => 2.4.6 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) )
|
||||
void *_kc_kzalloc(size_t size, int flags)
|
||||
{
|
||||
void *ret = kmalloc(size, flags);
|
||||
if (ret)
|
||||
memset(ret, 0, size);
|
||||
return ret;
|
||||
}
|
||||
#endif /* <= 2.6.13 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) )
|
||||
struct sk_buff *_kc_netdev_alloc_skb(struct net_device *dev,
|
||||
unsigned int length)
|
||||
{
|
||||
/* 16 == NET_PAD_SKB */
|
||||
struct sk_buff *skb;
|
||||
skb = alloc_skb(length + 16, GFP_ATOMIC);
|
||||
if (likely(skb != NULL)) {
|
||||
skb_reserve(skb, 16);
|
||||
skb->dev = dev;
|
||||
}
|
||||
return skb;
|
||||
}
|
||||
#endif /* <= 2.6.17 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) )
|
||||
int _kc_pci_save_state(struct pci_dev *pdev)
|
||||
{
|
||||
struct net_device *netdev = pci_get_drvdata(pdev);
|
||||
struct adapter_struct *adapter = netdev_priv(netdev);
|
||||
int size = PCI_CONFIG_SPACE_LEN, i;
|
||||
u16 pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
|
||||
u16 pcie_link_status;
|
||||
|
||||
if (pcie_cap_offset) {
|
||||
if (!pci_read_config_word(pdev,
|
||||
pcie_cap_offset + PCIE_LINK_STATUS,
|
||||
&pcie_link_status))
|
||||
size = PCIE_CONFIG_SPACE_LEN;
|
||||
}
|
||||
pci_config_space_ich8lan();
|
||||
#ifdef HAVE_PCI_ERS
|
||||
if (adapter->config_space == NULL)
|
||||
#else
|
||||
WARN_ON(adapter->config_space != NULL);
|
||||
#endif
|
||||
adapter->config_space = kmalloc(size, GFP_KERNEL);
|
||||
if (!adapter->config_space) {
|
||||
printk(KERN_ERR "Out of memory in pci_save_state\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
for (i = 0; i < (size / 4); i++)
|
||||
pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void _kc_pci_restore_state(struct pci_dev * pdev)
|
||||
{
|
||||
struct net_device *netdev = pci_get_drvdata(pdev);
|
||||
struct adapter_struct *adapter = netdev_priv(netdev);
|
||||
int size = PCI_CONFIG_SPACE_LEN, i;
|
||||
u16 pcie_cap_offset;
|
||||
u16 pcie_link_status;
|
||||
|
||||
if (adapter->config_space != NULL) {
|
||||
pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP);
|
||||
if (pcie_cap_offset &&
|
||||
!pci_read_config_word(pdev,
|
||||
pcie_cap_offset + PCIE_LINK_STATUS,
|
||||
&pcie_link_status))
|
||||
size = PCIE_CONFIG_SPACE_LEN;
|
||||
|
||||
pci_config_space_ich8lan();
|
||||
for (i = 0; i < (size / 4); i++)
|
||||
pci_write_config_dword(pdev, i * 4, adapter->config_space[i]);
|
||||
#ifndef HAVE_PCI_ERS
|
||||
kfree(adapter->config_space);
|
||||
adapter->config_space = NULL;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_PCI_ERS
|
||||
void _kc_free_netdev(struct net_device *netdev)
|
||||
{
|
||||
struct adapter_struct *adapter = netdev_priv(netdev);
|
||||
|
||||
if (adapter->config_space != NULL)
|
||||
kfree(adapter->config_space);
|
||||
#ifdef CONFIG_SYSFS
|
||||
if (netdev->reg_state == NETREG_UNINITIALIZED) {
|
||||
kfree((char *)netdev - netdev->padded);
|
||||
} else {
|
||||
BUG_ON(netdev->reg_state != NETREG_UNREGISTERED);
|
||||
netdev->reg_state = NETREG_RELEASED;
|
||||
class_device_put(&netdev->class_dev);
|
||||
}
|
||||
#else
|
||||
kfree((char *)netdev - netdev->padded);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif /* <= 2.6.18 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) )
|
||||
|
||||
int ixgbe_dcb_netlink_register()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ixgbe_dcb_netlink_unregister()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* < 2.6.23 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) )
|
||||
#ifdef NAPI
|
||||
struct net_device *napi_to_poll_dev(struct napi_struct *napi)
|
||||
{
|
||||
struct adapter_q_vector *q_vector = container_of(napi,
|
||||
struct adapter_q_vector,
|
||||
napi);
|
||||
return &q_vector->poll_dev;
|
||||
}
|
||||
|
||||
int __kc_adapter_clean(struct net_device *netdev, int *budget)
|
||||
{
|
||||
int work_done;
|
||||
int work_to_do = min(*budget, netdev->quota);
|
||||
/* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */
|
||||
struct napi_struct *napi = netdev->priv;
|
||||
work_done = napi->poll(napi, work_to_do);
|
||||
*budget -= work_done;
|
||||
netdev->quota -= work_done;
|
||||
return (work_done >= work_to_do) ? 1 : 0;
|
||||
}
|
||||
#endif /* NAPI */
|
||||
#endif /* <= 2.6.24 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) )
|
||||
#ifdef HAVE_TX_MQ
|
||||
void _kc_netif_tx_stop_all_queues(struct net_device *netdev)
|
||||
{
|
||||
struct adapter_struct *adapter = netdev_priv(netdev);
|
||||
int i;
|
||||
|
||||
netif_stop_queue(netdev);
|
||||
if (netif_is_multiqueue(netdev))
|
||||
for (i = 0; i < adapter->num_tx_queues; i++)
|
||||
netif_stop_subqueue(netdev, i);
|
||||
}
|
||||
void _kc_netif_tx_wake_all_queues(struct net_device *netdev)
|
||||
{
|
||||
struct adapter_struct *adapter = netdev_priv(netdev);
|
||||
int i;
|
||||
|
||||
netif_wake_queue(netdev);
|
||||
if (netif_is_multiqueue(netdev))
|
||||
for (i = 0; i < adapter->num_tx_queues; i++)
|
||||
netif_wake_subqueue(netdev, i);
|
||||
}
|
||||
void _kc_netif_tx_start_all_queues(struct net_device *netdev)
|
||||
{
|
||||
struct adapter_struct *adapter = netdev_priv(netdev);
|
||||
int i;
|
||||
|
||||
netif_start_queue(netdev);
|
||||
if (netif_is_multiqueue(netdev))
|
||||
for (i = 0; i < adapter->num_tx_queues; i++)
|
||||
netif_start_subqueue(netdev, i);
|
||||
}
|
||||
#endif /* HAVE_TX_MQ */
|
||||
#endif /* < 2.6.27 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) )
|
||||
|
||||
int
|
||||
_kc_pci_prepare_to_sleep(struct pci_dev *dev)
|
||||
{
|
||||
pci_power_t target_state;
|
||||
int error;
|
||||
|
||||
target_state = pci_choose_state(dev, PMSG_SUSPEND);
|
||||
|
||||
pci_enable_wake(dev, target_state, true);
|
||||
|
||||
error = pci_set_power_state(dev, target_state);
|
||||
|
||||
if (error)
|
||||
pci_enable_wake(dev, target_state, false);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int
|
||||
_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = pci_enable_wake(dev, PCI_D3cold, enable);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = pci_enable_wake(dev, PCI_D3hot, enable);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
#endif /* < 2.6.28 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) )
|
||||
void _kc_pci_disable_link_state(struct pci_dev *pdev, int state)
|
||||
{
|
||||
struct pci_dev *parent = pdev->bus->self;
|
||||
u16 link_state;
|
||||
int pos;
|
||||
|
||||
if (!parent)
|
||||
return;
|
||||
|
||||
pos = pci_find_capability(parent, PCI_CAP_ID_EXP);
|
||||
if (pos) {
|
||||
pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state);
|
||||
link_state &= ~state;
|
||||
pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state);
|
||||
}
|
||||
}
|
||||
#endif /* < 2.6.29 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) )
|
||||
#ifdef HAVE_NETDEV_SELECT_QUEUE
|
||||
#include <net/ip.h>
|
||||
static u32 _kc_simple_tx_hashrnd;
|
||||
static u32 _kc_simple_tx_hashrnd_initialized;
|
||||
|
||||
u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb)
|
||||
{
|
||||
u32 addr1, addr2, ports;
|
||||
u32 hash, ihl;
|
||||
u8 ip_proto = 0;
|
||||
|
||||
if (unlikely(!_kc_simple_tx_hashrnd_initialized)) {
|
||||
get_random_bytes(&_kc_simple_tx_hashrnd, 4);
|
||||
_kc_simple_tx_hashrnd_initialized = 1;
|
||||
}
|
||||
|
||||
switch (skb->protocol) {
|
||||
case htons(ETH_P_IP):
|
||||
if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
|
||||
ip_proto = ip_hdr(skb)->protocol;
|
||||
addr1 = ip_hdr(skb)->saddr;
|
||||
addr2 = ip_hdr(skb)->daddr;
|
||||
ihl = ip_hdr(skb)->ihl;
|
||||
break;
|
||||
case htons(ETH_P_IPV6):
|
||||
ip_proto = ipv6_hdr(skb)->nexthdr;
|
||||
addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
|
||||
addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
|
||||
ihl = (40 >> 2);
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
switch (ip_proto) {
|
||||
case IPPROTO_TCP:
|
||||
case IPPROTO_UDP:
|
||||
case IPPROTO_DCCP:
|
||||
case IPPROTO_ESP:
|
||||
case IPPROTO_AH:
|
||||
case IPPROTO_SCTP:
|
||||
case IPPROTO_UDPLITE:
|
||||
ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
|
||||
break;
|
||||
|
||||
default:
|
||||
ports = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
hash = jhash_3words(addr1, addr2, ports, _kc_simple_tx_hashrnd);
|
||||
|
||||
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
|
||||
}
|
||||
#endif /* HAVE_NETDEV_SELECT_QUEUE */
|
||||
#endif /* < 2.6.30 */
|
||||
|
||||
/*****************************************************************************/
|
||||
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) )
|
||||
int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
|
||||
{
|
||||
unsigned long features = dev->features;
|
||||
|
||||
if (data & ~supported)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef NETIF_F_LRO
|
||||
features &= ~NETIF_F_LRO;
|
||||
if (data & ETH_FLAG_LRO)
|
||||
features |= NETIF_F_LRO;
|
||||
#endif
|
||||
#ifdef NETIF_F_NTUPLE
|
||||
features &= ~NETIF_F_NTUPLE;
|
||||
if (data & ETH_FLAG_NTUPLE)
|
||||
features |= NETIF_F_NTUPLE;
|
||||
#endif
|
||||
#ifdef NETIF_F_RXHASH
|
||||
features &= ~NETIF_F_RXHASH;
|
||||
if (data & ETH_FLAG_RXHASH)
|
||||
features |= NETIF_F_RXHASH;
|
||||
#endif
|
||||
|
||||
dev->features = features;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* < 2.6.36 */
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,269 @@
|
|||
#ifndef _PS_H_
|
||||
#define _PS_H_
|
||||
|
||||
#define MAX_DEVICES 16
|
||||
#define MAX_RINGS 64
|
||||
|
||||
/* IN: option for ps_wait(); */
|
||||
#define PS_CTL_IN 0x1 /* The associated queue is available to read */
|
||||
#define PS_CTL_OUT 0x2 /* The associated queue is available to write */
|
||||
/* The associated queue is available to write or read */
|
||||
#define PS_CTL_INOUT (PS_CTL_IN | PS_CTL_OUT)
|
||||
|
||||
/* OUT: return values for ps_wait() */
|
||||
#define PS_SEND_AVAILABLE 0x1 /* The associated queue is available to read */
|
||||
#define PS_RECEIVE_AVAILABLE 0x2 /* The associated queue is available to write */
|
||||
/* The associated queue is available to read and write */
|
||||
#define PS_ALL_AVAILABLE (PS_SEND_AVAILABLE | PS_RECEIVE_AVAILABLE)
|
||||
|
||||
#define PS_SEND_MIN 256
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#define PS_MAJOR 1010
|
||||
#define PS_NAME "packet_shader"
|
||||
|
||||
#define MAX_BUFS (12*4)
|
||||
|
||||
struct ____cacheline_aligned ps_context {
|
||||
struct semaphore sem;
|
||||
|
||||
wait_queue_head_t wq;
|
||||
|
||||
int num_attached;
|
||||
struct ixgbe_ring *rx_rings[MAX_RINGS];
|
||||
int next_ring;
|
||||
|
||||
struct ps_pkt_info *info;
|
||||
/* char *buf; */
|
||||
|
||||
int num_bufs;
|
||||
int buf_refcnt[MAX_BUFS];
|
||||
char *kbufs[MAX_BUFS];
|
||||
char __user *ubufs[MAX_BUFS];
|
||||
};
|
||||
|
||||
#else /* __KERNEL__ */
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define __user
|
||||
|
||||
#ifndef IFNAMSIZ
|
||||
#define IFNAMSIZ 16
|
||||
#endif
|
||||
|
||||
#ifndef ETH_ALEN
|
||||
#define ETH_ALEN 6
|
||||
#endif
|
||||
|
||||
#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1)
|
||||
#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
|
||||
|
||||
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
|
||||
{
|
||||
unsigned int sum;
|
||||
|
||||
asm(" movl (%1), %0\n"
|
||||
" subl $4, %2\n"
|
||||
" jbe 2f\n"
|
||||
" addl 4(%1), %0\n"
|
||||
" adcl 8(%1), %0\n"
|
||||
" adcl 12(%1), %0\n"
|
||||
"1: adcl 16(%1), %0\n"
|
||||
" lea 4(%1), %1\n"
|
||||
" decl %2\n"
|
||||
" jne 1b\n"
|
||||
" adcl $0, %0\n"
|
||||
" movl %0, %2\n"
|
||||
" shrl $16, %0\n"
|
||||
" addw %w2, %w0\n"
|
||||
" adcl $0, %0\n"
|
||||
" notl %0\n"
|
||||
"2:"
|
||||
/* Since the input registers which are loaded with iph and ih
|
||||
are modified, we must also specify them as outputs, or gcc
|
||||
will assume they contain their original values. */
|
||||
: "=r" (sum), "=r" (iph), "=r" (ihl)
|
||||
: "1" (iph), "2" (ihl)
|
||||
: "memory");
|
||||
return (__sum16)sum;
|
||||
}
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
struct ps_device {
|
||||
char name[IFNAMSIZ];
|
||||
char dev_addr[ETH_ALEN];
|
||||
uint32_t ip_addr; /* network order */
|
||||
|
||||
/* NOTE: this is different from kernel's internal index */
|
||||
int ifindex;
|
||||
|
||||
/* This is kernel's ifindex. */
|
||||
int kifindex;
|
||||
|
||||
int num_rx_queues;
|
||||
int num_tx_queues;
|
||||
};
|
||||
|
||||
struct ps_queue {
|
||||
int ifindex;
|
||||
int qidx;
|
||||
};
|
||||
|
||||
#define MAX_PACKET_SIZE 2048
|
||||
#define MAX_CHUNK_SIZE 4096
|
||||
#define ENTRY_CNT 4096
|
||||
|
||||
#define PS_CHECKSUM_RX_UNKNOWN 0
|
||||
#define PS_CHECKSUM_RX_GOOD 1
|
||||
#define PS_CHECKSUM_RX_BAD 2
|
||||
|
||||
struct ps_pkt_info {
|
||||
uint32_t offset;
|
||||
uint16_t len;
|
||||
uint8_t checksum_rx;
|
||||
};
|
||||
|
||||
struct ps_chunk {
|
||||
/* number of packets to send/recv */
|
||||
int cnt;
|
||||
int recv_blocking;
|
||||
|
||||
/*
|
||||
for RX: output (where did these packets come from?)
|
||||
for TX: input (which interface do you want to xmit?)
|
||||
*/
|
||||
struct ps_queue queue;
|
||||
|
||||
struct ps_pkt_info __user *info;
|
||||
char __user *buf;
|
||||
};
|
||||
|
||||
struct ps_chunk_buf {
|
||||
|
||||
uint16_t cnt;
|
||||
uint16_t next_to_use;
|
||||
uint16_t next_to_send;
|
||||
uint32_t next_offset;
|
||||
|
||||
struct ps_queue queue;
|
||||
void __user *lock;
|
||||
struct ps_pkt_info __user *info;
|
||||
char __user *buf;
|
||||
};
|
||||
|
||||
struct ps_packet {
|
||||
int ifindex;
|
||||
int len;
|
||||
char __user *buf;
|
||||
};
|
||||
|
||||
#define NID_ZERO(isp) (isp = 0)
|
||||
#define NID_SET(id, isp) (isp |= 1 << id)
|
||||
#define NID_CLR(id, isp) (isp &= ~(1 << id))
|
||||
#define NID_ISSET(id, isp) (isp & (1 << id))
|
||||
|
||||
// maximum number of interface descriptor is 16
|
||||
typedef uint16_t nids_set;
|
||||
struct ps_event {
|
||||
long timeout;
|
||||
int qidx;
|
||||
|
||||
nids_set rx_nids;
|
||||
nids_set tx_nids;
|
||||
};
|
||||
|
||||
static inline void prefetcht0(void *p)
|
||||
{
|
||||
asm volatile("prefetcht0 (%0)\n\t"
|
||||
:
|
||||
: "r" (p)
|
||||
);
|
||||
}
|
||||
|
||||
static inline void prefetchnta(void *p)
|
||||
{
|
||||
asm volatile("prefetchnta (%0)\n\t"
|
||||
:
|
||||
: "r" (p)
|
||||
);
|
||||
}
|
||||
|
||||
static inline void memcpy_aligned(void *to, const void *from, size_t len)
|
||||
{
|
||||
if (len <= 64) {
|
||||
memcpy(to, from, 64);
|
||||
} else if (len <= 128) {
|
||||
memcpy(to, from, 64);
|
||||
memcpy((uint8_t *)to + 64, (uint8_t *)from + 64, 64);
|
||||
} else {
|
||||
size_t offset;
|
||||
|
||||
for (offset = 0; offset < len; offset += 64)
|
||||
memcpy((uint8_t *)to + offset,
|
||||
(uint8_t *)from + offset,
|
||||
64);
|
||||
}
|
||||
}
|
||||
|
||||
#define PS_IOC_LIST_DEVICES 0
|
||||
#define PS_IOC_ATTACH_RX_DEVICE 1
|
||||
#define PS_IOC_DETACH_RX_DEVICE 2
|
||||
#define PS_IOC_RECV_CHUNK 3
|
||||
#define PS_IOC_SEND_CHUNK 4
|
||||
#define PS_IOC_SLOWPATH_PACKET 5
|
||||
#define PS_IOC_RECV_CHUNK_IFIDX 6
|
||||
#define PS_IOC_SEND_CHUNK_BUF 7
|
||||
#define PS_IOC_GET_TXENTRY 8
|
||||
#define PS_IOC_SELECT 9
|
||||
|
||||
#ifndef __KERNEL__
|
||||
|
||||
struct ps_handle {
|
||||
int fd;
|
||||
|
||||
uint64_t rx_chunks[MAX_DEVICES];
|
||||
uint64_t rx_packets[MAX_DEVICES];
|
||||
uint64_t rx_bytes[MAX_DEVICES];
|
||||
|
||||
uint64_t tx_chunks[MAX_DEVICES];
|
||||
uint64_t tx_packets[MAX_DEVICES];
|
||||
uint64_t tx_bytes[MAX_DEVICES];
|
||||
|
||||
void *priv;
|
||||
};
|
||||
|
||||
int ps_list_devices(struct ps_device *devices);
|
||||
int ps_init_handle(struct ps_handle *handle);
|
||||
void ps_close_handle(struct ps_handle *handle);
|
||||
int ps_attach_rx_device(struct ps_handle *handle, struct ps_queue *queue);
|
||||
int ps_detach_rx_device(struct ps_handle *handle, struct ps_queue *queue);
|
||||
int ps_alloc_chunk(struct ps_handle *handle, struct ps_chunk *chunk);
|
||||
void ps_free_chunk(struct ps_chunk *chunk);
|
||||
int ps_alloc_chunk_buf(struct ps_handle *handle,
|
||||
int ifidx, int qidx, struct ps_chunk_buf *c_buf);
|
||||
void ps_free_chunk_buf(struct ps_chunk_buf *c_buf);
|
||||
char* ps_assign_chunk_buf(struct ps_chunk_buf *c_buf, int len);
|
||||
int ps_recv_chunk(struct ps_handle *handle, struct ps_chunk *chunk);
|
||||
int ps_recv_chunk_ifidx(struct ps_handle *handle, struct ps_chunk *chunk, int ifidx);
|
||||
int ps_send_chunk(struct ps_handle *handle, struct ps_chunk *chunk);
|
||||
int ps_send_chunk_buf(struct ps_handle *handle, struct ps_chunk_buf *chunk);
|
||||
int ps_select(struct ps_handle *handle, struct ps_event * event);
|
||||
int ps_get_txentry(struct ps_handle *handle, struct ps_queue * queue);
|
||||
int ps_slowpath_packet(struct ps_handle *handle, struct ps_packet *packet);
|
||||
|
||||
void dump_packet(char *buf, int len);
|
||||
void dump_chunk(struct ps_chunk *chunk);
|
||||
|
||||
int get_num_cpus();
|
||||
int bind_cpu(int cpu);
|
||||
uint64_t rdtsc();
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _PS_H_ */
|
|
@ -0,0 +1,10 @@
|
|||
.PHONY: clean
|
||||
|
||||
all: pslib.a
|
||||
|
||||
pslib.a: pslib.c
|
||||
gcc -O3 -c -g -Wall -o pslib.o pslib.c
|
||||
ar rcs libps.a pslib.o
|
||||
|
||||
clean:
|
||||
rm -f *.o *.a
|
|
@ -0,0 +1,262 @@
|
|||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "../include/ps.h"
|
||||
|
||||
int ps_list_devices(struct ps_device *devices)
|
||||
{
|
||||
struct ps_handle handle;
|
||||
int ret;
|
||||
|
||||
if (ps_init_handle(&handle))
|
||||
return -1;
|
||||
|
||||
ret = ioctl(handle.fd, PS_IOC_LIST_DEVICES, devices);
|
||||
|
||||
ps_close_handle(&handle);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ps_init_handle(struct ps_handle *handle)
|
||||
{
|
||||
memset(handle, 0, sizeof(struct ps_handle));
|
||||
|
||||
handle->fd = open("/dev/packet_shader", O_RDWR);
|
||||
if (handle->fd == -1)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ps_close_handle(struct ps_handle *handle)
|
||||
{
|
||||
close(handle->fd);
|
||||
handle->fd = -1;
|
||||
}
|
||||
|
||||
int ps_attach_rx_device(struct ps_handle *handle, struct ps_queue *queue)
|
||||
{
|
||||
return ioctl(handle->fd, PS_IOC_ATTACH_RX_DEVICE, queue);
|
||||
}
|
||||
|
||||
int ps_detach_rx_device(struct ps_handle *handle, struct ps_queue *queue)
|
||||
{
|
||||
return ioctl(handle->fd, PS_IOC_DETACH_RX_DEVICE, queue);
|
||||
}
|
||||
|
||||
int ps_alloc_chunk(struct ps_handle *handle, struct ps_chunk *chunk)
|
||||
{
|
||||
memset(chunk, 0, sizeof(*chunk));
|
||||
|
||||
chunk->info = (struct ps_pkt_info *)malloc(
|
||||
sizeof(struct ps_pkt_info) * MAX_CHUNK_SIZE);
|
||||
if (!chunk->info)
|
||||
return -1;
|
||||
|
||||
chunk->buf = (char *)mmap(NULL, MAX_PACKET_SIZE * MAX_CHUNK_SIZE,
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
handle->fd, 0);
|
||||
if ((long)chunk->buf == -1)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ps_free_chunk(struct ps_chunk *chunk)
|
||||
{
|
||||
free(chunk->info);
|
||||
munmap(chunk->buf, MAX_PACKET_SIZE * MAX_CHUNK_SIZE);
|
||||
|
||||
chunk->info = NULL;
|
||||
chunk->buf = NULL;
|
||||
}
|
||||
|
||||
int ps_alloc_chunk_buf(struct ps_handle *handle,
|
||||
int ifidx, int qidx, struct ps_chunk_buf *c_buf)
|
||||
{
|
||||
memset(c_buf, 0, sizeof(*c_buf));
|
||||
|
||||
c_buf->info = (struct ps_pkt_info *)malloc(
|
||||
sizeof(struct ps_pkt_info) * ENTRY_CNT);
|
||||
if (!c_buf->info)
|
||||
return -1;
|
||||
|
||||
c_buf->buf = (char *)mmap(NULL, MAX_PACKET_SIZE * ENTRY_CNT,
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
handle->fd, 0);
|
||||
if ((long)c_buf->buf == -1)
|
||||
return -1;
|
||||
|
||||
c_buf->lock = (pthread_mutex_t *) malloc(
|
||||
sizeof(pthread_mutex_t));
|
||||
|
||||
c_buf->queue.ifindex = ifidx;
|
||||
c_buf->queue.qidx = qidx;
|
||||
c_buf->cnt = 0;
|
||||
c_buf->next_to_use = 0;
|
||||
c_buf->next_to_send = 0;
|
||||
c_buf->next_offset = 0;
|
||||
|
||||
if (pthread_mutex_init(c_buf->lock, NULL)) {
|
||||
perror("pthread_mutex_init of c_buf->lock\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ps_free_chunk_buf(struct ps_chunk_buf *c_buf)
|
||||
{
|
||||
free(c_buf->info);
|
||||
munmap(c_buf->buf, MAX_PACKET_SIZE * ENTRY_CNT);
|
||||
|
||||
c_buf->info = NULL;
|
||||
c_buf->buf = NULL;
|
||||
}
|
||||
|
||||
char* ps_assign_chunk_buf(struct ps_chunk_buf *c_buf, int len) {
|
||||
|
||||
int w_idx;
|
||||
|
||||
if (c_buf->cnt >= ENTRY_CNT)
|
||||
return NULL;
|
||||
|
||||
pthread_mutex_lock(c_buf->lock);
|
||||
|
||||
w_idx = c_buf->next_to_use;
|
||||
|
||||
c_buf->cnt++;
|
||||
c_buf->info[w_idx].len = len;
|
||||
c_buf->info[w_idx].offset = c_buf->next_offset;
|
||||
c_buf->next_offset += (len + 63) / 64 * 64;
|
||||
|
||||
c_buf->next_to_use = (w_idx + 1) % ENTRY_CNT;
|
||||
|
||||
if(c_buf->next_to_use == 0)
|
||||
c_buf->next_offset = 0;
|
||||
|
||||
pthread_mutex_unlock(c_buf->lock);
|
||||
|
||||
return c_buf->buf + c_buf->info[w_idx].offset;
|
||||
}
|
||||
|
||||
int ps_recv_chunk(struct ps_handle *handle, struct ps_chunk *chunk)
|
||||
{
|
||||
int cnt;
|
||||
|
||||
cnt = ioctl(handle->fd, PS_IOC_RECV_CHUNK, chunk);
|
||||
if (cnt > 0) {
|
||||
int i;
|
||||
int ifindex = chunk->queue.ifindex;
|
||||
|
||||
handle->rx_chunks[ifindex]++;
|
||||
handle->rx_packets[ifindex] += cnt;
|
||||
|
||||
for (i = 0; i < cnt; i++)
|
||||
handle->rx_bytes[ifindex] += chunk->info[i].len;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
int ps_recv_chunk_ifidx(struct ps_handle *handle, struct ps_chunk *chunk, int ifidx)
|
||||
{
|
||||
int cnt;
|
||||
|
||||
chunk->queue.ifindex = ifidx;
|
||||
cnt = ioctl(handle->fd, PS_IOC_RECV_CHUNK_IFIDX, chunk);
|
||||
if (cnt > 0) {
|
||||
int i;
|
||||
int ifindex = chunk->queue.ifindex;
|
||||
|
||||
handle->rx_chunks[ifindex]++;
|
||||
handle->rx_packets[ifindex] += cnt;
|
||||
|
||||
for (i = 0; i < cnt; i++)
|
||||
handle->rx_bytes[ifindex] += chunk->info[i].len;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/* Send the given chunk to the modified driver. */
|
||||
int ps_send_chunk(struct ps_handle *handle, struct ps_chunk *chunk)
|
||||
{
|
||||
int cnt;
|
||||
|
||||
cnt = ioctl(handle->fd, PS_IOC_SEND_CHUNK, chunk);
|
||||
if (cnt > 0) {
|
||||
int i;
|
||||
int ifindex = chunk->queue.ifindex;
|
||||
|
||||
handle->tx_chunks[ifindex]++;
|
||||
handle->tx_packets[ifindex] += cnt;
|
||||
|
||||
for (i = 0; i < cnt; i++)
|
||||
handle->tx_bytes[ifindex] += chunk->info[i].len;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/* Send the given chunk to the modified driver. */
|
||||
int ps_send_chunk_buf(struct ps_handle *handle, struct ps_chunk_buf *c_buf)
|
||||
{
|
||||
int cnt;
|
||||
|
||||
if(c_buf->cnt <= 0)
|
||||
return 0;
|
||||
|
||||
pthread_mutex_lock(c_buf->lock);
|
||||
|
||||
cnt = ioctl(handle->fd, PS_IOC_SEND_CHUNK_BUF, c_buf);
|
||||
if (cnt > 0) {
|
||||
int i;
|
||||
int ifindex = c_buf->queue.ifindex;
|
||||
|
||||
handle->tx_chunks[ifindex]++;
|
||||
handle->tx_packets[ifindex] += cnt;
|
||||
|
||||
for (i = 0; i < cnt; i++)
|
||||
handle->tx_bytes[ifindex] += c_buf->info[i].len;
|
||||
|
||||
c_buf->cnt -= cnt;
|
||||
c_buf->next_to_send = (c_buf->next_to_send + cnt) % ENTRY_CNT;
|
||||
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(c_buf->lock);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
int ps_select(struct ps_handle *handle, struct ps_event * event)
|
||||
{
|
||||
return ioctl(handle->fd, PS_IOC_SELECT, event);
|
||||
}
|
||||
|
||||
/* Get the remain number of tx_entry in a tx_ring */
|
||||
int ps_get_txentry(struct ps_handle *handle, struct ps_queue *queue)
|
||||
{
|
||||
return ioctl(handle->fd, PS_IOC_GET_TXENTRY, queue);
|
||||
}
|
||||
|
||||
int ps_slowpath_packet(struct ps_handle *handle, struct ps_packet *packet)
|
||||
{
|
||||
return ioctl(handle->fd, PS_IOC_SLOWPATH_PACKET, packet);
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
|
||||
.PHONY: clean
|
||||
|
||||
### TARGET ###
|
||||
MTCP_LIB_DIR=../lib
|
||||
MTCP_LIB=libmtcp.a
|
||||
|
||||
MTCP_HDR_DIR=../include
|
||||
MTCP_HDR = mtcp_api.h mtcp_epoll.h
|
||||
|
||||
|
||||
### GCC ###
|
||||
GCC = gcc
|
||||
GCC_OPT = -m64 -Wall
|
||||
|
||||
#DBG_OPT = -DDBGMSG -DDBGFUNC -DSTREAM -DSTATE -DTSTAT -DAPP -DEPOLL
|
||||
#DBG_OPT = -DDBGMSG -DDBGFUNC -DSTREAM -DSTATE
|
||||
#DBG_OPT += -DPKTDUMP
|
||||
#DBG_OPT += -DDUMP_STREAM
|
||||
#GCC_OPT += -g -DNETSTAT -DINFO -DDBGERR -DDBGCERR
|
||||
GCC_OPT += -DNDEBUG -O3 -DNETSTAT -DINFO -DDBGERR -DDBGCERR
|
||||
GCC_OPT += $(DBG_OPT)
|
||||
|
||||
|
||||
### LIBRARIES AND INCLUDES ###
|
||||
INC_DIR=./include
|
||||
INC= -I$(INC_DIR)
|
||||
|
||||
LIBS = -lnuma -lpthread -lrt
|
||||
LIBS += -lps
|
||||
|
||||
# PacketShader LIBRARY and HEADER
|
||||
PS_DIR=../../../io_engine/io_engine-2.0.38.2
|
||||
LIB_DIR = -L$(PS_DIR)/lib
|
||||
INC += -I$(PS_DIR)/include
|
||||
|
||||
ifeq ($(wildcard /usr/lib/libhugetlbfs.so),)
|
||||
else
|
||||
GCC_OPT += -DHUGEPAGE
|
||||
endif
|
||||
|
||||
|
||||
### SOURCE CODE ###
|
||||
SRCS = core.c tcp_stream.c config.c api.c eventpoll.c socket.c pipe.c \
|
||||
tcp_util.c eth_in.c ip_in.c tcp_in.c eth_out.c ip_out.c tcp_out.c \
|
||||
arp.c timer.c cpu.c rss.c addr_pool.c fhash.c memory_mgt.c logger.c debug.c \
|
||||
tcp_rb_frag_queue.c tcp_ring_buffer.c tcp_send_buffer.c tcp_sb_queue.c tcp_stream_queue.c
|
||||
|
||||
OBJS = $(patsubst %.c,%.o,$(SRCS))
|
||||
DEPS = $(patsubst %.c,.%.d,$(SRCS))
|
||||
|
||||
|
||||
### GOALS ###
|
||||
default: $(OBJS) $(MTCP_HDR)
|
||||
ar rvs $(MTCP_LIB_DIR)/$(MTCP_LIB) $(OBJS)
|
||||
|
||||
$(OBJS): %.o: %.c Makefile
|
||||
$(GCC) $(GCC_OPT) $(INC) -c $< -o $@
|
||||
$(DEPS): .%.d: %.c Makefile
|
||||
$(GCC) $(GCC_OPT) $(INC) -MM $(CFLAGS) $< > $@
|
||||
|
||||
-include $(DEPS)
|
||||
|
||||
$(MTCP_HDR):
|
||||
cp $(INC_DIR)/$@ $(MTCP_HDR_DIR)/$@
|
||||
|
||||
clean: clean-library
|
||||
rm -f *.o *~ core
|
||||
rm -f .*.d
|
||||
|
||||
clean-library:
|
||||
rm -f $(MTCP_LIB_DIR)/*
|
||||
rm -f $(MTCP_HDR_DIR)/*
|
|
@ -0,0 +1,302 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <pthread.h>
|
||||
#include "addr_pool.h"
|
||||
#include "rss.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define MIN_PORT (1025)
|
||||
#define MAX_PORT (65535 + 1)
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct addr_entry
|
||||
{
|
||||
struct sockaddr_in addr;
|
||||
TAILQ_ENTRY(addr_entry) addr_link;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct addr_map
|
||||
{
|
||||
struct addr_entry *addrmap[MAX_PORT];
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct addr_pool
|
||||
{
|
||||
struct addr_entry *pool; /* address pool */
|
||||
struct addr_map *mapper; /* address map */
|
||||
|
||||
uint32_t addr_base; /* in host order */
|
||||
int num_addr; /* number of addresses in use */
|
||||
|
||||
int num_entry;
|
||||
int num_free;
|
||||
int num_used;
|
||||
|
||||
pthread_mutex_t lock;
|
||||
TAILQ_HEAD(, addr_entry) free_list;
|
||||
TAILQ_HEAD(, addr_entry) used_list;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
addr_pool_t
|
||||
CreateAddressPool(in_addr_t addr_base, int num_addr)
|
||||
{
|
||||
struct addr_pool *ap;
|
||||
int num_entry;
|
||||
int i, j, cnt;
|
||||
in_addr_t addr;
|
||||
uint32_t addr_h;
|
||||
|
||||
ap = (addr_pool_t)calloc(1, sizeof(struct addr_pool));
|
||||
if (!ap)
|
||||
return NULL;
|
||||
|
||||
/* initialize address pool */
|
||||
num_entry = num_addr * (MAX_PORT - MIN_PORT);
|
||||
ap->pool = (struct addr_entry *)calloc(num_entry, sizeof(struct addr_entry));
|
||||
if (!ap->pool) {
|
||||
free(ap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initialize address map */
|
||||
ap->mapper = (struct addr_map *)calloc(num_addr, sizeof(struct addr_map));
|
||||
if (!ap->mapper) {
|
||||
free(ap->pool);
|
||||
free(ap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
TAILQ_INIT(&ap->free_list);
|
||||
TAILQ_INIT(&ap->used_list);
|
||||
|
||||
if (pthread_mutex_init(&ap->lock, NULL)) {
|
||||
free(ap->pool);
|
||||
free(ap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&ap->lock);
|
||||
|
||||
ap->addr_base = ntohl(addr_base);
|
||||
ap->num_addr = num_addr;
|
||||
|
||||
cnt = 0;
|
||||
for (i = 0; i < num_addr; i++) {
|
||||
addr_h = ap->addr_base + i;
|
||||
addr = htonl(addr_h);
|
||||
for (j = MIN_PORT; j < MAX_PORT; j++) {
|
||||
ap->pool[cnt].addr.sin_addr.s_addr = addr;
|
||||
ap->pool[cnt].addr.sin_port = htons(j);
|
||||
ap->mapper[i].addrmap[j] = &ap->pool[cnt];
|
||||
|
||||
TAILQ_INSERT_TAIL(&ap->free_list, &ap->pool[cnt], addr_link);
|
||||
|
||||
if ((++cnt) >= num_entry)
|
||||
break;
|
||||
}
|
||||
}
|
||||
ap->num_entry = cnt;
|
||||
ap->num_free = cnt;
|
||||
ap->num_used = 0;
|
||||
|
||||
pthread_mutex_unlock(&ap->lock);
|
||||
|
||||
return ap;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
addr_pool_t
|
||||
CreateAddressPoolPerCore(int core, int num_queues,
|
||||
in_addr_t saddr_base, int num_addr, in_addr_t daddr, in_port_t dport)
|
||||
{
|
||||
struct addr_pool *ap;
|
||||
int num_entry;
|
||||
int i, j, cnt;
|
||||
in_addr_t saddr;
|
||||
uint32_t saddr_h, daddr_h;
|
||||
uint16_t sport_h, dport_h;
|
||||
int rss_core;
|
||||
|
||||
ap = (addr_pool_t)calloc(1, sizeof(struct addr_pool));
|
||||
if (!ap)
|
||||
return NULL;
|
||||
|
||||
/* initialize address pool */
|
||||
num_entry = (num_addr * (MAX_PORT - MIN_PORT)) / num_queues;
|
||||
ap->pool = (struct addr_entry *)calloc(num_entry, sizeof(struct addr_entry));
|
||||
if (!ap->pool) {
|
||||
free(ap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* initialize address map */
|
||||
ap->mapper = (struct addr_map *)calloc(num_addr, sizeof(struct addr_map));
|
||||
if (!ap->mapper) {
|
||||
free(ap->pool);
|
||||
free(ap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
TAILQ_INIT(&ap->free_list);
|
||||
TAILQ_INIT(&ap->used_list);
|
||||
|
||||
if (pthread_mutex_init(&ap->lock, NULL)) {
|
||||
free(ap->pool);
|
||||
free(ap);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&ap->lock);
|
||||
|
||||
ap->addr_base = ntohl(saddr_base);
|
||||
ap->num_addr = num_addr;
|
||||
daddr_h = ntohl(daddr);
|
||||
dport_h = ntohs(dport);
|
||||
|
||||
/* search address space to get RSS-friendly addresses */
|
||||
cnt = 0;
|
||||
for (i = 0; i < num_addr; i++) {
|
||||
saddr_h = ap->addr_base + i;
|
||||
saddr = htonl(saddr_h);
|
||||
for (j = MIN_PORT; j < MAX_PORT; j++) {
|
||||
if (cnt >= num_entry)
|
||||
break;
|
||||
|
||||
sport_h = j;
|
||||
rss_core = GetRSSCPUCore(daddr_h, saddr_h, dport_h, sport_h, num_queues);
|
||||
if (rss_core != core)
|
||||
continue;
|
||||
|
||||
ap->pool[cnt].addr.sin_addr.s_addr = saddr;
|
||||
ap->pool[cnt].addr.sin_port = htons(sport_h);
|
||||
ap->mapper[i].addrmap[j] = &ap->pool[cnt];
|
||||
TAILQ_INSERT_TAIL(&ap->free_list, &ap->pool[cnt], addr_link);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
ap->num_entry = cnt;
|
||||
ap->num_free = cnt;
|
||||
ap->num_used = 0;
|
||||
//fprintf(stderr, "CPU %d: Created %d address entries.\n", core, cnt);
|
||||
if (ap->num_entry < CONFIG.max_concurrency) {
|
||||
fprintf(stderr, "[WARINING] Available # addresses (%d) is smaller than"
|
||||
" the max concurrency (%d).\n",
|
||||
ap->num_entry, CONFIG.max_concurrency);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&ap->lock);
|
||||
|
||||
return ap;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyAddressPool(addr_pool_t ap)
|
||||
{
|
||||
if (!ap)
|
||||
return;
|
||||
|
||||
if (ap->pool) {
|
||||
free(ap->pool);
|
||||
ap->pool = NULL;
|
||||
}
|
||||
|
||||
if (ap->mapper) {
|
||||
free(ap->mapper);
|
||||
ap->mapper = NULL;
|
||||
}
|
||||
|
||||
pthread_mutex_destroy(&ap->lock);
|
||||
|
||||
free(ap);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
FetchAddress(addr_pool_t ap, int core, int num_queues,
|
||||
const struct sockaddr_in *daddr, struct sockaddr_in *saddr)
|
||||
{
|
||||
struct addr_entry *walk, *next;
|
||||
int rss_core;
|
||||
int ret = -1;
|
||||
|
||||
if (!ap || !daddr || !saddr)
|
||||
return -1;
|
||||
|
||||
pthread_mutex_lock(&ap->lock);
|
||||
|
||||
walk = TAILQ_FIRST(&ap->free_list);
|
||||
while (walk) {
|
||||
next = TAILQ_NEXT(walk, addr_link);
|
||||
|
||||
rss_core = GetRSSCPUCore(ntohl(walk->addr.sin_addr.s_addr),
|
||||
ntohl(daddr->sin_addr.s_addr), ntohs(walk->addr.sin_port),
|
||||
ntohs(daddr->sin_port), num_queues);
|
||||
|
||||
if (core == rss_core)
|
||||
break;
|
||||
|
||||
walk = next;
|
||||
}
|
||||
|
||||
if (walk) {
|
||||
*saddr = walk->addr;
|
||||
TAILQ_REMOVE(&ap->free_list, walk, addr_link);
|
||||
TAILQ_INSERT_TAIL(&ap->used_list, walk, addr_link);
|
||||
ap->num_free--;
|
||||
ap->num_used++;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&ap->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
FreeAddress(addr_pool_t ap, const struct sockaddr_in *addr)
|
||||
{
|
||||
struct addr_entry *walk, *next;
|
||||
int ret = -1;
|
||||
|
||||
if (!ap || !addr)
|
||||
return -1;
|
||||
|
||||
pthread_mutex_lock(&ap->lock);
|
||||
|
||||
if (ap->mapper) {
|
||||
uint32_t addr_h = ntohl(addr->sin_addr.s_addr);
|
||||
uint16_t port_h = ntohs(addr->sin_port);
|
||||
int index = addr_h - ap->addr_base;
|
||||
|
||||
if (index >= 0 || index < ap->num_addr) {
|
||||
walk = ap->mapper[addr_h - ap->addr_base].addrmap[port_h];
|
||||
} else {
|
||||
walk = NULL;
|
||||
}
|
||||
|
||||
} else {
|
||||
walk = TAILQ_FIRST(&ap->used_list);
|
||||
while (walk) {
|
||||
next = TAILQ_NEXT(walk, addr_link);
|
||||
if (addr->sin_port == walk->addr.sin_port &&
|
||||
addr->sin_addr.s_addr == walk->addr.sin_addr.s_addr) {
|
||||
break;
|
||||
}
|
||||
|
||||
walk = next;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (walk) {
|
||||
TAILQ_REMOVE(&ap->used_list, walk, addr_link);
|
||||
TAILQ_INSERT_TAIL(&ap->free_list, walk, addr_link);
|
||||
ap->num_free++;
|
||||
ap->num_used--;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&ap->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,336 @@
|
|||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "mtcp.h"
|
||||
#include "arp.h"
|
||||
#include "eth_out.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define ARP_LEN 28
|
||||
#define ARP_HEAD_LEN 8
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
enum arp_hrd_format
|
||||
{
|
||||
arp_hrd_ethernet = 1
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
enum arp_opcode
|
||||
{
|
||||
arp_op_request = 1,
|
||||
arp_op_reply = 2,
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct arphdr
|
||||
{
|
||||
uint16_t ar_hrd; /* hardware address format */
|
||||
uint16_t ar_pro; /* protocol address format */
|
||||
uint8_t ar_hln; /* hardware address length */
|
||||
uint8_t ar_pln; /* protocol address length */
|
||||
uint16_t ar_op; /* arp opcode */
|
||||
|
||||
uint8_t ar_sha[ETH_ALEN]; /* sender hardware address */
|
||||
uint32_t ar_sip; /* sender ip address */
|
||||
uint8_t ar_tha[ETH_ALEN]; /* targe hardware address */
|
||||
uint32_t ar_tip; /* target ip address */
|
||||
} __attribute__ ((packed));
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct arp_queue_entry
|
||||
{
|
||||
uint32_t ip;
|
||||
int nif_out;
|
||||
uint32_t ts_out;
|
||||
|
||||
TAILQ_ENTRY(arp_queue_entry) arp_link;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct arp_manager
|
||||
{
|
||||
TAILQ_HEAD (, arp_queue_entry) list;
|
||||
int cnt;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct arp_manager arpm;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DumpARPPacket(struct arphdr *arph);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
InitARPTable()
|
||||
{
|
||||
CONFIG.arp.entries = 0;
|
||||
|
||||
CONFIG.arp.entry = (struct arp_entry *)
|
||||
calloc(MAX_ARPENTRY, sizeof(struct arp_entry));
|
||||
if (CONFIG.arp.entry == NULL) {
|
||||
perror("calloc");
|
||||
return -1;
|
||||
}
|
||||
|
||||
TAILQ_INIT(&arpm.list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
unsigned char *
|
||||
GetHWaddr(uint32_t ip)
|
||||
{
|
||||
int i;
|
||||
unsigned char *haddr = NULL;
|
||||
for (i = 0; i < CONFIG.eths_num; i++) {
|
||||
if (ip == CONFIG.eths[i].ip_addr) {
|
||||
haddr = CONFIG.eths[i].haddr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return haddr;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
unsigned char *
|
||||
GetDestinationHWaddr(uint32_t dip)
|
||||
{
|
||||
unsigned char *d_haddr = NULL;
|
||||
int prefix = 0;
|
||||
int i;
|
||||
|
||||
/* Longest prefix matching */
|
||||
for (i = 0; i < CONFIG.arp.entries; i++) {
|
||||
if (CONFIG.arp.entry[i].prefix == 1) {
|
||||
if (CONFIG.arp.entry[i].ip == dip) {
|
||||
d_haddr = CONFIG.arp.entry[i].haddr;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if ((dip & CONFIG.arp.entry[i].ip_mask) ==
|
||||
CONFIG.arp.entry[i].ip_masked) {
|
||||
|
||||
if (CONFIG.arp.entry[i].prefix > prefix) {
|
||||
d_haddr = CONFIG.arp.entry[i].haddr;
|
||||
prefix = CONFIG.arp.entry[i].prefix;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return d_haddr;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
ARPOutput(struct mtcp_manager *mtcp, int nif, int opcode,
|
||||
uint32_t dst_ip, unsigned char *dst_haddr)
|
||||
{
|
||||
if (!dst_haddr)
|
||||
return -1;
|
||||
|
||||
/* Allocate a buffer */
|
||||
struct arphdr *arph = (struct arphdr *)EthernetOutput(mtcp,
|
||||
ETH_P_ARP, nif, dst_haddr, sizeof(struct arphdr));
|
||||
if (!arph) {
|
||||
return -1;
|
||||
}
|
||||
/* Fill arp header */
|
||||
arph->ar_hrd = htons(arp_hrd_ethernet);
|
||||
arph->ar_pro = htons(ETH_P_IP);
|
||||
arph->ar_hln = ETH_ALEN;
|
||||
arph->ar_pln = 4;
|
||||
arph->ar_op = htons(opcode);
|
||||
|
||||
/* Fill arp body */
|
||||
arph->ar_sip = CONFIG.eths[nif].ip_addr;
|
||||
arph->ar_tip = dst_ip;
|
||||
|
||||
memcpy(arph->ar_sha, CONFIG.eths[nif].haddr, arph->ar_hln);
|
||||
memcpy(arph->ar_tha, dst_haddr, arph->ar_hln);
|
||||
|
||||
#if DBGMSG
|
||||
DumpARPPacket(arph);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
RegisterARPEntry(uint32_t ip, const unsigned char *haddr)
|
||||
{
|
||||
int idx = CONFIG.arp.entries;
|
||||
|
||||
CONFIG.arp.entry[idx].prefix = 32;
|
||||
CONFIG.arp.entry[idx].ip = ip;
|
||||
memcpy(CONFIG.arp.entry[idx].haddr, haddr, ETH_ALEN);
|
||||
CONFIG.arp.entry[idx].ip_mask = -1;
|
||||
CONFIG.arp.entry[idx].ip_masked = ip;
|
||||
|
||||
CONFIG.arp.entries = idx + 1;
|
||||
|
||||
TRACE_CONFIG("Learned new arp entry.\n");
|
||||
PrintARPTable();
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
RequestARP(mtcp_manager_t mtcp, uint32_t ip, int nif, uint32_t cur_ts)
|
||||
{
|
||||
struct arp_queue_entry *ent;
|
||||
unsigned char haddr[ETH_ALEN];
|
||||
|
||||
/* if the arp request is in progress, return */
|
||||
TAILQ_FOREACH(ent, &arpm.list, arp_link) {
|
||||
if (ent->ip == ip)
|
||||
return;
|
||||
}
|
||||
|
||||
ent = (struct arp_queue_entry *)calloc(1, sizeof(struct arp_queue_entry));
|
||||
ent->ip = ip;
|
||||
ent->nif_out = nif;
|
||||
ent->ts_out = cur_ts;
|
||||
TAILQ_INSERT_TAIL(&arpm.list, ent, arp_link);
|
||||
|
||||
/* else, broadcast arp request */
|
||||
memset(haddr, 0xFF, ETH_ALEN);
|
||||
ARPOutput(mtcp, nif, arp_op_request, ip, haddr);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
ProcessARPRequest(mtcp_manager_t mtcp,
|
||||
struct arphdr *arph, int nif, uint32_t cur_ts)
|
||||
{
|
||||
unsigned char *temp;
|
||||
|
||||
/* register the arp entry if not exist */
|
||||
temp = GetDestinationHWaddr(arph->ar_sip);
|
||||
if (!temp) {
|
||||
RegisterARPEntry(arph->ar_sip, arph->ar_sha);
|
||||
}
|
||||
|
||||
/* send arp reply */
|
||||
ARPOutput(mtcp, nif, arp_op_reply, arph->ar_sip, arph->ar_sha);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
ProcessARPReply(mtcp_manager_t mtcp, struct arphdr *arph, uint32_t cur_ts)
|
||||
{
|
||||
unsigned char *temp;
|
||||
struct arp_queue_entry *ent;
|
||||
|
||||
/* register the arp entry if not exist */
|
||||
temp = GetDestinationHWaddr(arph->ar_sip);
|
||||
if (!temp) {
|
||||
RegisterARPEntry(arph->ar_sip, arph->ar_sha);
|
||||
}
|
||||
|
||||
/* remove from the arp request queue */
|
||||
TAILQ_FOREACH(ent, &arpm.list, arp_link) {
|
||||
if (ent->ip == arph->ar_tip) {
|
||||
TAILQ_REMOVE(&arpm.list, ent, arp_link);
|
||||
free(ent);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
ProcessARPPacket(mtcp_manager_t mtcp, uint32_t cur_ts,
|
||||
const int ifidx, unsigned char *pkt_data, int len)
|
||||
{
|
||||
struct arphdr *arph = (struct arphdr *)(pkt_data + sizeof(struct ethhdr));
|
||||
int i;
|
||||
int to_me = FALSE;
|
||||
|
||||
/* process the arp messages destined to me */
|
||||
for (i = 0; i < CONFIG.eths_num; i++) {
|
||||
if (arph->ar_tip == CONFIG.eths[i].ip_addr) {
|
||||
to_me = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (!to_me)
|
||||
return TRUE;
|
||||
|
||||
#if DBGMSG
|
||||
DumpARPPacket(arph);
|
||||
#endif
|
||||
|
||||
switch (ntohs(arph->ar_op)) {
|
||||
case arp_op_request:
|
||||
ProcessARPRequest(mtcp, arph, ifidx, cur_ts);
|
||||
break;
|
||||
|
||||
case arp_op_reply:
|
||||
ProcessARPReply(mtcp, arph, cur_ts);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
// Publish my address
|
||||
void
|
||||
PublishARP(mtcp_manager_t mtcp)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < CONFIG.eths_num; i++) {
|
||||
ARPOutput(mtcp, CONFIG.eths[i].ifindex, arp_op_request, 0, NULL);
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
PrintARPTable()
|
||||
{
|
||||
int i;
|
||||
|
||||
/* print out process start information */
|
||||
TRACE_CONFIG("ARP Table:\n");
|
||||
for (i = 0; i < CONFIG.arp.entries; i++) {
|
||||
|
||||
uint8_t *da = (uint8_t *)&CONFIG.arp.entry[i].ip;
|
||||
|
||||
TRACE_CONFIG("IP addr: %u.%u.%u.%u, "
|
||||
"dst_hwaddr: %02X:%02X:%02X:%02X:%02X:%02X\n",
|
||||
da[0], da[1], da[2], da[3],
|
||||
CONFIG.arp.entry[i].haddr[0],
|
||||
CONFIG.arp.entry[i].haddr[1],
|
||||
CONFIG.arp.entry[i].haddr[2],
|
||||
CONFIG.arp.entry[i].haddr[3],
|
||||
CONFIG.arp.entry[i].haddr[4],
|
||||
CONFIG.arp.entry[i].haddr[5]);
|
||||
}
|
||||
if (CONFIG.arp.entries == 0)
|
||||
TRACE_CONFIG("(blank)\n");
|
||||
|
||||
TRACE_CONFIG("----------------------------------------------------------"
|
||||
"-----------------------\n");
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DumpARPPacket(struct arphdr *arph)
|
||||
{
|
||||
uint8_t *t;
|
||||
|
||||
fprintf(stderr, "ARP header: \n");
|
||||
fprintf(stderr, "Hareware type: %d (len: %d), "
|
||||
"protocol type: %d (len: %d), opcode: %d\n",
|
||||
ntohs(arph->ar_hrd), arph->ar_hln,
|
||||
ntohs(arph->ar_pro), arph->ar_pln, ntohs(arph->ar_op));
|
||||
t = (uint8_t *)&arph->ar_sip;
|
||||
fprintf(stderr, "Sender IP: %u.%u.%u.%u, "
|
||||
"haddr: %02X:%02X:%02X:%02X:%02X:%02X\n",
|
||||
t[0], t[1], t[2], t[3],
|
||||
arph->ar_sha[0], arph->ar_sha[1], arph->ar_sha[2],
|
||||
arph->ar_sha[3], arph->ar_sha[4], arph->ar_sha[5]);
|
||||
t = (uint8_t *)&arph->ar_tip;
|
||||
fprintf(stderr, "Target IP: %u.%u.%u.%u, "
|
||||
"haddr: %02X:%02X:%02X:%02X:%02X:%02X\n",
|
||||
t[0], t[1], t[2], t[3],
|
||||
arph->ar_tha[0], arph->ar_tha[1], arph->ar_tha[2],
|
||||
arph->ar_tha[3], arph->ar_tha[4], arph->ar_tha[5]);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
|
@ -0,0 +1,686 @@
|
|||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <net/if.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netdb.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "mtcp.h"
|
||||
#include "config.h"
|
||||
#include "tcp_in.h"
|
||||
#include "arp.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define MAX_OPTLINE_LEN 1024
|
||||
#define MAX_PROCLINE_LEN 1024
|
||||
|
||||
static const char *route_file = "config/route.conf";
|
||||
static const char *arp_file = "config/arp.conf";
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
GetIntValue(char* value)
|
||||
{
|
||||
int ret = 0;
|
||||
ret = strtol(value, (char**)NULL, 10);
|
||||
if (errno == EINVAL || errno == ERANGE)
|
||||
return -1;
|
||||
return ret;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline uint32_t
|
||||
MaskFromPrefix(int prefix)
|
||||
{
|
||||
uint32_t mask = 0;
|
||||
uint8_t *mask_t = (uint8_t *)&mask;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i <= prefix / 8 && i < 4; i++) {
|
||||
for (j = 0; j < (prefix - i * 8) && j < 8; j++) {
|
||||
mask_t[i] |= (1 << (7 - j));
|
||||
}
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static void
|
||||
EnrollRouteTableEntry(char *optstr)
|
||||
{
|
||||
char *daddr_s;
|
||||
char *prefix;
|
||||
char *dev;
|
||||
int ifidx;
|
||||
int ridx;
|
||||
int i;
|
||||
|
||||
daddr_s = strtok(optstr, "/");
|
||||
prefix = strtok(NULL, " ");
|
||||
dev = strtok(NULL, "\n");
|
||||
|
||||
assert(daddr_s != NULL);
|
||||
assert(prefix != NULL);
|
||||
assert(dev != NULL);
|
||||
|
||||
ifidx = -1;
|
||||
for (i = 0; i < num_devices; i++) {
|
||||
if (strcmp(dev, devices[i].name) != 0)
|
||||
continue;
|
||||
|
||||
ifidx = devices[i].ifindex;
|
||||
break;
|
||||
}
|
||||
if (ifidx == -1) {
|
||||
TRACE_CONFIG("Interface %s does not exist!\n", dev);
|
||||
exit(4);
|
||||
}
|
||||
|
||||
ridx = CONFIG.routes++;
|
||||
CONFIG.rtable[ridx].daddr = inet_addr(daddr_s);
|
||||
CONFIG.rtable[ridx].prefix = atoi(prefix);
|
||||
if (CONFIG.rtable[ridx].prefix > 32 || CONFIG.rtable[ridx].prefix < 0) {
|
||||
TRACE_CONFIG("Prefix length should be between 0 - 32.\n");
|
||||
exit(4);
|
||||
}
|
||||
|
||||
CONFIG.rtable[ridx].mask = MaskFromPrefix(CONFIG.rtable[ridx].prefix);
|
||||
CONFIG.rtable[ridx].masked =
|
||||
CONFIG.rtable[ridx].daddr & CONFIG.rtable[ridx].mask;
|
||||
CONFIG.rtable[ridx].nif = ifidx;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
SetRoutingTableFromFile()
|
||||
{
|
||||
#define ROUTES "ROUTES"
|
||||
|
||||
FILE *fc;
|
||||
char optstr[MAX_OPTLINE_LEN];
|
||||
int i;
|
||||
|
||||
TRACE_CONFIG("Loading routing configurations from : %s\n", route_file);
|
||||
|
||||
fc = fopen(route_file, "r");
|
||||
if (fc == NULL) {
|
||||
perror("fopen");
|
||||
TRACE_CONFIG("Skip loading static routing table\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
char *iscomment;
|
||||
int num;
|
||||
|
||||
if (fgets(optstr, MAX_OPTLINE_LEN, fc) == NULL)
|
||||
break;
|
||||
|
||||
//skip comment
|
||||
iscomment = strchr(optstr, '#');
|
||||
if (iscomment == optstr)
|
||||
continue;
|
||||
if (iscomment != NULL)
|
||||
*iscomment = 0;
|
||||
|
||||
if (!strncmp(optstr, ROUTES, sizeof(ROUTES) - 1)) {
|
||||
num = GetIntValue(optstr + sizeof(ROUTES));
|
||||
if (num <= 0)
|
||||
break;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (fgets(optstr, MAX_OPTLINE_LEN, fc) == NULL)
|
||||
break;
|
||||
|
||||
if (*optstr == '#') {
|
||||
i -= 1;
|
||||
continue;
|
||||
}
|
||||
EnrollRouteTableEntry(optstr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fc);
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
PrintRoutingTable()
|
||||
{
|
||||
int i;
|
||||
uint8_t *da;
|
||||
uint8_t *m;
|
||||
uint8_t *md;
|
||||
|
||||
/* print out process start information */
|
||||
TRACE_CONFIG("Routes:\n");
|
||||
for (i = 0; i < CONFIG.routes; i++) {
|
||||
da = (uint8_t *)&CONFIG.rtable[i].daddr;
|
||||
m = (uint8_t *)&CONFIG.rtable[i].mask;
|
||||
md = (uint8_t *)&CONFIG.rtable[i].masked;
|
||||
TRACE_CONFIG("Destination: %u.%u.%u.%u/%d, Mask: %u.%u.%u.%u, "
|
||||
"Masked: %u.%u.%u.%u, Route: xge%d\n",
|
||||
da[0], da[1], da[2], da[3], CONFIG.rtable[i].prefix,
|
||||
m[0], m[1], m[2], m[3], md[0], md[1], md[2], md[3],
|
||||
CONFIG.rtable[i].nif);
|
||||
}
|
||||
if (CONFIG.routes == 0)
|
||||
TRACE_CONFIG("(blank)\n");
|
||||
|
||||
TRACE_CONFIG("----------------------------------------------------------"
|
||||
"-----------------------\n");
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static void
|
||||
ParseMACAddress(unsigned char *haddr, char *haddr_str)
|
||||
{
|
||||
int i;
|
||||
char *str;
|
||||
unsigned int temp;
|
||||
|
||||
str = strtok(haddr_str, ":");
|
||||
i = 0;
|
||||
while (str != NULL) {
|
||||
if (i >= ETH_ALEN) {
|
||||
TRACE_CONFIG("MAC address length exceeds %d!\n", ETH_ALEN);
|
||||
exit(4);
|
||||
}
|
||||
sscanf(str, "%x", &temp);
|
||||
haddr[i++] = temp;
|
||||
str = strtok(NULL, ":");
|
||||
}
|
||||
if (i < ETH_ALEN) {
|
||||
TRACE_CONFIG("MAC address length is less than %d!\n", ETH_ALEN);
|
||||
exit(4);
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
ParseIPAddress(uint32_t *ip_addr, char *ip_str)
|
||||
{
|
||||
if (ip_str == NULL) {
|
||||
*ip_addr = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*ip_addr = inet_addr(ip_str);
|
||||
if (*ip_addr == INADDR_NONE) {
|
||||
TRACE_CONFIG("IP address is not valid %s\n", ip_str);
|
||||
*ip_addr = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
SetRoutingTable()
|
||||
{
|
||||
int i, ridx;
|
||||
unsigned int c;
|
||||
|
||||
CONFIG.routes = 0;
|
||||
|
||||
CONFIG.rtable = (struct route_table *)
|
||||
calloc(MAX_DEVICES, sizeof(struct route_table));
|
||||
if (!CONFIG.rtable)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
/* set default routing table */
|
||||
for (i = 0; i < CONFIG.eths_num; i ++) {
|
||||
|
||||
ridx = CONFIG.routes++;
|
||||
CONFIG.rtable[ridx].daddr = CONFIG.eths[i].ip_addr & CONFIG.eths[i].netmask;
|
||||
|
||||
CONFIG.rtable[ridx].prefix = 0;
|
||||
c = CONFIG.eths[i].netmask;
|
||||
while ((c = (c >> 1))){
|
||||
CONFIG.rtable[ridx].prefix++;
|
||||
}
|
||||
CONFIG.rtable[ridx].prefix++;
|
||||
|
||||
CONFIG.rtable[ridx].mask = CONFIG.eths[i].netmask;
|
||||
CONFIG.rtable[ridx].masked = CONFIG.rtable[ridx].daddr;
|
||||
CONFIG.rtable[ridx].nif = devices[i].ifindex;
|
||||
}
|
||||
|
||||
/* set additional routing table */
|
||||
SetRoutingTableFromFile();
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
GetNumQueues()
|
||||
{
|
||||
FILE *fp;
|
||||
char buf[MAX_PROCLINE_LEN];
|
||||
int queue_cnt;
|
||||
|
||||
fp = fopen("/proc/interrupts", "r");
|
||||
if (!fp) {
|
||||
TRACE_CONFIG("Failed to read data from /proc/interrupts!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* count number of NIC queues from /proc/interrupts */
|
||||
queue_cnt = 0;
|
||||
while (!feof(fp)) {
|
||||
if (fgets(buf, MAX_PROCLINE_LEN, fp) == NULL)
|
||||
break;
|
||||
|
||||
/* "xge0-rx" is the keyword for counting queues */
|
||||
if (strstr(buf, "xge0-rx")) {
|
||||
queue_cnt++;
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
return queue_cnt;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
SetInterfaceInfo()
|
||||
{
|
||||
struct ifreq ifr;
|
||||
int eidx = 0;
|
||||
int i, j;
|
||||
|
||||
TRACE_CONFIG("Loading interface setting\n");
|
||||
|
||||
CONFIG.eths = (struct eth_table *)
|
||||
calloc(MAX_DEVICES, sizeof(struct eth_table));
|
||||
if (!CONFIG.eths)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
// Create socket
|
||||
int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
|
||||
if (sock == -1) {
|
||||
perror("socket");
|
||||
}
|
||||
|
||||
for (i = 0; i < num_devices; i++) {
|
||||
strcpy(ifr.ifr_name, devices[i].name);
|
||||
|
||||
//getting interface information
|
||||
if (ioctl(sock, SIOCGIFFLAGS, &ifr) == 0) {
|
||||
|
||||
// Setting informations
|
||||
eidx = CONFIG.eths_num++;
|
||||
strcpy(CONFIG.eths[eidx].dev_name, ifr.ifr_name);
|
||||
CONFIG.eths[eidx].ifindex = devices[i].ifindex;
|
||||
|
||||
//geting address
|
||||
if (ioctl(sock, SIOCGIFADDR, &ifr) == 0 ) {
|
||||
struct in_addr sin = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr;
|
||||
CONFIG.eths[eidx].ip_addr = *(uint32_t *)&sin;
|
||||
}
|
||||
|
||||
if (ioctl(sock, SIOCGIFHWADDR, &ifr) == 0 ) {
|
||||
for (j = 0; j < 6; j ++) {
|
||||
CONFIG.eths[eidx].haddr[j] = ifr.ifr_addr.sa_data[j];
|
||||
}
|
||||
}
|
||||
|
||||
/* Net MASK */
|
||||
if (ioctl(sock, SIOCGIFNETMASK, &ifr) == 0) {
|
||||
struct in_addr sin = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr;
|
||||
CONFIG.eths[eidx].netmask = *(uint32_t *)&sin;
|
||||
}
|
||||
|
||||
// add to attached devices
|
||||
for (j = 0; j < num_devices_attached; j++) {
|
||||
if (devices_attached[j] == devices[i].ifindex) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
devices_attached[num_devices_attached] = devices[i].ifindex;
|
||||
num_devices_attached++;
|
||||
|
||||
} else {
|
||||
perror("SIOCGIFFLAGS");
|
||||
}
|
||||
}
|
||||
|
||||
num_queues = GetNumQueues();
|
||||
if (num_queues <= 0) {
|
||||
TRACE_CONFIG("Failed to find NIC queues!\n");
|
||||
return -1;
|
||||
}
|
||||
if (num_queues > num_cpus) {
|
||||
TRACE_CONFIG("Too many NIC queues available.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
PrintInterfaceInfo()
|
||||
{
|
||||
int i;
|
||||
|
||||
/* print out process start information */
|
||||
TRACE_CONFIG("Interfaces:\n");
|
||||
for (i = 0; i < CONFIG.eths_num; i++) {
|
||||
|
||||
uint8_t *da = (uint8_t *)&CONFIG.eths[i].ip_addr;
|
||||
uint8_t *nm = (uint8_t *)&CONFIG.eths[i].netmask;
|
||||
|
||||
TRACE_CONFIG("name: %s, ifindex: %d, "
|
||||
"hwaddr: %02X:%02X:%02X:%02X:%02X:%02X, "
|
||||
"ipaddr: %u.%u.%u.%u, "
|
||||
"netmask: %u.%u.%u.%u\n",
|
||||
CONFIG.eths[i].dev_name,
|
||||
CONFIG.eths[i].ifindex,
|
||||
CONFIG.eths[i].haddr[0],
|
||||
CONFIG.eths[i].haddr[1],
|
||||
CONFIG.eths[i].haddr[2],
|
||||
CONFIG.eths[i].haddr[3],
|
||||
CONFIG.eths[i].haddr[4],
|
||||
CONFIG.eths[i].haddr[5],
|
||||
da[0], da[1], da[2], da[3],
|
||||
nm[0], nm[1], nm[2], nm[3]);
|
||||
}
|
||||
TRACE_CONFIG("Number of NIC queues: %d\n", num_queues);
|
||||
TRACE_CONFIG("----------------------------------------------------------"
|
||||
"-----------------------\n");
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static void
|
||||
EnrollARPTableEntry(char *optstr)
|
||||
{
|
||||
char *dip_s; /* destination IP string */
|
||||
char *prefix_s; /* IP prefix string */
|
||||
char *daddr_s; /* destination MAC string */
|
||||
|
||||
int prefix;
|
||||
uint32_t dip_mask;
|
||||
int idx;
|
||||
|
||||
dip_s = strtok(optstr, "/");
|
||||
prefix_s = strtok(NULL, " ");
|
||||
daddr_s = strtok(NULL, "\n");
|
||||
|
||||
assert(dip_s != NULL);
|
||||
assert(prefix_s != NULL);
|
||||
assert(daddr_s != NULL);
|
||||
|
||||
prefix = atoi(prefix_s);
|
||||
|
||||
if (prefix > 32 || prefix < 0) {
|
||||
TRACE_CONFIG("Prefix length should be between 0 - 32.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
idx = CONFIG.arp.entries++;
|
||||
CONFIG.arp.entry[idx].prefix = prefix;
|
||||
ParseIPAddress(&CONFIG.arp.entry[idx].ip, dip_s);
|
||||
ParseMACAddress(CONFIG.arp.entry[idx].haddr, daddr_s);
|
||||
|
||||
dip_mask = MaskFromPrefix(prefix);
|
||||
CONFIG.arp.entry[idx].ip_mask = dip_mask;
|
||||
CONFIG.arp.entry[idx].ip_masked = CONFIG.arp.entry[idx].ip & dip_mask;
|
||||
|
||||
/*
|
||||
int i, cnt;
|
||||
cnt = 1;
|
||||
cnt = cnt << (32 - prefix);
|
||||
|
||||
for (i = 0; i < cnt; i++) {
|
||||
idx = CONFIG.arp.entries++;
|
||||
CONFIG.arp.entry[idx].ip = htonl(ntohl(ip) + i);
|
||||
memcpy(CONFIG.arp.entry[idx].haddr, haddr, ETH_ALEN);
|
||||
}
|
||||
*/
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
LoadARPTable()
|
||||
{
|
||||
#define ARP_ENTRY "ARP_ENTRY"
|
||||
|
||||
FILE *fc;
|
||||
char optstr[MAX_OPTLINE_LEN];
|
||||
int numEntry = 0;
|
||||
int hasNumEntry = 0;
|
||||
|
||||
TRACE_CONFIG("Loading ARP table from : %s\n", arp_file);
|
||||
|
||||
InitARPTable();
|
||||
|
||||
fc = fopen(arp_file, "r");
|
||||
if (fc == NULL) {
|
||||
perror("fopen");
|
||||
TRACE_CONFIG("Skip loading static ARP table\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
char *p;
|
||||
char *temp;
|
||||
|
||||
if (fgets(optstr, MAX_OPTLINE_LEN, fc) == NULL)
|
||||
break;
|
||||
|
||||
p = optstr;
|
||||
|
||||
// skip comment
|
||||
if ((temp = strchr(p, '#')) != NULL)
|
||||
*temp = 0;
|
||||
// remove front and tailing spaces
|
||||
while (*p && isspace((int)*p))
|
||||
p++;
|
||||
temp = p + strlen(p) - 1;
|
||||
while (temp >= p && isspace((int)*temp))
|
||||
*temp = 0;
|
||||
if (*p == 0) /* nothing more to process? */
|
||||
continue;
|
||||
|
||||
if (!hasNumEntry && strncmp(p, ARP_ENTRY, sizeof(ARP_ENTRY)-1) == 0) {
|
||||
numEntry = GetIntValue(p + sizeof(ARP_ENTRY));
|
||||
if (numEntry <= 0) {
|
||||
fprintf(stderr, "Wrong entry in arp.conf: %s\n", p);
|
||||
exit(-1);
|
||||
}
|
||||
#if 0
|
||||
CONFIG.arp.entry = (struct arp_entry *)
|
||||
calloc(numEntry + MAX_ARPENTRY, sizeof(struct arp_entry));
|
||||
if (CONFIG.arp.entry == NULL) {
|
||||
fprintf(stderr, "Wrong entry in arp.conf: %s\n", p);
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
hasNumEntry = 1;
|
||||
} else {
|
||||
if (numEntry <= 0) {
|
||||
fprintf(stderr,
|
||||
"Error in arp.conf: more entries than "
|
||||
"are specifed, entry=%s\n", p);
|
||||
exit(-1);
|
||||
}
|
||||
EnrollARPTableEntry(p);
|
||||
numEntry--;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(fc);
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
ParseConfiguration(char *line)
|
||||
{
|
||||
char optstr[MAX_OPTLINE_LEN];
|
||||
char *p, *q;
|
||||
|
||||
strncpy(optstr, line, MAX_OPTLINE_LEN - 1);
|
||||
|
||||
p = strtok(optstr, " \t=");
|
||||
if (p == NULL) {
|
||||
TRACE_CONFIG("No option name found for the line: %s\n", line);
|
||||
return -1;
|
||||
}
|
||||
|
||||
q = strtok(NULL, " \t=");
|
||||
if (q == NULL) {
|
||||
TRACE_CONFIG("No option value found for the line: %s\n", line);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strcmp(p, "num_cores") == 0) {
|
||||
CONFIG.num_cores = atoi(q);
|
||||
if (CONFIG.num_cores <= 0) {
|
||||
TRACE_CONFIG("Number of cores should be larger than 0.\n");
|
||||
return -1;
|
||||
}
|
||||
if (CONFIG.num_cores > num_cpus) {
|
||||
TRACE_CONFIG("Number of cores should be smaller than "
|
||||
"# physical CPU cores.\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(p, "max_concurrency") == 0) {
|
||||
CONFIG.max_concurrency = atoi(q);
|
||||
if (CONFIG.max_concurrency < 0) {
|
||||
TRACE_CONFIG("The maximum concurrency should be larger than 0.\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(p, "max_num_buffers") == 0) {
|
||||
CONFIG.max_num_buffers = atoi(q);
|
||||
if (CONFIG.max_num_buffers < 0) {
|
||||
TRACE_CONFIG("The maximum # buffers should be larger than 0.\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(p, "rcvbuf") == 0) {
|
||||
CONFIG.rcvbuf_size = atoi(q);
|
||||
if (CONFIG.rcvbuf_size < 64) {
|
||||
TRACE_CONFIG("Receive buffer size should be larger than 64.\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(p, "sndbuf") == 0) {
|
||||
CONFIG.sndbuf_size = atoi(q);
|
||||
if (CONFIG.sndbuf_size < 64) {
|
||||
TRACE_CONFIG("Send buffer size should be larger than 64.\n");
|
||||
return -1;
|
||||
}
|
||||
} else if (strcmp(p, "tcp_timeout") == 0) {
|
||||
CONFIG.tcp_timeout = atoi(q);
|
||||
if (CONFIG.tcp_timeout > 0) {
|
||||
CONFIG.tcp_timeout = SEC_TO_USEC(CONFIG.tcp_timeout) / TIME_TICK;
|
||||
}
|
||||
} else if (strcmp(p, "tcp_timewait") == 0) {
|
||||
CONFIG.tcp_timewait = atoi(q);
|
||||
if (CONFIG.tcp_timewait > 0) {
|
||||
CONFIG.tcp_timewait = SEC_TO_USEC(CONFIG.tcp_timewait) / TIME_TICK;
|
||||
}
|
||||
} else if (strcmp(p, "stat_print") == 0) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < CONFIG.eths_num; i++) {
|
||||
if (strcmp(CONFIG.eths[i].dev_name, q) == 0) {
|
||||
CONFIG.eths[i].stat_print = TRUE;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
TRACE_CONFIG("Unknown option type: %s\n", line);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
LoadConfiguration(char *fname)
|
||||
{
|
||||
FILE *fp;
|
||||
char optstr[MAX_OPTLINE_LEN];
|
||||
|
||||
TRACE_CONFIG("----------------------------------------------------------"
|
||||
"-----------------------\n");
|
||||
TRACE_CONFIG("Loading mtcp configuration from : %s\n", fname);
|
||||
|
||||
fp = fopen(fname, "r");
|
||||
if (fp == NULL) {
|
||||
perror("fopen");
|
||||
TRACE_CONFIG("Failed to load configuration file: %s\n", fname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* set default configuration */
|
||||
CONFIG.num_cores = num_cpus;
|
||||
CONFIG.max_concurrency = 100000;
|
||||
CONFIG.max_num_buffers = 100000;
|
||||
CONFIG.rcvbuf_size = 8192;
|
||||
CONFIG.sndbuf_size = 8192;
|
||||
CONFIG.tcp_timeout = TCP_TIMEOUT;
|
||||
CONFIG.tcp_timewait = TCP_TIMEWAIT;
|
||||
|
||||
while (1) {
|
||||
char *p;
|
||||
char *temp;
|
||||
|
||||
if (fgets(optstr, MAX_OPTLINE_LEN, fp) == NULL)
|
||||
break;
|
||||
|
||||
p = optstr;
|
||||
|
||||
// skip comment
|
||||
if ((temp = strchr(p, '#')) != NULL)
|
||||
*temp = 0;
|
||||
// remove front and tailing spaces
|
||||
while (*p && isspace((int)*p))
|
||||
p++;
|
||||
temp = p + strlen(p) - 1;
|
||||
while (temp >= p && isspace((int)*temp))
|
||||
*temp = 0;
|
||||
if (*p == 0) /* nothing more to process? */
|
||||
continue;
|
||||
|
||||
if (ParseConfiguration(p) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
PrintConfiguration()
|
||||
{
|
||||
int i;
|
||||
|
||||
TRACE_CONFIG("Configurations:\n");
|
||||
TRACE_CONFIG("Number of CPU cores available: %d\n", num_cpus);
|
||||
TRACE_CONFIG("Number of CPU cores to use: %d\n", CONFIG.num_cores);
|
||||
TRACE_CONFIG("Maximum number of concurrency per core: %d\n",
|
||||
CONFIG.max_concurrency);
|
||||
|
||||
TRACE_CONFIG("Maximum number of preallocated buffers per core: %d\n",
|
||||
CONFIG.max_num_buffers);
|
||||
TRACE_CONFIG("Receive buffer size: %d\n", CONFIG.rcvbuf_size);
|
||||
TRACE_CONFIG("Send buffer size: %d\n", CONFIG.sndbuf_size);
|
||||
|
||||
if (CONFIG.tcp_timeout > 0) {
|
||||
TRACE_CONFIG("TCP timeout seconds: %d\n",
|
||||
USEC_TO_SEC(CONFIG.tcp_timeout * TIME_TICK));
|
||||
} else {
|
||||
TRACE_CONFIG("TCP timeout check disabled.\n");
|
||||
}
|
||||
TRACE_CONFIG("TCP timewait seconds: %d\n",
|
||||
USEC_TO_SEC(CONFIG.tcp_timewait * TIME_TICK));
|
||||
TRACE_CONFIG("NICs to print statistics:");
|
||||
for (i = 0; i < CONFIG.eths_num; i++) {
|
||||
if (CONFIG.eths[i].stat_print) {
|
||||
TRACE_CONFIG(" %s", CONFIG.eths[i].dev_name);
|
||||
}
|
||||
}
|
||||
TRACE_CONFIG("\n");
|
||||
TRACE_CONFIG("----------------------------------------------------------"
|
||||
"-----------------------\n");
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,76 @@
|
|||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <numa.h>
|
||||
#include <sched.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define MAX_FILE_NAME 1024
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
GetNumCPUs()
|
||||
{
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
pid_t
|
||||
Gettid()
|
||||
{
|
||||
return syscall(__NR_gettid);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
mtcp_core_affinitize(int cpu)
|
||||
{
|
||||
cpu_set_t cpus;
|
||||
struct bitmask *bmask;
|
||||
FILE *fp;
|
||||
char sysfname[MAX_FILE_NAME];
|
||||
int phy_id;
|
||||
size_t n;
|
||||
int ret;
|
||||
|
||||
n = GetNumCPUs();
|
||||
|
||||
if (cpu < 0 || cpu >= (int) n) {
|
||||
errno = -EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
CPU_ZERO(&cpus);
|
||||
CPU_SET((unsigned)cpu, &cpus);
|
||||
|
||||
ret = sched_setaffinity(Gettid(), sizeof(cpus), &cpus);
|
||||
|
||||
if (numa_max_node() == 0)
|
||||
return ret;
|
||||
|
||||
bmask = numa_bitmask_alloc(n);
|
||||
assert(bmask);
|
||||
|
||||
/* read physical id of the core from sys information */
|
||||
snprintf(sysfname, MAX_FILE_NAME - 1,
|
||||
"/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
|
||||
fp = fopen(sysfname, "r");
|
||||
if (!fp) {
|
||||
perror(sysfname);
|
||||
errno = EFAULT;
|
||||
return -1;
|
||||
}
|
||||
fscanf(fp, "%d", &phy_id);
|
||||
|
||||
numa_bitmask_setbit(bmask, phy_id);
|
||||
numa_set_membind(bmask);
|
||||
numa_bitmask_free(bmask);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,269 @@
|
|||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <stdarg.h>
|
||||
#include "debug.h"
|
||||
#include "tcp_in.h"
|
||||
#include "logger.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void flush_log_data(mtcp_manager_t mtcp)
|
||||
{
|
||||
int ret = 0;
|
||||
if (mtcp->w_buffer) {
|
||||
EnqueueJobBuffer(mtcp->logger, mtcp->w_buffer);
|
||||
ret = write(mtcp->sp_fd, "A", 1);
|
||||
if (ret != 1) {
|
||||
TRACE_INFO("Failed to flush logs in the buffer.\n");
|
||||
perror("write() for pipe");
|
||||
}
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
thread_printf(mtcp_manager_t mtcp, FILE* f_idx, const char* _Format, ...)
|
||||
{
|
||||
va_list argptr;
|
||||
va_start(argptr, _Format);
|
||||
|
||||
#define PRINT_LIMIT 4096
|
||||
int len;
|
||||
log_buff *wbuf;
|
||||
|
||||
assert(f_idx != NULL);
|
||||
|
||||
pthread_mutex_lock(&mtcp->logger->mutex);
|
||||
wbuf = mtcp->w_buffer;
|
||||
if (wbuf && (wbuf->buff_len + PRINT_LIMIT > LOG_BUFF_SIZE)) {
|
||||
flush_log_data(mtcp);
|
||||
wbuf = NULL;
|
||||
}
|
||||
|
||||
if (!wbuf) {
|
||||
wbuf = DequeueFreeBuffer(mtcp->logger);
|
||||
assert(wbuf);
|
||||
wbuf->buff_len = 0;
|
||||
wbuf->tid = mtcp->ctx->cpu;
|
||||
wbuf->fid = f_idx;
|
||||
mtcp->w_buffer = wbuf;
|
||||
}
|
||||
|
||||
len = vsnprintf(wbuf->buff + wbuf->buff_len, PRINT_LIMIT, _Format, argptr);
|
||||
wbuf->buff_len += len;
|
||||
pthread_mutex_unlock(&mtcp->logger->mutex);
|
||||
|
||||
va_end(argptr);
|
||||
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DumpPacket(mtcp_manager_t mtcp, char *buf, int len, char *step, int ifindex)
|
||||
{
|
||||
struct ethhdr *ethh;
|
||||
struct iphdr *iph;
|
||||
struct udphdr *udph;
|
||||
struct tcphdr *tcph;
|
||||
uint8_t *t;
|
||||
|
||||
if (ifindex >= 0)
|
||||
thread_printf(mtcp, mtcp->log_fp, "%s %d %u", step, ifindex, mtcp->cur_ts);
|
||||
else
|
||||
thread_printf(mtcp, mtcp->log_fp, "%s ? %u", step, mtcp->cur_ts);
|
||||
|
||||
ethh = (struct ethhdr *)buf;
|
||||
if (ntohs(ethh->h_proto) != ETH_P_IP) {
|
||||
thread_printf(mtcp, mtcp->log_fp, "%02X:%02X:%02X:%02X:%02X:%02X -> %02X:%02X:%02X:%02X:%02X:%02X ",
|
||||
ethh->h_source[0],
|
||||
ethh->h_source[1],
|
||||
ethh->h_source[2],
|
||||
ethh->h_source[3],
|
||||
ethh->h_source[4],
|
||||
ethh->h_source[5],
|
||||
ethh->h_dest[0],
|
||||
ethh->h_dest[1],
|
||||
ethh->h_dest[2],
|
||||
ethh->h_dest[3],
|
||||
ethh->h_dest[4],
|
||||
ethh->h_dest[5]);
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp, "protocol %04hx ", ntohs(ethh->h_proto));
|
||||
goto done;
|
||||
}
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp, " ");
|
||||
|
||||
iph = (struct iphdr *)(ethh + 1);
|
||||
udph = (struct udphdr *)((uint32_t *)iph + iph->ihl);
|
||||
tcph = (struct tcphdr *)((uint32_t *)iph + iph->ihl);
|
||||
|
||||
t = (uint8_t *)&iph->saddr;
|
||||
thread_printf(mtcp, mtcp->log_fp, "%u.%u.%u.%u", t[0], t[1], t[2], t[3]);
|
||||
if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
|
||||
thread_printf(mtcp, mtcp->log_fp, "(%d)", ntohs(udph->source));
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp, " -> ");
|
||||
|
||||
t = (uint8_t *)&iph->daddr;
|
||||
thread_printf(mtcp, mtcp->log_fp, "%u.%u.%u.%u", t[0], t[1], t[2], t[3]);
|
||||
if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
|
||||
thread_printf(mtcp, mtcp->log_fp, "(%d)", ntohs(udph->dest));
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp, " IP_ID=%d", ntohs(iph->id));
|
||||
thread_printf(mtcp, mtcp->log_fp, " TTL=%d ", iph->ttl);
|
||||
|
||||
if (ip_fast_csum(iph, iph->ihl)) {
|
||||
__sum16 org_csum, correct_csum;
|
||||
|
||||
org_csum = iph->check;
|
||||
iph->check = 0;
|
||||
correct_csum = ip_fast_csum(iph, iph->ihl);
|
||||
thread_printf(mtcp, mtcp->log_fp, "(bad checksum %04x should be %04x) ",
|
||||
ntohs(org_csum), ntohs(correct_csum));
|
||||
iph->check = org_csum;
|
||||
}
|
||||
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
thread_printf(mtcp, mtcp->log_fp, "TCP ");
|
||||
|
||||
if (tcph->syn)
|
||||
thread_printf(mtcp, mtcp->log_fp, "S ");
|
||||
if (tcph->fin)
|
||||
thread_printf(mtcp, mtcp->log_fp, "F ");
|
||||
if (tcph->ack)
|
||||
thread_printf(mtcp, mtcp->log_fp, "A ");
|
||||
if (tcph->rst)
|
||||
thread_printf(mtcp, mtcp->log_fp, "R ");
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp, "seq %u ", ntohl(tcph->seq));
|
||||
if (tcph->ack)
|
||||
thread_printf(mtcp, mtcp->log_fp, "ack %u ", ntohl(tcph->ack_seq));
|
||||
thread_printf(mtcp, mtcp->log_fp, "WDW=%u ", ntohs(tcph->window));
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
thread_printf(mtcp, mtcp->log_fp, "UDP ");
|
||||
break;
|
||||
default:
|
||||
thread_printf(mtcp, mtcp->log_fp, "protocol %d ", iph->protocol);
|
||||
goto done;
|
||||
}
|
||||
done:
|
||||
thread_printf(mtcp, mtcp->log_fp, "len=%d\n", len);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DumpIPPacket(mtcp_manager_t mtcp, const struct iphdr *iph, int len)
|
||||
{
|
||||
struct udphdr *udph;
|
||||
struct tcphdr *tcph;
|
||||
uint8_t *t;
|
||||
|
||||
udph = (struct udphdr *)((uint32_t *)iph + iph->ihl);
|
||||
tcph = (struct tcphdr *)((uint32_t *)iph + iph->ihl);
|
||||
|
||||
t = (uint8_t *)&iph->saddr;
|
||||
thread_printf(mtcp, mtcp->log_fp, "%u.%u.%u.%u", t[0], t[1], t[2], t[3]);
|
||||
if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
|
||||
thread_printf(mtcp, mtcp->log_fp, "(%d)", ntohs(udph->source));
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp, " -> ");
|
||||
|
||||
t = (uint8_t *)&iph->daddr;
|
||||
thread_printf(mtcp, mtcp->log_fp, "%u.%u.%u.%u", t[0], t[1], t[2], t[3]);
|
||||
if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
|
||||
thread_printf(mtcp, mtcp->log_fp, "(%d)", ntohs(udph->dest));
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp, " IP_ID=%d", ntohs(iph->id));
|
||||
thread_printf(mtcp, mtcp->log_fp, " TTL=%d ", iph->ttl);
|
||||
|
||||
if (ip_fast_csum(iph, iph->ihl)) {
|
||||
thread_printf(mtcp, mtcp->log_fp, "(bad checksum) ");
|
||||
}
|
||||
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
thread_printf(mtcp, mtcp->log_fp, "TCP ");
|
||||
|
||||
if (tcph->syn)
|
||||
thread_printf(mtcp, mtcp->log_fp, "S ");
|
||||
if (tcph->fin)
|
||||
thread_printf(mtcp, mtcp->log_fp, "F ");
|
||||
if (tcph->ack)
|
||||
thread_printf(mtcp, mtcp->log_fp, "A ");
|
||||
if (tcph->rst)
|
||||
thread_printf(mtcp, mtcp->log_fp, "R ");
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp, "seq %u ", ntohl(tcph->seq));
|
||||
if (tcph->ack)
|
||||
thread_printf(mtcp, mtcp->log_fp, "ack %u ", ntohl(tcph->ack_seq));
|
||||
thread_printf(mtcp, mtcp->log_fp, "WDW=%u ", ntohs(tcph->window));
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
thread_printf(mtcp, mtcp->log_fp, "UDP ");
|
||||
break;
|
||||
default:
|
||||
thread_printf(mtcp, mtcp->log_fp, "protocol %d ", iph->protocol);
|
||||
goto done;
|
||||
}
|
||||
done:
|
||||
thread_printf(mtcp, mtcp->log_fp, "len=%d\n", len);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DumpIPPacketToFile(FILE *fout, const struct iphdr *iph, int len)
|
||||
{
|
||||
struct udphdr *udph;
|
||||
struct tcphdr *tcph;
|
||||
uint8_t *t;
|
||||
|
||||
udph = (struct udphdr *)((uint32_t *)iph + iph->ihl);
|
||||
tcph = (struct tcphdr *)((uint32_t *)iph + iph->ihl);
|
||||
|
||||
t = (uint8_t *)&iph->saddr;
|
||||
fprintf(fout, "%u.%u.%u.%u", t[0], t[1], t[2], t[3]);
|
||||
if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
|
||||
fprintf(fout, "(%d)", ntohs(udph->source));
|
||||
|
||||
fprintf(fout, " -> ");
|
||||
|
||||
t = (uint8_t *)&iph->daddr;
|
||||
fprintf(fout, "%u.%u.%u.%u", t[0], t[1], t[2], t[3]);
|
||||
if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
|
||||
fprintf(fout, "(%d)", ntohs(udph->dest));
|
||||
|
||||
fprintf(fout, " IP_ID=%d", ntohs(iph->id));
|
||||
fprintf(fout, " TTL=%d ", iph->ttl);
|
||||
|
||||
if (ip_fast_csum(iph, iph->ihl)) {
|
||||
fprintf(fout, "(bad checksum) ");
|
||||
}
|
||||
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
fprintf(fout, "TCP ");
|
||||
|
||||
if (tcph->syn)
|
||||
fprintf(fout, "S ");
|
||||
if (tcph->fin)
|
||||
fprintf(fout, "F ");
|
||||
if (tcph->ack)
|
||||
fprintf(fout, "A ");
|
||||
if (tcph->rst)
|
||||
fprintf(fout, "R ");
|
||||
|
||||
fprintf(fout, "seq %u ", ntohl(tcph->seq));
|
||||
if (tcph->ack)
|
||||
fprintf(fout, "ack %u ", ntohl(tcph->ack_seq));
|
||||
fprintf(fout, "WDW=%u ", ntohs(tcph->window));
|
||||
break;
|
||||
case IPPROTO_UDP:
|
||||
fprintf(fout, "UDP ");
|
||||
break;
|
||||
default:
|
||||
fprintf(fout, "protocol %d ", iph->protocol);
|
||||
goto done;
|
||||
}
|
||||
done:
|
||||
fprintf(fout, "len=%d\n", len);
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
#include "ps.h"
|
||||
#include "ip_in.h"
|
||||
#include "eth_in.h"
|
||||
#include "arp.h"
|
||||
#include "debug.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
ProcessPacket(mtcp_manager_t mtcp, const int ifidx,
|
||||
uint32_t cur_ts, unsigned char *pkt_data, int len)
|
||||
{
|
||||
struct ethhdr *ethh = (struct ethhdr *)pkt_data;
|
||||
u_short ip_proto = ntohs(ethh->h_proto);
|
||||
int ret;
|
||||
|
||||
#ifdef PKTDUMP
|
||||
DumpPacket(mtcp, (char *)pkt_data, len, "IN", ifidx);
|
||||
#endif
|
||||
|
||||
#ifdef NETSTAT
|
||||
mtcp->nstat.rx_packets[ifidx]++;
|
||||
mtcp->nstat.rx_bytes[ifidx] += len + 24;
|
||||
#endif /* NETSTAT */
|
||||
|
||||
#if 0
|
||||
/* ignore mac address which is not for current interface */
|
||||
int i;
|
||||
for (i = 0; i < 6; i ++) {
|
||||
if (ethh->h_dest[i] != CONFIG.eths[ifidx].haddr[i]) {
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ip_proto == ETH_P_IP) {
|
||||
/* process ipv4 packet */
|
||||
ret = ProcessIPv4Packet(mtcp, cur_ts, ifidx, pkt_data, len);
|
||||
|
||||
} else if (ip_proto == ETH_P_ARP) {
|
||||
ProcessARPPacket(mtcp, cur_ts, ifidx, pkt_data, len);
|
||||
return TRUE;
|
||||
|
||||
} else {
|
||||
//DumpPacket(mtcp, (char *)pkt_data, len, "??", ifidx);
|
||||
struct ps_packet packet;
|
||||
packet.ifindex = ifidx;
|
||||
packet.len = len;
|
||||
packet.buf = (char *)pkt_data;
|
||||
ps_slowpath_packet(mtcp->ctx->handle, &packet);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#ifdef NETSTAT
|
||||
if (ret < 0) {
|
||||
mtcp->nstat.rx_errors[ifidx]++;
|
||||
}
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,263 @@
|
|||
#include <stdio.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/tcp.h>
|
||||
#include <netinet/ip.h>
|
||||
|
||||
#include "mtcp.h"
|
||||
#include "arp.h"
|
||||
#include "eth_out.h"
|
||||
#include "debug.h"
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE (1)
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE (0)
|
||||
#endif
|
||||
|
||||
#ifndef ERROR
|
||||
#define ERROR (-1)
|
||||
#endif
|
||||
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
|
||||
#define MAX_WINDOW_SIZE 65535
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
enum ETH_BUFFER_RETURN {BUF_RET_MAYBE, BUF_RET_ALWAYS};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
#if !(E_PSIO || USE_CHUNK_BUF)
|
||||
inline void
|
||||
InitWriteChunks(struct ps_handle* handle, struct ps_chunk *w_chunk)
|
||||
{
|
||||
int i, ret;
|
||||
for (i = 0; i < ETH_NUM; i++)
|
||||
{
|
||||
ret = ps_alloc_chunk(handle, &w_chunk[i]);
|
||||
if (ret != 0)
|
||||
{
|
||||
perror("ps_alloc_chunk");
|
||||
exit(1);
|
||||
}
|
||||
w_chunk[i].queue.ifindex = i;
|
||||
w_chunk[i].recv_blocking = 0;
|
||||
w_chunk[i].cnt = 0;
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
FlushWriteBuffer(struct mtcp_thread_context* ctx, int ifidx)
|
||||
{
|
||||
int ret = 0;
|
||||
struct ps_chunk* w_chunk = ctx->w_chunk;
|
||||
mtcp_manager_t mtcp = ctx->mtcp_manager;
|
||||
int i;
|
||||
int drop = 0;
|
||||
assert(ctx != NULL);
|
||||
assert(w_chunk != NULL);
|
||||
|
||||
if (w_chunk[ifidx].cnt > 0) {
|
||||
|
||||
STAT_COUNT(mtcp->runstat.rounds_tx_try);
|
||||
|
||||
ret = ps_send_chunk(ctx->handle, &w_chunk[ifidx]);
|
||||
drop = ctx->w_chunk[ifidx].cnt - ret;
|
||||
|
||||
if (ret < 0) {
|
||||
TRACE_ERROR("ps_send_chunk failed to send chunks, %d:%d\n",
|
||||
ifidx, w_chunk[ifidx].cnt);
|
||||
return ret;
|
||||
} else {
|
||||
#ifdef NETSTAT
|
||||
mtcp->nstat.tx_packets[ifidx] += ret;
|
||||
#endif /* NETSTAT */
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
#ifdef PKTDUMP
|
||||
DumpPacket(mtcp,
|
||||
w_chunk[ifidx].buf + w_chunk[ifidx].info[i].offset,
|
||||
w_chunk[ifidx].info[i].len, "OUT", ifidx);
|
||||
#endif /* PKTDUMP */
|
||||
|
||||
#ifdef NETSTAT
|
||||
mtcp->nstat.tx_bytes[ifidx] += w_chunk[ifidx].info[i].len + 24;
|
||||
#endif /* NETSTAT */
|
||||
}
|
||||
|
||||
#ifdef NETSTAT
|
||||
if (ret != w_chunk[ifidx].cnt) {
|
||||
mtcp->nstat.tx_drops[ifidx] += (w_chunk[ifidx].cnt - ret);
|
||||
}
|
||||
#endif /* NETSTAT */
|
||||
|
||||
if (ret == 0) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef PKTDUMP
|
||||
thread_printf(mtcp, mtcp->log_fp, "sent chunks, ret: %d (tries: %d)\n",
|
||||
ret, w_chunk[ifidx].cnt);
|
||||
thread_printf(mtcp, mtcp->log_fp, "======================================"
|
||||
"======================================================"
|
||||
"====================\n\n");
|
||||
#endif /* PKTDUMP */
|
||||
|
||||
if (drop > 0) {
|
||||
ctx->w_chunk[ifidx].cnt = drop;
|
||||
for (i = 0; i < drop; i++) {
|
||||
ctx->w_chunk[ifidx].info[i].len =
|
||||
ctx->w_chunk[ifidx].info[ret + i].len;
|
||||
ctx->w_chunk[ifidx].info[i].offset =
|
||||
ctx->w_chunk[ifidx].info[ret + i].offset;
|
||||
}
|
||||
ctx->w_off[ifidx] = ctx->w_chunk[ifidx].info[drop - 1].offset +
|
||||
(ctx->w_chunk[ifidx].info[drop - 1].len + 63) / 64 * 64;
|
||||
ctx->w_cur_idx[ifidx] += ret;
|
||||
} else {
|
||||
ctx->w_chunk[ifidx].cnt = 0;
|
||||
ctx->w_off[ifidx] = 0;
|
||||
ctx->w_cur_idx[ifidx] = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline char *
|
||||
GetWriteBuffer(struct mtcp_thread_context *ctx, int method, int ifidx, int len)
|
||||
{
|
||||
struct ps_chunk *w_chunk = ctx->w_chunk;
|
||||
uint32_t *w_off = ctx->w_off;
|
||||
int w_idx;
|
||||
|
||||
assert(w_chunk != NULL);
|
||||
assert(w_off != NULL);
|
||||
|
||||
if (ifidx < 0 || ifidx >= CONFIG.eths_num )
|
||||
return NULL;
|
||||
|
||||
//pthread_mutex_lock(&ctx->send_lock);
|
||||
|
||||
if (ctx->w_cur_idx[ifidx] + w_chunk[ifidx].cnt >= MAX_SEND_PCK_CHUNK) {
|
||||
if (method == BUF_RET_MAYBE) {
|
||||
return NULL;
|
||||
} else if (method == BUF_RET_ALWAYS) {
|
||||
if (FlushWriteBuffer(ctx, ifidx) <= 0)
|
||||
return NULL;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
assert(ctx->w_cur_idx[ifidx] + w_chunk[ifidx].cnt < MAX_SEND_PCK_CHUNK);
|
||||
assert(w_off[ifidx] < MAX_PACKET_SIZE * MAX_CHUNK_SIZE);
|
||||
|
||||
w_idx = w_chunk[ifidx].cnt++;
|
||||
w_chunk[ifidx].info[w_idx].len = len;
|
||||
w_chunk[ifidx].info[w_idx].offset = w_off[ifidx];
|
||||
w_off[ifidx] += (len + 63) / 64 * 64;
|
||||
|
||||
//pthread_mutex_unlock(&ctx->send_lock);
|
||||
|
||||
return (w_chunk[ifidx].buf + w_chunk[ifidx].info[w_idx].offset);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
#else /* E_PSIO */
|
||||
int
|
||||
FlushSendChunkBuf(mtcp_manager_t mtcp, int nif)
|
||||
{
|
||||
struct ps_chunk_buf *c_buf;
|
||||
int send_cnt, to_send_cnt = 0;
|
||||
int start_idx;
|
||||
int i;
|
||||
|
||||
c_buf = &mtcp->ctx->w_chunk_buf[nif];
|
||||
if (!c_buf)
|
||||
return -1;
|
||||
|
||||
to_send_cnt = c_buf->cnt;
|
||||
if (to_send_cnt > 0) {
|
||||
STAT_COUNT(mtcp->runstat.rounds_tx_try);
|
||||
start_idx = c_buf->next_to_send;
|
||||
send_cnt = ps_send_chunk_buf(mtcp->ctx->handle, c_buf);
|
||||
|
||||
for (i = 0; i < send_cnt; i++) {
|
||||
#ifdef NETSTAT
|
||||
mtcp->nstat.tx_bytes[nif] += c_buf->info[start_idx].len + 24;
|
||||
#endif
|
||||
#if PKTDUMP
|
||||
DumpPacket(mtcp, c_buf->buf + c_buf->info[start_idx].offset,
|
||||
c_buf->info[start_idx].len, "OUT", nif);
|
||||
|
||||
#endif
|
||||
start_idx = (start_idx + 1) % ENTRY_CNT;
|
||||
}
|
||||
if (send_cnt < 0) {
|
||||
TRACE_ERROR("ps_send_chunk_buf failed. "
|
||||
"ret: %d, error: %s\n", send_cnt, strerror(errno));
|
||||
#ifdef NETSTAT
|
||||
} else {
|
||||
mtcp->nstat.tx_packets[nif] += send_cnt;
|
||||
#endif
|
||||
}
|
||||
|
||||
return send_cnt;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* E_PSIO */
|
||||
/*----------------------------------------------------------------------------*/
|
||||
uint8_t *
|
||||
EthernetOutput(struct mtcp_manager *mtcp, uint16_t h_proto,
|
||||
int nif, unsigned char* dst_haddr, uint16_t iplen)
|
||||
{
|
||||
char *buf;
|
||||
struct ethhdr *ethh;
|
||||
int i;
|
||||
|
||||
#if E_PSIO || USE_CHUNK_BUF
|
||||
struct ps_chunk_buf *c_buf = &mtcp->ctx->w_chunk_buf[nif];
|
||||
|
||||
buf = ps_assign_chunk_buf(c_buf, iplen + ETHERNET_HEADER_LEN);
|
||||
#else
|
||||
buf = GetWriteBuffer(mtcp->ctx,
|
||||
BUF_RET_MAYBE, nif, iplen + ETHERNET_HEADER_LEN);
|
||||
#endif
|
||||
if (!buf) {
|
||||
//TRACE_DBG("Failed to get available write buffer\n");
|
||||
return NULL;
|
||||
}
|
||||
//memset(buf, 0, ETHERNET_HEADER_LEN + iplen);
|
||||
|
||||
// if (!stream->sndvar->d_haddr) {
|
||||
// stream->sndvar->d_haddr = GetDestinationHWaddr(stream->daddr);
|
||||
// }
|
||||
|
||||
#if 0
|
||||
TRACE_DBG("dst_hwaddr: %02X:%02X:%02X:%02X:%02X:%02X\n",
|
||||
stream->sndvar->d_haddr[0], stream->sndvar->d_haddr[1],
|
||||
stream->sndvar->d_haddr[2], stream->sndvar->d_haddr[3],
|
||||
stream->sndvar->d_haddr[4], stream->sndvar->d_haddr[5]);
|
||||
#endif
|
||||
|
||||
ethh = (struct ethhdr *)buf;
|
||||
for (i = 0; i < ETH_ALEN; i++) {
|
||||
ethh->h_source[i] = CONFIG.eths[nif].haddr[i];
|
||||
ethh->h_dest[i] = dst_haddr[i];
|
||||
}
|
||||
ethh->h_proto = htons(h_proto);
|
||||
|
||||
return (uint8_t *)(ethh + 1);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
|
@ -0,0 +1,580 @@
|
|||
#include <sys/queue.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <signal.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "mtcp.h"
|
||||
#include "tcp_stream.h"
|
||||
#include "eventpoll.h"
|
||||
#include "tcp_in.h"
|
||||
#include "pipe.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
|
||||
#define SPIN_BEFORE_SLEEP FALSE
|
||||
#define SPIN_THRESH 10000000
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
char *event_str[] = {"NONE", "IN", "PRI", "OUT", "ERR", "HUP", "RDHUP"};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
char *
|
||||
EventToString(uint32_t event)
|
||||
{
|
||||
switch (event) {
|
||||
case MTCP_EPOLLNONE:
|
||||
return event_str[0];
|
||||
break;
|
||||
case MTCP_EPOLLIN:
|
||||
return event_str[1];
|
||||
break;
|
||||
case MTCP_EPOLLPRI:
|
||||
return event_str[2];
|
||||
break;
|
||||
case MTCP_EPOLLOUT:
|
||||
return event_str[3];
|
||||
break;
|
||||
case MTCP_EPOLLERR:
|
||||
return event_str[4];
|
||||
break;
|
||||
case MTCP_EPOLLHUP:
|
||||
return event_str[5];
|
||||
break;
|
||||
case MTCP_EPOLLRDHUP:
|
||||
return event_str[6];
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct event_queue *
|
||||
CreateEventQueue(int size)
|
||||
{
|
||||
struct event_queue *eq;
|
||||
|
||||
eq = (struct event_queue *)calloc(1, sizeof(struct event_queue));
|
||||
if (!eq)
|
||||
return NULL;
|
||||
|
||||
eq->start = 0;
|
||||
eq->end = 0;
|
||||
eq->size = size;
|
||||
eq->events = (struct mtcp_epoll_event_int *)
|
||||
calloc(size, sizeof(struct mtcp_epoll_event_int));
|
||||
if (!eq->events) {
|
||||
free(eq);
|
||||
return NULL;
|
||||
}
|
||||
eq->num_events = 0;
|
||||
|
||||
return eq;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyEventQueue(struct event_queue *eq)
|
||||
{
|
||||
if (eq->events)
|
||||
free(eq->events);
|
||||
|
||||
free(eq);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
mtcp_epoll_create(mctx_t mctx, int size)
|
||||
{
|
||||
mtcp_manager_t mtcp = g_mtcp[mctx->cpu];
|
||||
struct mtcp_epoll *ep;
|
||||
socket_map_t epsocket;
|
||||
|
||||
if (size <= 0) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
epsocket = AllocateSocket(mctx, MTCP_SOCK_EPOLL, FALSE);
|
||||
if (!epsocket) {
|
||||
errno = ENFILE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ep = (struct mtcp_epoll *)calloc(1, sizeof(struct mtcp_epoll));
|
||||
if (!ep) {
|
||||
FreeSocket(mctx, epsocket->id, FALSE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* create event queues */
|
||||
ep->usr_queue = CreateEventQueue(size);
|
||||
if (!ep->usr_queue)
|
||||
return -1;
|
||||
|
||||
ep->usr_shadow_queue = CreateEventQueue(size);
|
||||
if (!ep->usr_shadow_queue) {
|
||||
DestroyEventQueue(ep->usr_queue);
|
||||
return -1;
|
||||
}
|
||||
|
||||
ep->mtcp_queue = CreateEventQueue(size);
|
||||
if (!ep->mtcp_queue) {
|
||||
DestroyEventQueue(ep->usr_queue);
|
||||
DestroyEventQueue(ep->usr_shadow_queue);
|
||||
return -1;
|
||||
}
|
||||
|
||||
TRACE_EPOLL("epoll structure of size %d created.\n", ep->size);
|
||||
|
||||
mtcp->ep = ep;
|
||||
epsocket->ep = ep;
|
||||
|
||||
if (pthread_mutex_init(&ep->epoll_lock, NULL)) {
|
||||
return -1;
|
||||
}
|
||||
if (pthread_cond_init(&ep->epoll_cond, NULL)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return epsocket->id;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
CloseEpollSocket(mctx_t mctx, int epid)
|
||||
{
|
||||
mtcp_manager_t mtcp;
|
||||
struct mtcp_epoll *ep;
|
||||
|
||||
mtcp = GetMTCPManager(mctx);
|
||||
if (!mtcp) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
ep = mtcp->smap[epid].ep;
|
||||
if (!ep) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
DestroyEventQueue(ep->usr_queue);
|
||||
DestroyEventQueue(ep->usr_shadow_queue);
|
||||
DestroyEventQueue(ep->mtcp_queue);
|
||||
free(ep);
|
||||
|
||||
pthread_mutex_lock(&ep->epoll_lock);
|
||||
mtcp->ep = NULL;
|
||||
mtcp->smap[epid].ep = NULL;
|
||||
pthread_cond_signal(&ep->epoll_cond);
|
||||
pthread_mutex_unlock(&ep->epoll_lock);
|
||||
|
||||
pthread_cond_destroy(&ep->epoll_cond);
|
||||
pthread_mutex_destroy(&ep->epoll_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
RaisePendingStreamEvents(mtcp_manager_t mtcp,
|
||||
struct mtcp_epoll *ep, socket_map_t socket)
|
||||
{
|
||||
tcp_stream *stream = socket->stream;
|
||||
|
||||
if (!stream)
|
||||
return -1;
|
||||
if (stream->state < TCP_ST_ESTABLISHED)
|
||||
return -1;
|
||||
|
||||
TRACE_EPOLL("Stream %d at state %s\n",
|
||||
stream->id, TCPStateToString(stream));
|
||||
/* if there are payloads already read before epoll registration */
|
||||
/* generate read event */
|
||||
if (socket->epoll & MTCP_EPOLLIN) {
|
||||
struct tcp_recv_vars *rcvvar = stream->rcvvar;
|
||||
if (rcvvar->rcvbuf && rcvvar->rcvbuf->merged_len > 0) {
|
||||
TRACE_EPOLL("Socket %d: Has existing payloads\n", socket->id);
|
||||
AddEpollEvent(ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLIN);
|
||||
} else if (stream->state == TCP_ST_CLOSE_WAIT) {
|
||||
TRACE_EPOLL("Socket %d: Waiting for close\n", socket->id);
|
||||
AddEpollEvent(ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLIN);
|
||||
}
|
||||
}
|
||||
|
||||
/* same thing to the write event */
|
||||
if (socket->epoll & MTCP_EPOLLOUT) {
|
||||
struct tcp_send_vars *sndvar = stream->sndvar;
|
||||
if (!sndvar->sndbuf ||
|
||||
(sndvar->sndbuf && sndvar->sndbuf->len < sndvar->snd_wnd)) {
|
||||
if (!(socket->events & MTCP_EPOLLOUT)) {
|
||||
TRACE_EPOLL("Socket %d: Adding write event\n", socket->id);
|
||||
AddEpollEvent(ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLOUT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
mtcp_epoll_ctl(mctx_t mctx, int epid,
|
||||
int op, int sockid, struct mtcp_epoll_event *event)
|
||||
{
|
||||
mtcp_manager_t mtcp;
|
||||
struct mtcp_epoll *ep;
|
||||
socket_map_t socket;
|
||||
uint32_t events;
|
||||
|
||||
mtcp = GetMTCPManager(mctx);
|
||||
if (!mtcp) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (epid < 0 || epid >= CONFIG.max_concurrency) {
|
||||
TRACE_API("Epoll id %d out of range.\n", epid);
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (sockid < 0 || sockid >= CONFIG.max_concurrency) {
|
||||
TRACE_API("Socket id %d out of range.\n", sockid);
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mtcp->smap[epid].socktype == MTCP_SOCK_UNUSED) {
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mtcp->smap[epid].socktype != MTCP_SOCK_EPOLL) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ep = mtcp->smap[epid].ep;
|
||||
if (!ep || (!event && op != MTCP_EPOLL_CTL_DEL)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
socket = &mtcp->smap[sockid];
|
||||
|
||||
if (op == MTCP_EPOLL_CTL_ADD) {
|
||||
if (socket->epoll) {
|
||||
errno = EEXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* EPOLLERR and EPOLLHUP are registered as default */
|
||||
events = event->events;
|
||||
events |= (MTCP_EPOLLERR | MTCP_EPOLLHUP);
|
||||
socket->ep_data = event->data;
|
||||
socket->epoll = events;
|
||||
|
||||
TRACE_EPOLL("Adding epoll socket %d(type %d) ET: %u, IN: %u, OUT: %u\n",
|
||||
socket->id, socket->socktype, socket->epoll & MTCP_EPOLLET,
|
||||
socket->epoll & MTCP_EPOLLIN, socket->epoll & MTCP_EPOLLOUT);
|
||||
|
||||
if (socket->socktype == MTCP_SOCK_STREAM) {
|
||||
RaisePendingStreamEvents(mtcp, ep, socket);
|
||||
} else if (socket->socktype == MTCP_SOCK_PIPE) {
|
||||
RaisePendingPipeEvents(mctx, epid, sockid);
|
||||
}
|
||||
|
||||
} else if (op == MTCP_EPOLL_CTL_MOD) {
|
||||
if (!socket->epoll) {
|
||||
pthread_mutex_unlock(&ep->epoll_lock);
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
|
||||
events = event->events;
|
||||
events |= (MTCP_EPOLLERR | MTCP_EPOLLHUP);
|
||||
socket->ep_data = event->data;
|
||||
socket->epoll = events;
|
||||
|
||||
if (socket->socktype == MTCP_SOCK_STREAM) {
|
||||
RaisePendingStreamEvents(mtcp, ep, socket);
|
||||
} else if (socket->socktype == MTCP_SOCK_PIPE) {
|
||||
RaisePendingPipeEvents(mctx, epid, sockid);
|
||||
}
|
||||
|
||||
} else if (op == MTCP_EPOLL_CTL_DEL) {
|
||||
if (!socket->epoll) {
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
|
||||
socket->epoll = MTCP_EPOLLNONE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
mtcp_epoll_wait(mctx_t mctx, int epid,
|
||||
struct mtcp_epoll_event *events, int maxevents, int timeout)
|
||||
{
|
||||
mtcp_manager_t mtcp;
|
||||
struct mtcp_epoll *ep;
|
||||
struct event_queue *eq;
|
||||
struct event_queue *eq_shadow;
|
||||
socket_map_t event_socket;
|
||||
int validity;
|
||||
int i, cnt, ret;
|
||||
int num_events;
|
||||
|
||||
mtcp = GetMTCPManager(mctx);
|
||||
if (!mtcp) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (epid < 0 || epid >= CONFIG.max_concurrency) {
|
||||
TRACE_API("Epoll id %d out of range.\n", epid);
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mtcp->smap[epid].socktype == MTCP_SOCK_UNUSED) {
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (mtcp->smap[epid].socktype != MTCP_SOCK_EPOLL) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ep = mtcp->smap[epid].ep;
|
||||
if (!ep || !events || maxevents <= 0) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ep->stat.calls++;
|
||||
|
||||
#if SPIN_BEFORE_SLEEP
|
||||
int spin = 0;
|
||||
while (ep->num_events == 0 && spin < SPIN_THRESH) {
|
||||
spin++;
|
||||
}
|
||||
#endif /* SPIN_BEFORE_SLEEP */
|
||||
|
||||
if (pthread_mutex_lock(&ep->epoll_lock)) {
|
||||
if (errno == EDEADLK)
|
||||
perror("mtcp_epoll_wait: epoll_lock blocked\n");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
wait:
|
||||
eq = ep->usr_queue;
|
||||
eq_shadow = ep->usr_shadow_queue;
|
||||
|
||||
/* wait until event occurs */
|
||||
while (eq->num_events == 0 && eq_shadow->num_events == 0 && timeout != 0) {
|
||||
|
||||
#if INTR_SLEEPING_MTCP
|
||||
/* signal to mtcp thread if it is sleeping */
|
||||
if (mtcp->wakeup_flag && mtcp->is_sleeping) {
|
||||
pthread_kill(mtcp->ctx->thread, SIGUSR1);
|
||||
}
|
||||
#endif
|
||||
ep->stat.waits++;
|
||||
ep->waiting = TRUE;
|
||||
if (timeout > 0) {
|
||||
struct timespec deadline;
|
||||
|
||||
clock_gettime(CLOCK_REALTIME, &deadline);
|
||||
if (timeout > 1000) {
|
||||
int sec;
|
||||
sec = timeout / 1000;
|
||||
deadline.tv_sec += sec;
|
||||
timeout -= sec * 1000;
|
||||
}
|
||||
|
||||
if (deadline.tv_nsec >= 1000000000) {
|
||||
deadline.tv_sec++;
|
||||
deadline.tv_nsec -= 1000000000;
|
||||
}
|
||||
|
||||
//deadline.tv_sec = mtcp->cur_tv.tv_sec;
|
||||
//deadline.tv_nsec = (mtcp->cur_tv.tv_usec + timeout * 1000) * 1000;
|
||||
ret = pthread_cond_timedwait(&ep->epoll_cond,
|
||||
&ep->epoll_lock, &deadline);
|
||||
if (ret && ret != ETIMEDOUT) {
|
||||
/* errno set by pthread_cond_timedwait() */
|
||||
pthread_mutex_unlock(&ep->epoll_lock);
|
||||
TRACE_ERROR("pthread_cond_timedwait failed. ret: %d, error: %s\n",
|
||||
ret, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
timeout = 0;
|
||||
} else if (timeout < 0) {
|
||||
ret = pthread_cond_wait(&ep->epoll_cond, &ep->epoll_lock);
|
||||
if (ret) {
|
||||
/* errno set by pthread_cond_wait() */
|
||||
pthread_mutex_unlock(&ep->epoll_lock);
|
||||
TRACE_ERROR("pthread_cond_wait failed. ret: %d, error: %s\n",
|
||||
ret, strerror(errno));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ep->waiting = FALSE;
|
||||
|
||||
if (mtcp->ctx->done || mtcp->ctx->exit || mtcp->ctx->interrupt) {
|
||||
mtcp->ctx->interrupt = FALSE;
|
||||
//ret = pthread_cond_signal(&ep->epoll_cond);
|
||||
pthread_mutex_unlock(&ep->epoll_lock);
|
||||
errno = EINTR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* fetch events from the user event queue */
|
||||
cnt = 0;
|
||||
num_events = eq->num_events;
|
||||
for (i = 0; i < num_events && cnt < maxevents; i++) {
|
||||
event_socket = &mtcp->smap[eq->events[eq->start].sockid];
|
||||
validity = TRUE;
|
||||
if (event_socket->socktype == MTCP_SOCK_UNUSED)
|
||||
validity = FALSE;
|
||||
if (!(event_socket->epoll & eq->events[eq->start].ev.events))
|
||||
validity = FALSE;
|
||||
if (!(event_socket->events & eq->events[eq->start].ev.events))
|
||||
validity = FALSE;
|
||||
|
||||
if (validity) {
|
||||
events[cnt++] = eq->events[eq->start].ev;
|
||||
assert(eq->events[eq->start].sockid >= 0);
|
||||
|
||||
TRACE_EPOLL("Socket %d: Handled event. event: %s, "
|
||||
"start: %u, end: %u, num: %u\n",
|
||||
event_socket->id,
|
||||
EventToString(eq->events[eq->start].ev.events),
|
||||
eq->start, eq->end, eq->num_events);
|
||||
ep->stat.handled++;
|
||||
} else {
|
||||
TRACE_EPOLL("Socket %d: event %s invalidated.\n",
|
||||
eq->events[eq->start].sockid,
|
||||
EventToString(eq->events[eq->start].ev.events));
|
||||
ep->stat.invalidated++;
|
||||
}
|
||||
event_socket->events &= (~eq->events[eq->start].ev.events);
|
||||
|
||||
eq->start++;
|
||||
eq->num_events--;
|
||||
if (eq->start >= eq->size) {
|
||||
eq->start = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* fetch eventes from user shadow event queue */
|
||||
eq = ep->usr_shadow_queue;
|
||||
num_events = eq->num_events;
|
||||
for (i = 0; i < num_events && cnt < maxevents; i++) {
|
||||
event_socket = &mtcp->smap[eq->events[eq->start].sockid];
|
||||
validity = TRUE;
|
||||
if (event_socket->socktype == MTCP_SOCK_UNUSED)
|
||||
validity = FALSE;
|
||||
if (!(event_socket->epoll & eq->events[eq->start].ev.events))
|
||||
validity = FALSE;
|
||||
if (!(event_socket->events & eq->events[eq->start].ev.events))
|
||||
validity = FALSE;
|
||||
|
||||
if (validity) {
|
||||
events[cnt++] = eq->events[eq->start].ev;
|
||||
assert(eq->events[eq->start].sockid >= 0);
|
||||
|
||||
TRACE_EPOLL("Socket %d: Handled event. event: %s, "
|
||||
"start: %u, end: %u, num: %u\n",
|
||||
event_socket->id,
|
||||
EventToString(eq->events[eq->start].ev.events),
|
||||
eq->start, eq->end, eq->num_events);
|
||||
ep->stat.handled++;
|
||||
} else {
|
||||
TRACE_EPOLL("Socket %d: event %s invalidated.\n",
|
||||
eq->events[eq->start].sockid,
|
||||
EventToString(eq->events[eq->start].ev.events));
|
||||
ep->stat.invalidated++;
|
||||
}
|
||||
event_socket->events &= (~eq->events[eq->start].ev.events);
|
||||
|
||||
eq->start++;
|
||||
eq->num_events--;
|
||||
if (eq->start >= eq->size) {
|
||||
eq->start = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (cnt == 0 && timeout != 0)
|
||||
goto wait;
|
||||
|
||||
pthread_mutex_unlock(&ep->epoll_lock);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline int
|
||||
AddEpollEvent(struct mtcp_epoll *ep,
|
||||
int queue_type, socket_map_t socket, uint32_t event)
|
||||
{
|
||||
struct event_queue *eq;
|
||||
int index;
|
||||
|
||||
if (!ep || !socket || !event)
|
||||
return -1;
|
||||
|
||||
ep->stat.issued++;
|
||||
|
||||
if (socket->events & event) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (queue_type == MTCP_EVENT_QUEUE) {
|
||||
eq = ep->mtcp_queue;
|
||||
} else if (queue_type == USR_EVENT_QUEUE) {
|
||||
eq = ep->usr_queue;
|
||||
pthread_mutex_lock(&ep->epoll_lock);
|
||||
} else if (queue_type == USR_SHADOW_EVENT_QUEUE) {
|
||||
eq = ep->usr_shadow_queue;
|
||||
} else {
|
||||
TRACE_ERROR("Non-existing event queue type!\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (eq->num_events >= eq->size) {
|
||||
TRACE_ERROR("Exceeded epoll event queue! num_events: %d, size: %d\n",
|
||||
eq->num_events, eq->size);
|
||||
if (queue_type == USR_EVENT_QUEUE)
|
||||
pthread_mutex_unlock(&ep->epoll_lock);
|
||||
return -1;
|
||||
}
|
||||
|
||||
index = eq->end++;
|
||||
|
||||
socket->events |= event;
|
||||
eq->events[index].sockid = socket->id;
|
||||
eq->events[index].ev.events = event;
|
||||
eq->events[index].ev.data = socket->ep_data;
|
||||
|
||||
if (eq->end >= eq->size) {
|
||||
eq->end = 0;
|
||||
}
|
||||
eq->num_events++;
|
||||
|
||||
#if 0
|
||||
TRACE_EPOLL("Socket %d New event: %s, start: %u, end: %u, num: %u\n",
|
||||
ep->events[index].sockid,
|
||||
EventToString(ep->events[index].ev.events),
|
||||
ep->start, ep->end, ep->num_events);
|
||||
#endif
|
||||
|
||||
if (queue_type == USR_EVENT_QUEUE)
|
||||
pthread_mutex_unlock(&ep->epoll_lock);
|
||||
|
||||
ep->stat.registered++;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/queue.h>
|
||||
|
||||
#include "debug.h"
|
||||
#include "fhash.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct hashtable *
|
||||
CreateHashtable(unsigned int (*hashfn) (const tcp_stream *), // key function
|
||||
int (*eqfn) (const tcp_stream*,
|
||||
const tcp_stream *)) // equality
|
||||
{
|
||||
int i;
|
||||
struct hashtable* ht = calloc(1, sizeof(struct hashtable));
|
||||
if (!ht){
|
||||
TRACE_ERROR("calloc: CreateHashtable");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ht->hashfn = hashfn;
|
||||
ht->eqfn = eqfn;
|
||||
|
||||
/* init the tables */
|
||||
for (i = 0; i < NUM_BINS; i++)
|
||||
TAILQ_INIT(&ht->ht_table[i]);
|
||||
return ht;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyHashtable(struct hashtable *ht)
|
||||
{
|
||||
free(ht);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
HTInsert(struct hashtable *ht, tcp_stream *item)
|
||||
{
|
||||
/* create an entry*/
|
||||
int idx;
|
||||
|
||||
assert(ht);
|
||||
assert(ht->ht_count <= 65535); // uint16_t ht_count
|
||||
|
||||
idx = ht->hashfn(item);
|
||||
assert(idx >=0 && idx < NUM_BINS);
|
||||
|
||||
#if STATIC_TABLE
|
||||
for (i = 0; i < TCP_AR_CNT; i++) {
|
||||
// insert into empty array slot
|
||||
if (!ht->ht_array[idx][i]) {
|
||||
ht->ht_array[idx][i] = item;
|
||||
item->ht_idx = i;
|
||||
ht->ht_count++;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
TRACE_INFO("[WARNING] HTSearch() cnt: %d!!\n", TCP_AR_CNT);
|
||||
#endif
|
||||
|
||||
TAILQ_INSERT_TAIL(&ht->ht_table[idx], item, rcvvar->he_link);
|
||||
item->ht_idx = TCP_AR_CNT;
|
||||
ht->ht_count++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void*
|
||||
HTRemove(struct hashtable *ht, tcp_stream *item)
|
||||
{
|
||||
hash_bucket_head *head;
|
||||
int idx = ht->hashfn(item);
|
||||
|
||||
#if STATIC_TABLE
|
||||
if (item->ht_idx < TCP_AR_CNT) {
|
||||
assert(ht_array[idx][item->ht_idx]);
|
||||
ht->ht_array[idx][item->ht_idx] = NULL;
|
||||
} else {
|
||||
#endif
|
||||
head = &ht->ht_table[idx];
|
||||
TAILQ_REMOVE(head, item, rcvvar->he_link);
|
||||
#if STATIC_TABLE
|
||||
}
|
||||
#endif
|
||||
|
||||
ht->ht_count--;
|
||||
return (item);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
tcp_stream*
|
||||
HTSearch(struct hashtable *ht, const tcp_stream *item)
|
||||
{
|
||||
int idx;
|
||||
tcp_stream *walk;
|
||||
hash_bucket_head *head;
|
||||
|
||||
idx = ht->hashfn(item);
|
||||
|
||||
#if STATIC_TABLE
|
||||
for (i = 0; i < TCP_AR_CNT; i++) {
|
||||
if (ht->ht_array[idx][i]) {
|
||||
if (ht->eqfn(ht->ht_array[idx][i], item))
|
||||
return ht->ht_array[idx][i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
head = &ht->ht_table[ht->hashfn(item)];
|
||||
TAILQ_FOREACH(walk, head, rcvvar->he_link) {
|
||||
if (ht->eqfn(walk, item))
|
||||
return walk;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
|
@ -0,0 +1,37 @@
|
|||
#ifndef __ADDR_POOL_H_
|
||||
#define __ADDR_POOL_H_
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <sys/queue.h>
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef struct addr_pool *addr_pool_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* CreateAddressPool() */
|
||||
/* Create address pool for given address range. */
|
||||
/* addr_base: the base address in network order. */
|
||||
/* num_addr: number of addresses to use as source IP */
|
||||
/*----------------------------------------------------------------------------*/
|
||||
addr_pool_t
|
||||
CreateAddressPool(in_addr_t addr_base, int num_addr);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* CreateAddressPoolPerCore() */
|
||||
/* Create address pool only for the given core number. */
|
||||
/* All addresses and port numbers should be in network order. */
|
||||
/*----------------------------------------------------------------------------*/
|
||||
addr_pool_t
|
||||
CreateAddressPoolPerCore(int core, int num_queues,
|
||||
in_addr_t saddr_base, int num_addr, in_addr_t daddr, in_port_t dport);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyAddressPool(addr_pool_t ap);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
FetchAddress(addr_pool_t ap, int core, int num_queues,
|
||||
const struct sockaddr_in *daddr, struct sockaddr_in *saddr);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
FreeAddress(addr_pool_t ap, const struct sockaddr_in *addr);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* __ADDR_POOL_H_ */
|
|
@ -0,0 +1,28 @@
|
|||
#ifndef __ARP_H_
|
||||
#define __ARP_H_
|
||||
|
||||
#define MAX_ARPENTRY 1024
|
||||
|
||||
int
|
||||
InitARPTable();
|
||||
|
||||
unsigned char *
|
||||
GetHWaddr(uint32_t ip);
|
||||
|
||||
unsigned char *
|
||||
GetDestinationHWaddr(uint32_t dip);
|
||||
|
||||
void
|
||||
RequestARP(mtcp_manager_t mtcp, uint32_t ip, int nif, uint32_t cur_ts);
|
||||
|
||||
int
|
||||
ProcessARPPacket(mtcp_manager_t mtcp, uint32_t cur_ts,
|
||||
const int ifidx, unsigned char* pkt_data, int len);
|
||||
|
||||
void
|
||||
PublishARP(mtcp_manager_t mtcp);
|
||||
|
||||
void
|
||||
PrintARPTable();
|
||||
|
||||
#endif /* __ARP_H_ */
|
|
@ -0,0 +1,43 @@
|
|||
#ifndef __CONFIG_H_
|
||||
#define __CONFIG_H_
|
||||
|
||||
#include "ps.h"
|
||||
|
||||
int num_cpus;
|
||||
int num_queues;
|
||||
int num_devices;
|
||||
struct ps_device devices[MAX_DEVICES];
|
||||
|
||||
int num_devices_attached;
|
||||
int devices_attached[MAX_DEVICES];
|
||||
|
||||
int
|
||||
LoadConfiguration(char *fname);
|
||||
|
||||
/* set configurations from the setted
|
||||
interface information */
|
||||
int
|
||||
SetInterfaceInfo();
|
||||
|
||||
/* set configurations from the files */
|
||||
int
|
||||
SetRoutingTable();
|
||||
|
||||
int
|
||||
LoadARPTable();
|
||||
|
||||
/* print setted configuration */
|
||||
void
|
||||
PrintConfiguration();
|
||||
|
||||
void
|
||||
PrintInterfaceInfo();
|
||||
|
||||
void
|
||||
PrintRoutingTable();
|
||||
|
||||
/* set socket modes */
|
||||
int
|
||||
SetSocketMode(int8_t socket_mode);
|
||||
|
||||
#endif /* __CONFIG_H_ */
|
|
@ -0,0 +1,6 @@
|
|||
#ifndef __CPU_H_
|
||||
#define __CPU_H_
|
||||
|
||||
int GetNumCPUs();
|
||||
|
||||
#endif /* __CPU_H_ */
|
|
@ -0,0 +1,228 @@
|
|||
#ifndef __DEBUG_H_
|
||||
#define __DEBUG_H_
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include "mtcp.h"
|
||||
#include "tcp_in.h"
|
||||
|
||||
#ifdef DBGTEMP
|
||||
|
||||
#define TRACE_TEMP(f, m...) { \
|
||||
fprintf(stderr, "[CPU %d][%10s:%4d] " f, mtcp->ctx->cpu, \
|
||||
__FUNCTION__, __LINE__, ##m); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define TRACE_TEMP(f, m...) (void)0
|
||||
|
||||
#endif /* DBGTEMP*/
|
||||
|
||||
#ifdef DBGERR
|
||||
|
||||
#define TRACE_ERROR(f, m...) { \
|
||||
fprintf(stderr, "[%10s:%4d] " f, __FUNCTION__, __LINE__, ##m); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define TRACE_ERROR(f, m...) (void)0
|
||||
|
||||
#endif /* DBGERR */
|
||||
|
||||
#ifdef DBGCERR
|
||||
|
||||
#define CTRACE_ERROR(f, m...) { \
|
||||
fprintf(stderr, "[CPU %d][%10s:%4d] " f, mtcp->ctx->cpu, __FUNCTION__, __LINE__, ##m); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define CTRACE_ERROR(f, m...) (void)0
|
||||
|
||||
#endif /* DBGERR */
|
||||
|
||||
#ifdef DBGMSG
|
||||
|
||||
#define TRACE_DBG(f, m...) {\
|
||||
thread_printf(mtcp, mtcp->log_fp, "[%10s:%4d] " \
|
||||
f, __FUNCTION__, __LINE__, ##m); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define TRACE_DBG(f, m...) (void)0
|
||||
|
||||
#endif /* DBGMSG */
|
||||
|
||||
#ifdef INFO
|
||||
|
||||
#define TRACE_INFO(f, m...) { \
|
||||
fprintf(stderr, "[%10s:%4d] " f,__FUNCTION__, __LINE__, ##m); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define TRACE_INFO(f, m...) (void)0
|
||||
|
||||
#endif /* INFO */
|
||||
|
||||
#define TRACE_CONFIG(f, m...) fprintf(stderr, f, ##m)
|
||||
|
||||
#ifdef DBGLOG
|
||||
#define TRACE_LOG(f, m...) TRACE_INFO(f, ##m)
|
||||
#else
|
||||
#define TRACE_LOG(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef STREAM
|
||||
#define TRACE_STREAM(f, m...) TRACE_FUNC("STREAM", f, ##m)
|
||||
#else
|
||||
#define TRACE_STREAM(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef STATE
|
||||
#define TRACE_STATE(f, m...) TRACE_FUNC("STATE", f, ##m)
|
||||
#else
|
||||
#define TRACE_STATE(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef SNDBUF
|
||||
#define TRACE_SNDBUF(f, m...) TRACE_FUNC("SNDBUF", f, ##m)
|
||||
#else
|
||||
#define TRACE_SNDBUF(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef RCVBUF
|
||||
#define TRACE_RCVBUF(f, m...) TRACE_FUNC("RCVBUF", f, ##m)
|
||||
#else
|
||||
#define TRACE_RCVBUF(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef CLWND
|
||||
#define TRACE_CLWND(f, m...) TRACE_FUNC("CLWND", f, ##m)
|
||||
#else
|
||||
#define TRACE_CLWND(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef LOSS
|
||||
#define TRACE_LOSS(f, m...) TRACE_FUNC("LOSS", f, ##m)
|
||||
#else
|
||||
#define TRACE_LOSS(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef SACK
|
||||
#define TRACE_SACK(f, m...) TRACE_FUNC("SACK", f, ##m)
|
||||
#else
|
||||
#define TRACE_SACK(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef TSTAMP
|
||||
#define TRACE_TSTAMP(f, m...) TRACE_FUNC("TSTAMP", f, ##m)
|
||||
#else
|
||||
#define TRACE_TSTAMP(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef RTT
|
||||
#define TRACE_RTT(f, m...) TRACE_FUNC("RTT", f, ##m)
|
||||
#else
|
||||
#define TRACE_RTT(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef RTO
|
||||
#define TRACE_RTO(f, m...) TRACE_FUNC("RTO", f, ##m)
|
||||
#else
|
||||
#define TRACE_RTO(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef CONG
|
||||
#define TRACE_CONG(f, m...) TRACE_FUNC("CONG", f, ##m)
|
||||
#else
|
||||
#define TRACE_CONG(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef EPOLL
|
||||
#define TRACE_EPOLL(f, m...) TRACE_FUNC("EPOLL", f, ##m)
|
||||
#else
|
||||
#define TRACE_EPOLL(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef FSTAT
|
||||
#define TRACE_FSTAT(f, m...) TRACE_FUNC("FSTAT", f, ##m)
|
||||
#else
|
||||
#define TRACE_FSTAT(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef APP
|
||||
#define TRACE_APP(f, m...) TRACE_FUNC("APP", f, ##m)
|
||||
#else
|
||||
#define TRACE_APP(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef DBGFIN
|
||||
#define TRACE_FIN(f, m...) TRACE_FUNC("FIN", f, ##m)
|
||||
#else
|
||||
#define TRACE_FIN(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef TSTAT
|
||||
#define TRACE_TSTAT(f, m...) TRACE_FUNC("TSTAT", f, ##m)
|
||||
#else
|
||||
#define TRACE_TSTAT(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef LOOP
|
||||
#define TRACE_LOOP(f, m...) TRACE_FUNC("LOOP", "ts: %u, "f, cur_ts, ##m)
|
||||
#else
|
||||
#define TRACE_LOOP(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef ROUND
|
||||
#define TRACE_ROUND(f, m...) TRACE_FUNC("ROUND", f, ##m)
|
||||
#else
|
||||
#define TRACE_ROUND(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef SELECT
|
||||
#define TRACE_SELECT(f, m...) TRACE_FUNC("SELECT", f, ##m)
|
||||
#else
|
||||
#define TRACE_SELECT(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef API
|
||||
#define TRACE_API(f, m...) TRACE_FUNC("API", f, ##m)
|
||||
#else
|
||||
#define TRACE_API(f, m...) (void)0
|
||||
#endif
|
||||
|
||||
#ifdef DBGFUNC
|
||||
|
||||
#define TRACE_FUNC(n, f, m...) { \
|
||||
thread_printf(mtcp, mtcp->log_fp, "[%6s: %10s:%4d] " \
|
||||
f, n, __FUNCTION__, __LINE__, ##m); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define TRACE_FUNC(f, m...) (void)0
|
||||
|
||||
#endif /* DBGFUNC */
|
||||
|
||||
void
|
||||
DumpPacket(mtcp_manager_t mtcp, char *buf, int len, char *step, int ifindex);
|
||||
|
||||
void
|
||||
DumpIPPacket(mtcp_manager_t mtcp, const struct iphdr *iph, int len);
|
||||
|
||||
void
|
||||
DumpIPPacketToFile(FILE *fout, const struct iphdr *iph, int len);
|
||||
|
||||
void
|
||||
flush_log_data(mtcp_manager_t mtcp);
|
||||
|
||||
void
|
||||
thread_printf(mtcp_manager_t mtcp, FILE* f_idx, const char* _Format, ...);
|
||||
|
||||
#endif /* __DEBUG_H_ */
|
|
@ -0,0 +1,10 @@
|
|||
#ifndef __ETH_IN_H_
|
||||
#define __ETH_IN_H_
|
||||
|
||||
#include "mtcp.h"
|
||||
|
||||
int
|
||||
ProcessPacket(mtcp_manager_t mtcp, const int ifidx,
|
||||
uint32_t cur_ts, unsigned char *pkt_data, int len);
|
||||
|
||||
#endif /* __ETH_IN_H_ */
|
|
@ -0,0 +1,30 @@
|
|||
#ifndef __ETH_OUT_H_
|
||||
#define __ETH_OUT_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "mtcp.h"
|
||||
#include "tcp_stream.h"
|
||||
#include "ps.h"
|
||||
|
||||
#define MAX_SEND_PCK_CHUNK 64
|
||||
|
||||
#if !(E_PSIO || USE_CHUNK_BUF)
|
||||
inline void
|
||||
InitWriteChunks(struct ps_handle* handle, struct ps_chunk *w_chunk);
|
||||
|
||||
int
|
||||
FlushWriteBuffer(struct mtcp_thread_context *ctx, int ifidx);
|
||||
|
||||
#else
|
||||
|
||||
int
|
||||
FlushSendChunkBuf(mtcp_manager_t mtcp, int nif);
|
||||
|
||||
#endif
|
||||
|
||||
uint8_t *
|
||||
EthernetOutput(struct mtcp_manager *mtcp, uint16_t h_proto,
|
||||
int nif, unsigned char* dst_haddr, uint16_t iplen);
|
||||
|
||||
#endif /* __ETH_OUT_H_ */
|
|
@ -0,0 +1,60 @@
|
|||
#ifndef __EVENTPOLL_H_
|
||||
#define __EVENTPOLL_H_
|
||||
|
||||
#include "mtcp_api.h"
|
||||
#include "mtcp_epoll.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_epoll_stat
|
||||
{
|
||||
uint64_t calls;
|
||||
uint64_t waits;
|
||||
uint64_t wakes;
|
||||
|
||||
uint64_t issued;
|
||||
uint64_t registered;
|
||||
uint64_t invalidated;
|
||||
uint64_t handled;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_epoll_event_int
|
||||
{
|
||||
struct mtcp_epoll_event ev;
|
||||
int sockid;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
enum event_queue_type
|
||||
{
|
||||
USR_EVENT_QUEUE = 0,
|
||||
USR_SHADOW_EVENT_QUEUE = 1,
|
||||
MTCP_EVENT_QUEUE = 2
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct event_queue
|
||||
{
|
||||
struct mtcp_epoll_event_int *events;
|
||||
int start; // starting index
|
||||
int end; // ending index
|
||||
|
||||
int size; // max size
|
||||
int num_events; // number of events
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_epoll
|
||||
{
|
||||
struct event_queue *usr_queue;
|
||||
struct event_queue *usr_shadow_queue;
|
||||
struct event_queue *mtcp_queue;
|
||||
|
||||
uint8_t waiting;
|
||||
struct mtcp_epoll_stat stat;
|
||||
|
||||
pthread_cond_t epoll_cond;
|
||||
pthread_mutex_t epoll_lock;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
int
|
||||
CloseEpollSocket(mctx_t mctx, int epid);
|
||||
|
||||
#endif /* __EVENTPOLL_H_ */
|
|
@ -0,0 +1,42 @@
|
|||
#ifndef __FHASH_H_
|
||||
#define __FHASH_H_
|
||||
|
||||
#include <sys/queue.h>
|
||||
#include "tcp_stream.h"
|
||||
|
||||
#define NUM_BINS (131072) /* 132 K entries per thread*/
|
||||
#define TCP_AR_CNT (3)
|
||||
|
||||
#define STATIC_TABLE FALSE
|
||||
|
||||
typedef struct hash_bucket_head {
|
||||
tcp_stream *tqh_first;
|
||||
tcp_stream **tqh_last;
|
||||
} hash_bucket_head;
|
||||
|
||||
/* hashtable structure */
|
||||
struct hashtable {
|
||||
uint8_t ht_count ; // count for # entry
|
||||
|
||||
#if STATIC_TABLE
|
||||
tcp_stream* ht_array[NUM_BINS][TCP_AR_CNT];
|
||||
#endif
|
||||
hash_bucket_head ht_table[NUM_BINS];
|
||||
|
||||
// functions
|
||||
unsigned int (*hashfn) (const tcp_stream *);
|
||||
int (*eqfn) (const tcp_stream *, const tcp_stream *);
|
||||
};
|
||||
|
||||
/*functions for hashtable*/
|
||||
struct hashtable *CreateHashtable(unsigned int (*hashfn) (const tcp_stream*),
|
||||
int (*eqfn) (const tcp_stream*,
|
||||
const tcp_stream *));
|
||||
void DestroyHashtable(struct hashtable *ht);
|
||||
|
||||
|
||||
int HTInsert(struct hashtable *ht, tcp_stream *);
|
||||
void* HTRemove(struct hashtable *ht, tcp_stream *);
|
||||
tcp_stream* HTSearch(struct hashtable *ht, const tcp_stream *);
|
||||
|
||||
#endif /* __FHASH_H_ */
|
|
@ -0,0 +1,10 @@
|
|||
#ifndef __IP_IN_H_
|
||||
#define __IP_IN_H_
|
||||
|
||||
#include "mtcp.h"
|
||||
|
||||
int
|
||||
ProcessIPv4Packet(mtcp_manager_t mtcp, uint32_t cur_ts,
|
||||
const int ifidx, unsigned char* pkt_data, int len);
|
||||
|
||||
#endif /* __IP_IN_H_ */
|
|
@ -0,0 +1,20 @@
|
|||
#ifndef __IP_OUT_H_
|
||||
#define __IP_OUT_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "tcp_stream.h"
|
||||
|
||||
inline int
|
||||
GetOutputInterface(uint32_t daddr);
|
||||
|
||||
void
|
||||
ForwardIPv4Packet(mtcp_manager_t mtcp, int nif_in, char *buf, int len);
|
||||
|
||||
uint8_t *
|
||||
IPOutputStandalone(struct mtcp_manager *mtcp,
|
||||
uint16_t ip_id, uint32_t saddr, uint32_t daddr, uint16_t tcplen);
|
||||
|
||||
uint8_t *
|
||||
IPOutput(struct mtcp_manager *mtcp, tcp_stream *stream, uint16_t tcplen);
|
||||
|
||||
#endif /* __IP_OUT_H_ */
|
|
@ -0,0 +1,47 @@
|
|||
#ifndef __LOGGER_H_
|
||||
#define __LOGGER_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define LOG_BUFF_SIZE (256*1024)
|
||||
#define NUM_LOG_BUFF (100)
|
||||
|
||||
enum {
|
||||
IDLE_LOGT,
|
||||
ACTIVE_LOGT
|
||||
} log_thread_state;
|
||||
|
||||
typedef struct log_buff
|
||||
{
|
||||
int tid;
|
||||
FILE* fid;
|
||||
int buff_len;
|
||||
char buff[LOG_BUFF_SIZE];
|
||||
TAILQ_ENTRY(log_buff) buff_link;
|
||||
} log_buff;
|
||||
|
||||
typedef struct log_thread_context {
|
||||
pthread_t thread;
|
||||
int cpu;
|
||||
int done;
|
||||
int sp_fd;
|
||||
int pair_sp_fd;
|
||||
int free_buff_cnt;
|
||||
int job_buff_cnt;
|
||||
|
||||
uint8_t state;
|
||||
|
||||
pthread_mutex_t mutex;
|
||||
pthread_mutex_t free_mutex;
|
||||
|
||||
TAILQ_HEAD(, log_buff) working_queue;
|
||||
TAILQ_HEAD(, log_buff) free_queue;
|
||||
|
||||
} log_thread_context;
|
||||
|
||||
log_buff* DequeueFreeBuffer (log_thread_context *ctx);
|
||||
void EnqueueJobBuffer(log_thread_context *ctx, log_buff* working_bp);
|
||||
void InitLogThreadContext (log_thread_context *ctx, int cpu);
|
||||
void *ThreadLogMain(void* arg);
|
||||
|
||||
#endif /* __LOGGER_H_ */
|
|
@ -0,0 +1,23 @@
|
|||
#ifndef __MEMORY_MGT_H_
|
||||
#define __MEMORY_MGT_H_
|
||||
|
||||
struct mem_pool;
|
||||
typedef struct mem_pool* mem_pool_t;
|
||||
|
||||
/* create a memory pool with a chunk size and total size
|
||||
an return the pointer to the memory pool */
|
||||
mem_pool_t MPCreate(int chunk_size, size_t total_size, int is_hugepage);
|
||||
|
||||
/* allocate one chunk */
|
||||
void *MPAllocateChunk(mem_pool_t mp);
|
||||
|
||||
/* free one chunk */
|
||||
void MPFreeChunk(mem_pool_t mp, void *p);
|
||||
|
||||
/* destroy the memory pool */
|
||||
void MPDestroy(mem_pool_t mp);
|
||||
|
||||
/* return the number of free chunks */
|
||||
int MPGetFreeChunks(mem_pool_t mp);
|
||||
|
||||
#endif /* __MEMORY_MGT_H_ */
|
|
@ -0,0 +1,306 @@
|
|||
#ifndef __MTCP_H_
|
||||
#define __MTCP_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/queue.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "memory_mgt.h"
|
||||
#include "tcp_ring_buffer.h"
|
||||
#include "tcp_send_buffer.h"
|
||||
#include "tcp_stream_queue.h"
|
||||
#include "socket.h"
|
||||
#include "mtcp_api.h"
|
||||
#include "eventpoll.h"
|
||||
#include "addr_pool.h"
|
||||
#include "ps.h"
|
||||
#include "logger.h"
|
||||
#include "stat.h"
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE (1)
|
||||
#endif
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE (0)
|
||||
#endif
|
||||
|
||||
#ifndef ERROR
|
||||
#define ERROR (-1)
|
||||
#endif
|
||||
|
||||
#define MAX_CPUS 16
|
||||
|
||||
#define ETHERNET_HEADER_LEN 14 // sizeof(struct ethhdr)
|
||||
#define IP_HEADER_LEN 20 // sizeof(struct iphdr)
|
||||
#define TCP_HEADER_LEN 20 // sizeof(struct tcphdr)
|
||||
#define TOTAL_TCP_HEADER_LEN 54 // total header length
|
||||
|
||||
/* configrations */
|
||||
#define BACKLOG_SIZE (10*1024)
|
||||
#define MAX_PKT_SIZE (2*1024)
|
||||
#define ETH_NUM 4
|
||||
|
||||
#define TCP_OPT_TIMESTAMP_ENABLED TRUE
|
||||
#define TCP_OPT_SACK_ENABLED FALSE
|
||||
|
||||
#define E_PSIO TRUE
|
||||
#define USE_CHUNK_BUF FALSE
|
||||
#define LOCK_STREAM_QUEUE FALSE
|
||||
#define USE_SPIN_LOCK TRUE
|
||||
#define INTR_SLEEPING_MTCP TRUE
|
||||
#define PROMISCUOUS_MODE TRUE
|
||||
|
||||
#define BLOCKING_SUPPORT FALSE
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Statistics */
|
||||
#ifdef NETSTAT
|
||||
#define NETSTAT_PERTHREAD TRUE
|
||||
#define NETSTAT_TOTAL TRUE
|
||||
#endif /* NETSTAT */
|
||||
#define RTM_STAT FALSE
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Lock definitions for socket buffer */
|
||||
#if USE_SPIN_LOCK
|
||||
#define SBUF_LOCK_INIT(lock, errmsg, action); \
|
||||
if (pthread_spin_init(lock, PTHREAD_PROCESS_PRIVATE)) { \
|
||||
perror("pthread_spin_init" errmsg); \
|
||||
action; \
|
||||
}
|
||||
#define SBUF_LOCK_DESTROY(lock) pthread_spin_destroy(lock)
|
||||
#define SBUF_LOCK(lock) pthread_spin_lock(lock)
|
||||
#define SBUF_UNLOCK(lock) pthread_spin_unlock(lock)
|
||||
#else
|
||||
#define SBUF_LOCK_INIT(lock, errmsg, action); \
|
||||
if (pthread_mutex_init(lock, NULL)) { \
|
||||
perror("pthread_mutex_init" errmsg); \
|
||||
action; \
|
||||
}
|
||||
#define SBUF_LOCK_DESTROY(lock) pthread_mutex_destroy(lock)
|
||||
#define SBUF_LOCK(lock) pthread_mutex_lock(lock)
|
||||
#define SBUF_UNLOCK(lock) pthread_mutex_unlock(lock)
|
||||
#endif /* USE_SPIN_LOCK */
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct eth_table
|
||||
{
|
||||
char dev_name[128];
|
||||
int ifindex;
|
||||
int stat_print;
|
||||
unsigned char haddr[ETH_ALEN];
|
||||
uint32_t netmask;
|
||||
// unsigned char dst_haddr[ETH_ALEN];
|
||||
uint32_t ip_addr;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct route_table
|
||||
{
|
||||
uint32_t daddr;
|
||||
uint32_t mask;
|
||||
uint32_t masked;
|
||||
int prefix;
|
||||
int nif;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct arp_entry
|
||||
{
|
||||
uint32_t ip;
|
||||
int8_t prefix;
|
||||
uint32_t ip_mask;
|
||||
uint32_t ip_masked;
|
||||
unsigned char haddr[ETH_ALEN];
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct arp_table
|
||||
{
|
||||
struct arp_entry *entry;
|
||||
int entries;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_config
|
||||
{
|
||||
/* socket mode */
|
||||
int8_t socket_mode;
|
||||
|
||||
/* network interface config */
|
||||
struct eth_table *eths;
|
||||
int eths_num;
|
||||
|
||||
/* route config */
|
||||
struct route_table *rtable; // routing table
|
||||
int routes; // # of entries
|
||||
|
||||
/* arp config */
|
||||
struct arp_table arp;
|
||||
|
||||
int num_cores;
|
||||
int max_concurrency;
|
||||
|
||||
int max_num_buffers;
|
||||
int rcvbuf_size;
|
||||
int sndbuf_size;
|
||||
|
||||
int tcp_timewait;
|
||||
int tcp_timeout;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_context
|
||||
{
|
||||
int cpu;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_sender
|
||||
{
|
||||
int ifidx;
|
||||
|
||||
TAILQ_HEAD (control_head, tcp_stream) control_list;
|
||||
TAILQ_HEAD (send_head, tcp_stream) send_list;
|
||||
TAILQ_HEAD (ack_head, tcp_stream) ack_list;
|
||||
|
||||
int control_list_cnt;
|
||||
int send_list_cnt;
|
||||
int ack_list_cnt;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_manager
|
||||
{
|
||||
mem_pool_t flow_pool; /* memory pool for tcp_stream */
|
||||
mem_pool_t rv_pool; /* memory pool for recv variables */
|
||||
mem_pool_t sv_pool; /* memory pool for send variables */
|
||||
mem_pool_t mv_pool; /* memory pool for monitor variables */
|
||||
|
||||
//mem_pool_t socket_pool;
|
||||
sb_manager_t rbm_snd;
|
||||
rb_manager_t rbm_rcv;
|
||||
struct hashtable *tcp_flow_table;
|
||||
|
||||
uint32_t s_index:24; /* stream index */
|
||||
socket_map_t smap;
|
||||
TAILQ_HEAD (, socket_map) free_smap;
|
||||
|
||||
addr_pool_t ap; /* address pool */
|
||||
|
||||
uint32_t g_id; /* id space in a thread */
|
||||
uint32_t flow_cnt; /* number of concurrent flows */
|
||||
|
||||
struct mtcp_thread_context* ctx;
|
||||
|
||||
/* variables related to logger */
|
||||
int sp_fd;
|
||||
log_thread_context* logger;
|
||||
log_buff* w_buffer;
|
||||
FILE *log_fp;
|
||||
|
||||
/* variables related to event */
|
||||
struct mtcp_epoll *ep;
|
||||
uint32_t ts_last_event;
|
||||
|
||||
struct tcp_listener *listener;
|
||||
|
||||
stream_queue_t connectq; /* streams need to connect */
|
||||
stream_queue_t sendq; /* streams need to send data */
|
||||
stream_queue_t ackq; /* streams need to send ack */
|
||||
|
||||
stream_queue_t closeq; /* streams need to close */
|
||||
stream_queue_int *closeq_int; /* internally maintained closeq */
|
||||
stream_queue_t resetq; /* streams need to reset */
|
||||
stream_queue_int *resetq_int; /* internally maintained resetq */
|
||||
|
||||
stream_queue_t destroyq; /* streams need to be destroyed */
|
||||
|
||||
struct mtcp_sender *g_sender;
|
||||
struct mtcp_sender *n_sender[ETH_NUM];
|
||||
|
||||
/* lists related to timeout */
|
||||
struct rto_hashstore* rto_store;
|
||||
TAILQ_HEAD (timewait_head, tcp_stream) timewait_list;
|
||||
TAILQ_HEAD (timeout_head, tcp_stream) timeout_list;
|
||||
|
||||
int rto_list_cnt;
|
||||
int timewait_list_cnt;
|
||||
int timeout_list_cnt;
|
||||
|
||||
#if BLOCKING_SUPPORT
|
||||
TAILQ_HEAD (rcv_br_head, tcp_stream) rcv_br_list;
|
||||
TAILQ_HEAD (snd_br_head, tcp_stream) snd_br_list;
|
||||
int rcv_br_list_cnt;
|
||||
int snd_br_list_cnt;
|
||||
#endif
|
||||
|
||||
uint32_t cur_ts;
|
||||
|
||||
int wakeup_flag;
|
||||
int is_sleeping;
|
||||
|
||||
/* statistics */
|
||||
struct bcast_stat bstat;
|
||||
struct timeout_stat tstat;
|
||||
#ifdef NETSTAT
|
||||
struct net_stat nstat;
|
||||
struct net_stat p_nstat;
|
||||
uint32_t p_nstat_ts;
|
||||
|
||||
struct run_stat runstat;
|
||||
struct run_stat p_runstat;
|
||||
|
||||
struct time_stat rtstat;
|
||||
#endif /* NETSTAT */
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef struct mtcp_manager* mtcp_manager_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
mtcp_manager_t
|
||||
GetMTCPManager(mctx_t mctx);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_thread_context
|
||||
{
|
||||
int cpu;
|
||||
pthread_t thread;
|
||||
uint8_t done:1,
|
||||
exit:1,
|
||||
interrupt:1;
|
||||
|
||||
struct ps_handle *handle;
|
||||
struct mtcp_manager* mtcp_manager;
|
||||
|
||||
#if E_PSIO || USE_CHUNK_BUF
|
||||
struct ps_chunk_buf w_chunk_buf[ETH_NUM];
|
||||
#else
|
||||
struct ps_chunk w_chunk[ETH_NUM];
|
||||
uint32_t w_off[ETH_NUM];
|
||||
int16_t w_cur_idx[ETH_NUM];
|
||||
#endif
|
||||
|
||||
pthread_mutex_t smap_lock;
|
||||
pthread_mutex_t flow_pool_lock;
|
||||
pthread_mutex_t socket_pool_lock;
|
||||
|
||||
#if LOCK_STREAM_QUEUE
|
||||
#if USE_SPIN_LOCK
|
||||
pthread_spinlock_t connect_lock;
|
||||
pthread_spinlock_t close_lock;
|
||||
pthread_spinlock_t reset_lock;
|
||||
pthread_spinlock_t sendq_lock;
|
||||
pthread_spinlock_t ackq_lock;
|
||||
pthread_spinlock_t destroyq_lock;
|
||||
#else
|
||||
pthread_mutex_t connect_lock;
|
||||
pthread_mutex_t close_lock;
|
||||
pthread_mutex_t reset_lock;
|
||||
pthread_mutex_t sendq_lock;
|
||||
pthread_mutex_t ackq_lock;
|
||||
pthread_mutex_t destroyq_lock;
|
||||
#endif /* USE_SPIN_LOCK */
|
||||
#endif /* LOCK_STREAM_QUEUE */
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef struct mtcp_thread_context* mtcp_thread_context_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_manager *g_mtcp[MAX_CPUS];
|
||||
struct mtcp_config CONFIG;
|
||||
addr_pool_t ap;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* __MTCP_H_ */
|
|
@ -0,0 +1,132 @@
|
|||
#ifndef __MTCP_API_H_
|
||||
#define __MTCP_API_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/uio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum socket_type
|
||||
{
|
||||
MTCP_SOCK_UNUSED,
|
||||
MTCP_SOCK_STREAM,
|
||||
MTCP_SOCK_PROXY,
|
||||
MTCP_SOCK_LISTENER,
|
||||
MTCP_SOCK_EPOLL,
|
||||
MTCP_SOCK_PIPE,
|
||||
};
|
||||
|
||||
struct mtcp_conf
|
||||
{
|
||||
int num_cores;
|
||||
int max_concurrency;
|
||||
|
||||
int max_num_buffers;
|
||||
int rcvbuf_size;
|
||||
int sndbuf_size;
|
||||
|
||||
int tcp_timewait;
|
||||
int tcp_timeout;
|
||||
};
|
||||
|
||||
typedef struct mtcp_context *mctx_t;
|
||||
|
||||
int
|
||||
mtcp_init(char *config_file);
|
||||
|
||||
void
|
||||
mtcp_destroy();
|
||||
|
||||
int
|
||||
mtcp_getconf(struct mtcp_conf *conf);
|
||||
|
||||
int
|
||||
mtcp_setconf(const struct mtcp_conf *conf);
|
||||
|
||||
int
|
||||
mtcp_core_affinitize(int cpu);
|
||||
|
||||
mctx_t
|
||||
mtcp_create_context(int cpu);
|
||||
|
||||
void
|
||||
mtcp_destroy_context(mctx_t mctx);
|
||||
|
||||
typedef void (*mtcp_sighandler_t)(int);
|
||||
|
||||
mtcp_sighandler_t
|
||||
mtcp_register_signal(int signum, mtcp_sighandler_t handler);
|
||||
|
||||
int
|
||||
mtcp_pipe(mctx_t mctx, int pipeid[2]);
|
||||
|
||||
int
|
||||
mtcp_getsockopt(mctx_t mctx, int sockid, int level,
|
||||
int optname, void *optval, socklen_t *optlen);
|
||||
|
||||
int
|
||||
mtcp_setsockopt(mctx_t mctx, int sockid, int level,
|
||||
int optname, const void *optval, socklen_t optlen);
|
||||
|
||||
int
|
||||
mtcp_setsock_nonblock(mctx_t mctx, int sockid);
|
||||
|
||||
/* mtcp_socket_ioctl: similar to ioctl,
|
||||
but only FIONREAD is supported currently */
|
||||
int
|
||||
mtcp_socket_ioctl(mctx_t mctx, int sockid, int request, void *argp);
|
||||
|
||||
int
|
||||
mtcp_socket(mctx_t mctx, int domain, int type, int protocol);
|
||||
|
||||
int
|
||||
mtcp_bind(mctx_t mctx, int sockid,
|
||||
const struct sockaddr *addr, socklen_t addrlen);
|
||||
|
||||
int
|
||||
mtcp_listen(mctx_t mctx, int sockid, int backlog);
|
||||
|
||||
int
|
||||
mtcp_accept(mctx_t mctx, int sockid, struct sockaddr *addr, socklen_t *addrlen);
|
||||
|
||||
int
|
||||
mtcp_init_rss(mctx_t mctx, in_addr_t saddr_base, int num_addr,
|
||||
in_addr_t daddr, in_addr_t dport);
|
||||
|
||||
int
|
||||
mtcp_connect(mctx_t mctx, int sockid,
|
||||
const struct sockaddr *addr, socklen_t addrlen);
|
||||
|
||||
int
|
||||
mtcp_close(mctx_t mctx, int sockid);
|
||||
|
||||
int
|
||||
mtcp_abort(mctx_t mctx, int sockid);
|
||||
|
||||
int
|
||||
mtcp_read(mctx_t mctx, int sockid, char *buf, int len);
|
||||
|
||||
/* readv should work in atomic */
|
||||
int
|
||||
mtcp_readv(mctx_t mctx, int sockid, struct iovec *iov, int numIOV);
|
||||
|
||||
int
|
||||
mtcp_write(mctx_t mctx, int sockid, char *buf, int len);
|
||||
|
||||
/* writev should work in atomic */
|
||||
int
|
||||
mtcp_writev(mctx_t mctx, int sockid, struct iovec *iov, int numIOV);
|
||||
|
||||
#if 0
|
||||
int
|
||||
mtcp_delete(mctx_t mctx, int sockid, int len);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* __MTCP_API_H_ */
|
|
@ -0,0 +1,69 @@
|
|||
#ifndef __MTCP_EPOLL_H_
|
||||
#define __MTCP_EPOLL_H_
|
||||
|
||||
#include "mtcp_api.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
enum mtcp_epoll_op
|
||||
{
|
||||
MTCP_EPOLL_CTL_ADD = 1,
|
||||
MTCP_EPOLL_CTL_DEL = 2,
|
||||
MTCP_EPOLL_CTL_MOD = 3,
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
enum mtcp_event_type
|
||||
{
|
||||
MTCP_EPOLLNONE = 0x000,
|
||||
MTCP_EPOLLIN = 0x001,
|
||||
MTCP_EPOLLPRI = 0x002,
|
||||
MTCP_EPOLLOUT = 0x004,
|
||||
MTCP_EPOLLRDNORM = 0x040,
|
||||
MTCP_EPOLLRDBAND = 0x080,
|
||||
MTCP_EPOLLWRNORM = 0x100,
|
||||
MTCP_EPOLLWRBAND = 0x200,
|
||||
MTCP_EPOLLMSG = 0x400,
|
||||
MTCP_EPOLLERR = 0x008,
|
||||
MTCP_EPOLLHUP = 0x010,
|
||||
MTCP_EPOLLRDHUP = 0x2000,
|
||||
MTCP_EPOLLONESHOT = (1 << 30),
|
||||
MTCP_EPOLLET = (1 << 31)
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef union mtcp_epoll_data
|
||||
{
|
||||
void *ptr;
|
||||
int sockid;
|
||||
uint32_t u32;
|
||||
uint64_t u64;
|
||||
} mtcp_epoll_data_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct mtcp_epoll_event
|
||||
{
|
||||
uint32_t events;
|
||||
mtcp_epoll_data_t data;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
mtcp_epoll_create(mctx_t mctx, int size);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
mtcp_epoll_ctl(mctx_t mctx, int epid,
|
||||
int op, int sockid, struct mtcp_epoll_event *event);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
mtcp_epoll_wait(mctx_t mctx, int epid,
|
||||
struct mtcp_epoll_event *events, int maxevents, int timeout);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
char *
|
||||
EventToString(uint32_t event);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* __MTCP_EPOLL_H_ */
|
|
@ -0,0 +1,18 @@
|
|||
#ifndef __MTCP_PIPE_H_
|
||||
#define __MTCP_PIPE_H_
|
||||
|
||||
#include <mtcp_api.h>
|
||||
|
||||
int
|
||||
PipeRead(mctx_t mctx, int pipeid, char *buf, int len);
|
||||
|
||||
int
|
||||
PipeWrite(mctx_t mctx, int pipeid, char *buf, int len);
|
||||
|
||||
int
|
||||
RaisePendingPipeEvents(mctx_t mctx, int epid, int pipeid);
|
||||
|
||||
int
|
||||
PipeClose(mctx_t mctx, int pipeid);
|
||||
|
||||
#endif /* __MTCP_PIPE_H_ */
|
|
@ -0,0 +1,10 @@
|
|||
#ifndef __RSS_H_
|
||||
#define __RSS_H_
|
||||
|
||||
#include <netinet/in.h>
|
||||
|
||||
/* sip, dip, sp, dp: in network byte order */
|
||||
int GetRSSCPUCore(in_addr_t sip, in_addr_t dip,
|
||||
in_port_t sp, in_port_t dp, int num_queues);
|
||||
|
||||
#endif /* __RSS_H_ */
|
|
@ -0,0 +1,61 @@
|
|||
#ifndef __SOCKET_H_
|
||||
#define __SOCKET_H_
|
||||
|
||||
#include "mtcp_api.h"
|
||||
#include "mtcp_epoll.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
enum socket_opts
|
||||
{
|
||||
MTCP_NONBLOCK = 0x01,
|
||||
MTCP_ADDR_BIND = 0x02,
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct socket_map
|
||||
{
|
||||
int id;
|
||||
int socktype;
|
||||
uint32_t opts;
|
||||
|
||||
struct sockaddr_in saddr;
|
||||
|
||||
union {
|
||||
struct tcp_stream *stream;
|
||||
struct tcp_listener *listener;
|
||||
struct mtcp_epoll *ep;
|
||||
struct pipe *pp;
|
||||
};
|
||||
|
||||
uint32_t epoll; /* registered events */
|
||||
uint32_t events; /* available events */
|
||||
mtcp_epoll_data_t ep_data;
|
||||
|
||||
TAILQ_ENTRY (socket_map) free_smap_link;
|
||||
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef struct socket_map * socket_map_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
socket_map_t
|
||||
AllocateSocket(mctx_t mctx, int socktype, int need_lock);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
FreeSocket(mctx_t mctx, int sockid, int need_lock);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
socket_map_t
|
||||
GetSocket(mctx_t mctx, int sockid);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct tcp_listener
|
||||
{
|
||||
int sockid;
|
||||
socket_map_t socket;
|
||||
|
||||
int backlog;
|
||||
stream_queue_t acceptq;
|
||||
|
||||
pthread_mutex_t accept_lock;
|
||||
pthread_cond_t accept_cond;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* __SOCKET_H_ */
|
|
@ -0,0 +1,81 @@
|
|||
#ifndef __STAT_H_
|
||||
#define __STAT_H_
|
||||
|
||||
struct run_stat
|
||||
{
|
||||
uint64_t rounds;
|
||||
uint64_t rounds_rx;
|
||||
uint64_t rounds_rx_try;
|
||||
uint64_t rounds_tx;
|
||||
uint64_t rounds_tx_try;
|
||||
uint64_t rounds_select;
|
||||
uint64_t rounds_select_rx;
|
||||
uint64_t rounds_select_tx;
|
||||
uint64_t rounds_select_intr;
|
||||
|
||||
uint64_t rounds_accept;
|
||||
uint64_t rounds_read;
|
||||
uint64_t rounds_write;
|
||||
uint64_t rounds_epoll;
|
||||
uint64_t rounds_wndadv;
|
||||
|
||||
uint64_t rounds_rtocheck;
|
||||
uint64_t rounds_twcheck;
|
||||
uint64_t rounds_tocheck;
|
||||
};
|
||||
|
||||
struct stat_counter
|
||||
{
|
||||
uint64_t cnt;
|
||||
uint64_t sum;
|
||||
uint64_t max;
|
||||
uint64_t min;
|
||||
};
|
||||
|
||||
struct time_stat
|
||||
{
|
||||
struct stat_counter round;
|
||||
struct stat_counter processing;
|
||||
struct stat_counter tcheck;
|
||||
struct stat_counter epoll;
|
||||
struct stat_counter handle;
|
||||
struct stat_counter xmit;
|
||||
struct stat_counter select;
|
||||
};
|
||||
|
||||
struct net_stat
|
||||
{
|
||||
uint64_t tx_packets[MAX_DEVICES];
|
||||
uint64_t tx_bytes[MAX_DEVICES];
|
||||
uint64_t tx_drops[MAX_DEVICES];
|
||||
uint64_t rx_packets[MAX_DEVICES];
|
||||
uint64_t rx_bytes[MAX_DEVICES];
|
||||
uint64_t rx_errors[MAX_DEVICES];
|
||||
};
|
||||
|
||||
struct bcast_stat
|
||||
{
|
||||
uint64_t cycles;
|
||||
uint64_t write;
|
||||
uint64_t read;
|
||||
uint64_t epoll;
|
||||
uint64_t wnd_adv;
|
||||
uint64_t ack;
|
||||
};
|
||||
|
||||
struct timeout_stat
|
||||
{
|
||||
uint64_t cycles;
|
||||
uint64_t rto_try;
|
||||
uint64_t rto;
|
||||
uint64_t timewait_try;
|
||||
uint64_t timewait;
|
||||
};
|
||||
|
||||
#ifdef NETSTAT
|
||||
#define STAT_COUNT(stat) stat++
|
||||
#else
|
||||
#define STAT_COUNT(stat)
|
||||
#endif
|
||||
|
||||
#endif /* __STAT_H_ */
|
|
@ -0,0 +1,123 @@
|
|||
#ifndef __TCP_IN_H_
|
||||
#define __TCP_IN_H_
|
||||
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/tcp.h>
|
||||
#include <linux/udp.h>
|
||||
#include <netinet/ip.h>
|
||||
|
||||
#include "mtcp.h"
|
||||
#include "fhash.h"
|
||||
|
||||
#define TCP_FLAG_FIN 0x01 // 0000 0001
|
||||
#define TCP_FLAG_SYN 0x02 // 0000 0010
|
||||
#define TCP_FLAG_RST 0x04 // 0000 0100
|
||||
#define TCP_FLAG_PSH 0x08 // 0000 1000
|
||||
#define TCP_FLAG_ACK 0x10 // 0001 0000
|
||||
#define TCP_FLAG_URG 0x20 // 0010 0000
|
||||
#define TCP_FLAG_SACK 0x40 // 0100 0000
|
||||
#define TCP_FLAG_WACK 0x80 // 1000 0000
|
||||
|
||||
#define TCP_OPT_FLAG_MSS 0x02 // 0000 0010
|
||||
#define TCP_OPT_FLAG_WSCALE 0x04 // 0000 0100
|
||||
#define TCP_OPT_FLAG_SACK_PERMIT 0x08 // 0000 1000
|
||||
#define TCP_OPT_FLAG_SACK 0x10 // 0001 0000
|
||||
#define TCP_OPT_FLAG_TIMESTAMP 0x20 // 0010 0000
|
||||
|
||||
#define TCP_OPT_MSS_LEN 4
|
||||
#define TCP_OPT_WSCALE_LEN 3
|
||||
#define TCP_OPT_SACK_PERMIT_LEN 2
|
||||
#define TCP_OPT_SACK_LEN 10
|
||||
#define TCP_OPT_TIMESTAMP_LEN 10
|
||||
|
||||
#define TCP_DEFAULT_MSS 1460
|
||||
#define TCP_DEFAULT_WSCALE 7
|
||||
#define TCP_INITIAL_WINDOW 14600 // initial window size
|
||||
|
||||
#define TCP_SEQ_LT(a,b) ((int32_t)((a)-(b)) < 0)
|
||||
#define TCP_SEQ_LEQ(a,b) ((int32_t)((a)-(b)) <= 0)
|
||||
#define TCP_SEQ_GT(a,b) ((int32_t)((a)-(b)) > 0)
|
||||
#define TCP_SEQ_GEQ(a,b) ((int32_t)((a)-(b)) >= 0)
|
||||
#define TCP_SEQ_BETWEEN(a,b,c) (TCP_SEQ_GEQ(a,b) && TCP_SEQ_LEQ(a,c))
|
||||
|
||||
/* convert timeval to timestamp (precision: 10us) */
|
||||
#define HZ 1000
|
||||
#define TIME_TICK (1000000/HZ) // in us
|
||||
#define TIMEVAL_TO_TS(t) (uint32_t)((t)->tv_sec * HZ + \
|
||||
((t)->tv_usec / TIME_TICK))
|
||||
|
||||
#define TS_TO_USEC(t) ((t) * TIME_TICK)
|
||||
#define TS_TO_MSEC(t) (TS_TO_USEC(t) / 1000)
|
||||
|
||||
#define USEC_TO_TS(t) ((t) / TIME_TICK)
|
||||
#define MSEC_TO_TS(t) (USEC_TO_TS((t) * 1000))
|
||||
|
||||
#define SEC_TO_USEC(t) ((t) * 1000000)
|
||||
#define SEC_TO_MSEC(t) ((t) * 1000)
|
||||
#define MSEC_TO_USEC(t) ((t) * 1000)
|
||||
#define USEC_TO_SEC(t) ((t) / 1000000)
|
||||
//#define TCP_TIMEWAIT (MSEC_TO_USEC(5000) / TIME_TICK) // 5s
|
||||
#define TCP_TIMEWAIT 0
|
||||
#define TCP_INITIAL_RTO (MSEC_TO_USEC(500) / TIME_TICK) // 500ms
|
||||
#define TCP_FIN_RTO (MSEC_TO_USEC(500) / TIME_TICK) // 500ms
|
||||
#define TCP_TIMEOUT (MSEC_TO_USEC(30000) / TIME_TICK) // 30s
|
||||
|
||||
#define TCP_MAX_RTX 16
|
||||
#define TCP_MAX_SYN_RETRY 7
|
||||
#define TCP_MAX_BACKOFF 7
|
||||
|
||||
enum tcp_state
|
||||
{
|
||||
TCP_ST_CLOSED = 0,
|
||||
TCP_ST_LISTEN = 1,
|
||||
TCP_ST_SYN_SENT = 2,
|
||||
TCP_ST_SYN_RCVD = 3,
|
||||
TCP_ST_ESTABLISHED = 4,
|
||||
TCP_ST_FIN_WAIT_1 = 5,
|
||||
TCP_ST_FIN_WAIT_2 = 6,
|
||||
TCP_ST_CLOSE_WAIT = 7,
|
||||
TCP_ST_CLOSING = 8,
|
||||
TCP_ST_LAST_ACK = 9,
|
||||
TCP_ST_TIME_WAIT = 10
|
||||
};
|
||||
|
||||
enum tcp_option
|
||||
{
|
||||
TCP_OPT_END = 0,
|
||||
TCP_OPT_NOP = 1,
|
||||
TCP_OPT_MSS = 2,
|
||||
TCP_OPT_WSCALE = 3,
|
||||
TCP_OPT_SACK_PERMIT = 4,
|
||||
TCP_OPT_SACK = 5,
|
||||
TCP_OPT_TIMESTAMP = 8
|
||||
};
|
||||
|
||||
enum tcp_close_reason
|
||||
{
|
||||
TCP_NOT_CLOSED = 0,
|
||||
TCP_ACTIVE_CLOSE = 1,
|
||||
TCP_PASSIVE_CLOSE = 2,
|
||||
TCP_CONN_FAIL = 3,
|
||||
TCP_CONN_LOST = 4,
|
||||
TCP_RESET = 5,
|
||||
TCP_NO_MEM = 6,
|
||||
TCP_NOT_ACCEPTED = 7,
|
||||
TCP_TIMEDOUT = 8
|
||||
};
|
||||
|
||||
void
|
||||
ParseTCPOptions(tcp_stream *cur_stream,
|
||||
uint32_t cur_ts, uint8_t *tcpopt, int len);
|
||||
|
||||
inline int
|
||||
ProcessTCPUplink(mtcp_manager_t mtcp, uint32_t cur_ts, tcp_stream *cur_stream,
|
||||
const struct tcphdr *tcph, uint32_t seq, uint32_t ack_seq,
|
||||
uint8_t *payload, int payloadlen, uint32_t window);
|
||||
|
||||
int
|
||||
ProcessTCPPacket(struct mtcp_manager *mtcp, uint32_t cur_ts,
|
||||
const struct iphdr* iph, int ip_len);
|
||||
uint16_t
|
||||
TCPCalcChecksum(uint16_t *buf, uint16_t len, uint32_t saddr, uint32_t daddr);
|
||||
|
||||
#endif /* __TCP_IN_H_ */
|
|
@ -0,0 +1,59 @@
|
|||
#ifndef __TCP_OUT_H_
|
||||
#define __TCP_OUT_H_
|
||||
|
||||
#include "mtcp.h"
|
||||
#include "tcp_stream.h"
|
||||
|
||||
enum ack_opt
|
||||
{
|
||||
ACK_OPT_NOW,
|
||||
ACK_OPT_AGGREGATE,
|
||||
ACK_OPT_WACK
|
||||
};
|
||||
|
||||
int
|
||||
SendTCPPacketStandalone(struct mtcp_manager *mtcp,
|
||||
uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
|
||||
uint32_t seq, uint32_t ack_seq, uint16_t window, uint8_t flags,
|
||||
uint8_t *payload, uint16_t payloadlen,
|
||||
uint32_t cur_ts, uint32_t echo_ts);
|
||||
|
||||
int
|
||||
SendTCPPacket(struct mtcp_manager *mtcp, tcp_stream *cur_stream,
|
||||
uint32_t cur_ts, uint8_t flags, uint8_t *payload, uint16_t payloadlen);
|
||||
|
||||
inline int
|
||||
WriteTCPControlList(mtcp_manager_t mtcp,
|
||||
struct mtcp_sender *sender, uint32_t cur_ts, int thresh);
|
||||
|
||||
inline int
|
||||
WriteTCPDataList(mtcp_manager_t mtcp,
|
||||
struct mtcp_sender *sender, uint32_t cur_ts, int thresh);
|
||||
|
||||
inline int
|
||||
WriteTCPACKList(mtcp_manager_t mtcp,
|
||||
struct mtcp_sender *sender, uint32_t cur_ts, int thresh);
|
||||
|
||||
inline void
|
||||
AddtoControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts);
|
||||
|
||||
inline void
|
||||
AddtoSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
RemoveFromControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
RemoveFromSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
RemoveFromACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
EnqueueACK(mtcp_manager_t mtcp,
|
||||
tcp_stream *cur_stream, uint32_t cur_ts, uint8_t opt);
|
||||
|
||||
inline void
|
||||
DumpControlList(mtcp_manager_t mtcp, struct mtcp_sender *sender);
|
||||
|
||||
#endif /* __TCP_OUT_H_ */
|
|
@ -0,0 +1,22 @@
|
|||
#ifndef __TCP_RB_FRAG_QUEUE_
|
||||
#define __TCP_RB_FRAG_QUEUE_
|
||||
|
||||
#include "tcp_ring_buffer.h"
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
typedef struct rb_frag_queue* rb_frag_queue_t;
|
||||
/*---------------------------------------------------------------------------*/
|
||||
rb_frag_queue_t
|
||||
CreateRBFragQueue(int capacity);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyRBFragQueue(rb_frag_queue_t rb_fragq);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
RBFragEnqueue(rb_frag_queue_t rb_fragq, struct fragment_ctx *frag);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct fragment_ctx *
|
||||
RBFragDequeue(rb_frag_queue_t rb_fragq);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* __TCP_RB_FRAG_QUEUE_ */
|
|
@ -0,0 +1,77 @@
|
|||
|
||||
/*
|
||||
* 2010.12.10 Shinae Woo
|
||||
* Ring buffer structure for managing dynamically allocating ring buffer
|
||||
*
|
||||
* put data to the tail
|
||||
* get/pop/remove data from the head
|
||||
*
|
||||
* always garantee physically continuous ready in-memory data from data_offset to the data_offset+len
|
||||
* automatically increase total buffer size when buffer is full
|
||||
* for efficiently managing packet payload and chunking
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NRE_RING_BUFFER_
|
||||
#define __NRE_RING_BUFFER_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
enum rb_caller
|
||||
{
|
||||
AT_APP,
|
||||
AT_MTCP
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef struct rb_manager* rb_manager_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct fragment_ctx
|
||||
{
|
||||
uint32_t seq;
|
||||
uint32_t len : 31;
|
||||
uint32_t is_calloc : 1;
|
||||
struct fragment_ctx *next;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct tcp_ring_buffer
|
||||
{
|
||||
u_char* data; /* buffered data */
|
||||
u_char* head; /* pointer to the head */
|
||||
|
||||
uint32_t head_offset; /* offset for the head (head - data) */
|
||||
uint32_t tail_offset; /* offset fot the last byte (null byte) */
|
||||
|
||||
int merged_len; /* contiguously merged length */
|
||||
uint64_t cum_len; /* cummulatively merged length */
|
||||
int last_len; /* currently saved data length */
|
||||
int size; /* total ring buffer size */
|
||||
|
||||
/* TCP payload features */
|
||||
uint32_t head_seq;
|
||||
uint32_t init_seq;
|
||||
|
||||
struct fragment_ctx* fctx;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
uint32_t RBGetCurnum(rb_manager_t rbm);
|
||||
void RBPrintInfo(struct tcp_ring_buffer* buff);
|
||||
void RBPrintStr(struct tcp_ring_buffer* buff);
|
||||
void RBPrintHex(struct tcp_ring_buffer* buff);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
rb_manager_t RBManagerCreate(size_t chunk_size, uint32_t cnum);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct tcp_ring_buffer* RBInit(rb_manager_t rbm, uint32_t init_seq);
|
||||
void RBFree(rb_manager_t rbm, struct tcp_ring_buffer* buff);
|
||||
uint32_t RBIsDanger(rb_manager_t rbm);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* data manupulation functions */
|
||||
int RBPut(rb_manager_t rbm, struct tcp_ring_buffer* buff,
|
||||
void* data, uint32_t len , uint32_t seq);
|
||||
size_t RBGet(rb_manager_t rbm, struct tcp_ring_buffer* buff, size_t len);
|
||||
size_t RBRemove(rb_manager_t rbm, struct tcp_ring_buffer* buff,
|
||||
size_t len, int option);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#endif
|
|
@ -0,0 +1,22 @@
|
|||
#ifndef __TCP_SB_QUEUE_
|
||||
#define __TCP_SB_QUEUE_
|
||||
|
||||
#include "tcp_send_buffer.h"
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
typedef struct sb_queue* sb_queue_t;
|
||||
/*---------------------------------------------------------------------------*/
|
||||
sb_queue_t
|
||||
CreateSBQueue(int capacity);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroySBQueue(sb_queue_t sq);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
SBEnqueue(sb_queue_t sq, struct tcp_send_buffer *buf);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct tcp_send_buffer *
|
||||
SBDequeue(sb_queue_t sq);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* __TCP_SB_QUEUE_ */
|
|
@ -0,0 +1,44 @@
|
|||
#ifndef __TCP_SEND_BUFFER_H_
|
||||
#define __TCP_SEND_BUFFER_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef struct sb_manager* sb_manager_t;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct tcp_send_buffer
|
||||
{
|
||||
unsigned char *data;
|
||||
unsigned char *head;
|
||||
|
||||
uint32_t head_off;
|
||||
uint32_t tail_off;
|
||||
uint32_t len;
|
||||
uint64_t cum_len;
|
||||
uint32_t size;
|
||||
|
||||
uint32_t head_seq;
|
||||
uint32_t init_seq;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
uint32_t
|
||||
SBGetCurnum(sb_manager_t sbm);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
sb_manager_t
|
||||
SBManagerCreate(size_t chunk_size, uint32_t cnum);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct tcp_send_buffer *
|
||||
SBInit(sb_manager_t sbm, uint32_t init_seq);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
SBFree(sb_manager_t sbm, struct tcp_send_buffer *buf);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
size_t
|
||||
SBPut(sb_manager_t sbm, struct tcp_send_buffer *buf, void *data, size_t len);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
size_t
|
||||
SBRemove(sb_manager_t sbm, struct tcp_send_buffer *buf, size_t len);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* __TCP_SEND_BUFFER_H_ */
|
|
@ -0,0 +1,230 @@
|
|||
#ifndef __TCP_STREAM_H_
|
||||
#define __TCP_STREAM_H_
|
||||
|
||||
#include <netinet/ip.h>
|
||||
#include <linux/tcp.h>
|
||||
#include <sys/queue.h>
|
||||
|
||||
#include "mtcp.h"
|
||||
|
||||
struct rtm_stat
|
||||
{
|
||||
uint32_t tdp_ack_cnt;
|
||||
uint32_t tdp_ack_bytes;
|
||||
uint32_t ack_upd_cnt;
|
||||
uint32_t ack_upd_bytes;
|
||||
#if TCP_OPT_SACK_ENABLED
|
||||
uint32_t sack_cnt;
|
||||
uint32_t sack_bytes;
|
||||
uint32_t tdp_sack_cnt;
|
||||
uint32_t tdp_sack_bytes;
|
||||
#endif /* TCP_OPT_SACK_ENABLED */
|
||||
uint32_t rto_cnt;
|
||||
uint32_t rto_bytes;
|
||||
};
|
||||
|
||||
#if TCP_OPT_SACK_ENABLED
|
||||
struct sack_entry
|
||||
{
|
||||
uint32_t left_edge;
|
||||
uint32_t right_edge;
|
||||
uint32_t expire;
|
||||
};
|
||||
#endif /* TCP_OPT_SACK_ENABLED */
|
||||
|
||||
struct tcp_recv_vars
|
||||
{
|
||||
/* receiver variables */
|
||||
uint32_t rcv_wnd; /* receive window (unscaled) */
|
||||
//uint32_t rcv_up; /* receive urgent pointer */
|
||||
uint32_t irs; /* initial receiving sequence */
|
||||
uint32_t snd_wl1; /* segment seq number for last window update */
|
||||
uint32_t snd_wl2; /* segment ack number for last window update */
|
||||
|
||||
/* variables for fast retransmission */
|
||||
uint32_t last_ack_seq; /* highest ackd seq */
|
||||
uint8_t dup_acks; /* number of duplicated acks */
|
||||
|
||||
/* timestamps */
|
||||
uint32_t ts_recent; /* recent peer timestamp */
|
||||
uint32_t ts_lastack_rcvd; /* last ack rcvd time */
|
||||
uint32_t ts_last_ts_upd; /* last peer ts update time */
|
||||
uint32_t ts_tw_expire; // timestamp for timewait expire
|
||||
|
||||
/* RTT estimation variables */
|
||||
uint32_t srtt; /* smoothed round trip time << 3 (scaled) */
|
||||
uint32_t mdev; /* medium deviation */
|
||||
uint32_t mdev_max; /* maximal mdev ffor the last rtt period */
|
||||
uint32_t rttvar; /* smoothed mdev_max */
|
||||
uint32_t rtt_seq; /* sequence number to update rttvar */
|
||||
|
||||
#if TCP_OPT_SACK_ENABLED /* currently not used */
|
||||
#define MAX_SACK_ENTRY 8
|
||||
struct sack_entry sack_table[MAX_SACK_ENTRY];
|
||||
uint8_t sacks:3;
|
||||
#endif /* TCP_OPT_SACK_ENABLED */
|
||||
|
||||
struct tcp_ring_buffer *rcvbuf;
|
||||
#if USE_SPIN_LOCK
|
||||
pthread_spinlock_t read_lock;
|
||||
#else
|
||||
pthread_mutex_t read_lock;
|
||||
#endif
|
||||
|
||||
TAILQ_ENTRY(tcp_stream) he_link; /* hash table entry link */
|
||||
|
||||
#if BLOCKING_SUPPORT
|
||||
TAILQ_ENTRY(tcp_stream) rcv_br_link;
|
||||
pthread_cond_t read_cond;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct tcp_send_vars
|
||||
{
|
||||
/* IP-level information */
|
||||
uint16_t ip_id;
|
||||
|
||||
uint16_t mss; /* maximum segment size */
|
||||
uint16_t eff_mss; /* effective segment size (excluding tcp option) */
|
||||
|
||||
uint8_t wscale; /* window scale */
|
||||
int8_t nif_out; /* cached output network interface */
|
||||
unsigned char *d_haddr; /* cached destination MAC address */
|
||||
|
||||
/* send sequence variables */
|
||||
uint32_t snd_una; /* send unacknoledged */
|
||||
uint32_t snd_wnd; /* send window (unscaled) */
|
||||
uint32_t peer_wnd; /* client window size */
|
||||
//uint32_t snd_up; /* send urgent pointer (not used) */
|
||||
uint32_t iss; /* initial sending sequence */
|
||||
uint32_t fss; /* final sending sequence */
|
||||
|
||||
/* retransmission timeout variables */
|
||||
uint8_t nrtx; /* number of retransmission */
|
||||
uint8_t max_nrtx; /* max number of retransmission */
|
||||
uint32_t rto; /* retransmission timeout */
|
||||
uint32_t ts_rto; /* timestamp for retransmission timeout */
|
||||
|
||||
/* congestion control variables */
|
||||
uint32_t cwnd; /* congestion window */
|
||||
uint32_t ssthresh; /* slow start threshold */
|
||||
|
||||
/* timestamp */
|
||||
uint32_t ts_lastack_sent; /* last ack sent time */
|
||||
|
||||
uint8_t is_wack:1, /* is ack for window adertisement? */
|
||||
ack_cnt:6; /* number of acks to send. max 64 */
|
||||
|
||||
uint8_t on_control_list;
|
||||
uint8_t on_send_list;
|
||||
uint8_t on_ack_list;
|
||||
uint8_t on_sendq;
|
||||
uint8_t on_ackq;
|
||||
uint8_t on_closeq;
|
||||
uint8_t on_resetq;
|
||||
|
||||
uint8_t on_closeq_int:1,
|
||||
on_resetq_int:1,
|
||||
is_fin_sent:1,
|
||||
is_fin_ackd:1;
|
||||
|
||||
TAILQ_ENTRY(tcp_stream) control_link;
|
||||
TAILQ_ENTRY(tcp_stream) send_link;
|
||||
TAILQ_ENTRY(tcp_stream) ack_link;
|
||||
|
||||
TAILQ_ENTRY(tcp_stream) timer_link; /* timer link (rto list, tw list) */
|
||||
TAILQ_ENTRY(tcp_stream) timeout_link; /* connection timeout link */
|
||||
|
||||
struct tcp_send_buffer *sndbuf;
|
||||
#if USE_SPIN_LOCK
|
||||
pthread_spinlock_t write_lock;
|
||||
#else
|
||||
pthread_mutex_t write_lock;
|
||||
#endif
|
||||
|
||||
#if RTM_STAT
|
||||
struct rtm_stat rstat; /* retransmission statistics */
|
||||
#endif
|
||||
|
||||
#if BLOCKING_SUPPORT
|
||||
TAILQ_ENTRY(tcp_stream) snd_br_link;
|
||||
pthread_cond_t write_cond;
|
||||
#endif
|
||||
};
|
||||
|
||||
typedef struct tcp_stream
|
||||
{
|
||||
socket_map_t socket;
|
||||
|
||||
uint32_t id:24,
|
||||
stream_type:8;
|
||||
|
||||
uint32_t saddr; /* in network order */
|
||||
uint32_t daddr; /* in network order */
|
||||
uint16_t sport; /* in network order */
|
||||
uint16_t dport; /* in network order */
|
||||
|
||||
uint8_t state; /* tcp state */
|
||||
uint8_t close_reason; /* close reason */
|
||||
uint8_t on_hash_table;
|
||||
uint8_t on_timewait_list;
|
||||
uint8_t ht_idx;
|
||||
uint8_t closed;
|
||||
uint8_t is_bound_addr;
|
||||
uint8_t need_wnd_adv;
|
||||
int16_t on_rto_idx;
|
||||
|
||||
uint16_t on_timeout_list:1,
|
||||
on_rcv_br_list:1,
|
||||
on_snd_br_list:1,
|
||||
saw_timestamp:1, /* whether peer sends timestamp */
|
||||
sack_permit:1, /* whether peer permits SACK */
|
||||
control_list_waiting:1,
|
||||
have_reset:1;
|
||||
|
||||
uint32_t snd_nxt; /* send next */
|
||||
uint32_t rcv_nxt; /* receive next */
|
||||
|
||||
struct tcp_recv_vars *rcvvar;
|
||||
struct tcp_send_vars *sndvar;
|
||||
|
||||
uint32_t last_active_ts; /* ts_last_ack_sent or ts_last_ts_upd */
|
||||
|
||||
} tcp_stream;
|
||||
|
||||
inline char *
|
||||
TCPStateToString(const tcp_stream *cur_stream);
|
||||
|
||||
unsigned int
|
||||
HashFlow(const tcp_stream *flow);
|
||||
|
||||
int
|
||||
EqualFlow(const tcp_stream *flow1, const tcp_stream *flow2);
|
||||
|
||||
inline int
|
||||
AddEpollEvent(struct mtcp_epoll *ep,
|
||||
int queue_type, socket_map_t socket, uint32_t event);
|
||||
|
||||
inline void
|
||||
RaiseReadEvent(mtcp_manager_t mtcp, tcp_stream *stream);
|
||||
|
||||
inline void
|
||||
RaiseWriteEvent(mtcp_manager_t mtcp, tcp_stream *stream);
|
||||
|
||||
inline void
|
||||
RaiseCloseEvent(mtcp_manager_t mtcp, tcp_stream *stream);
|
||||
|
||||
inline void
|
||||
RaiseErrorEvent(mtcp_manager_t mtcp, tcp_stream *stream);
|
||||
|
||||
tcp_stream *
|
||||
CreateTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type,
|
||||
uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport);
|
||||
|
||||
void
|
||||
DestroyTCPStream(mtcp_manager_t mtcp, tcp_stream *stream);
|
||||
|
||||
void
|
||||
DumpStream(mtcp_manager_t mtcp, tcp_stream *stream);
|
||||
|
||||
#endif /* __TCP_STREAM_H_ */
|
|
@ -0,0 +1,78 @@
|
|||
#ifndef __TCP_STREAM_QUEUE_
|
||||
#define __TCP_STREAM_QUEUE_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/* Lock definitions for stream queue */
|
||||
#if LOCK_STREAM_QUEUE
|
||||
|
||||
#if USE_SPIN_LOCK
|
||||
#define SQ_LOCK_INIT(lock, errmsg, action); \
|
||||
if (pthread_spin_init(lock, PTHREAD_PROCESS_PRIVATE)) { \
|
||||
perror("pthread_spin_init" errmsg); \
|
||||
action; \
|
||||
}
|
||||
#define SQ_LOCK_DESTROY(lock) pthread_spin_destroy(lock)
|
||||
#define SQ_LOCK(lock) pthread_spin_lock(lock)
|
||||
#define SQ_UNLOCK(lock) pthread_spin_unlock(lock)
|
||||
#else
|
||||
#define SQ_LOCK_INIT(lock, errmsg, action); \
|
||||
if (pthread_mutex_init(lock, NULL)) { \
|
||||
perror("pthread_mutex_init" errmsg); \
|
||||
action; \
|
||||
}
|
||||
#define SQ_LOCK_DESTROY(lock) pthread_mutex_destroy(lock)
|
||||
#define SQ_LOCK(lock) pthread_mutex_lock(lock)
|
||||
#define SQ_UNLOCK(lock) pthread_mutex_unlock(lock)
|
||||
#endif /* USE_SPIN_LOCK */
|
||||
|
||||
#else /* LOCK_STREAM_QUEUE */
|
||||
#define SQ_LOCK_INIT(lock, errmsg, action) (void) 0
|
||||
#define SQ_LOCK_DESTROY(lock) (void) 0
|
||||
#define SQ_LOCK(lock) (void) 0
|
||||
#define SQ_UNLOCK(lock) (void) 0
|
||||
#endif /* LOCK_STREAM_QUEUE */
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
typedef struct stream_queue* stream_queue_t;
|
||||
/*---------------------------------------------------------------------------*/
|
||||
typedef struct stream_queue_int
|
||||
{
|
||||
struct tcp_stream **array;
|
||||
int size;
|
||||
|
||||
int first;
|
||||
int last;
|
||||
int count;
|
||||
|
||||
} stream_queue_int;
|
||||
/*---------------------------------------------------------------------------*/
|
||||
stream_queue_int *
|
||||
CreateInternalStreamQueue(int size);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyInternalStreamQueue(stream_queue_int *sq);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
StreamInternalEnqueue(stream_queue_int *sq, struct tcp_stream *stream);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct tcp_stream *
|
||||
StreamInternalDequeue(stream_queue_int *sq);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
stream_queue_t
|
||||
CreateStreamQueue(int size);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyStreamQueue(stream_queue_t sq);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
StreamEnqueue(stream_queue_t sq, struct tcp_stream *stream);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct tcp_stream *
|
||||
StreamDequeue(stream_queue_t sq);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
StreamQueueIsEmpty(stream_queue_t sq);
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
#endif /* __TCP_STREAM_QUEUE_ */
|
|
@ -0,0 +1,32 @@
|
|||
#ifndef __TCP_UTIL_H_
|
||||
#define __TCP_UTIL_H_
|
||||
|
||||
#include "mtcp.h"
|
||||
#include "tcp_stream.h"
|
||||
|
||||
struct tcp_timestamp
|
||||
{
|
||||
uint32_t ts_val;
|
||||
uint32_t ts_ref;
|
||||
};
|
||||
|
||||
void ParseTCPOptions(tcp_stream *cur_stream,
|
||||
uint32_t cur_ts, uint8_t *tcpopt, int len);
|
||||
|
||||
inline int
|
||||
ParseTCPTimestamp(tcp_stream *cur_stream,
|
||||
struct tcp_timestamp *ts, uint8_t *tcpopt, int len);
|
||||
|
||||
#if TCP_OPT_SACK_ENABLED
|
||||
void
|
||||
ParseSACKOption(tcp_stream *cur_stream,
|
||||
uint32_t ack_seq, uint8_t *tcpopt, int len);
|
||||
#endif
|
||||
|
||||
uint16_t
|
||||
TCPCalcChecksum(uint16_t *buf, uint16_t len, uint32_t saddr, uint32_t daddr);
|
||||
|
||||
void
|
||||
PrintTCPOptions(uint8_t *tcpopt, int len);
|
||||
|
||||
#endif /* __TCP_UTIL_H_ */
|
|
@ -0,0 +1,54 @@
|
|||
#ifndef __TIMER_H_
|
||||
#define __TIMER_H_
|
||||
|
||||
#include "mtcp.h"
|
||||
#include "tcp_stream.h"
|
||||
|
||||
#define RTO_HASH 3000
|
||||
|
||||
struct rto_hashstore
|
||||
{
|
||||
uint32_t rto_now_idx; // pointing the hs_table_s index
|
||||
uint32_t rto_now_ts; //
|
||||
|
||||
TAILQ_HEAD(rto_head , tcp_stream) rto_list[RTO_HASH+1];
|
||||
};
|
||||
|
||||
struct rto_hashstore*
|
||||
InitRTOHashstore();
|
||||
|
||||
inline void
|
||||
AddtoRTOList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
RemoveFromRTOList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
AddtoTimewaitList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts);
|
||||
|
||||
inline void
|
||||
RemoveFromTimewaitList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
AddtoTimeoutList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
RemoveFromTimeoutList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
UpdateTimeoutList(mtcp_manager_t mtcp, tcp_stream *cur_stream);
|
||||
|
||||
inline void
|
||||
UpdateRetransmissionTimer(mtcp_manager_t mtcp,
|
||||
tcp_stream *cur_stream, uint32_t cur_ts);
|
||||
|
||||
void
|
||||
CheckRtmTimeout(mtcp_manager_t mtcp, uint32_t cur_ts, int thresh);
|
||||
|
||||
void
|
||||
CheckTimewaitExpire(mtcp_manager_t mtcp, uint32_t cur_ts, int thresh);
|
||||
|
||||
void
|
||||
CheckConnectionTimeout(mtcp_manager_t mtcp, uint32_t cur_ts, int thresh);
|
||||
|
||||
#endif /* __TIMER_H_ */
|
|
@ -0,0 +1,56 @@
|
|||
#include <string.h>
|
||||
#include <netinet/ip.h>
|
||||
|
||||
#include "ip_in.h"
|
||||
#include "tcp_in.h"
|
||||
#include "mtcp_api.h"
|
||||
#include "ps.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define ETH_P_IP_FRAG 0xF800
|
||||
#define ETH_P_IPV6_FRAG 0xF6DD
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline int
|
||||
ProcessIPv4Packet(mtcp_manager_t mtcp, uint32_t cur_ts,
|
||||
const int ifidx, unsigned char* pkt_data, int len)
|
||||
{
|
||||
/* check and process IPv4 packets */
|
||||
struct iphdr* iph = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
|
||||
int ip_len = ntohs(iph->tot_len);
|
||||
|
||||
/* drop the packet shorter than ip header */
|
||||
if (ip_len < sizeof(struct iphdr))
|
||||
return ERROR;
|
||||
|
||||
if (ip_fast_csum(iph, iph->ihl))
|
||||
return ERROR;
|
||||
|
||||
#if !PROMISCUOUS_MODE
|
||||
/* if not promiscuous mode, drop if the destination is not myself */
|
||||
if (iph->daddr != CONFIG.eths[ifidx].ip_addr)
|
||||
//DumpIPPacketToFile(stderr, iph, ip_len);
|
||||
return TRUE;
|
||||
#endif
|
||||
|
||||
// see if the version is correct
|
||||
if (iph->version != 0x4 ) {
|
||||
struct ps_packet packet;
|
||||
packet.ifindex = ifidx;
|
||||
packet.len = len;
|
||||
packet.buf = (char *)pkt_data;
|
||||
ps_slowpath_packet(mtcp->ctx->handle, &packet);
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
switch (iph->protocol) {
|
||||
case IPPROTO_TCP:
|
||||
return ProcessTCPPacket(mtcp, cur_ts, iph, ip_len);
|
||||
default:
|
||||
/* currently drop other protocols */
|
||||
return FALSE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
|
@ -0,0 +1,129 @@
|
|||
#include "ip_out.h"
|
||||
#include "ip_in.h"
|
||||
#include "eth_out.h"
|
||||
#include "arp.h"
|
||||
#include "debug.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline int
|
||||
GetOutputInterface(uint32_t daddr)
|
||||
{
|
||||
int nif = -1;
|
||||
int i;
|
||||
int prefix = 0;
|
||||
|
||||
/* Longest prefix matching */
|
||||
for (i = 0; i < CONFIG.routes; i++) {
|
||||
if ((daddr & CONFIG.rtable[i].mask) == CONFIG.rtable[i].masked) {
|
||||
if (CONFIG.rtable[i].prefix > prefix) {
|
||||
nif = CONFIG.rtable[i].nif;
|
||||
prefix = CONFIG.rtable[i].prefix;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nif < 0) {
|
||||
uint8_t *da = (uint8_t *)&daddr;
|
||||
TRACE_ERROR("[WARNING] No route to %u.%u.%u.%u\n",
|
||||
da[0], da[1], da[2], da[3]);
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return nif;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
uint8_t *
|
||||
IPOutputStandalone(struct mtcp_manager *mtcp,
|
||||
uint16_t ip_id, uint32_t saddr, uint32_t daddr, uint16_t tcplen)
|
||||
{
|
||||
struct iphdr *iph;
|
||||
int nif;
|
||||
unsigned char * haddr;
|
||||
|
||||
nif = GetOutputInterface(daddr);
|
||||
if (nif < 0)
|
||||
return NULL;
|
||||
|
||||
haddr = GetDestinationHWaddr(daddr);
|
||||
if (!haddr) {
|
||||
#if 0
|
||||
uint8_t *da = (uint8_t *)&daddr;
|
||||
TRACE_INFO("[WARNING] The destination IP %u.%u.%u.%u "
|
||||
"is not in ARP table!\n",
|
||||
da[0], da[1], da[2], da[3]);
|
||||
#endif
|
||||
RequestARP(mtcp, daddr, nif, mtcp->cur_ts);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iph = (struct iphdr *)EthernetOutput(mtcp,
|
||||
ETH_P_IP, nif, haddr, tcplen + IP_HEADER_LEN);
|
||||
if (!iph) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iph->ihl = IP_HEADER_LEN >> 2;
|
||||
iph->version = 4;
|
||||
iph->tos = 0;
|
||||
iph->tot_len = htons(IP_HEADER_LEN + tcplen);
|
||||
iph->id = htons(ip_id);
|
||||
iph->frag_off = htons(0x4000); // no fragmentation
|
||||
iph->ttl = 64;
|
||||
iph->protocol = IPPROTO_TCP;
|
||||
iph->saddr = saddr;
|
||||
iph->daddr = daddr;
|
||||
iph->check = 0;
|
||||
iph->check = ip_fast_csum(iph, iph->ihl);
|
||||
|
||||
return (uint8_t *)(iph + 1);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
uint8_t *
|
||||
IPOutput(struct mtcp_manager *mtcp, tcp_stream *stream, uint16_t tcplen)
|
||||
{
|
||||
struct iphdr *iph;
|
||||
int nif;
|
||||
unsigned char *haddr;
|
||||
|
||||
if (stream->sndvar->nif_out >= 0) {
|
||||
nif = stream->sndvar->nif_out;
|
||||
} else {
|
||||
nif = GetOutputInterface(stream->daddr);
|
||||
stream->sndvar->nif_out = nif;
|
||||
}
|
||||
|
||||
haddr = GetDestinationHWaddr(stream->daddr);
|
||||
if (!haddr) {
|
||||
#if 0
|
||||
uint8_t *da = (uint8_t *)&stream->daddr;
|
||||
TRACE_INFO("[WARNING] The destination IP %u.%u.%u.%u "
|
||||
"is not in ARP table!\n",
|
||||
da[0], da[1], da[2], da[3]);
|
||||
#endif
|
||||
/* if not found in the arp table, send arp request and return NULL */
|
||||
/* tcp will retry sending the packet later */
|
||||
RequestARP(mtcp, stream->daddr, stream->sndvar->nif_out, mtcp->cur_ts);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iph = (struct iphdr *)EthernetOutput(mtcp, ETH_P_IP,
|
||||
stream->sndvar->nif_out, haddr, tcplen + IP_HEADER_LEN);
|
||||
if (!iph) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iph->ihl = IP_HEADER_LEN >> 2;
|
||||
iph->version = 4;
|
||||
iph->tos = 0;
|
||||
iph->tot_len = htons(IP_HEADER_LEN + tcplen);
|
||||
iph->id = htons(stream->sndvar->ip_id++);
|
||||
iph->frag_off = htons(0x4000); // no fragmentation
|
||||
iph->ttl = 64;
|
||||
iph->protocol = IPPROTO_TCP;
|
||||
iph->saddr = stream->saddr;
|
||||
iph->daddr = stream->daddr;
|
||||
iph->check = 0;
|
||||
iph->check = ip_fast_csum(iph, iph->ihl);
|
||||
|
||||
return (uint8_t *)(iph + 1);
|
||||
}
|
|
@ -0,0 +1,170 @@
|
|||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <sys/queue.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <pthread.h>
|
||||
#include "cpu.h"
|
||||
#include "debug.h"
|
||||
#include "logger.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static void
|
||||
EnqueueFreeBuffer(log_thread_context *ctx, log_buff *free_bp)
|
||||
{
|
||||
pthread_mutex_lock(&ctx->free_mutex);
|
||||
TAILQ_INSERT_TAIL(&ctx->free_queue, free_bp, buff_link);
|
||||
ctx->free_buff_cnt++;
|
||||
|
||||
assert(ctx->free_buff_cnt <= NUM_LOG_BUFF);
|
||||
assert(ctx->free_buff_cnt + ctx->job_buff_cnt <= NUM_LOG_BUFF);
|
||||
pthread_mutex_unlock(&ctx->free_mutex);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
log_buff*
|
||||
DequeueFreeBuffer(log_thread_context *ctx)
|
||||
{
|
||||
pthread_mutex_lock(&ctx->free_mutex);
|
||||
log_buff *free_bp = TAILQ_FIRST(&ctx->free_queue);
|
||||
if (free_bp) {
|
||||
TAILQ_REMOVE(&ctx->free_queue, free_bp, buff_link);
|
||||
ctx->free_buff_cnt--;
|
||||
}
|
||||
|
||||
assert(ctx->free_buff_cnt >= 0);
|
||||
assert(ctx->free_buff_cnt + ctx->job_buff_cnt <= NUM_LOG_BUFF);
|
||||
pthread_mutex_unlock(&ctx->free_mutex);
|
||||
return (free_bp);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
EnqueueJobBuffer(log_thread_context *ctx, log_buff *working_bp)
|
||||
{
|
||||
TAILQ_INSERT_TAIL(&ctx->working_queue, working_bp, buff_link);
|
||||
ctx->job_buff_cnt++;
|
||||
ctx->state = ACTIVE_LOGT;
|
||||
assert(ctx->job_buff_cnt <= NUM_LOG_BUFF);
|
||||
if (ctx->free_buff_cnt + ctx->job_buff_cnt > NUM_LOG_BUFF) {
|
||||
TRACE_ERROR("free_buff_cnt(%d) + job_buff_cnt(%d) > NUM_LOG_BUFF(%d)\n",
|
||||
ctx->free_buff_cnt, ctx->job_buff_cnt, NUM_LOG_BUFF);
|
||||
}
|
||||
assert(ctx->free_buff_cnt + ctx->job_buff_cnt <= NUM_LOG_BUFF);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static log_buff*
|
||||
DequeueJobBuffer(log_thread_context *ctx)
|
||||
{
|
||||
pthread_mutex_lock(&ctx->mutex);
|
||||
log_buff *working_bp = TAILQ_FIRST(&ctx->working_queue);
|
||||
if (working_bp) {
|
||||
TAILQ_REMOVE(&ctx->working_queue, working_bp, buff_link);
|
||||
ctx->job_buff_cnt--;
|
||||
} else {
|
||||
ctx->state = IDLE_LOGT;
|
||||
}
|
||||
|
||||
assert(ctx->job_buff_cnt >= 0);
|
||||
assert(ctx->free_buff_cnt + ctx->job_buff_cnt <= NUM_LOG_BUFF);
|
||||
pthread_mutex_unlock(&ctx->mutex);
|
||||
return (working_bp);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
InitLogThreadContext(struct log_thread_context *ctx, int cpu)
|
||||
{
|
||||
int i;
|
||||
int sv[2];
|
||||
|
||||
/* initialize log_thread_context */
|
||||
memset(ctx, 0, sizeof(struct log_thread_context));
|
||||
ctx->cpu = cpu;
|
||||
ctx->state = IDLE_LOGT;
|
||||
ctx->done = 0;
|
||||
|
||||
if (pipe(sv)) {
|
||||
fprintf(stderr, "pipe() failed, errno=%d, errstr=%s\n",
|
||||
errno, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
ctx->sp_fd = sv[0];
|
||||
ctx->pair_sp_fd = sv[1];
|
||||
|
||||
pthread_mutex_init(&ctx->mutex, NULL);
|
||||
pthread_mutex_init(&ctx->free_mutex, NULL);
|
||||
|
||||
TAILQ_INIT(&ctx->working_queue);
|
||||
TAILQ_INIT(&ctx->free_queue);
|
||||
|
||||
/* initialize free log_buff */
|
||||
log_buff *w_buff = malloc(sizeof(log_buff) * NUM_LOG_BUFF);
|
||||
assert(w_buff);
|
||||
for (i = 0; i < NUM_LOG_BUFF; i++) {
|
||||
EnqueueFreeBuffer(ctx, &w_buff[i]);
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void *
|
||||
ThreadLogMain(void* arg)
|
||||
{
|
||||
size_t len;
|
||||
log_thread_context* ctx = (log_thread_context *) arg;
|
||||
log_buff* w_buff;
|
||||
int cnt;
|
||||
|
||||
mtcp_core_affinitize(ctx->cpu);
|
||||
//fprintf(stderr, "[CPU %d] Log thread created. thread: %lu\n",
|
||||
// ctx->cpu, pthread_self());
|
||||
|
||||
TRACE_LOG("Log thread %d is starting.\n", ctx->cpu);
|
||||
|
||||
while (!ctx->done) {
|
||||
/* handle every jobs in job buffer*/
|
||||
cnt = 0;
|
||||
while ((w_buff = DequeueJobBuffer(ctx))){
|
||||
if (++cnt > NUM_LOG_BUFF) {
|
||||
TRACE_ERROR("CPU %d: Exceed NUM_LOG_BUFF %d.\n",
|
||||
ctx->cpu, cnt);
|
||||
break;
|
||||
}
|
||||
len = fwrite(w_buff->buff, 1, w_buff->buff_len, w_buff->fid);
|
||||
if (len != w_buff->buff_len) {
|
||||
TRACE_ERROR("CPU %d: Tried to write %d, but only write %ld\n",
|
||||
ctx->cpu, w_buff->buff_len, len);
|
||||
}
|
||||
//assert(len == w_buff->buff_len);
|
||||
EnqueueFreeBuffer(ctx, w_buff);
|
||||
}
|
||||
|
||||
/* */
|
||||
while (ctx->state == IDLE_LOGT && !ctx->done) {
|
||||
char temp[1];
|
||||
int ret = read(ctx->sp_fd, temp, 1);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
TRACE_LOG("Log thread %d out of first loop.\n", ctx->cpu);
|
||||
/* handle every jobs in job buffer*/
|
||||
cnt = 0;
|
||||
while ((w_buff = DequeueJobBuffer(ctx))){
|
||||
if (++cnt > NUM_LOG_BUFF) {
|
||||
TRACE_ERROR("CPU %d: "
|
||||
"Exceed NUM_LOG_BUFF %d in final loop.\n", ctx->cpu, cnt);
|
||||
break;
|
||||
}
|
||||
len = fwrite(w_buff->buff, 1, w_buff->buff_len, w_buff->fid);
|
||||
assert(len == w_buff->buff_len);
|
||||
EnqueueFreeBuffer(ctx, w_buff);
|
||||
}
|
||||
|
||||
TRACE_LOG("Log thread %d finished.\n", ctx->cpu);
|
||||
pthread_exit(NULL);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
|
@ -0,0 +1,187 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#ifdef HUGETABLE
|
||||
#include <hugetlbfs.h>
|
||||
#endif
|
||||
#include "debug.h"
|
||||
#include "memory_mgt.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef struct tag_mem_chunk
|
||||
{
|
||||
int mc_free_chunks;
|
||||
struct tag_mem_chunk *mc_next;
|
||||
} mem_chunk;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef mem_chunk *mem_chunk_t;
|
||||
#ifdef HUGETABLE
|
||||
typedef enum { MEM_NORMAL, MEM_HUGEPAGE};
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*/
|
||||
typedef struct mem_pool
|
||||
{
|
||||
u_char *mp_startptr; /* start pointer */
|
||||
mem_chunk_t mp_freeptr; /* pointer to the start memory chunk */
|
||||
int mp_free_chunks; /* number of total free chunks */
|
||||
int mp_total_chunks; /* number of total free chunks */
|
||||
int mp_chunk_size; /* chunk size in bytes */
|
||||
int mp_type;
|
||||
|
||||
} mem_pool;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
mem_pool *
|
||||
MPCreate(int chunk_size, size_t total_size, int is_hugepage)
|
||||
{
|
||||
int res;
|
||||
mem_pool_t mp;
|
||||
|
||||
if (chunk_size < sizeof(mem_chunk)) {
|
||||
TRACE_ERROR("The chunk size should be larger than %lu. current: %d\n",
|
||||
sizeof(mem_chunk), chunk_size);
|
||||
return NULL;
|
||||
}
|
||||
if (chunk_size % 4 != 0) {
|
||||
TRACE_ERROR("The chunk size should be multiply of 4!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//assert(chunk_size <= 2*1024*1024);
|
||||
|
||||
if ((mp = calloc(1, sizeof(mem_pool))) == NULL) {
|
||||
perror("calloc failed");
|
||||
exit(0);
|
||||
}
|
||||
mp->mp_type = is_hugepage;
|
||||
mp->mp_chunk_size = chunk_size;
|
||||
mp->mp_free_chunks = ((total_size + (chunk_size -1))/chunk_size);
|
||||
mp->mp_total_chunks = mp->mp_free_chunks;
|
||||
total_size = chunk_size * ((size_t)mp->mp_free_chunks);
|
||||
|
||||
|
||||
/* allocate the big memory chunk */
|
||||
#ifdef HUGETABLE
|
||||
if (is_hugepage == MEM_HUGEPAGE) {
|
||||
mp->mp_startptr = get_huge_pages(total_size, NULL);
|
||||
if (!mp->mp_startptr) {
|
||||
TRACE_ERROR("posix_memalign failed, size=%ld\n", total_size);
|
||||
assert(0);
|
||||
if (mp) free(mp);
|
||||
return (NULL);
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
res = posix_memalign((void **)&mp->mp_startptr, getpagesize(), total_size);
|
||||
if (res != 0) {
|
||||
TRACE_ERROR("posix_memalign failed, size=%ld\n", total_size);
|
||||
assert(0);
|
||||
if (mp) free(mp);
|
||||
return (NULL);
|
||||
}
|
||||
#ifdef HUGETABLE
|
||||
}
|
||||
#endif
|
||||
|
||||
/* try mlock only for superuser */
|
||||
if (geteuid() == 0) {
|
||||
if (mlock(mp->mp_startptr, total_size) < 0)
|
||||
TRACE_ERROR("m_lock failed, size=%ld\n", total_size);
|
||||
}
|
||||
|
||||
mp->mp_freeptr = (mem_chunk_t)mp->mp_startptr;
|
||||
mp->mp_freeptr->mc_free_chunks = mp->mp_free_chunks;
|
||||
mp->mp_freeptr->mc_next = NULL;
|
||||
|
||||
return mp;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void *
|
||||
MPAllocateChunk(mem_pool_t mp)
|
||||
{
|
||||
mem_chunk_t p = mp->mp_freeptr;
|
||||
|
||||
if (mp->mp_free_chunks == 0)
|
||||
return (NULL);
|
||||
assert(p->mc_free_chunks > 0 && p->mc_free_chunks <= p->mc_free_chunks);
|
||||
|
||||
p->mc_free_chunks--;
|
||||
mp->mp_free_chunks--;
|
||||
if (p->mc_free_chunks) {
|
||||
/* move right by one chunk */
|
||||
mp->mp_freeptr = (mem_chunk_t)((u_char *)p + mp->mp_chunk_size);
|
||||
mp->mp_freeptr->mc_free_chunks = p->mc_free_chunks;
|
||||
mp->mp_freeptr->mc_next = p->mc_next;
|
||||
}
|
||||
else {
|
||||
mp->mp_freeptr = p->mc_next;
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
MPFreeChunk(mem_pool_t mp, void *p)
|
||||
{
|
||||
mem_chunk_t mcp = (mem_chunk_t)p;
|
||||
|
||||
// assert((u_char*)p >= mp->mp_startptr &&
|
||||
// (u_char *)p < mp->mp_startptr + mp->mp_total_size);
|
||||
assert(((u_char *)p - mp->mp_startptr) % mp->mp_chunk_size == 0);
|
||||
// assert(*((u_char *)p + (mp->mp_chunk_size-1)) == 'a');
|
||||
// *((u_char *)p + (mp->mp_chunk_size-1)) = 'f';
|
||||
|
||||
mcp->mc_free_chunks = 1;
|
||||
mcp->mc_next = mp->mp_freeptr;
|
||||
mp->mp_freeptr = mcp;
|
||||
mp->mp_free_chunks++;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
MPDestroy(mem_pool_t mp)
|
||||
{
|
||||
#ifdef HUGETABLE
|
||||
if(mp->mp_type == MEM_HUGEPAGE) {
|
||||
free_huge_pages(mp->mp_startptr);
|
||||
} else {
|
||||
#endif
|
||||
free(mp->mp_startptr);
|
||||
#ifdef HUGETABLE
|
||||
}
|
||||
#endif
|
||||
free(mp);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
MPGetFreeChunks(mem_pool_t mp)
|
||||
{
|
||||
return mp->mp_free_chunks;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
uint32_t
|
||||
MPIsDanger(mem_pool_t mp)
|
||||
{
|
||||
#define DANGER_THREASHOLD 0.95
|
||||
#define SAFE_THREASHOLD 0.90
|
||||
uint32_t danger_num = mp->mp_total_chunks * DANGER_THREASHOLD;
|
||||
uint32_t safe_num = mp->mp_total_chunks * SAFE_THREASHOLD;
|
||||
if (danger_num < mp->mp_total_chunks - mp->mp_free_chunks) {
|
||||
return mp->mp_total_chunks - mp->mp_free_chunks - safe_num;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
uint32_t
|
||||
MPIsOverSafeline(mem_pool_t mp)
|
||||
{
|
||||
#define SAFELINE 0.90
|
||||
uint32_t safe_num = mp->mp_total_chunks * SAFELINE;
|
||||
if (safe_num < mp->mp_total_chunks - mp->mp_free_chunks) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
|
@ -0,0 +1,418 @@
|
|||
#include <pthread.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "pipe.h"
|
||||
#include "eventpoll.h"
|
||||
#include "tcp_stream.h"
|
||||
#include "mtcp.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define PIPE_BUF_SIZE 10240
|
||||
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
enum pipe_state
|
||||
{
|
||||
PIPE_CLOSED,
|
||||
PIPE_ACTIVE,
|
||||
PIPE_CLOSE_WAIT,
|
||||
};
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct pipe
|
||||
{
|
||||
int state;
|
||||
socket_map_t socket[2];
|
||||
|
||||
char *buf;
|
||||
int buf_off;
|
||||
int buf_tail;
|
||||
int buf_len;
|
||||
int buf_size;
|
||||
|
||||
pthread_mutex_t pipe_lock;
|
||||
pthread_cond_t pipe_cond;
|
||||
};
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
mtcp_pipe(mctx_t mctx, int pipeid[2])
|
||||
{
|
||||
socket_map_t socket[2];
|
||||
struct pipe *pp;
|
||||
int ret;
|
||||
|
||||
socket[0] = AllocateSocket(mctx, MTCP_SOCK_PIPE, FALSE);
|
||||
if (!socket[0]) {
|
||||
errno = ENFILE;
|
||||
return -1;
|
||||
}
|
||||
socket[1] = AllocateSocket(mctx, MTCP_SOCK_PIPE, FALSE);
|
||||
if (!socket[1]) {
|
||||
FreeSocket(mctx, socket[0]->id, FALSE);
|
||||
errno = ENFILE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
pp = (struct pipe *)calloc(1, sizeof(struct pipe));
|
||||
if (!pp) {
|
||||
/* errno set by calloc() */
|
||||
FreeSocket(mctx, socket[0]->id, FALSE);
|
||||
FreeSocket(mctx, socket[1]->id, FALSE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
pp->buf_size = PIPE_BUF_SIZE;
|
||||
pp->buf = (char *)malloc(pp->buf_size);
|
||||
if (!pp->buf) {
|
||||
/* errno set by malloc() */
|
||||
FreeSocket(mctx, socket[0]->id, FALSE);
|
||||
FreeSocket(mctx, socket[1]->id, FALSE);
|
||||
free(pp);
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = pthread_mutex_init(&pp->pipe_lock, NULL);
|
||||
if (ret) {
|
||||
/* errno set by pthread_mutex_init() */
|
||||
FreeSocket(mctx, socket[0]->id, FALSE);
|
||||
FreeSocket(mctx, socket[1]->id, FALSE);
|
||||
free(pp->buf);
|
||||
free(pp);
|
||||
return -1;
|
||||
|
||||
}
|
||||
ret = pthread_cond_init(&pp->pipe_cond, NULL);
|
||||
if (ret) {
|
||||
/* errno set by pthread_cond_init() */
|
||||
FreeSocket(mctx, socket[0]->id, FALSE);
|
||||
FreeSocket(mctx, socket[1]->id, FALSE);
|
||||
free(pp->buf);
|
||||
free(pp);
|
||||
pthread_mutex_destroy(&pp->pipe_lock);
|
||||
return -1;
|
||||
}
|
||||
|
||||
pp->state = PIPE_ACTIVE;
|
||||
pp->socket[0] = socket[0];
|
||||
pp->socket[1] = socket[1];
|
||||
socket[0]->pp = pp;
|
||||
socket[1]->pp = pp;
|
||||
|
||||
pipeid[0] = socket[0]->id;
|
||||
pipeid[1] = socket[1]->id;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static void
|
||||
RaiseEventToPair(mtcp_manager_t mtcp, socket_map_t socket, uint32_t event)
|
||||
{
|
||||
struct pipe *pp = socket->pp;
|
||||
socket_map_t pair_socket;
|
||||
|
||||
if (pp->socket[0] == socket)
|
||||
pair_socket = pp->socket[1];
|
||||
else
|
||||
pair_socket = pp->socket[0];
|
||||
|
||||
if (pair_socket->opts & MTCP_NONBLOCK) {
|
||||
if (pair_socket->epoll) {
|
||||
AddEpollEvent(mtcp->ep, USR_EVENT_QUEUE, pair_socket, event);
|
||||
}
|
||||
} else {
|
||||
pthread_cond_signal(&pp->pipe_cond);
|
||||
}
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
PipeRead(mctx_t mctx, int pipeid, char *buf, int len)
|
||||
{
|
||||
mtcp_manager_t mtcp;
|
||||
socket_map_t socket;
|
||||
struct pipe *pp;
|
||||
int to_read;
|
||||
int to_notify;
|
||||
int ret;
|
||||
|
||||
mtcp = GetMTCPManager(mctx);
|
||||
if (!mtcp) {
|
||||
return -1;
|
||||
}
|
||||
socket = GetSocket(mctx, pipeid);
|
||||
if (!socket) {
|
||||
return -1;
|
||||
}
|
||||
if (socket->socktype != MTCP_SOCK_PIPE) {
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
pp = socket->pp;
|
||||
if (!pp) {
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
if (pp->state == PIPE_CLOSED) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if (pp->state == PIPE_CLOSE_WAIT && pp->buf_len == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (len <= 0) {
|
||||
if (socket->opts & MTCP_NONBLOCK) {
|
||||
errno = EAGAIN;
|
||||
return -1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&pp->pipe_lock);
|
||||
if (!(socket->opts & MTCP_NONBLOCK)) {
|
||||
while (pp->buf_len == 0) {
|
||||
ret = pthread_cond_wait(&pp->pipe_cond, &pp->pipe_lock);
|
||||
if (ret) {
|
||||
/* errno set by pthread_cond_wait() */
|
||||
pthread_mutex_unlock(&pp->pipe_lock);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
to_read = MIN(len, pp->buf_len);
|
||||
if (to_read <= 0) {
|
||||
pthread_mutex_unlock(&pp->pipe_lock);
|
||||
if (pp->state == PIPE_ACTIVE) {
|
||||
errno = EAGAIN;
|
||||
return -1;
|
||||
} else if (pp->state == PIPE_CLOSE_WAIT) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* if the buffer was full, notify the write event to the pair socket */
|
||||
to_notify = FALSE;
|
||||
if (pp->buf_len == pp->buf_size)
|
||||
to_notify = TRUE;
|
||||
|
||||
if (pp->buf_off + to_read < pp->buf_size) {
|
||||
memcpy(buf, pp->buf + pp->buf_off, to_read);
|
||||
pp->buf_off += to_read;
|
||||
} else {
|
||||
int temp_read = pp->buf_size - pp->buf_off;
|
||||
memcpy(buf, pp->buf + pp->buf_off, temp_read);
|
||||
memcpy(buf + temp_read, pp->buf, to_read - temp_read);
|
||||
pp->buf_off = to_read - temp_read;
|
||||
}
|
||||
pp->buf_len -= to_read;
|
||||
|
||||
/* notify to the pair socket for new buffer space */
|
||||
if (to_notify) {
|
||||
RaiseEventToPair(mtcp, socket, MTCP_EPOLLOUT);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&pp->pipe_lock);
|
||||
|
||||
/* if level triggered, raise event for remainig buffer */
|
||||
if (pp->buf_len > 0) {
|
||||
if ((socket->epoll & MTCP_EPOLLIN) && !(socket->epoll & MTCP_EPOLLET)) {
|
||||
AddEpollEvent(mtcp->ep,
|
||||
USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLIN);
|
||||
}
|
||||
} else if (pp->state == PIPE_CLOSE_WAIT && pp->buf_len == 0) {
|
||||
AddEpollEvent(mtcp->ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLIN);
|
||||
}
|
||||
|
||||
return to_read;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
PipeWrite(mctx_t mctx, int pipeid, char *buf, int len)
|
||||
{
|
||||
mtcp_manager_t mtcp;
|
||||
socket_map_t socket;
|
||||
struct pipe *pp;
|
||||
int to_write;
|
||||
int to_notify;
|
||||
int ret;
|
||||
|
||||
mtcp = GetMTCPManager(mctx);
|
||||
if (!mtcp) {
|
||||
return -1;
|
||||
}
|
||||
socket = GetSocket(mctx, pipeid);
|
||||
if (!socket) {
|
||||
return -1;
|
||||
}
|
||||
if (socket->socktype != MTCP_SOCK_PIPE) {
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
pp = socket->pp;
|
||||
if (!pp) {
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
if (pp->state == PIPE_CLOSED) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if (pp->state == PIPE_CLOSE_WAIT) {
|
||||
errno = EPIPE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (len <= 0) {
|
||||
if (socket->opts & MTCP_NONBLOCK) {
|
||||
errno = EAGAIN;
|
||||
return -1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&pp->pipe_lock);
|
||||
if (!(socket->opts & MTCP_NONBLOCK)) {
|
||||
while (pp->buf_len == pp->buf_size) {
|
||||
ret = pthread_cond_wait(&pp->pipe_cond, &pp->pipe_lock);
|
||||
if (ret) {
|
||||
/* errno set by pthread_cond_wait() */
|
||||
pthread_mutex_unlock(&pp->pipe_lock);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
to_write = MIN(len, pp->buf_size - pp->buf_len);
|
||||
if (to_write <= 0) {
|
||||
pthread_mutex_unlock(&pp->pipe_lock);
|
||||
errno = EAGAIN;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* if the buffer was empty, notify read event to the pair socket */
|
||||
to_notify = FALSE;
|
||||
if (pp->buf_len == 0)
|
||||
to_notify = TRUE;
|
||||
|
||||
if (pp->buf_tail + to_write < pp->buf_size) {
|
||||
/* if the data fit into the buffer, copy it */
|
||||
memcpy(pp->buf + pp->buf_tail, buf, to_write);
|
||||
pp->buf_tail += to_write;
|
||||
} else {
|
||||
/* if the data overflow the buffer, wrap around the buffer */
|
||||
int temp_write = pp->buf_size - pp->buf_tail;
|
||||
memcpy(pp->buf + pp->buf_tail, buf, temp_write);
|
||||
memcpy(pp->buf, buf + temp_write, to_write - temp_write);
|
||||
pp->buf_tail = to_write - temp_write;
|
||||
}
|
||||
pp->buf_len += to_write;
|
||||
|
||||
/* notify to the pair socket for the new buffers */
|
||||
if (to_notify) {
|
||||
RaiseEventToPair(mtcp, socket, MTCP_EPOLLIN);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&pp->pipe_lock);
|
||||
|
||||
/* if level triggered, raise event for remainig buffer */
|
||||
if (pp->buf_len < pp->buf_size) {
|
||||
if ((socket->epoll & MTCP_EPOLLOUT) && !(socket->epoll & MTCP_EPOLLET)) {
|
||||
AddEpollEvent(mtcp->ep,
|
||||
USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLOUT);
|
||||
}
|
||||
}
|
||||
|
||||
return to_write;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
RaisePendingPipeEvents(mctx_t mctx, int epid, int pipeid)
|
||||
{
|
||||
struct mtcp_epoll *ep = GetSocket(mctx, epid)->ep;
|
||||
socket_map_t socket = GetSocket(mctx, pipeid);
|
||||
struct pipe *pp = socket->pp;
|
||||
|
||||
if (!pp)
|
||||
return -1;
|
||||
if (pp->state < PIPE_ACTIVE)
|
||||
return -1;
|
||||
|
||||
/* if there are payloads already read before epoll registration */
|
||||
/* generate read event */
|
||||
if (socket->epoll & MTCP_EPOLLIN) {
|
||||
if (pp->buf_len > 0) {
|
||||
AddEpollEvent(ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLIN);
|
||||
} else if (pp->state == PIPE_CLOSE_WAIT) {
|
||||
AddEpollEvent(ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLIN);
|
||||
}
|
||||
}
|
||||
|
||||
/* same thing to the write event */
|
||||
if (socket->epoll & MTCP_EPOLLOUT) {
|
||||
if (pp->buf_len < pp->buf_size) {
|
||||
AddEpollEvent(ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLOUT);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
PipeClose(mctx_t mctx, int pipeid)
|
||||
{
|
||||
mtcp_manager_t mtcp;
|
||||
socket_map_t socket;
|
||||
struct pipe *pp;
|
||||
|
||||
mtcp = GetMTCPManager(mctx);
|
||||
if (!mtcp) {
|
||||
return -1;
|
||||
}
|
||||
socket = GetSocket(mctx, pipeid);
|
||||
if (!socket) {
|
||||
return -1;
|
||||
}
|
||||
if (socket->socktype != MTCP_SOCK_PIPE) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
pp = socket->pp;
|
||||
if (!pp) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pp->state == PIPE_CLOSED) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&pp->pipe_lock);
|
||||
if (pp->state == PIPE_ACTIVE) {
|
||||
pp->state = PIPE_CLOSE_WAIT;
|
||||
RaiseEventToPair(mtcp, socket, MTCP_EPOLLIN);
|
||||
pthread_mutex_unlock(&pp->pipe_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* control reaches here only when PIPE_CLOSE_WAIT */
|
||||
|
||||
if (pp->socket[0])
|
||||
pp->socket[0]->pp = NULL;
|
||||
if (pp->socket[1])
|
||||
pp->socket[1]->pp = NULL;
|
||||
|
||||
pthread_mutex_unlock(&pp->pipe_lock);
|
||||
|
||||
pthread_mutex_destroy(&pp->pipe_lock);
|
||||
pthread_cond_destroy(&pp->pipe_cond);
|
||||
|
||||
free(pp->buf);
|
||||
|
||||
free(pp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
|
@ -0,0 +1,102 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "rss.h"
|
||||
|
||||
/*-------------------------------------------------------------*/
|
||||
static void
|
||||
BuildKeyCache(uint32_t *cache, int cache_len)
|
||||
{
|
||||
#define NBBY 8 /* number of bits per byte */
|
||||
|
||||
/* Keys for system testing */
|
||||
static const uint8_t key[] = {
|
||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
|
||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
|
||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
|
||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
|
||||
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05
|
||||
};
|
||||
|
||||
uint32_t result = (((uint32_t)key[0]) << 24) |
|
||||
(((uint32_t)key[1]) << 16) |
|
||||
(((uint32_t)key[2]) << 8) |
|
||||
((uint32_t)key[3]);
|
||||
|
||||
uint32_t idx = 32;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < cache_len; i++, idx++) {
|
||||
uint8_t shift = (idx % NBBY);
|
||||
uint32_t bit;
|
||||
|
||||
cache[i] = result;
|
||||
bit = ((key[idx/NBBY] << shift) & 0x80) ? 1 : 0;
|
||||
result = ((result << 1) | bit);
|
||||
}
|
||||
}
|
||||
/*-------------------------------------------------------------*/
|
||||
static uint32_t
|
||||
GetRSSHash(in_addr_t sip, in_addr_t dip, in_port_t sp, in_port_t dp)
|
||||
{
|
||||
#define MSB32 0x80000000
|
||||
#define MSB16 0x8000
|
||||
#define KEY_CACHE_LEN 96
|
||||
|
||||
uint32_t res = 0;
|
||||
int i;
|
||||
static int first = 1;
|
||||
static uint32_t key_cache[KEY_CACHE_LEN] = {0};
|
||||
|
||||
if (first) {
|
||||
BuildKeyCache(key_cache, KEY_CACHE_LEN);
|
||||
first = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
if (sip & MSB32)
|
||||
res ^= key_cache[i];
|
||||
sip <<= 1;
|
||||
}
|
||||
for (i = 0; i < 32; i++) {
|
||||
if (dip & MSB32)
|
||||
res ^= key_cache[32+i];
|
||||
dip <<= 1;
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (sp & MSB16)
|
||||
res ^= key_cache[64+i];
|
||||
sp <<= 1;
|
||||
}
|
||||
for (i = 0; i < 16; i++) {
|
||||
if (dp & MSB16)
|
||||
res ^= key_cache[80+i];
|
||||
dp <<= 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
/*-------------------------------------------------------------------*/
|
||||
/* RSS redirection table is in the little endian byte order (intel) */
|
||||
/* */
|
||||
/* idx: 0 1 2 3 | 4 5 6 7 | 8 9 10 11 | 12 13 14 15 | 16 17 18 19 ...*/
|
||||
/* val: 3 2 1 0 | 7 6 5 4 | 11 10 9 8 | 15 14 13 12 | 19 18 17 16 ...*/
|
||||
/* qid = val % num_queues */
|
||||
/*-------------------------------------------------------------------*/
|
||||
int
|
||||
GetRSSCPUCore(in_addr_t sip, in_addr_t dip,
|
||||
in_port_t sp, in_port_t dp, int num_queues)
|
||||
{
|
||||
#define RSS_BIT_MASK 0x0000007F
|
||||
|
||||
static const uint32_t off[4] = {3, 1, -1, -3};
|
||||
uint32_t masked = GetRSSHash(sip, dip, sp, dp) & RSS_BIT_MASK;
|
||||
|
||||
masked += off[masked & 0x3];
|
||||
return (masked % num_queues);
|
||||
}
|
||||
/*-------------------------------------------------------------------*/
|
|
@ -0,0 +1,85 @@
|
|||
#include "mtcp.h"
|
||||
#include "socket.h"
|
||||
#include "debug.h"
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
socket_map_t
|
||||
AllocateSocket(mctx_t mctx, int socktype, int need_lock)
|
||||
{
|
||||
mtcp_manager_t mtcp = g_mtcp[mctx->cpu];
|
||||
socket_map_t socket = NULL;
|
||||
|
||||
if (need_lock)
|
||||
pthread_mutex_lock(&mtcp->ctx->smap_lock);
|
||||
|
||||
while (socket == NULL) {
|
||||
socket = TAILQ_FIRST(&mtcp->free_smap);
|
||||
if (!socket) {
|
||||
if (need_lock)
|
||||
pthread_mutex_unlock(&mtcp->ctx->smap_lock);
|
||||
|
||||
TRACE_ERROR("The concurrent sockets are at maximum.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
TAILQ_REMOVE(&mtcp->free_smap, socket, free_smap_link);
|
||||
|
||||
/* if there is not invalidated events, insert the socket to the end */
|
||||
/* and find another socket in the free smap list */
|
||||
if (socket->events) {
|
||||
TRACE_INFO("There are still not invalidate events remaining.\n");
|
||||
TRACE_DBG("There are still not invalidate events remaining.\n");
|
||||
TAILQ_INSERT_TAIL(&mtcp->free_smap, socket, free_smap_link);
|
||||
socket = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_lock)
|
||||
pthread_mutex_unlock(&mtcp->ctx->smap_lock);
|
||||
|
||||
socket->socktype = socktype;
|
||||
socket->opts = 0;
|
||||
socket->stream = NULL;
|
||||
socket->epoll = 0;
|
||||
socket->events = 0;
|
||||
|
||||
//memset(&socket->saddr, 0, sizeof(struct sockaddr_in));
|
||||
memset(&socket->ep_data, 0, sizeof(mtcp_epoll_data_t));
|
||||
|
||||
return socket;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
FreeSocket(mctx_t mctx, int sockid, int need_lock)
|
||||
{
|
||||
mtcp_manager_t mtcp = g_mtcp[mctx->cpu];
|
||||
socket_map_t socket = &mtcp->smap[sockid];
|
||||
|
||||
if (socket->socktype == MTCP_SOCK_UNUSED) {
|
||||
return;
|
||||
}
|
||||
|
||||
socket->socktype = MTCP_SOCK_UNUSED;
|
||||
socket->epoll = MTCP_EPOLLNONE;
|
||||
|
||||
if (need_lock)
|
||||
pthread_mutex_lock(&mtcp->ctx->smap_lock);
|
||||
|
||||
/* insert into free stream map */
|
||||
mtcp->smap[sockid].stream = NULL;
|
||||
TAILQ_INSERT_TAIL(&mtcp->free_smap, socket, free_smap_link);
|
||||
|
||||
if (need_lock)
|
||||
pthread_mutex_unlock(&mtcp->ctx->smap_lock);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
socket_map_t
|
||||
GetSocket(mctx_t mctx, int sockid)
|
||||
{
|
||||
if (sockid < 0 || sockid >= CONFIG.max_concurrency) {
|
||||
errno = EBADF;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return &g_mtcp[mctx->cpu]->smap[sockid];
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,917 @@
|
|||
#include <unistd.h>
|
||||
#include "tcp_out.h"
|
||||
#include "mtcp.h"
|
||||
#include "ip_out.h"
|
||||
#include "tcp_in.h"
|
||||
#include "tcp_stream.h"
|
||||
#include "eventpoll.h"
|
||||
#include "timer.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define TCP_CALCULATE_CHECKSUM TRUE
|
||||
#define ACK_PIGGYBACK TRUE
|
||||
#define TRY_SEND_BEFORE_QUEUE FALSE
|
||||
|
||||
#define TCP_MAX_WINDOW 65535
|
||||
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline uint16_t
|
||||
CalculateOptionLength(uint8_t flags)
|
||||
{
|
||||
uint16_t optlen = 0;
|
||||
|
||||
if (flags & TCP_FLAG_SYN) {
|
||||
optlen += TCP_OPT_MSS_LEN;
|
||||
#if TCP_OPT_SACK_ENABLED
|
||||
optlen += TCP_OPT_SACK_PERMIT_LEN;
|
||||
#if !TCP_OPT_TIMESTAMP_ENABLED
|
||||
optlen += 2; // insert NOP padding
|
||||
#endif /* TCP_OPT_TIMESTAMP_ENABLED */
|
||||
#endif /* TCP_OPT_SACK_ENABLED */
|
||||
|
||||
#if TCP_OPT_TIMESTAMP_ENABLED
|
||||
optlen += TCP_OPT_TIMESTAMP_LEN;
|
||||
#if !TCP_OPT_SACK_ENABLED
|
||||
optlen += 2; // insert NOP padding
|
||||
#endif /* TCP_OPT_SACK_ENABLED */
|
||||
#endif /* TCP_OPT_TIMESTAMP_ENABLED */
|
||||
|
||||
optlen += TCP_OPT_WSCALE_LEN + 1;
|
||||
|
||||
} else {
|
||||
|
||||
#if TCP_OPT_TIMESTAMP_ENABLED
|
||||
optlen += TCP_OPT_TIMESTAMP_LEN + 2;
|
||||
#endif
|
||||
|
||||
#if TCP_OPT_SACK_ENABLED
|
||||
if (flags & TCP_FLAG_SACK) {
|
||||
optlen += TCP_OPT_SACK_LEN + 2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
assert(optlen % 4 == 0);
|
||||
|
||||
return optlen;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline void
|
||||
GenerateTCPTimestamp(tcp_stream *cur_stream, uint8_t *tcpopt, uint32_t cur_ts)
|
||||
{
|
||||
uint32_t *ts = (uint32_t *)(tcpopt + 2);
|
||||
|
||||
tcpopt[0] = TCP_OPT_TIMESTAMP;
|
||||
tcpopt[1] = TCP_OPT_TIMESTAMP_LEN;
|
||||
ts[0] = htonl(cur_ts);
|
||||
ts[1] = htonl(cur_stream->rcvvar->ts_recent);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline void
|
||||
GenerateTCPOptions(tcp_stream *cur_stream, uint32_t cur_ts,
|
||||
uint8_t flags, uint8_t *tcpopt, uint16_t optlen)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
if (flags & TCP_FLAG_SYN) {
|
||||
uint16_t mss;
|
||||
|
||||
/* MSS option */
|
||||
mss = cur_stream->sndvar->mss;
|
||||
tcpopt[i++] = TCP_OPT_MSS;
|
||||
tcpopt[i++] = TCP_OPT_MSS_LEN;
|
||||
tcpopt[i++] = mss >> 8;
|
||||
tcpopt[i++] = mss % 256;
|
||||
|
||||
/* SACK permit */
|
||||
#if TCP_OPT_SACK_ENABLED
|
||||
#if !TCP_OPT_TIMESTAMP_ENABLED
|
||||
tcpopt[i++] = TCP_OPT_NOP;
|
||||
tcpopt[i++] = TCP_OPT_NOP;
|
||||
#endif /* TCP_OPT_TIMESTAMP_ENABLED */
|
||||
tcpopt[i++] = TCP_OPT_SACK_PERMIT;
|
||||
tcpopt[i++] = TCP_OPT_SACK_PERMIT_LEN;
|
||||
TRACE_SACK("Local SACK permited.\n");
|
||||
#endif /* TCP_OPT_SACK_ENABLED */
|
||||
|
||||
/* Timestamp */
|
||||
#if TCP_OPT_TIMESTAMP_ENABLED
|
||||
#if !TCP_OPT_SACK_ENABLED
|
||||
tcpopt[i++] = TCP_OPT_NOP;
|
||||
tcpopt[i++] = TCP_OPT_NOP;
|
||||
#endif /* TCP_OPT_SACK_ENABLED */
|
||||
GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
|
||||
i += TCP_OPT_TIMESTAMP_LEN;
|
||||
#endif /* TCP_OPT_TIMESTAMP_ENABLED */
|
||||
|
||||
/* Window scale */
|
||||
tcpopt[i++] = TCP_OPT_NOP;
|
||||
tcpopt[i++] = TCP_OPT_WSCALE;
|
||||
tcpopt[i++] = TCP_OPT_WSCALE_LEN;
|
||||
tcpopt[i++] = cur_stream->sndvar->wscale;
|
||||
|
||||
} else {
|
||||
|
||||
#if TCP_OPT_TIMESTAMP_ENABLED
|
||||
tcpopt[i++] = TCP_OPT_NOP;
|
||||
tcpopt[i++] = TCP_OPT_NOP;
|
||||
GenerateTCPTimestamp(cur_stream, tcpopt + i, cur_ts);
|
||||
i += TCP_OPT_TIMESTAMP_LEN;
|
||||
#endif
|
||||
|
||||
#if TCP_OPT_SACK_ENABLED
|
||||
if (flags & TCP_OPT_SACK) {
|
||||
// TODO: implement SACK support
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
assert (i == optlen);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
SendTCPPacketStandalone(struct mtcp_manager *mtcp,
|
||||
uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport,
|
||||
uint32_t seq, uint32_t ack_seq, uint16_t window, uint8_t flags,
|
||||
uint8_t *payload, uint16_t payloadlen,
|
||||
uint32_t cur_ts, uint32_t echo_ts)
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
uint8_t *tcpopt;
|
||||
uint32_t *ts;
|
||||
uint16_t optlen;
|
||||
|
||||
optlen = CalculateOptionLength(flags);
|
||||
if (payloadlen > TCP_DEFAULT_MSS + optlen) {
|
||||
TRACE_ERROR("Payload size exceeds MSS.\n");
|
||||
assert(0);
|
||||
return ERROR;
|
||||
}
|
||||
|
||||
tcph = (struct tcphdr *)IPOutputStandalone(mtcp, 0,
|
||||
saddr, daddr, TCP_HEADER_LEN + optlen + payloadlen);
|
||||
if (tcph == NULL) {
|
||||
return ERROR;
|
||||
}
|
||||
memset(tcph, 0, TCP_HEADER_LEN + optlen);
|
||||
|
||||
tcph->source = sport;
|
||||
tcph->dest = dport;
|
||||
|
||||
if (flags & TCP_FLAG_SYN)
|
||||
tcph->syn = TRUE;
|
||||
if (flags & TCP_FLAG_FIN)
|
||||
tcph->fin = TRUE;
|
||||
if (flags & TCP_FLAG_RST)
|
||||
tcph->rst = TRUE;
|
||||
if (flags & TCP_FLAG_PSH)
|
||||
tcph->psh = TRUE;
|
||||
|
||||
tcph->seq = htonl(seq);
|
||||
if (flags & TCP_FLAG_ACK) {
|
||||
tcph->ack = TRUE;
|
||||
tcph->ack_seq = htonl(ack_seq);
|
||||
}
|
||||
|
||||
tcph->window = htons(MIN(window, TCP_MAX_WINDOW));
|
||||
|
||||
tcpopt = (uint8_t *)tcph + TCP_HEADER_LEN;
|
||||
ts = (uint32_t *)(tcpopt + 4);
|
||||
|
||||
tcpopt[0] = TCP_OPT_NOP;
|
||||
tcpopt[1] = TCP_OPT_NOP;
|
||||
tcpopt[2] = TCP_OPT_TIMESTAMP;
|
||||
tcpopt[3] = TCP_OPT_TIMESTAMP_LEN;
|
||||
ts[0] = htonl(cur_ts);
|
||||
ts[1] = htonl(echo_ts);
|
||||
|
||||
tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
|
||||
// copy payload if exist
|
||||
if (payloadlen > 0) {
|
||||
memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
|
||||
}
|
||||
|
||||
#if TCP_CALCULATE_CHECKSUM
|
||||
tcph->check = TCPCalcChecksum((uint16_t *)tcph,
|
||||
TCP_HEADER_LEN + optlen + payloadlen, saddr, daddr);
|
||||
#endif
|
||||
|
||||
if (tcph->syn || tcph->fin) {
|
||||
payloadlen++;
|
||||
}
|
||||
|
||||
return payloadlen;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
SendTCPPacket(struct mtcp_manager *mtcp, tcp_stream *cur_stream,
|
||||
uint32_t cur_ts, uint8_t flags, uint8_t *payload, uint16_t payloadlen)
|
||||
{
|
||||
struct tcphdr *tcph;
|
||||
uint16_t optlen;
|
||||
uint8_t wscale = 0;
|
||||
uint32_t window32 = 0;
|
||||
|
||||
optlen = CalculateOptionLength(flags);
|
||||
if (payloadlen > cur_stream->sndvar->mss + optlen) {
|
||||
TRACE_ERROR("Payload size exceeds MSS\n");
|
||||
return ERROR;
|
||||
}
|
||||
|
||||
tcph = (struct tcphdr *)IPOutput(mtcp, cur_stream,
|
||||
TCP_HEADER_LEN + optlen + payloadlen);
|
||||
if (tcph == NULL) {
|
||||
return -2;
|
||||
}
|
||||
memset(tcph, 0, TCP_HEADER_LEN + optlen);
|
||||
|
||||
tcph->source = cur_stream->sport;
|
||||
tcph->dest = cur_stream->dport;
|
||||
|
||||
if (flags & TCP_FLAG_SYN) {
|
||||
tcph->syn = TRUE;
|
||||
if (cur_stream->snd_nxt != cur_stream->sndvar->iss) {
|
||||
TRACE_DBG("Stream %d: weird SYN sequence. "
|
||||
"snd_nxt: %u, iss: %u\n", cur_stream->id,
|
||||
cur_stream->snd_nxt, cur_stream->sndvar->iss);
|
||||
}
|
||||
#if 0
|
||||
TRACE_FIN("Stream %d: Sending SYN. seq: %u, ack_seq: %u\n",
|
||||
cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
|
||||
#endif
|
||||
}
|
||||
if (flags & TCP_FLAG_RST) {
|
||||
TRACE_FIN("Stream %d: Sending RST.\n", cur_stream->id);
|
||||
tcph->rst = TRUE;
|
||||
}
|
||||
if (flags & TCP_FLAG_PSH)
|
||||
tcph->psh = TRUE;
|
||||
|
||||
if (flags & TCP_FLAG_WACK) {
|
||||
tcph->seq = htonl(cur_stream->snd_nxt - 1);
|
||||
TRACE_CLWND("%u Sending ACK to get new window advertisement. "
|
||||
"seq: %u, peer_wnd: %u, snd_nxt - snd_una: %u\n",
|
||||
cur_stream->id,
|
||||
cur_stream->snd_nxt - 1, cur_stream->sndvar->peer_wnd,
|
||||
cur_stream->snd_nxt - cur_stream->sndvar->snd_una);
|
||||
} else if (flags & TCP_FLAG_FIN) {
|
||||
tcph->fin = TRUE;
|
||||
|
||||
if (cur_stream->sndvar->fss == 0) {
|
||||
TRACE_ERROR("Stream %u: not fss set. closed: %u\n",
|
||||
cur_stream->id, cur_stream->closed);
|
||||
}
|
||||
tcph->seq = htonl(cur_stream->sndvar->fss);
|
||||
cur_stream->sndvar->is_fin_sent = TRUE;
|
||||
TRACE_FIN("Stream %d: Sending FIN. seq: %u, ack_seq: %u\n",
|
||||
cur_stream->id, cur_stream->snd_nxt, cur_stream->rcv_nxt);
|
||||
} else {
|
||||
tcph->seq = htonl(cur_stream->snd_nxt);
|
||||
}
|
||||
|
||||
if (flags & TCP_FLAG_ACK) {
|
||||
tcph->ack = TRUE;
|
||||
tcph->ack_seq = htonl(cur_stream->rcv_nxt);
|
||||
cur_stream->sndvar->ts_lastack_sent = cur_ts;
|
||||
cur_stream->last_active_ts = cur_ts;
|
||||
UpdateTimeoutList(mtcp, cur_stream);
|
||||
}
|
||||
|
||||
if (flags & TCP_FLAG_SYN) {
|
||||
wscale = 0;
|
||||
} else {
|
||||
wscale = cur_stream->sndvar->wscale;
|
||||
}
|
||||
|
||||
window32 = cur_stream->rcvvar->rcv_wnd >> wscale;
|
||||
tcph->window = htons(MIN((uint16_t)window32, TCP_MAX_WINDOW));
|
||||
/* if the advertised window is 0, we need to advertise again later */
|
||||
if (window32 == 0) {
|
||||
cur_stream->need_wnd_adv = TRUE;
|
||||
}
|
||||
|
||||
GenerateTCPOptions(cur_stream, cur_ts, flags,
|
||||
(uint8_t *)tcph + TCP_HEADER_LEN, optlen);
|
||||
|
||||
tcph->doff = (TCP_HEADER_LEN + optlen) >> 2;
|
||||
// copy payload if exist
|
||||
if (payloadlen > 0) {
|
||||
memcpy((uint8_t *)tcph + TCP_HEADER_LEN + optlen, payload, payloadlen);
|
||||
}
|
||||
|
||||
#if TCP_CALCULATE_CHECKSUM
|
||||
tcph->check = TCPCalcChecksum((uint16_t *)tcph,
|
||||
TCP_HEADER_LEN + optlen + payloadlen,
|
||||
cur_stream->saddr, cur_stream->daddr);
|
||||
#endif
|
||||
|
||||
cur_stream->snd_nxt += payloadlen;
|
||||
|
||||
if (tcph->syn || tcph->fin) {
|
||||
cur_stream->snd_nxt++;
|
||||
payloadlen++;
|
||||
}
|
||||
|
||||
if (payloadlen > 0) {
|
||||
if (cur_stream->state > TCP_ST_ESTABLISHED) {
|
||||
TRACE_FIN("Payload after ESTABLISHED: length: %d, snd_nxt: %u\n",
|
||||
payloadlen, cur_stream->snd_nxt);
|
||||
}
|
||||
|
||||
/* update retransmission timer if have payload */
|
||||
cur_stream->sndvar->ts_rto = cur_ts + cur_stream->sndvar->rto;
|
||||
TRACE_RTO("Updating retransmission timer. "
|
||||
"cur_ts: %u, rto: %u, ts_rto: %u\n",
|
||||
cur_ts, cur_stream->sndvar->rto, cur_stream->sndvar->ts_rto);
|
||||
AddtoRTOList(mtcp, cur_stream);
|
||||
}
|
||||
|
||||
return payloadlen;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static int
|
||||
FlushTCPSendingBuffer(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
|
||||
{
|
||||
struct tcp_send_vars *sndvar = cur_stream->sndvar;
|
||||
const uint32_t maxlen = sndvar->mss - CalculateOptionLength(TCP_FLAG_ACK);
|
||||
uint8_t *data;
|
||||
uint32_t buffered_len;
|
||||
uint32_t seq;
|
||||
uint16_t len;
|
||||
int16_t sndlen;
|
||||
uint32_t window;
|
||||
int packets = 0;
|
||||
|
||||
if (!sndvar->sndbuf) {
|
||||
TRACE_ERROR("Stream %d: No send buffer available.\n", cur_stream->id);
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (sndvar->sndbuf->len == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
window = MIN(sndvar->cwnd, sndvar->peer_wnd);
|
||||
|
||||
while (1) {
|
||||
seq = cur_stream->snd_nxt;
|
||||
|
||||
if (TCP_SEQ_LT(seq, sndvar->sndbuf->head_seq)) {
|
||||
TRACE_ERROR("Stream %d: Invalid sequence to send. "
|
||||
"state: %s, seq: %u, head_seq: %u.\n",
|
||||
cur_stream->id, TCPStateToString(cur_stream),
|
||||
seq, sndvar->sndbuf->head_seq);
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
buffered_len = sndvar->sndbuf->head_seq + sndvar->sndbuf->len - seq;
|
||||
if (cur_stream->state > TCP_ST_ESTABLISHED) {
|
||||
TRACE_FIN("head_seq: %u, len: %u, seq: %u, "
|
||||
"buffered_len: %u\n", sndvar->sndbuf->head_seq,
|
||||
sndvar->sndbuf->len, seq, buffered_len);
|
||||
}
|
||||
if (buffered_len == 0)
|
||||
break;
|
||||
|
||||
data = sndvar->sndbuf->head +
|
||||
(seq - sndvar->sndbuf->head_seq);
|
||||
|
||||
if (buffered_len > maxlen) {
|
||||
len = maxlen;
|
||||
} else {
|
||||
len = buffered_len;
|
||||
}
|
||||
|
||||
if (len <= 0)
|
||||
break;
|
||||
|
||||
if (cur_stream->state > TCP_ST_ESTABLISHED) {
|
||||
TRACE_FIN("Flushing after ESTABLISHED: seq: %u, len: %u, "
|
||||
"buffered_len: %u\n", seq, len, buffered_len);
|
||||
}
|
||||
|
||||
if (seq - sndvar->snd_una + len > window) {
|
||||
/* Ask for new window advertisement to peer */
|
||||
if (seq - sndvar->snd_una + len > sndvar->peer_wnd) {
|
||||
#if 0
|
||||
TRACE_CLWND("Full peer window. "
|
||||
"peer_wnd: %u, (snd_nxt-snd_una): %u\n",
|
||||
sndvar->peer_wnd, seq - sndvar->snd_una);
|
||||
#endif
|
||||
if (TS_TO_MSEC(cur_ts - sndvar->ts_lastack_sent) > 500) {
|
||||
EnqueueACK(mtcp, cur_stream, cur_ts, ACK_OPT_WACK);
|
||||
}
|
||||
}
|
||||
return -3;
|
||||
}
|
||||
|
||||
sndlen = SendTCPPacket(mtcp, cur_stream, cur_ts,
|
||||
TCP_FLAG_ACK, data, len);
|
||||
if (sndlen < 0) {
|
||||
return sndlen;
|
||||
}
|
||||
packets++;
|
||||
}
|
||||
|
||||
return packets;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline int
|
||||
SendControlPacket(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
|
||||
{
|
||||
struct tcp_send_vars *sndvar = cur_stream->sndvar;
|
||||
int ret = 0;
|
||||
|
||||
if (cur_stream->state == TCP_ST_SYN_SENT) {
|
||||
/* Send SYN here */
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_SYN, NULL, 0);
|
||||
|
||||
} else if (cur_stream->state == TCP_ST_SYN_RCVD) {
|
||||
/* Send SYN/ACK here */
|
||||
cur_stream->snd_nxt = sndvar->iss;
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts,
|
||||
TCP_FLAG_SYN | TCP_FLAG_ACK, NULL, 0);
|
||||
|
||||
} else if (cur_stream->state == TCP_ST_ESTABLISHED) {
|
||||
/* Send ACK here */
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_ACK, NULL, 0);
|
||||
|
||||
} else if (cur_stream->state == TCP_ST_CLOSE_WAIT) {
|
||||
/* Send ACK for the FIN here */
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_ACK, NULL, 0);
|
||||
|
||||
} else if (cur_stream->state == TCP_ST_LAST_ACK) {
|
||||
/* if it is on ack_list, send it after sending ack */
|
||||
if (sndvar->on_send_list || sndvar->on_ack_list) {
|
||||
ret = -1;
|
||||
} else {
|
||||
/* Send FIN/ACK here */
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts,
|
||||
TCP_FLAG_FIN | TCP_FLAG_ACK, NULL, 0);
|
||||
}
|
||||
} else if (cur_stream->state == TCP_ST_FIN_WAIT_1) {
|
||||
/* if it is on ack_list, send it after sending ack */
|
||||
if (sndvar->on_send_list || sndvar->on_ack_list) {
|
||||
ret = -1;
|
||||
} else {
|
||||
/* Send FIN/ACK here */
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts,
|
||||
TCP_FLAG_FIN | TCP_FLAG_ACK, NULL, 0);
|
||||
}
|
||||
|
||||
} else if (cur_stream->state == TCP_ST_FIN_WAIT_2) {
|
||||
/* Send ACK here */
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_ACK, NULL, 0);
|
||||
|
||||
} else if (cur_stream->state == TCP_ST_CLOSING) {
|
||||
if (sndvar->is_fin_sent) {
|
||||
/* if the sequence is for FIN, send FIN */
|
||||
if (cur_stream->snd_nxt == sndvar->fss) {
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts,
|
||||
TCP_FLAG_FIN | TCP_FLAG_ACK, NULL, 0);
|
||||
} else {
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts,
|
||||
TCP_FLAG_ACK, NULL, 0);
|
||||
}
|
||||
} else {
|
||||
/* if FIN is not sent, send fin with ack */
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts,
|
||||
TCP_FLAG_FIN | TCP_FLAG_ACK, NULL, 0);
|
||||
}
|
||||
|
||||
} else if (cur_stream->state == TCP_ST_TIME_WAIT) {
|
||||
/* Send ACK here */
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_ACK, NULL, 0);
|
||||
|
||||
} else if (cur_stream->state == TCP_ST_CLOSED) {
|
||||
/* Send RST here */
|
||||
TRACE_DBG("Stream %d: Try sending RST (TCP_ST_CLOSED)\n",
|
||||
cur_stream->id);
|
||||
/* first flush the data and ack */
|
||||
if (sndvar->on_send_list || sndvar->on_ack_list) {
|
||||
ret = -1;
|
||||
} else {
|
||||
ret = SendTCPPacket(mtcp, cur_stream, cur_ts, TCP_FLAG_RST, NULL, 0);
|
||||
if (ret >= 0) {
|
||||
DestroyTCPStream(mtcp, cur_stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline int
|
||||
WriteTCPControlList(mtcp_manager_t mtcp,
|
||||
struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
|
||||
{
|
||||
tcp_stream *cur_stream;
|
||||
tcp_stream *next, *last;
|
||||
int cnt = 0;
|
||||
int ret;
|
||||
|
||||
thresh = MIN(thresh, sender->control_list_cnt);
|
||||
|
||||
/* Send TCP control messages */
|
||||
cnt = 0;
|
||||
cur_stream = TAILQ_FIRST(&sender->control_list);
|
||||
last = TAILQ_LAST(&sender->control_list, control_head);
|
||||
while (cur_stream) {
|
||||
if (++cnt > thresh)
|
||||
break;
|
||||
|
||||
TRACE_LOOP("Inside control loop. cnt: %u, stream: %d\n",
|
||||
cnt, cur_stream->id);
|
||||
next = TAILQ_NEXT(cur_stream, sndvar->control_link);
|
||||
|
||||
TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
|
||||
sender->control_list_cnt--;
|
||||
|
||||
if (cur_stream->sndvar->on_control_list) {
|
||||
cur_stream->sndvar->on_control_list = FALSE;
|
||||
//TRACE_DBG("Stream %u: Sending control packet\n", cur_stream->id);
|
||||
ret = SendControlPacket(mtcp, cur_stream, cur_ts);
|
||||
if (ret < 0) {
|
||||
TAILQ_INSERT_HEAD(&sender->control_list,
|
||||
cur_stream, sndvar->control_link);
|
||||
cur_stream->sndvar->on_control_list = TRUE;
|
||||
sender->control_list_cnt++;
|
||||
/* since there is no available write buffer, break */
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
TRACE_ERROR("Stream %d: not on control list.\n", cur_stream->id);
|
||||
}
|
||||
|
||||
if (cur_stream == last)
|
||||
break;
|
||||
cur_stream = next;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline int
|
||||
WriteTCPDataList(mtcp_manager_t mtcp,
|
||||
struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
|
||||
{
|
||||
tcp_stream *cur_stream;
|
||||
tcp_stream *next, *last;
|
||||
int cnt = 0;
|
||||
int ret;
|
||||
|
||||
/* Send data */
|
||||
cnt = 0;
|
||||
cur_stream = TAILQ_FIRST(&sender->send_list);
|
||||
last = TAILQ_LAST(&sender->send_list, send_head);
|
||||
while (cur_stream) {
|
||||
if (++cnt > thresh)
|
||||
break;
|
||||
|
||||
TRACE_LOOP("Inside send loop. cnt: %u, stream: %d\n",
|
||||
cnt, cur_stream->id);
|
||||
next = TAILQ_NEXT(cur_stream, sndvar->send_link);
|
||||
|
||||
TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
|
||||
if (cur_stream->sndvar->on_send_list) {
|
||||
ret = 0;
|
||||
|
||||
/* Send data here */
|
||||
/* Only can send data when ESTABLISHED or CLOSE_WAIT */
|
||||
if (cur_stream->state == TCP_ST_ESTABLISHED) {
|
||||
if (cur_stream->sndvar->on_control_list) {
|
||||
/* delay sending data after until on_control_list becomes off */
|
||||
//TRACE_DBG("Stream %u: delay sending data.\n", cur_stream->id);
|
||||
ret = -1;
|
||||
} else {
|
||||
ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
|
||||
}
|
||||
} else if (cur_stream->state == TCP_ST_CLOSE_WAIT ||
|
||||
cur_stream->state == TCP_ST_FIN_WAIT_1 ||
|
||||
cur_stream->state == TCP_ST_LAST_ACK) {
|
||||
ret = FlushTCPSendingBuffer(mtcp, cur_stream, cur_ts);
|
||||
} else {
|
||||
TRACE_DBG("Stream %d: on_send_list at state %s\n",
|
||||
cur_stream->id, TCPStateToString(cur_stream));
|
||||
#if DUMP_STREAM
|
||||
DumpStream(mtcp, cur_stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
|
||||
/* since there is no available write buffer, break */
|
||||
break;
|
||||
|
||||
} else {
|
||||
cur_stream->sndvar->on_send_list = FALSE;
|
||||
sender->send_list_cnt--;
|
||||
/* the ret value is the number of packets sent. */
|
||||
/* decrease ack_cnt for the piggybacked acks */
|
||||
#if ACK_PIGGYBACK
|
||||
if (cur_stream->sndvar->ack_cnt > 0) {
|
||||
if (cur_stream->sndvar->ack_cnt > ret) {
|
||||
cur_stream->sndvar->ack_cnt -= ret;
|
||||
} else {
|
||||
cur_stream->sndvar->ack_cnt = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if 1
|
||||
if (cur_stream->control_list_waiting) {
|
||||
if (!cur_stream->sndvar->on_ack_list) {
|
||||
cur_stream->control_list_waiting = FALSE;
|
||||
AddtoControlList(mtcp, cur_stream, cur_ts);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
TRACE_ERROR("Stream %d: not on send list.\n", cur_stream->id);
|
||||
#ifdef DUMP_STREAM
|
||||
DumpStream(mtcp, cur_stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (cur_stream == last)
|
||||
break;
|
||||
cur_stream = next;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline int
|
||||
WriteTCPACKList(mtcp_manager_t mtcp,
|
||||
struct mtcp_sender *sender, uint32_t cur_ts, int thresh)
|
||||
{
|
||||
tcp_stream *cur_stream;
|
||||
tcp_stream *next, *last;
|
||||
int to_ack;
|
||||
int cnt = 0;
|
||||
int ret;
|
||||
|
||||
/* Send aggregated acks */
|
||||
cnt = 0;
|
||||
cur_stream = TAILQ_FIRST(&sender->ack_list);
|
||||
last = TAILQ_LAST(&sender->ack_list, ack_head);
|
||||
while (cur_stream) {
|
||||
if (++cnt > thresh)
|
||||
break;
|
||||
|
||||
TRACE_LOOP("Inside ack loop. cnt: %u\n", cnt);
|
||||
next = TAILQ_NEXT(cur_stream, sndvar->ack_link);
|
||||
|
||||
if (cur_stream->sndvar->on_ack_list) {
|
||||
/* this list is only to ack the data packets */
|
||||
/* if the ack is not data ack, then it will not process here */
|
||||
to_ack = FALSE;
|
||||
if (cur_stream->state == TCP_ST_ESTABLISHED ||
|
||||
cur_stream->state == TCP_ST_CLOSE_WAIT ||
|
||||
cur_stream->state == TCP_ST_FIN_WAIT_1 ||
|
||||
cur_stream->state == TCP_ST_FIN_WAIT_2 ||
|
||||
cur_stream->state == TCP_ST_TIME_WAIT) {
|
||||
/* TIMEWAIT is possible since the ack is queued
|
||||
at FIN_WAIT_2 */
|
||||
if (cur_stream->rcvvar->rcvbuf) {
|
||||
if (TCP_SEQ_LEQ(cur_stream->rcv_nxt,
|
||||
cur_stream->rcvvar->rcvbuf->head_seq +
|
||||
cur_stream->rcvvar->rcvbuf->merged_len)) {
|
||||
to_ack = TRUE;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
TRACE_DBG("Stream %u (%s): "
|
||||
"Try sending ack at not proper state. "
|
||||
"seq: %u, ack_seq: %u, on_control_list: %u\n",
|
||||
cur_stream->id, TCPStateToString(cur_stream),
|
||||
cur_stream->snd_nxt, cur_stream->rcv_nxt,
|
||||
cur_stream->sndvar->on_control_list);
|
||||
#ifdef DUMP_STREAM
|
||||
DumpStream(mtcp, cur_stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (to_ack) {
|
||||
/* send the queued ack packets */
|
||||
while (cur_stream->sndvar->ack_cnt > 0) {
|
||||
ret = SendTCPPacket(mtcp, cur_stream,
|
||||
cur_ts, TCP_FLAG_ACK, NULL, 0);
|
||||
if (ret < 0) {
|
||||
/* since there is no available write buffer, break */
|
||||
break;
|
||||
}
|
||||
cur_stream->sndvar->ack_cnt--;
|
||||
}
|
||||
|
||||
/* if is_wack is set, send packet to get window advertisement */
|
||||
if (cur_stream->sndvar->is_wack) {
|
||||
cur_stream->sndvar->is_wack = FALSE;
|
||||
ret = SendTCPPacket(mtcp, cur_stream,
|
||||
cur_ts, TCP_FLAG_ACK | TCP_FLAG_WACK, NULL, 0);
|
||||
if (ret < 0) {
|
||||
/* since there is no available write buffer, break */
|
||||
cur_stream->sndvar->is_wack = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(cur_stream->sndvar->ack_cnt || cur_stream->sndvar->is_wack)) {
|
||||
cur_stream->sndvar->on_ack_list = FALSE;
|
||||
TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
|
||||
sender->ack_list_cnt--;
|
||||
}
|
||||
} else {
|
||||
cur_stream->sndvar->on_ack_list = FALSE;
|
||||
cur_stream->sndvar->ack_cnt = 0;
|
||||
cur_stream->sndvar->is_wack = 0;
|
||||
TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
|
||||
sender->ack_list_cnt--;
|
||||
}
|
||||
|
||||
if (cur_stream->control_list_waiting) {
|
||||
if (!cur_stream->sndvar->on_send_list) {
|
||||
cur_stream->control_list_waiting = FALSE;
|
||||
AddtoControlList(mtcp, cur_stream, cur_ts);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
TRACE_ERROR("Stream %d: not on ack list.\n", cur_stream->id);
|
||||
TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
|
||||
sender->ack_list_cnt--;
|
||||
#ifdef DUMP_STREAM
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"Stream %u: not on ack list.\n", cur_stream->id);
|
||||
DumpStream(mtcp, cur_stream);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (cur_stream == last)
|
||||
break;
|
||||
cur_stream = next;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline struct mtcp_sender *
|
||||
GetSender(mtcp_manager_t mtcp, tcp_stream *cur_stream)
|
||||
{
|
||||
if (cur_stream->sndvar->nif_out < 0) {
|
||||
return mtcp->g_sender;
|
||||
|
||||
} else if (cur_stream->sndvar->nif_out >= CONFIG.eths_num) {
|
||||
TRACE_ERROR("(NEVER HAPPEN) Failed to find appropriate sender.\n");
|
||||
return NULL;
|
||||
|
||||
} else {
|
||||
return mtcp->n_sender[cur_stream->sndvar->nif_out];
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline void
|
||||
AddtoControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t cur_ts)
|
||||
{
|
||||
#if TRY_SEND_BEFORE_QUEUE
|
||||
int ret;
|
||||
struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
|
||||
assert(sender != NULL);
|
||||
|
||||
ret = SendControlPacket(mtcp, cur_stream, cur_ts);
|
||||
if (ret < 0) {
|
||||
#endif
|
||||
if (!cur_stream->sndvar->on_control_list) {
|
||||
struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
|
||||
assert(sender != NULL);
|
||||
|
||||
cur_stream->sndvar->on_control_list = TRUE;
|
||||
TAILQ_INSERT_TAIL(&sender->control_list, cur_stream, sndvar->control_link);
|
||||
sender->control_list_cnt++;
|
||||
//TRACE_DBG("Stream %u: added to control list (cnt: %d)\n",
|
||||
// cur_stream->id, sender->control_list_cnt);
|
||||
}
|
||||
#if TRY_SEND_BEFORE_QUEUE
|
||||
} else {
|
||||
if (cur_stream->sndvar->on_control_list) {
|
||||
cur_stream->sndvar->on_control_list = FALSE;
|
||||
TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
|
||||
sender->control_list_cnt--;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline void
|
||||
AddtoSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
|
||||
{
|
||||
struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
|
||||
assert(sender != NULL);
|
||||
|
||||
if(!cur_stream->sndvar->sndbuf) {
|
||||
TRACE_ERROR("[%d] Stream %d: No send buffer available.\n",
|
||||
mtcp->ctx->cpu,
|
||||
cur_stream->id);
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!cur_stream->sndvar->on_send_list) {
|
||||
cur_stream->sndvar->on_send_list = TRUE;
|
||||
TAILQ_INSERT_TAIL(&sender->send_list, cur_stream, sndvar->send_link);
|
||||
sender->send_list_cnt++;
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline void
|
||||
AddtoACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
|
||||
{
|
||||
struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
|
||||
assert(sender != NULL);
|
||||
|
||||
if (!cur_stream->sndvar->on_ack_list) {
|
||||
cur_stream->sndvar->on_ack_list = TRUE;
|
||||
TAILQ_INSERT_TAIL(&sender->ack_list, cur_stream, sndvar->ack_link);
|
||||
sender->ack_list_cnt++;
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline void
|
||||
RemoveFromControlList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
|
||||
{
|
||||
struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
|
||||
assert(sender != NULL);
|
||||
|
||||
if (cur_stream->sndvar->on_control_list) {
|
||||
cur_stream->sndvar->on_control_list = FALSE;
|
||||
TAILQ_REMOVE(&sender->control_list, cur_stream, sndvar->control_link);
|
||||
sender->control_list_cnt--;
|
||||
//TRACE_DBG("Stream %u: Removed from control list (cnt: %d)\n",
|
||||
// cur_stream->id, sender->control_list_cnt);
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline void
|
||||
RemoveFromSendList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
|
||||
{
|
||||
struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
|
||||
assert(sender != NULL);
|
||||
|
||||
if (cur_stream->sndvar->on_send_list) {
|
||||
cur_stream->sndvar->on_send_list = FALSE;
|
||||
TAILQ_REMOVE(&sender->send_list, cur_stream, sndvar->send_link);
|
||||
sender->send_list_cnt--;
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline void
|
||||
RemoveFromACKList(mtcp_manager_t mtcp, tcp_stream *cur_stream)
|
||||
{
|
||||
struct mtcp_sender *sender = GetSender(mtcp, cur_stream);
|
||||
assert(sender != NULL);
|
||||
|
||||
if (cur_stream->sndvar->on_ack_list) {
|
||||
cur_stream->sndvar->on_ack_list = FALSE;
|
||||
TAILQ_REMOVE(&sender->ack_list, cur_stream, sndvar->ack_link);
|
||||
sender->ack_list_cnt--;
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline void
|
||||
EnqueueACK(mtcp_manager_t mtcp,
|
||||
tcp_stream *cur_stream, uint32_t cur_ts, uint8_t opt)
|
||||
{
|
||||
if (!(cur_stream->state == TCP_ST_ESTABLISHED ||
|
||||
cur_stream->state == TCP_ST_CLOSE_WAIT ||
|
||||
cur_stream->state == TCP_ST_FIN_WAIT_1 ||
|
||||
cur_stream->state == TCP_ST_FIN_WAIT_2)) {
|
||||
TRACE_DBG("Stream %u: Enqueueing ack at state %s\n",
|
||||
cur_stream->id, TCPStateToString(cur_stream));
|
||||
}
|
||||
|
||||
if (opt == ACK_OPT_NOW) {
|
||||
if (cur_stream->sndvar->ack_cnt < cur_stream->sndvar->ack_cnt + 1) {
|
||||
cur_stream->sndvar->ack_cnt++;
|
||||
}
|
||||
} else if (opt == ACK_OPT_AGGREGATE) {
|
||||
if (cur_stream->sndvar->ack_cnt == 0) {
|
||||
cur_stream->sndvar->ack_cnt = 1;
|
||||
}
|
||||
} else if (opt == ACK_OPT_WACK) {
|
||||
cur_stream->sndvar->is_wack = TRUE;
|
||||
}
|
||||
AddtoACKList(mtcp, cur_stream);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
inline void
|
||||
DumpControlList(mtcp_manager_t mtcp, struct mtcp_sender *sender)
|
||||
{
|
||||
tcp_stream *stream;
|
||||
|
||||
TRACE_DBG("Dumping control list (count: %d):\n", sender->control_list_cnt);
|
||||
TAILQ_FOREACH(stream, &sender->control_list, sndvar->control_link) {
|
||||
TRACE_DBG("Stream id: %u in control list\n", stream->id);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* TCP free fragment queue for ring buffer - tcp_rb_frag_queue.c/h
|
||||
*
|
||||
* EunYoung Jeong
|
||||
*
|
||||
* Part of this code borrows Click's simple queue implementation
|
||||
*
|
||||
* ============================== Click License =============================
|
||||
*
|
||||
* Copyright (c) 1999-2000 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, subject to the conditions
|
||||
* listed in the Click LICENSE file. These conditions include: you must
|
||||
* preserve this copyright notice, and you cannot mention the copyright
|
||||
* holders in advertising related to the Software without their permission.
|
||||
* The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
|
||||
* notice is a summary of the Click LICENSE file; the license in that file is
|
||||
* legally binding.
|
||||
*/
|
||||
|
||||
#include "tcp_rb_frag_queue.h"
|
||||
#include "debug.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
#ifndef _INDEX_TYPE_
|
||||
#define _INDEX_TYPE_
|
||||
typedef uint32_t index_type;
|
||||
typedef int32_t signed_index_type;
|
||||
#endif
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct rb_frag_queue
|
||||
{
|
||||
index_type _capacity;
|
||||
volatile index_type _head;
|
||||
volatile index_type _tail;
|
||||
|
||||
struct fragment_ctx * volatile * _q;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline index_type
|
||||
NextIndex(rb_frag_queue_t rb_fragq, index_type i)
|
||||
{
|
||||
return (i != rb_fragq->_capacity ? i + 1: 0);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static inline index_type
|
||||
PrevIndex(rb_frag_queue_t rb_fragq, index_type i)
|
||||
{
|
||||
return (i != 0 ? i - 1: rb_fragq->_capacity);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static inline void
|
||||
RBFragMemoryBarrier(struct fragment_ctx * volatile frag, volatile index_type index)
|
||||
{
|
||||
__asm__ volatile("" : : "m" (frag), "m" (index));
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
rb_frag_queue_t
|
||||
CreateRBFragQueue(int capacity)
|
||||
{
|
||||
rb_frag_queue_t rb_fragq;
|
||||
|
||||
rb_fragq = (rb_frag_queue_t)calloc(1, sizeof(struct rb_frag_queue));
|
||||
if (!rb_fragq)
|
||||
return NULL;
|
||||
|
||||
rb_fragq->_q = (struct fragment_ctx **)
|
||||
calloc(capacity + 1, sizeof(struct fragment_ctx *));
|
||||
if (!rb_fragq->_q) {
|
||||
free(rb_fragq);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rb_fragq->_capacity = capacity;
|
||||
rb_fragq->_head = rb_fragq->_tail = 0;
|
||||
|
||||
return rb_fragq;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyRBFragQueue(rb_frag_queue_t rb_fragq)
|
||||
{
|
||||
if (!rb_fragq)
|
||||
return;
|
||||
|
||||
if (rb_fragq->_q) {
|
||||
free((void *)rb_fragq->_q);
|
||||
rb_fragq->_q = NULL;
|
||||
}
|
||||
|
||||
free(rb_fragq);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
RBFragEnqueue(rb_frag_queue_t rb_fragq, struct fragment_ctx *frag)
|
||||
{
|
||||
index_type h = rb_fragq->_head;
|
||||
index_type t = rb_fragq->_tail;
|
||||
index_type nt = NextIndex(rb_fragq, t);
|
||||
|
||||
if (nt != h) {
|
||||
rb_fragq->_q[t] = frag;
|
||||
RBFragMemoryBarrier(rb_fragq->_q[t], rb_fragq->_tail);
|
||||
rb_fragq->_tail = nt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
TRACE_ERROR("Exceed capacity of frag queue!\n");
|
||||
return -1;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct fragment_ctx *
|
||||
RBFragDequeue(rb_frag_queue_t rb_fragq)
|
||||
{
|
||||
index_type h = rb_fragq->_head;
|
||||
index_type t = rb_fragq->_tail;
|
||||
|
||||
if (h != t) {
|
||||
struct fragment_ctx *frag = rb_fragq->_q[h];
|
||||
RBFragMemoryBarrier(rb_fragq->_q[h], rb_fragq->_head);
|
||||
rb_fragq->_head = NextIndex(rb_fragq, h);
|
||||
assert(frag);
|
||||
|
||||
return frag;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
|
@ -0,0 +1,401 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "tcp_ring_buffer.h"
|
||||
#include "tcp_rb_frag_queue.h"
|
||||
#include "memory_mgt.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define MAX_RB_SIZE (16*1024*1024)
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct rb_manager
|
||||
{
|
||||
size_t chunk_size;
|
||||
uint32_t cur_num;
|
||||
uint32_t cnum;
|
||||
|
||||
mem_pool_t mp;
|
||||
mem_pool_t frag_mp;
|
||||
|
||||
rb_frag_queue_t free_fragq; /* free fragment queue (for app thread) */
|
||||
rb_frag_queue_t free_fragq_int; /* free fragment quuee (only for mtcp) */
|
||||
|
||||
} rb_manager;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
uint32_t
|
||||
RBGetCurnum(rb_manager_t rbm)
|
||||
{
|
||||
return rbm->cur_num;
|
||||
}
|
||||
/*-----------------------------------------------------------------------------*/
|
||||
void
|
||||
RBPrintInfo(struct tcp_ring_buffer* buff)
|
||||
{
|
||||
printf("buff_data %p, buff_size %d, buff_mlen %d, "
|
||||
"buff_clen %lu, buff_head %p (%d), buff_tail (%d)\n",
|
||||
buff->data, buff->size, buff->merged_len, buff->cum_len,
|
||||
buff->head, buff->head_offset, buff->tail_offset);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
RBPrintStr(struct tcp_ring_buffer* buff)
|
||||
{
|
||||
RBPrintInfo(buff);
|
||||
printf("%s\n", buff->head);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
RBPrintHex(struct tcp_ring_buffer* buff)
|
||||
{
|
||||
int i;
|
||||
|
||||
RBPrintInfo(buff);
|
||||
|
||||
for (i = 0; i < buff->merged_len; i++) {
|
||||
if (i != 0 && i % 16 == 0)
|
||||
printf("\n");
|
||||
printf("%0x ", *( (unsigned char*) buff->head + i));
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
rb_manager_t
|
||||
RBManagerCreate(size_t chunk_size, uint32_t cnum)
|
||||
{
|
||||
rb_manager_t rbm = (rb_manager_t) calloc(1, sizeof(rb_manager));
|
||||
|
||||
if (!rbm) {
|
||||
perror("rbm_create calloc");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rbm->chunk_size = chunk_size;
|
||||
rbm->cnum = cnum;
|
||||
rbm->mp = (mem_pool_t)MPCreate(chunk_size, (uint64_t)chunk_size * cnum, 0);
|
||||
if (!rbm->mp) {
|
||||
TRACE_ERROR("Failed to allocate mp pool.\n");
|
||||
free(rbm);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rbm->frag_mp = (mem_pool_t)MPCreate(sizeof(struct fragment_ctx),
|
||||
sizeof(struct fragment_ctx) * cnum, 0);
|
||||
if (!rbm->frag_mp) {
|
||||
TRACE_ERROR("Failed to allocate frag_mp pool.\n");
|
||||
MPDestroy(rbm->mp);
|
||||
free(rbm);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rbm->free_fragq = CreateRBFragQueue(cnum);
|
||||
if (!rbm->free_fragq) {
|
||||
TRACE_ERROR("Failed to create free fragment queue.\n");
|
||||
MPDestroy(rbm->mp);
|
||||
MPDestroy(rbm->frag_mp);
|
||||
free(rbm);
|
||||
return NULL;
|
||||
}
|
||||
rbm->free_fragq_int = CreateRBFragQueue(cnum);
|
||||
if (!rbm->free_fragq_int) {
|
||||
TRACE_ERROR("Failed to create internal free fragment queue.\n");
|
||||
MPDestroy(rbm->mp);
|
||||
MPDestroy(rbm->frag_mp);
|
||||
DestroyRBFragQueue(rbm->free_fragq);
|
||||
free(rbm);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return rbm;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline void
|
||||
FreeFragmentContextSingle(rb_manager_t rbm, struct fragment_ctx* frag)
|
||||
{
|
||||
if (frag->is_calloc)
|
||||
free(frag);
|
||||
else
|
||||
MPFreeChunk(rbm->frag_mp, frag);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
FreeFragmentContext(rb_manager_t rbm, struct fragment_ctx* fctx)
|
||||
{
|
||||
struct fragment_ctx *remove;
|
||||
|
||||
assert(fctx);
|
||||
if (fctx == NULL)
|
||||
return;
|
||||
|
||||
while (fctx) {
|
||||
remove = fctx;
|
||||
fctx = fctx->next;
|
||||
FreeFragmentContextSingle(rbm, remove);
|
||||
}
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static struct fragment_ctx *
|
||||
AllocateFragmentContext(rb_manager_t rbm)
|
||||
{
|
||||
/* this function should be called only in mtcp thread */
|
||||
struct fragment_ctx *frag;
|
||||
|
||||
/* first try deqeue the fragment in free fragment queue */
|
||||
frag = RBFragDequeue(rbm->free_fragq);
|
||||
if (!frag) {
|
||||
frag = RBFragDequeue(rbm->free_fragq_int);
|
||||
if (!frag) {
|
||||
/* next fall back to fetching from mempool */
|
||||
frag = MPAllocateChunk(rbm->frag_mp);
|
||||
if (!frag) {
|
||||
TRACE_ERROR("fragments depleted, fall back to calloc\n");
|
||||
frag = calloc(1, sizeof(struct fragment_ctx));
|
||||
if (frag == NULL) {
|
||||
TRACE_ERROR("calloc failed\n");
|
||||
exit(-1);
|
||||
}
|
||||
frag->is_calloc = 1; /* mark it as allocated by calloc */
|
||||
}
|
||||
}
|
||||
}
|
||||
memset(frag, 0, sizeof(*frag));
|
||||
return frag;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct tcp_ring_buffer*
|
||||
RBInit(rb_manager_t rbm, uint32_t init_seq)
|
||||
{
|
||||
struct tcp_ring_buffer* buff =
|
||||
(struct tcp_ring_buffer*)calloc(1, sizeof(struct tcp_ring_buffer));
|
||||
|
||||
if (buff == NULL){
|
||||
perror("rb_init buff");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
buff->data = MPAllocateChunk(rbm->mp);
|
||||
if(!buff->data){
|
||||
perror("rb_init MPAllocateChunk");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//memset(buff->data, 0, rbm->chunk_size);
|
||||
|
||||
buff->size = rbm->chunk_size;
|
||||
buff->head = buff->data;
|
||||
buff->head_seq = init_seq;
|
||||
buff->init_seq = init_seq;
|
||||
|
||||
rbm->cur_num++;
|
||||
|
||||
return buff;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
RBFree(rb_manager_t rbm, struct tcp_ring_buffer* buff)
|
||||
{
|
||||
assert(buff);
|
||||
if (buff->fctx) {
|
||||
FreeFragmentContext(rbm, buff->fctx);
|
||||
buff->fctx = NULL;
|
||||
}
|
||||
|
||||
if (buff->data) {
|
||||
MPFreeChunk(rbm->mp, buff->data);
|
||||
}
|
||||
|
||||
rbm->cur_num--;
|
||||
|
||||
free(buff);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
#define MAXSEQ ((uint32_t)(0xFFFFFFFF))
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline uint32_t
|
||||
GetMinSeq(uint32_t a, uint32_t b)
|
||||
{
|
||||
if (a == b) return a;
|
||||
if (a < b)
|
||||
return ((b - a) <= MAXSEQ/2) ? a : b;
|
||||
/* b < a */
|
||||
return ((a - b) <= MAXSEQ/2) ? b : a;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline uint32_t
|
||||
GetMaxSeq(uint32_t a, uint32_t b)
|
||||
{
|
||||
if (a == b) return a;
|
||||
if (a < b)
|
||||
return ((b - a) <= MAXSEQ/2) ? b : a;
|
||||
/* b < a */
|
||||
return ((a - b) <= MAXSEQ/2) ? a : b;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline int
|
||||
CanMerge(const struct fragment_ctx *a, const struct fragment_ctx *b)
|
||||
{
|
||||
uint32_t a_end = a->seq + a->len + 1;
|
||||
uint32_t b_end = b->seq + b->len + 1;
|
||||
|
||||
if (GetMinSeq(a_end, b->seq) == a_end ||
|
||||
GetMinSeq(b_end, a->seq) == b_end)
|
||||
return 0;
|
||||
return (1);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline void
|
||||
MergeFragments(struct fragment_ctx *a, struct fragment_ctx *b)
|
||||
{
|
||||
/* merge a into b */
|
||||
uint32_t min_seq, max_seq;
|
||||
|
||||
min_seq = GetMinSeq(a->seq, b->seq);
|
||||
max_seq = GetMaxSeq(a->seq + a->len, b->seq + b->len);
|
||||
b->seq = min_seq;
|
||||
b->len = max_seq - min_seq;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
RBPut(rb_manager_t rbm, struct tcp_ring_buffer* buff,
|
||||
void* data, uint32_t len, uint32_t cur_seq)
|
||||
{
|
||||
int putx, end_off;
|
||||
struct fragment_ctx *new_ctx;
|
||||
struct fragment_ctx* iter;
|
||||
struct fragment_ctx* prev, *pprev;
|
||||
int merged = 0;
|
||||
|
||||
if (len <= 0)
|
||||
return 0;
|
||||
|
||||
// if data offset is smaller than head sequence, then drop
|
||||
if (GetMinSeq(buff->head_seq, cur_seq) != buff->head_seq)
|
||||
return 0;
|
||||
|
||||
putx = cur_seq - buff->head_seq;
|
||||
end_off = putx + len;
|
||||
if (buff->size <= end_off) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
// if buffer is at tail, move the data to the first of head
|
||||
if (buff->size <= (buff->head_offset + end_off)) {
|
||||
memmove(buff->data, buff->head, buff->last_len + 1);
|
||||
buff->tail_offset -= buff->head_offset;
|
||||
buff->head_offset = 0;
|
||||
buff->head = buff->data;
|
||||
}
|
||||
//copy data to buffer
|
||||
memcpy(buff->head + putx, data, len);
|
||||
if (buff->tail_offset < buff->head_offset + end_off)
|
||||
buff->tail_offset = buff->head_offset + end_off;
|
||||
buff->last_len = buff->tail_offset - buff->head_offset;
|
||||
buff->head[buff->last_len] = 0; /* null termination */
|
||||
|
||||
// create fragmentation context blocks
|
||||
new_ctx = AllocateFragmentContext(rbm);
|
||||
if (!new_ctx) {
|
||||
perror("allocating new_ctx failed");
|
||||
return 0;
|
||||
}
|
||||
new_ctx->seq = cur_seq;
|
||||
new_ctx->len = len;
|
||||
new_ctx->next = NULL;
|
||||
|
||||
// traverse the fragment list, and merge the new fragment if possible
|
||||
for (iter = buff->fctx, prev = NULL, pprev = NULL;
|
||||
iter != NULL;
|
||||
pprev = prev, prev = iter, iter = iter->next) {
|
||||
|
||||
if (CanMerge(new_ctx, iter)) {
|
||||
/* merge the first fragment into the second fragment */
|
||||
MergeFragments(new_ctx, iter);
|
||||
|
||||
/* remove the first fragment */
|
||||
if (prev == new_ctx) {
|
||||
if (pprev)
|
||||
pprev->next = iter;
|
||||
else
|
||||
buff->fctx = iter;
|
||||
prev = pprev;
|
||||
}
|
||||
FreeFragmentContextSingle(rbm, new_ctx);
|
||||
new_ctx = iter;
|
||||
merged = 1;
|
||||
}
|
||||
else if (merged ||
|
||||
GetMaxSeq(cur_seq + len, iter->seq) == iter->seq) {
|
||||
/* merged at some point, but no more mergeable
|
||||
then stop it now */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!merged) {
|
||||
if (buff->fctx == NULL) {
|
||||
buff->fctx = new_ctx;
|
||||
} else if (GetMinSeq(cur_seq, buff->fctx->seq) == cur_seq) {
|
||||
/* if the new packet's seqnum is before the existing fragments */
|
||||
new_ctx->next = buff->fctx;
|
||||
buff->fctx = new_ctx;
|
||||
} else {
|
||||
/* if the seqnum is in-between the fragments or
|
||||
at the last */
|
||||
assert(GetMinSeq(cur_seq, prev->seq + prev->len) ==
|
||||
prev->seq + prev->len);
|
||||
prev->next = new_ctx;
|
||||
new_ctx->next = iter;
|
||||
}
|
||||
}
|
||||
if (buff->head_seq == buff->fctx->seq) {
|
||||
buff->cum_len += buff->fctx->len - buff->merged_len;
|
||||
buff->merged_len = buff->fctx->len;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
size_t
|
||||
RBRemove(rb_manager_t rbm, struct tcp_ring_buffer* buff, size_t len, int option)
|
||||
{
|
||||
/* this function should be called only in application thread */
|
||||
|
||||
if (buff->merged_len < len)
|
||||
len = buff->merged_len;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
buff->head_offset += len;
|
||||
buff->head = buff->data + buff->head_offset;
|
||||
buff->head_seq += len;
|
||||
|
||||
buff->merged_len -= len;
|
||||
buff->last_len -= len;
|
||||
|
||||
// modify fragementation chunks
|
||||
if (len == buff->fctx->len) {
|
||||
struct fragment_ctx* remove = buff->fctx;
|
||||
buff->fctx = buff->fctx->next;
|
||||
if (option == AT_APP) {
|
||||
RBFragEnqueue(rbm->free_fragq, remove);
|
||||
} else if (option == AT_MTCP) {
|
||||
RBFragEnqueue(rbm->free_fragq_int, remove);
|
||||
}
|
||||
}
|
||||
else if (len < buff->fctx->len) {
|
||||
buff->fctx->seq += len;
|
||||
buff->fctx->len -= len;
|
||||
}
|
||||
else {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* TCP free send buffer queue - tcp_sb_queue.c/h
|
||||
*
|
||||
* EunYoung Jeong
|
||||
*
|
||||
* Part of this code borrows Click's simple queue implementation
|
||||
*
|
||||
* ============================== Click License =============================
|
||||
*
|
||||
* Copyright (c) 1999-2000 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, subject to the conditions
|
||||
* listed in the Click LICENSE file. These conditions include: you must
|
||||
* preserve this copyright notice, and you cannot mention the copyright
|
||||
* holders in advertising related to the Software without their permission.
|
||||
* The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
|
||||
* notice is a summary of the Click LICENSE file; the license in that file is
|
||||
* legally binding.
|
||||
*/
|
||||
|
||||
#include "tcp_sb_queue.h"
|
||||
#include "debug.h"
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
#ifndef _INDEX_TYPE_
|
||||
#define _INDEX_TYPE_
|
||||
typedef uint32_t index_type;
|
||||
typedef int32_t signed_index_type;
|
||||
#endif
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct sb_queue
|
||||
{
|
||||
index_type _capacity;
|
||||
volatile index_type _head;
|
||||
volatile index_type _tail;
|
||||
|
||||
struct tcp_send_buffer * volatile * _q;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
static inline index_type
|
||||
NextIndex(sb_queue_t sq, index_type i)
|
||||
{
|
||||
return (i != sq->_capacity ? i + 1: 0);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static inline index_type
|
||||
PrevIndex(sb_queue_t sq, index_type i)
|
||||
{
|
||||
return (i != 0 ? i - 1: sq->_capacity);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static inline void
|
||||
SBMemoryBarrier(struct tcp_send_buffer * volatile buf, volatile index_type index)
|
||||
{
|
||||
__asm__ volatile("" : : "m" (buf), "m" (index));
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
sb_queue_t
|
||||
CreateSBQueue(int capacity)
|
||||
{
|
||||
sb_queue_t sq;
|
||||
|
||||
sq = (sb_queue_t)calloc(1, sizeof(struct sb_queue));
|
||||
if (!sq)
|
||||
return NULL;
|
||||
|
||||
sq->_q = (struct tcp_send_buffer **)
|
||||
calloc(capacity + 1, sizeof(struct tcp_send_buffer *));
|
||||
if (!sq->_q) {
|
||||
free(sq);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sq->_capacity = capacity;
|
||||
sq->_head = sq->_tail = 0;
|
||||
|
||||
return sq;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroySBQueue(sb_queue_t sq)
|
||||
{
|
||||
if (!sq)
|
||||
return;
|
||||
|
||||
if (sq->_q) {
|
||||
free((void *)sq->_q);
|
||||
sq->_q = NULL;
|
||||
}
|
||||
|
||||
free(sq);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
SBEnqueue(sb_queue_t sq, struct tcp_send_buffer *buf)
|
||||
{
|
||||
index_type h = sq->_head;
|
||||
index_type t = sq->_tail;
|
||||
index_type nt = NextIndex(sq, t);
|
||||
|
||||
if (nt != h) {
|
||||
sq->_q[t] = buf;
|
||||
SBMemoryBarrier(sq->_q[t], sq->_tail);
|
||||
sq->_tail = nt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
TRACE_ERROR("Exceed capacity of buf queue!\n");
|
||||
return -1;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct tcp_send_buffer *
|
||||
SBDequeue(sb_queue_t sq)
|
||||
{
|
||||
index_type h = sq->_head;
|
||||
index_type t = sq->_tail;
|
||||
|
||||
if (h != t) {
|
||||
struct tcp_send_buffer *buf = sq->_q[h];
|
||||
SBMemoryBarrier(sq->_q[h], sq->_head);
|
||||
sq->_head = NextIndex(sq, h);
|
||||
assert(buf);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
|
@ -0,0 +1,171 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "memory_mgt.h"
|
||||
#include "debug.h"
|
||||
#include "tcp_send_buffer.h"
|
||||
#include "tcp_sb_queue.h"
|
||||
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct sb_manager
|
||||
{
|
||||
size_t chunk_size;
|
||||
uint32_t cur_num;
|
||||
uint32_t cnum;
|
||||
mem_pool_t mp;
|
||||
sb_queue_t freeq;
|
||||
|
||||
} sb_manager;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
uint32_t
|
||||
SBGetCurnum(sb_manager_t sbm)
|
||||
{
|
||||
return sbm->cur_num;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
sb_manager_t
|
||||
SBManagerCreate(size_t chunk_size, uint32_t cnum)
|
||||
{
|
||||
sb_manager_t sbm = (sb_manager_t)calloc(1, sizeof(sb_manager));
|
||||
if (!sbm) {
|
||||
TRACE_ERROR("SBManagerCreate() failed. %s\n", strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sbm->chunk_size = chunk_size;
|
||||
sbm->cnum = cnum;
|
||||
sbm->mp = (mem_pool_t)MPCreate(chunk_size, (uint64_t)chunk_size * cnum, 0);
|
||||
if (!sbm->mp) {
|
||||
TRACE_ERROR("Failed to create mem pool for sb.\n");
|
||||
free(sbm);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sbm->freeq = CreateSBQueue(cnum);
|
||||
if (!sbm->freeq) {
|
||||
TRACE_ERROR("Failed to create free buffer queue.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return sbm;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct tcp_send_buffer *
|
||||
SBInit(sb_manager_t sbm, uint32_t init_seq)
|
||||
{
|
||||
struct tcp_send_buffer *buf;
|
||||
|
||||
/* first try dequeue from free buffer queue */
|
||||
buf = SBDequeue(sbm->freeq);
|
||||
if (!buf) {
|
||||
buf = (struct tcp_send_buffer *)malloc(sizeof(struct tcp_send_buffer));
|
||||
if (!buf) {
|
||||
perror("calloc() for buf");
|
||||
return NULL;
|
||||
}
|
||||
buf->data = MPAllocateChunk(sbm->mp);
|
||||
if (!buf->data) {
|
||||
TRACE_ERROR("Failed to fetch memory chunk for data.\n");
|
||||
return NULL;
|
||||
}
|
||||
sbm->cur_num++;
|
||||
}
|
||||
|
||||
buf->head = buf->data;
|
||||
|
||||
buf->head_off = buf->tail_off = 0;
|
||||
buf->len = buf->cum_len = 0;
|
||||
buf->size = sbm->chunk_size;
|
||||
|
||||
buf->init_seq = buf->head_seq = init_seq;
|
||||
|
||||
return buf;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
#if 0
|
||||
static void
|
||||
SBFreeInternal(sb_manager_t sbm, struct tcp_send_buffer *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
if (buf->data) {
|
||||
MPFreeChunk(sbm->mp, buf->data);
|
||||
buf->data = NULL;
|
||||
}
|
||||
|
||||
sbm->cur_num--;
|
||||
free(buf);
|
||||
}
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
SBFree(sb_manager_t sbm, struct tcp_send_buffer *buf)
|
||||
{
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
SBEnqueue(sbm->freeq, buf);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
size_t
|
||||
SBPut(sb_manager_t sbm, struct tcp_send_buffer *buf, void *data, size_t len)
|
||||
{
|
||||
size_t to_put;
|
||||
|
||||
if (len <= 0)
|
||||
return 0;
|
||||
|
||||
/* if no space, return -2 */
|
||||
to_put = MIN(len, buf->size - buf->len);
|
||||
if (to_put <= 0) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
if (buf->tail_off + to_put < buf->size) {
|
||||
/* if the data fit into the buffer, copy it */
|
||||
memcpy(buf->data + buf->tail_off, data, to_put);
|
||||
buf->tail_off += to_put;
|
||||
} else {
|
||||
/* if buffer overflows, move the existing payload and merge */
|
||||
memmove(buf->data, buf->head, buf->len);
|
||||
buf->head = buf->data;
|
||||
buf->head_off = 0;
|
||||
memcpy(buf->head + buf->len, data, to_put);
|
||||
buf->tail_off = buf->len + to_put;
|
||||
}
|
||||
buf->len += to_put;
|
||||
buf->cum_len += to_put;
|
||||
|
||||
return to_put;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
size_t
|
||||
SBRemove(sb_manager_t sbm, struct tcp_send_buffer *buf, size_t len)
|
||||
{
|
||||
size_t to_remove;
|
||||
|
||||
if (len <= 0)
|
||||
return 0;
|
||||
|
||||
to_remove = MIN(len, buf->len);
|
||||
if (to_remove <= 0) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
buf->head_off += to_remove;
|
||||
buf->head = buf->data + buf->head_off;
|
||||
buf->head_seq += to_remove;
|
||||
buf->len -= to_remove;
|
||||
|
||||
/* if buffer is empty, move the head to 0 */
|
||||
if (buf->len == 0 && buf->head_off > 0) {
|
||||
buf->head = buf->data;
|
||||
buf->head_off = buf->tail_off = 0;
|
||||
}
|
||||
|
||||
return to_remove;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
|
@ -0,0 +1,609 @@
|
|||
#include "tcp_stream.h"
|
||||
#include "fhash.h"
|
||||
#include "tcp_in.h"
|
||||
#include "tcp_out.h"
|
||||
#include "tcp_ring_buffer.h"
|
||||
#include "tcp_send_buffer.h"
|
||||
#include "eventpoll.h"
|
||||
#include "ip_out.h"
|
||||
#include "timer.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define TCP_MAX_SEQ 4294967295
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
char *state_str[] = {"TCP_ST_CLOSED",
|
||||
"TCP_ST_LISTEN",
|
||||
"TCP_ST_SYN_SENT",
|
||||
"TCP_ST_SYN_RCVD",
|
||||
"TCP_ST_ESTABILSHED",
|
||||
"TCP_ST_FIN_WAIT_1",
|
||||
"TCP_ST_FIN_WAIT_2",
|
||||
"TCP_ST_CLOSE_WAIT",
|
||||
"TCP_ST_CLOSING",
|
||||
"TCP_ST_LAST_ACK",
|
||||
"TCP_ST_TIME_WAIT"
|
||||
};
|
||||
/*---------------------------------------------------------------------------*/
|
||||
char *close_reason_str[] = {
|
||||
"NOT_CLOSED",
|
||||
"CLOSE",
|
||||
"CLOSED",
|
||||
"CONN_FAIL",
|
||||
"CONN_LOST",
|
||||
"RESET",
|
||||
"NO_MEM",
|
||||
"DENIED",
|
||||
"TIMEDOUT"
|
||||
};
|
||||
/*---------------------------------------------------------------------------*/
|
||||
inline char *
|
||||
TCPStateToString(const tcp_stream *stream)
|
||||
{
|
||||
return state_str[stream->state];
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
unsigned int
|
||||
HashFlow(const tcp_stream *flow)
|
||||
{
|
||||
#if 0
|
||||
unsigned long hash = 5381;
|
||||
int c;
|
||||
int index;
|
||||
|
||||
char *str = (char *)&flow->saddr;
|
||||
index = 0;
|
||||
|
||||
while ((c = *str++) && index++ < 12) {
|
||||
if (index == 8) {
|
||||
str = (char *)&flow->sport;
|
||||
}
|
||||
hash = ((hash << 5) + hash) + c;
|
||||
}
|
||||
|
||||
return hash & (NUM_BINS - 1);
|
||||
#else
|
||||
unsigned int hash, i;
|
||||
char *key = (char *)&flow->saddr;
|
||||
|
||||
for (hash = i = 0; i < 12; ++i) {
|
||||
hash += key[i];
|
||||
hash += (hash << 10);
|
||||
hash ^= (hash >> 6);
|
||||
}
|
||||
hash += (hash << 3);
|
||||
hash ^= (hash >> 11);
|
||||
hash += (hash << 15);
|
||||
|
||||
return hash & (NUM_BINS - 1);
|
||||
#endif
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
EqualFlow(const tcp_stream *flow1, const tcp_stream *flow2)
|
||||
{
|
||||
return (flow1->saddr == flow2->saddr &&
|
||||
flow1->sport == flow2->sport &&
|
||||
flow1->daddr == flow2->daddr &&
|
||||
flow1->dport == flow2->dport);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
inline void
|
||||
RaiseReadEvent(mtcp_manager_t mtcp, tcp_stream *stream)
|
||||
{
|
||||
if (stream->socket) {
|
||||
if (stream->socket->epoll & MTCP_EPOLLIN) {
|
||||
AddEpollEvent(mtcp->ep,
|
||||
MTCP_EVENT_QUEUE, stream->socket, MTCP_EPOLLIN);
|
||||
#if BLOCKING_SUPPORT
|
||||
} else if (!(stream->socket->opts & MTCP_NONBLOCK)) {
|
||||
if (!stream->on_rcv_br_list) {
|
||||
stream->on_rcv_br_list = TRUE;
|
||||
TAILQ_INSERT_TAIL(&mtcp->rcv_br_list, stream, rcvvar->rcv_br_link);
|
||||
mtcp->rcv_br_list_cnt++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
TRACE_EPOLL("Stream %d: Raising read without a socket!\n", stream->id);
|
||||
}
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
inline void
|
||||
RaiseWriteEvent(mtcp_manager_t mtcp, tcp_stream *stream)
|
||||
{
|
||||
if (stream->socket) {
|
||||
if (stream->socket->epoll & MTCP_EPOLLOUT) {
|
||||
AddEpollEvent(mtcp->ep,
|
||||
MTCP_EVENT_QUEUE, stream->socket, MTCP_EPOLLOUT);
|
||||
#if BLOCKING_SUPPORT
|
||||
} else if (!(stream->socket->opts & MTCP_NONBLOCK)) {
|
||||
if (!stream->on_snd_br_list) {
|
||||
stream->on_snd_br_list = TRUE;
|
||||
TAILQ_INSERT_TAIL(&mtcp->snd_br_list, stream, sndvar->snd_br_link);
|
||||
mtcp->snd_br_list_cnt++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
TRACE_EPOLL("Stream %d: Raising write without a socket!\n", stream->id);
|
||||
}
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
inline void
|
||||
RaiseCloseEvent(mtcp_manager_t mtcp, tcp_stream *stream)
|
||||
{
|
||||
if (stream->socket) {
|
||||
if (stream->socket->epoll & MTCP_EPOLLRDHUP) {
|
||||
AddEpollEvent(mtcp->ep,
|
||||
MTCP_EVENT_QUEUE, stream->socket, MTCP_EPOLLRDHUP);
|
||||
} else if (stream->socket->epoll & MTCP_EPOLLIN) {
|
||||
AddEpollEvent(mtcp->ep,
|
||||
MTCP_EVENT_QUEUE, stream->socket, MTCP_EPOLLIN);
|
||||
#if BLOCKING_SUPPORT
|
||||
} else if (!(stream->socket->opts & MTCP_NONBLOCK)) {
|
||||
//pthread_cond_signal(&stream->rcvvar->read_cond);
|
||||
//pthread_cond_signal(&stream->sndvar->write_cond);
|
||||
if (!stream->on_rcv_br_list) {
|
||||
stream->on_rcv_br_list = TRUE;
|
||||
TAILQ_INSERT_TAIL(&mtcp->rcv_br_list, stream, rcvvar->rcv_br_link);
|
||||
mtcp->rcv_br_list_cnt++;
|
||||
}
|
||||
if (!stream->on_snd_br_list) {
|
||||
stream->on_snd_br_list = TRUE;
|
||||
TAILQ_INSERT_TAIL(&mtcp->snd_br_list, stream, sndvar->snd_br_link);
|
||||
mtcp->snd_br_list_cnt++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
TRACE_EPOLL("Stream %d: Raising close without a socket!\n", stream->id);
|
||||
}
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
inline void
|
||||
RaiseErrorEvent(mtcp_manager_t mtcp, tcp_stream *stream)
|
||||
{
|
||||
if (stream->socket) {
|
||||
if (stream->socket->epoll & MTCP_EPOLLERR) {
|
||||
AddEpollEvent(mtcp->ep,
|
||||
MTCP_EVENT_QUEUE, stream->socket, MTCP_EPOLLERR);
|
||||
#if BLOCKING_SUPPORT
|
||||
} else if (!(stream->socket->opts & MTCP_NONBLOCK)) {
|
||||
if (!stream->on_rcv_br_list) {
|
||||
stream->on_rcv_br_list = TRUE;
|
||||
TAILQ_INSERT_TAIL(&mtcp->rcv_br_list, stream, rcvvar->rcv_br_link);
|
||||
mtcp->rcv_br_list_cnt++;
|
||||
}
|
||||
if (!stream->on_snd_br_list) {
|
||||
stream->on_snd_br_list = TRUE;
|
||||
TAILQ_INSERT_TAIL(&mtcp->snd_br_list, stream, sndvar->snd_br_link);
|
||||
mtcp->snd_br_list_cnt++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
TRACE_EPOLL("Stream %d: Raising error without a socket!\n", stream->id);
|
||||
}
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
tcp_stream *
|
||||
CreateTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type,
|
||||
uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport)
|
||||
{
|
||||
tcp_stream *stream = NULL;
|
||||
int ret;
|
||||
|
||||
uint8_t *sa;
|
||||
uint8_t *da;
|
||||
|
||||
pthread_mutex_lock(&mtcp->ctx->flow_pool_lock);
|
||||
|
||||
stream = (tcp_stream *)MPAllocateChunk(mtcp->flow_pool);
|
||||
if (!stream) {
|
||||
TRACE_ERROR("Cannot allocate memory for the stream. "
|
||||
"CONFIG.max_concurrency: %d, concurrent: %u\n",
|
||||
CONFIG.max_concurrency, mtcp->flow_cnt);
|
||||
pthread_mutex_unlock(&mtcp->ctx->flow_pool_lock);
|
||||
return NULL;
|
||||
}
|
||||
memset(stream, 0, sizeof(tcp_stream));
|
||||
|
||||
stream->rcvvar = (struct tcp_recv_vars *)MPAllocateChunk(mtcp->rv_pool);
|
||||
if (!stream->rcvvar) {
|
||||
MPFreeChunk(mtcp->flow_pool, stream);
|
||||
pthread_mutex_unlock(&mtcp->ctx->flow_pool_lock);
|
||||
return NULL;
|
||||
}
|
||||
stream->sndvar = (struct tcp_send_vars *)MPAllocateChunk(mtcp->sv_pool);
|
||||
if (!stream->sndvar) {
|
||||
MPFreeChunk(mtcp->rv_pool, stream->rcvvar);
|
||||
MPFreeChunk(mtcp->flow_pool, stream);
|
||||
pthread_mutex_unlock(&mtcp->ctx->flow_pool_lock);
|
||||
return NULL;
|
||||
}
|
||||
memset(stream->rcvvar, 0, sizeof(struct tcp_recv_vars));
|
||||
memset(stream->sndvar, 0, sizeof(struct tcp_send_vars));
|
||||
|
||||
stream->id = mtcp->g_id++;
|
||||
stream->saddr = saddr;
|
||||
stream->sport = sport;
|
||||
stream->daddr = daddr;
|
||||
stream->dport = dport;
|
||||
|
||||
ret = HTInsert(mtcp->tcp_flow_table, stream);
|
||||
if (ret < 0) {
|
||||
TRACE_ERROR("Stream %d: "
|
||||
"Failed to insert the stream into hash table.\n", stream->id);
|
||||
MPFreeChunk(mtcp->flow_pool, stream);
|
||||
pthread_mutex_unlock(&mtcp->ctx->flow_pool_lock);
|
||||
return NULL;
|
||||
}
|
||||
stream->on_hash_table = TRUE;
|
||||
mtcp->flow_cnt++;
|
||||
|
||||
pthread_mutex_unlock(&mtcp->ctx->flow_pool_lock);
|
||||
|
||||
if (socket) {
|
||||
stream->socket = socket;
|
||||
socket->stream = stream;
|
||||
}
|
||||
|
||||
stream->stream_type = type;
|
||||
stream->state = TCP_ST_LISTEN;
|
||||
|
||||
stream->on_rto_idx = -1;
|
||||
|
||||
stream->sndvar->ip_id = 0;
|
||||
stream->sndvar->mss = TCP_DEFAULT_MSS;
|
||||
stream->sndvar->wscale = TCP_DEFAULT_WSCALE;
|
||||
stream->sndvar->nif_out = GetOutputInterface(stream->daddr);
|
||||
|
||||
stream->sndvar->iss = rand() % TCP_MAX_SEQ;
|
||||
//stream->sndvar->iss = 0;
|
||||
stream->rcvvar->irs = 0;
|
||||
|
||||
stream->snd_nxt = stream->sndvar->iss;
|
||||
stream->sndvar->snd_una = stream->sndvar->iss;
|
||||
stream->sndvar->snd_wnd = CONFIG.sndbuf_size;
|
||||
stream->rcv_nxt = 0;
|
||||
stream->rcvvar->rcv_wnd = TCP_INITIAL_WINDOW;
|
||||
|
||||
stream->rcvvar->snd_wl1 = stream->rcvvar->irs - 1;
|
||||
|
||||
stream->sndvar->rto = TCP_INITIAL_RTO;
|
||||
|
||||
#if BLOCKING_SUPPORT
|
||||
if (pthread_cond_init(&stream->rcvvar->read_cond, NULL)) {
|
||||
perror("pthread_cond_init of read_cond");
|
||||
return NULL;
|
||||
}
|
||||
if (pthread_cond_init(&stream->sndvar->write_cond, NULL)) {
|
||||
perror("pthread_cond_init of write_cond");
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_SPIN_LOCK
|
||||
if (pthread_spin_init(&stream->rcvvar->read_lock, PTHREAD_PROCESS_PRIVATE)) {
|
||||
#else
|
||||
if (pthread_mutex_init(&stream->rcvvar->read_lock, NULL)) {
|
||||
#endif
|
||||
perror("pthread_mutex_init of read_lock");
|
||||
#if BLOCKING_SUPPORT
|
||||
pthread_cond_destroy(&stream->rcvvar->read_cond);
|
||||
pthread_cond_destroy(&stream->sndvar->write_cond);
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
#if USE_SPIN_LOCK
|
||||
if (pthread_spin_init(&stream->sndvar->write_lock, PTHREAD_PROCESS_PRIVATE)) {
|
||||
perror("pthread_spin_init of write_lock");
|
||||
pthread_spin_destroy(&stream->rcvvar->read_lock);
|
||||
#else
|
||||
if (pthread_mutex_init(&stream->sndvar->write_lock, NULL)) {
|
||||
perror("pthread_mutex_init of write_lock");
|
||||
pthread_mutex_destroy(&stream->rcvvar->read_lock);
|
||||
#endif
|
||||
#if BLOCKING_SUPPORT
|
||||
pthread_cond_destroy(&stream->rcvvar->read_cond);
|
||||
pthread_cond_destroy(&stream->sndvar->write_cond);
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sa = (uint8_t *)&stream->saddr;
|
||||
da = (uint8_t *)&stream->daddr;
|
||||
TRACE_STREAM("CREATED NEW TCP STREAM %d: "
|
||||
"%u.%u.%u.%u(%d) -> %u.%u.%u.%u(%d) (ISS: %u)\n", stream->id,
|
||||
sa[0], sa[1], sa[2], sa[3], ntohs(stream->sport),
|
||||
da[0], da[1], da[2], da[3], ntohs(stream->dport),
|
||||
stream->sndvar->iss);
|
||||
|
||||
return stream;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyTCPStream(mtcp_manager_t mtcp, tcp_stream *stream)
|
||||
{
|
||||
struct sockaddr_in addr;
|
||||
int bound_addr = FALSE;
|
||||
uint8_t *sa, *da;
|
||||
int ret;
|
||||
|
||||
#ifdef DUMP_STREAM
|
||||
if (stream->close_reason != TCP_ACTIVE_CLOSE &&
|
||||
stream->close_reason != TCP_PASSIVE_CLOSE) {
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"Stream %d abnormally closed.\n", stream->id);
|
||||
DumpStream(mtcp, stream);
|
||||
DumpControlList(mtcp, mtcp->n_sender[0]);
|
||||
}
|
||||
#endif
|
||||
|
||||
sa = (uint8_t *)&stream->saddr;
|
||||
da = (uint8_t *)&stream->daddr;
|
||||
TRACE_STREAM("DESTROY TCP STREAM %d: "
|
||||
"%u.%u.%u.%u(%d) -> %u.%u.%u.%u(%d) (%s)\n", stream->id,
|
||||
sa[0], sa[1], sa[2], sa[3], ntohs(stream->sport),
|
||||
da[0], da[1], da[2], da[3], ntohs(stream->dport),
|
||||
close_reason_str[stream->close_reason]);
|
||||
|
||||
if (stream->sndvar->sndbuf) {
|
||||
TRACE_FSTAT("Stream %d: send buffer "
|
||||
"cum_len: %lu, len: %u\n", stream->id,
|
||||
stream->sndvar->sndbuf->cum_len,
|
||||
stream->sndvar->sndbuf->len);
|
||||
}
|
||||
if (stream->rcvvar->rcvbuf) {
|
||||
TRACE_FSTAT("Stream %d: recv buffer "
|
||||
"cum_len: %lu, merged_len: %u, last_len: %u\n", stream->id,
|
||||
stream->rcvvar->rcvbuf->cum_len,
|
||||
stream->rcvvar->rcvbuf->merged_len,
|
||||
stream->rcvvar->rcvbuf->last_len);
|
||||
}
|
||||
|
||||
#if RTM_STAT
|
||||
/* Triple duplicated ack stats */
|
||||
if (stream->sndvar->rstat.tdp_ack_cnt) {
|
||||
TRACE_FSTAT("Stream %d: triple duplicated ack: %u, "
|
||||
"retransmission bytes: %u, average rtm bytes/ack: %u\n",
|
||||
stream->id,
|
||||
stream->sndvar->rstat.tdp_ack_cnt, stream->sndvar->rstat.tdp_ack_bytes,
|
||||
stream->sndvar->rstat.tdp_ack_bytes / stream->sndvar->rstat.tdp_ack_cnt);
|
||||
}
|
||||
|
||||
/* Retransmission timeout stats */
|
||||
if (stream->sndvar->rstat.rto_cnt > 0) {
|
||||
TRACE_FSTAT("Stream %d: timeout count: %u, bytes: %u\n", stream->id,
|
||||
stream->sndvar->rstat.rto_cnt, stream->sndvar->rstat.rto_bytes);
|
||||
}
|
||||
|
||||
/* Recovery stats */
|
||||
if (stream->sndvar->rstat.ack_upd_cnt) {
|
||||
TRACE_FSTAT("Stream %d: snd_nxt update count: %u, "
|
||||
"snd_nxt update bytes: %u, average update bytes/update: %u\n",
|
||||
stream->id,
|
||||
stream->sndvar->rstat.ack_upd_cnt, stream->sndvar->rstat.ack_upd_bytes,
|
||||
stream->sndvar->rstat.ack_upd_bytes / stream->sndvar->rstat.ack_upd_cnt);
|
||||
}
|
||||
#if TCP_OPT_SACK_ENABLED
|
||||
if (stream->sndvar->rstat.sack_cnt) {
|
||||
TRACE_FSTAT("Selective ack count: %u, bytes: %u, "
|
||||
"average bytes/ack: %u\n",
|
||||
stream->sndvar->rstat.sack_cnt, stream->sndvar->rstat.sack_bytes,
|
||||
stream->sndvar->rstat.sack_bytes / stream->sndvar->rstat.sack_cnt);
|
||||
} else {
|
||||
TRACE_FSTAT("Selective ack count: %u, bytes: %u\n",
|
||||
stream->sndvar->rstat.sack_cnt, stream->sndvar->rstat.sack_bytes);
|
||||
}
|
||||
if (stream->sndvar->rstat.tdp_sack_cnt) {
|
||||
TRACE_FSTAT("Selective tdp ack count: %u, bytes: %u, "
|
||||
"average bytes/ack: %u\n",
|
||||
stream->sndvar->rstat.tdp_sack_cnt, stream->sndvar->rstat.tdp_sack_bytes,
|
||||
stream->sndvar->rstat.tdp_sack_bytes / stream->sndvar->rstat.tdp_sack_cnt);
|
||||
} else {
|
||||
TRACE_FSTAT("Selective ack count: %u, bytes: %u\n",
|
||||
stream->sndvar->rstat.tdp_sack_cnt, stream->sndvar->rstat.tdp_sack_bytes);
|
||||
}
|
||||
#endif /* TCP_OPT_SACK_ENABLED */
|
||||
#endif /* RTM_STAT */
|
||||
|
||||
if (stream->is_bound_addr) {
|
||||
bound_addr = TRUE;
|
||||
addr.sin_addr.s_addr = stream->saddr;
|
||||
addr.sin_port = stream->sport;
|
||||
}
|
||||
|
||||
RemoveFromControlList(mtcp, stream);
|
||||
RemoveFromSendList(mtcp, stream);
|
||||
RemoveFromACKList(mtcp, stream);
|
||||
|
||||
if (stream->on_rto_idx >= 0)
|
||||
RemoveFromRTOList(mtcp, stream);
|
||||
|
||||
if (stream->on_timewait_list)
|
||||
RemoveFromTimewaitList(mtcp, stream);
|
||||
|
||||
if (CONFIG.tcp_timeout > 0)
|
||||
RemoveFromTimeoutList(mtcp, stream);
|
||||
|
||||
#if BLOCKING_SUPPORT
|
||||
if (stream->on_snd_br_list) {
|
||||
stream->on_snd_br_list = FALSE;
|
||||
TAILQ_REMOVE(&mtcp->snd_br_list, stream, sndvar->snd_br_link);
|
||||
mtcp->snd_br_list_cnt--;
|
||||
}
|
||||
if (stream->on_rcv_br_list) {
|
||||
stream->on_rcv_br_list = FALSE;
|
||||
TAILQ_REMOVE(&mtcp->rcv_br_list, stream, rcvvar->rcv_br_link);
|
||||
mtcp->rcv_br_list_cnt--;
|
||||
}
|
||||
|
||||
if (!stream->epoll) {
|
||||
pthread_cond_signal(&stream->rcvvar->read_cond);
|
||||
pthread_cond_signal(&stream->sndvar->write_cond);
|
||||
}
|
||||
|
||||
if (pthread_cond_destroy(&stream->rcvvar->read_cond)) {
|
||||
perror("pthread_cond_destroy of read_cond");
|
||||
}
|
||||
if (pthread_cond_destroy(&stream->sndvar->write_cond)) {
|
||||
perror("pthread_cond_destroy of write_cond");
|
||||
}
|
||||
#endif
|
||||
SBUF_LOCK_DESTROY(&stream->rcvvar->read_lock);
|
||||
SBUF_LOCK_DESTROY(&stream->sndvar->write_lock);
|
||||
|
||||
assert(stream->on_hash_table == TRUE);
|
||||
|
||||
/* free ring buffers */
|
||||
if (stream->sndvar->sndbuf) {
|
||||
SBFree(mtcp->rbm_snd, stream->sndvar->sndbuf);
|
||||
stream->sndvar->sndbuf = NULL;
|
||||
}
|
||||
if (stream->rcvvar->rcvbuf) {
|
||||
RBFree(mtcp->rbm_rcv, stream->rcvvar->rcvbuf);
|
||||
stream->rcvvar->rcvbuf = NULL;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&mtcp->ctx->flow_pool_lock);
|
||||
|
||||
/* remove from flow hash table */
|
||||
HTRemove(mtcp->tcp_flow_table, stream);
|
||||
stream->on_hash_table = FALSE;
|
||||
|
||||
mtcp->flow_cnt--;
|
||||
|
||||
MPFreeChunk(mtcp->rv_pool, stream->rcvvar);
|
||||
MPFreeChunk(mtcp->sv_pool, stream->sndvar);
|
||||
MPFreeChunk(mtcp->flow_pool, stream);
|
||||
pthread_mutex_unlock(&mtcp->ctx->flow_pool_lock);
|
||||
|
||||
if (bound_addr) {
|
||||
if (mtcp->ap) {
|
||||
ret = FreeAddress(mtcp->ap, &addr);
|
||||
} else {
|
||||
ret = FreeAddress(ap, &addr);
|
||||
}
|
||||
if (ret < 0) {
|
||||
TRACE_ERROR("(NEVER HAPPEN) Failed to free address.\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef NETSTAT
|
||||
#if NETSTAT_PERTHREAD
|
||||
TRACE_STREAM("Destroyed. Remaining flows: %u\n", mtcp->flow_cnt);
|
||||
#endif /* NETSTAT_PERTHREAD */
|
||||
#endif /* NETSTAT */
|
||||
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
DumpStream(mtcp_manager_t mtcp, tcp_stream *stream)
|
||||
{
|
||||
uint8_t *sa, *da;
|
||||
struct tcp_send_vars *sndvar = stream->sndvar;
|
||||
struct tcp_recv_vars *rcvvar = stream->rcvvar;
|
||||
|
||||
sa = (uint8_t *)&stream->saddr;
|
||||
da = (uint8_t *)&stream->daddr;
|
||||
thread_printf(mtcp, mtcp->log_fp, "========== Stream %u: "
|
||||
"%u.%u.%u.%u(%u) -> %u.%u.%u.%u(%u) ==========\n", stream->id,
|
||||
sa[0], sa[1], sa[2], sa[3], ntohs(stream->sport),
|
||||
da[0], da[1], da[2], da[3], ntohs(stream->dport));
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"Stream id: %u, type: %u, state: %s, close_reason: %s\n",
|
||||
stream->id, stream->stream_type,
|
||||
TCPStateToString(stream), close_reason_str[stream->close_reason]);
|
||||
if (stream->socket) {
|
||||
socket_map_t socket = stream->socket;
|
||||
thread_printf(mtcp, mtcp->log_fp, "Socket id: %d, type: %d, opts: %u\n"
|
||||
"epoll: %u (IN: %u, OUT: %u, ERR: %u, RDHUP: %u, ET: %u)\n"
|
||||
"events: %u (IN: %u, OUT: %u, ERR: %u, RDHUP: %u, ET: %u)\n",
|
||||
socket->id, socket->socktype, socket->opts,
|
||||
socket->epoll, socket->epoll & MTCP_EPOLLIN,
|
||||
socket->epoll & MTCP_EPOLLOUT, socket->epoll & MTCP_EPOLLERR,
|
||||
socket->epoll & MTCP_EPOLLRDHUP, socket->epoll & MTCP_EPOLLET,
|
||||
socket->events, socket->events & MTCP_EPOLLIN,
|
||||
socket->events & MTCP_EPOLLOUT, socket->events & MTCP_EPOLLERR,
|
||||
socket->events & MTCP_EPOLLRDHUP, socket->events & MTCP_EPOLLET);
|
||||
} else {
|
||||
thread_printf(mtcp, mtcp->log_fp, "Socket: (null)\n");
|
||||
}
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"on_hash_table: %u, on_control_list: %u (wait: %u), on_send_list: %u, "
|
||||
"on_ack_list: %u, is_wack: %u, ack_cnt: %u\n"
|
||||
"on_rto_idx: %d, on_timewait_list: %u, on_timeout_list: %u, "
|
||||
"on_rcv_br_list: %u, on_snd_br_list: %u\n"
|
||||
"on_sendq: %u, on_ackq: %u, closed: %u, on_closeq: %u, "
|
||||
"on_closeq_int: %u, on_resetq: %u, on_resetq_int: %u\n"
|
||||
"have_reset: %u, is_fin_sent: %u, is_fin_ackd: %u, "
|
||||
"saw_timestamp: %u, sack_permit: %u, "
|
||||
"is_bound_addr: %u, need_wnd_adv: %u\n", stream->on_hash_table,
|
||||
sndvar->on_control_list, stream->control_list_waiting, sndvar->on_send_list,
|
||||
sndvar->on_ack_list, sndvar->is_wack, sndvar->ack_cnt,
|
||||
stream->on_rto_idx, stream->on_timewait_list, stream->on_timeout_list,
|
||||
stream->on_rcv_br_list, stream->on_snd_br_list,
|
||||
sndvar->on_sendq, sndvar->on_ackq,
|
||||
stream->closed, sndvar->on_closeq, sndvar->on_closeq_int,
|
||||
sndvar->on_resetq, sndvar->on_resetq_int,
|
||||
stream->have_reset, sndvar->is_fin_sent,
|
||||
sndvar->is_fin_ackd, stream->saw_timestamp, stream->sack_permit,
|
||||
stream->is_bound_addr, stream->need_wnd_adv);
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp, "========== Send variables ==========\n");
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"ip_id: %u, mss: %u, eff_mss: %u, wscale: %u, nif_out: %d\n",
|
||||
sndvar->ip_id, sndvar->mss, sndvar->eff_mss,
|
||||
sndvar->wscale, sndvar->nif_out);
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"snd_nxt: %u, snd_una: %u, iss: %u, fss: %u\nsnd_wnd: %u, "
|
||||
"peer_wnd: %u, cwnd: %u, ssthresh: %u\n",
|
||||
stream->snd_nxt, sndvar->snd_una, sndvar->iss, sndvar->fss,
|
||||
sndvar->snd_wnd, sndvar->peer_wnd, sndvar->cwnd, sndvar->ssthresh);
|
||||
|
||||
if (sndvar->sndbuf) {
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"Send buffer: init_seq: %u, head_seq: %u, "
|
||||
"len: %d, cum_len: %lu, size: %d\n",
|
||||
sndvar->sndbuf->init_seq, sndvar->sndbuf->head_seq,
|
||||
sndvar->sndbuf->len, sndvar->sndbuf->cum_len, sndvar->sndbuf->size);
|
||||
} else {
|
||||
thread_printf(mtcp, mtcp->log_fp, "Send buffer: (null)\n");
|
||||
}
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"nrtx: %u, max_nrtx: %u, rto: %u, ts_rto: %u, "
|
||||
"ts_lastack_sent: %u\n", sndvar->nrtx, sndvar->max_nrtx,
|
||||
sndvar->rto, sndvar->ts_rto, sndvar->ts_lastack_sent);
|
||||
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"========== Receive variables ==========\n");
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"rcv_nxt: %u, irs: %u, rcv_wnd: %u, "
|
||||
"snd_wl1: %u, snd_wl2: %u\n",
|
||||
stream->rcv_nxt, rcvvar->irs,
|
||||
rcvvar->rcv_wnd, rcvvar->snd_wl1, rcvvar->snd_wl2);
|
||||
if (rcvvar->rcvbuf) {
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"Receive buffer: init_seq: %u, head_seq: %u, "
|
||||
"merged_len: %d, cum_len: %lu, last_len: %d, size: %d\n",
|
||||
rcvvar->rcvbuf->init_seq, rcvvar->rcvbuf->head_seq,
|
||||
rcvvar->rcvbuf->merged_len, rcvvar->rcvbuf->cum_len,
|
||||
rcvvar->rcvbuf->last_len, rcvvar->rcvbuf->size);
|
||||
} else {
|
||||
thread_printf(mtcp, mtcp->log_fp, "Receive buffer: (null)\n");
|
||||
}
|
||||
thread_printf(mtcp, mtcp->log_fp, "last_ack_seq: %u, dup_acks: %u\n",
|
||||
rcvvar->last_ack_seq, rcvvar->dup_acks);
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"ts_recent: %u, ts_lastack_rcvd: %u, ts_last_ts_upd: %u, "
|
||||
"ts_tw_expire: %u\n", rcvvar->ts_recent, rcvvar->ts_lastack_rcvd,
|
||||
rcvvar->ts_last_ts_upd, rcvvar->ts_tw_expire);
|
||||
thread_printf(mtcp, mtcp->log_fp,
|
||||
"srtt: %u, mdev: %u, mdev_max: %u, rttvar: %u, rtt_seq: %u\n",
|
||||
rcvvar->srtt, rcvvar->mdev, rcvvar->mdev_max,
|
||||
rcvvar->rttvar, rcvvar->rtt_seq);
|
||||
}
|
|
@ -0,0 +1,214 @@
|
|||
/*
|
||||
* TCP stream queue - tcp_stream_queue.c/h
|
||||
*
|
||||
* EunYoung Jeong
|
||||
*
|
||||
* Part of this code borrows Click's simple queue implementation
|
||||
*
|
||||
* ============================== Click License =============================
|
||||
*
|
||||
* Copyright (c) 1999-2000 Massachusetts Institute of Technology
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, subject to the conditions
|
||||
* listed in the Click LICENSE file. These conditions include: you must
|
||||
* preserve this copyright notice, and you cannot mention the copyright
|
||||
* holders in advertising related to the Software without their permission.
|
||||
* The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
|
||||
* notice is a summary of the Click LICENSE file; the license in that file is
|
||||
* legally binding.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "tcp_stream_queue.h"
|
||||
#include "debug.h"
|
||||
|
||||
#ifndef _INDEX_TYPE_
|
||||
#define _INDEX_TYPE_
|
||||
typedef uint32_t index_type;
|
||||
typedef int32_t signed_index_type;
|
||||
#endif
|
||||
/*---------------------------------------------------------------------------*/
|
||||
struct stream_queue
|
||||
{
|
||||
index_type _capacity;
|
||||
volatile index_type _head;
|
||||
volatile index_type _tail;
|
||||
|
||||
struct tcp_stream * volatile * _q;
|
||||
};
|
||||
/*----------------------------------------------------------------------------*/
|
||||
stream_queue_int *
|
||||
CreateInternalStreamQueue(int size)
|
||||
{
|
||||
stream_queue_int *sq;
|
||||
|
||||
sq = (stream_queue_int *)calloc(1, sizeof(stream_queue_int));
|
||||
if (!sq) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sq->array = (tcp_stream **)calloc(size, sizeof(tcp_stream *));
|
||||
if (!sq->array) {
|
||||
free(sq);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sq->size = size;
|
||||
sq->first = sq->last = 0;
|
||||
sq->count = 0;
|
||||
|
||||
return sq;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyInternalStreamQueue(stream_queue_int *sq)
|
||||
{
|
||||
if (!sq)
|
||||
return;
|
||||
|
||||
if (sq->array) {
|
||||
free(sq->array);
|
||||
sq->array = NULL;
|
||||
}
|
||||
|
||||
free(sq);
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
int
|
||||
StreamInternalEnqueue(stream_queue_int *sq, struct tcp_stream *stream)
|
||||
{
|
||||
if (sq->count >= sq->size) {
|
||||
/* queue is full */
|
||||
TRACE_INFO("[WARNING] Queue overflow. Set larger queue size! "
|
||||
"count: %d, size: %d\n", sq->count, sq->size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
sq->array[sq->last++] = stream;
|
||||
sq->count++;
|
||||
if (sq->last >= sq->size) {
|
||||
sq->last = 0;
|
||||
}
|
||||
assert (sq->count <= sq->size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
/*----------------------------------------------------------------------------*/
|
||||
struct tcp_stream *
|
||||
StreamInternalDequeue(stream_queue_int *sq)
|
||||
{
|
||||
struct tcp_stream *stream = NULL;
|
||||
|
||||
if (sq->count <= 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
stream = sq->array[sq->first++];
|
||||
assert(stream != NULL);
|
||||
if (sq->first >= sq->size) {
|
||||
sq->first = 0;
|
||||
}
|
||||
sq->count--;
|
||||
assert(sq->count >= 0);
|
||||
|
||||
return stream;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static inline index_type
|
||||
NextIndex(stream_queue_t sq, index_type i)
|
||||
{
|
||||
return (i != sq->_capacity ? i + 1: 0);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static inline index_type
|
||||
PrevIndex(stream_queue_t sq, index_type i)
|
||||
{
|
||||
return (i != 0 ? i - 1: sq->_capacity);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
StreamQueueIsEmpty(stream_queue_t sq)
|
||||
{
|
||||
return (sq->_head == sq->_tail);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
static inline void
|
||||
StreamMemoryBarrier(tcp_stream * volatile stream, volatile index_type index)
|
||||
{
|
||||
__asm__ volatile("" : : "m" (stream), "m" (index));
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
stream_queue_t
|
||||
CreateStreamQueue(int capacity)
|
||||
{
|
||||
stream_queue_t sq;
|
||||
|
||||
sq = (stream_queue_t)calloc(1, sizeof(struct stream_queue));
|
||||
if (!sq)
|
||||
return NULL;
|
||||
|
||||
sq->_q = (tcp_stream **)calloc(capacity + 1, sizeof(tcp_stream *));
|
||||
if (!sq->_q) {
|
||||
free(sq);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sq->_capacity = capacity;
|
||||
sq->_head = sq->_tail = 0;
|
||||
|
||||
return sq;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
void
|
||||
DestroyStreamQueue(stream_queue_t sq)
|
||||
{
|
||||
if (!sq)
|
||||
return;
|
||||
|
||||
if (sq->_q) {
|
||||
free((void *)sq->_q);
|
||||
sq->_q = NULL;
|
||||
}
|
||||
|
||||
free(sq);
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
int
|
||||
StreamEnqueue(stream_queue_t sq, tcp_stream *stream)
|
||||
{
|
||||
index_type h = sq->_head;
|
||||
index_type t = sq->_tail;
|
||||
index_type nt = NextIndex(sq, t);
|
||||
|
||||
if (nt != h) {
|
||||
sq->_q[t] = stream;
|
||||
StreamMemoryBarrier(sq->_q[t], sq->_tail);
|
||||
sq->_tail = nt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
TRACE_ERROR("Exceed capacity of stream queue!\n");
|
||||
return -1;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
||||
tcp_stream *
|
||||
StreamDequeue(stream_queue_t sq)
|
||||
{
|
||||
index_type h = sq->_head;
|
||||
index_type t = sq->_tail;
|
||||
|
||||
if (h != t) {
|
||||
tcp_stream *stream = sq->_q[h];
|
||||
StreamMemoryBarrier(sq->_q[h], sq->_head);
|
||||
sq->_head = NextIndex(sq, h);
|
||||
assert(stream);
|
||||
return stream;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
/*---------------------------------------------------------------------------*/
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue