diff --git a/net/colo.c b/net/colo.c index 124994c99f..6a6eacd2dc 100644 --- a/net/colo.c +++ b/net/colo.c @@ -134,6 +134,8 @@ Connection *connection_new(ConnectionKey *key) conn->ip_proto = key->ip_proto; conn->processing = false; + conn->offset = 0; + conn->syn_flag = 0; g_queue_init(&conn->primary_list); g_queue_init(&conn->secondary_list); diff --git a/net/colo.h b/net/colo.h index 6720a3a8b7..7c524f3a1c 100644 --- a/net/colo.h +++ b/net/colo.h @@ -62,6 +62,13 @@ typedef struct Connection { /* flag to enqueue unprocessed_connections */ bool processing; uint8_t ip_proto; + /* offset = secondary_seq - primary_seq */ + tcp_seq offset; + /* + * we use this flag update offset func + * run once in independent tcp connection + */ + int syn_flag; } Connection; uint32_t connection_key_hash(const void *opaque); diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c index 9bf80d3955..89abe72d4e 100644 --- a/net/filter-rewriter.c +++ b/net/filter-rewriter.c @@ -10,6 +10,7 @@ */ #include "qemu/osdep.h" +#include "trace.h" #include "net/colo.h" #include "net/filter.h" #include "net/net.h" @@ -58,6 +59,93 @@ static int is_tcp_packet(Packet *pkt) } } +/* handle tcp packet from primary guest */ +static int handle_primary_tcp_pkt(NetFilterState *nf, + Connection *conn, + Packet *pkt) +{ + struct tcphdr *tcp_pkt; + + tcp_pkt = (struct tcphdr *)pkt->transport_header; + if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { + char *sdebug, *ddebug; + sdebug = strdup(inet_ntoa(pkt->ip->ip_src)); + ddebug = strdup(inet_ntoa(pkt->ip->ip_dst)); + trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug, + ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), + tcp_pkt->th_flags); + trace_colo_filter_rewriter_conn_offset(conn->offset); + g_free(sdebug); + g_free(ddebug); + } + + if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_SYN)) { + /* + * we use this flag update offset func + * run once in independent tcp connection + */ + conn->syn_flag = 1; + } + + if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK)) { + if (conn->syn_flag) { + /* + * offset = secondary_seq - primary seq + * ack packet sent by guest from primary node, + * so we use th_ack - 1 get primary_seq + */ + conn->offset -= (ntohl(tcp_pkt->th_ack) - 1); + conn->syn_flag = 0; + } + /* handle packets to the secondary from the primary */ + tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset); + + net_checksum_calculate((uint8_t *)pkt->data, pkt->size); + } + + return 0; +} + +/* handle tcp packet from secondary guest */ +static int handle_secondary_tcp_pkt(NetFilterState *nf, + Connection *conn, + Packet *pkt) +{ + struct tcphdr *tcp_pkt; + + tcp_pkt = (struct tcphdr *)pkt->transport_header; + + if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { + char *sdebug, *ddebug; + sdebug = strdup(inet_ntoa(pkt->ip->ip_src)); + ddebug = strdup(inet_ntoa(pkt->ip->ip_dst)); + trace_colo_filter_rewriter_pkt_info(__func__, sdebug, ddebug, + ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), + tcp_pkt->th_flags); + trace_colo_filter_rewriter_conn_offset(conn->offset); + g_free(sdebug); + g_free(ddebug); + } + + if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == (TH_ACK | TH_SYN))) { + /* + * save offset = secondary_seq and then + * in handle_primary_tcp_pkt make offset + * = secondary_seq - primary_seq + */ + conn->offset = ntohl(tcp_pkt->th_seq); + } + + if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) { + /* handle packets to the primary from the secondary*/ + tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset); + + net_checksum_calculate((uint8_t *)pkt->data, pkt->size); + } + + return 0; +} + static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, NetClientState *sender, unsigned flags, @@ -97,10 +185,30 @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, if (sender == nf->netdev) { /* NET_FILTER_DIRECTION_TX */ - /* handle_primary_tcp_pkt */ + if (!handle_primary_tcp_pkt(nf, conn, pkt)) { + qemu_net_queue_send(s->incoming_queue, sender, 0, + (const uint8_t *)pkt->data, pkt->size, NULL); + packet_destroy(pkt, NULL); + pkt = NULL; + /* + * We block the packet here,after rewrite pkt + * and will send it + */ + return 1; + } } else { /* NET_FILTER_DIRECTION_RX */ - /* handle_secondary_tcp_pkt */ + if (!handle_secondary_tcp_pkt(nf, conn, pkt)) { + qemu_net_queue_send(s->incoming_queue, sender, 0, + (const uint8_t *)pkt->data, pkt->size, NULL); + packet_destroy(pkt, NULL); + pkt = NULL; + /* + * We block the packet here,after rewrite pkt + * and will send it + */ + return 1; + } } } diff --git a/trace-events b/trace-events index b39536852e..1cb9d37ce4 100644 --- a/trace-events +++ b/trace-events @@ -150,6 +150,11 @@ colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, con colo_old_packet_check_found(int64_t old_time) "%" PRId64 colo_compare_miscompare(void) "" +# net/filter-rewriter.c +colo_filter_rewriter_debug(void) "" +colo_filter_rewriter_pkt_info(const char *func, const char *src, const char *dst, uint32_t seq, uint32_t ack, uint32_t flag) "%s: src/dst: %s/%s p: seq/ack=%u/%u flags=%x\n" +colo_filter_rewriter_conn_offset(uint32_t offset) ": offset=%u\n" + ### Guest events, keep at bottom # @vaddr: Access' virtual address.