/* * xen paravirt network card backend * * (c) Gerd Hoffmann * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; under version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, see . * * Contributions after 2012-01-13 are licensed under the terms of the * GNU GPL, version 2 or (at your option) any later version. */ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qemu/cutils.h" #include "qemu/log.h" #include "qemu/qemu-print.h" #include "qapi/qmp/qdict.h" #include "qapi/error.h" #include #include #include #include "net/net.h" #include "net/checksum.h" #include "net/util.h" #include "hw/xen/xen-backend.h" #include "hw/xen/xen-bus-helper.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" #include "hw/xen/interface/io/netif.h" #include "hw/xen/interface/io/xs_wire.h" #include "trace.h" /* ------------------------------------------------------------- */ struct XenNetDev { struct XenDevice xendev; /* must be first */ XenEventChannel *event_channel; int dev; int tx_work; unsigned int tx_ring_ref; unsigned int rx_ring_ref; struct netif_tx_sring *txs; struct netif_rx_sring *rxs; netif_tx_back_ring_t tx_ring; netif_rx_back_ring_t rx_ring; NICConf conf; NICState *nic; }; typedef struct XenNetDev XenNetDev; #define TYPE_XEN_NET_DEVICE "xen-net-device" OBJECT_DECLARE_SIMPLE_TYPE(XenNetDev, XEN_NET_DEVICE) /* ------------------------------------------------------------- */ static void net_tx_response(struct XenNetDev *netdev, netif_tx_request_t *txp, int8_t st) { RING_IDX i = netdev->tx_ring.rsp_prod_pvt; netif_tx_response_t *resp; int notify; resp = RING_GET_RESPONSE(&netdev->tx_ring, i); resp->id = txp->id; resp->status = st; #if 0 if (txp->flags & NETTXF_extra_info) { RING_GET_RESPONSE(&netdev->tx_ring, ++i)->status = NETIF_RSP_NULL; } #endif netdev->tx_ring.rsp_prod_pvt = ++i; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->tx_ring, notify); if (notify) { xen_device_notify_event_channel(XEN_DEVICE(netdev), netdev->event_channel, NULL); } if (i == netdev->tx_ring.req_cons) { int more_to_do; RING_FINAL_CHECK_FOR_REQUESTS(&netdev->tx_ring, more_to_do); if (more_to_do) { netdev->tx_work++; } } } static void net_tx_error(struct XenNetDev *netdev, netif_tx_request_t *txp, RING_IDX end) { #if 0 /* * Hmm, why netback fails everything in the ring? * Should we do that even when not supporting SG and TSO? */ RING_IDX cons = netdev->tx_ring.req_cons; do { make_tx_response(netif, txp, NETIF_RSP_ERROR); if (cons >= end) { break; } txp = RING_GET_REQUEST(&netdev->tx_ring, cons++); } while (1); netdev->tx_ring.req_cons = cons; netif_schedule_work(netif); netif_put(netif); #else net_tx_response(netdev, txp, NETIF_RSP_ERROR); #endif } static bool net_tx_packets(struct XenNetDev *netdev) { bool done_something = false; netif_tx_request_t txreq; RING_IDX rc, rp; void *page; void *tmpbuf = NULL; assert(bql_locked()); for (;;) { rc = netdev->tx_ring.req_cons; rp = netdev->tx_ring.sring->req_prod; xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ while ((rc != rp)) { if (RING_REQUEST_CONS_OVERFLOW(&netdev->tx_ring, rc)) { break; } memcpy(&txreq, RING_GET_REQUEST(&netdev->tx_ring, rc), sizeof(txreq)); netdev->tx_ring.req_cons = ++rc; done_something = true; #if 1 /* should not happen in theory, we don't announce the * * feature-{sg,gso,whatelse} flags in xenstore (yet?) */ if (txreq.flags & NETTXF_extra_info) { qemu_log_mask(LOG_UNIMP, "vif%u: FIXME: extra info flag\n", netdev->dev); net_tx_error(netdev, &txreq, rc); continue; } if (txreq.flags & NETTXF_more_data) { qemu_log_mask(LOG_UNIMP, "vif%u: FIXME: more data flag\n", netdev->dev); net_tx_error(netdev, &txreq, rc); continue; } #endif if (txreq.size < 14) { qemu_log_mask(LOG_GUEST_ERROR, "vif%u: bad packet size: %d\n", netdev->dev, txreq.size); net_tx_error(netdev, &txreq, rc); continue; } if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) { qemu_log_mask(LOG_GUEST_ERROR, "vif%u: error: page crossing\n", netdev->dev); net_tx_error(netdev, &txreq, rc); continue; } trace_xen_netdev_tx(netdev->dev, txreq.gref, txreq.offset, txreq.size, txreq.flags, (txreq.flags & NETTXF_csum_blank) ? " csum_blank" : "", (txreq.flags & NETTXF_data_validated) ? " data_validated" : "", (txreq.flags & NETTXF_more_data) ? " more_data" : "", (txreq.flags & NETTXF_extra_info) ? " extra_info" : ""); page = xen_device_map_grant_refs(&netdev->xendev, &txreq.gref, 1, PROT_READ, NULL); if (page == NULL) { qemu_log_mask(LOG_GUEST_ERROR, "vif%u: tx gref dereference failed (%d)\n", netdev->dev, txreq.gref); net_tx_error(netdev, &txreq, rc); continue; } if (txreq.flags & NETTXF_csum_blank) { /* have read-only mapping -> can't fill checksum in-place */ if (!tmpbuf) { tmpbuf = g_malloc(XEN_PAGE_SIZE); } memcpy(tmpbuf, page + txreq.offset, txreq.size); net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL); qemu_send_packet(qemu_get_queue(netdev->nic), tmpbuf, txreq.size); } else { qemu_send_packet(qemu_get_queue(netdev->nic), page + txreq.offset, txreq.size); } xen_device_unmap_grant_refs(&netdev->xendev, page, &txreq.gref, 1, NULL); net_tx_response(netdev, &txreq, NETIF_RSP_OKAY); } if (!netdev->tx_work) { break; } netdev->tx_work = 0; } g_free(tmpbuf); return done_something; } /* ------------------------------------------------------------- */ static void net_rx_response(struct XenNetDev *netdev, netif_rx_request_t *req, int8_t st, uint16_t offset, uint16_t size, uint16_t flags) { RING_IDX i = netdev->rx_ring.rsp_prod_pvt; netif_rx_response_t *resp; int notify; resp = RING_GET_RESPONSE(&netdev->rx_ring, i); resp->offset = offset; resp->flags = flags; resp->id = req->id; resp->status = (int16_t)size; if (st < 0) { resp->status = (int16_t)st; } trace_xen_netdev_rx(netdev->dev, i, resp->status, resp->flags); netdev->rx_ring.rsp_prod_pvt = ++i; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev->rx_ring, notify); if (notify) { xen_device_notify_event_channel(XEN_DEVICE(netdev), netdev->event_channel, NULL); } } #define NET_IP_ALIGN 2 static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size) { struct XenNetDev *netdev = qemu_get_nic_opaque(nc); netif_rx_request_t rxreq; RING_IDX rc, rp; void *page; assert(bql_locked()); if (xen_device_backend_get_state(&netdev->xendev) != XenbusStateConnected) { return -1; } rc = netdev->rx_ring.req_cons; rp = netdev->rx_ring.sring->req_prod; xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) { return 0; } if (size > XEN_PAGE_SIZE - NET_IP_ALIGN) { qemu_log_mask(LOG_GUEST_ERROR, "vif%u: packet too big (%lu > %ld)", netdev->dev, (unsigned long)size, XEN_PAGE_SIZE - NET_IP_ALIGN); return -1; } memcpy(&rxreq, RING_GET_REQUEST(&netdev->rx_ring, rc), sizeof(rxreq)); netdev->rx_ring.req_cons = ++rc; page = xen_device_map_grant_refs(&netdev->xendev, &rxreq.gref, 1, PROT_WRITE, NULL); if (page == NULL) { qemu_log_mask(LOG_GUEST_ERROR, "vif%u: rx gref dereference failed (%d)\n", netdev->dev, rxreq.gref); net_rx_response(netdev, &rxreq, NETIF_RSP_ERROR, 0, 0, 0); return -1; } memcpy(page + NET_IP_ALIGN, buf, size); xen_device_unmap_grant_refs(&netdev->xendev, page, &rxreq.gref, 1, NULL); net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0); return size; } /* ------------------------------------------------------------- */ static NetClientInfo net_xen_info = { .type = NET_CLIENT_DRIVER_NIC, .size = sizeof(NICState), .receive = net_rx_packet, }; static void xen_netdev_realize(XenDevice *xendev, Error **errp) { ERRP_GUARD(); XenNetDev *netdev = XEN_NET_DEVICE(xendev); NetClientState *nc; qemu_macaddr_default_if_unset(&netdev->conf.macaddr); xen_device_frontend_printf(xendev, "mac", "%02x:%02x:%02x:%02x:%02x:%02x", netdev->conf.macaddr.a[0], netdev->conf.macaddr.a[1], netdev->conf.macaddr.a[2], netdev->conf.macaddr.a[3], netdev->conf.macaddr.a[4], netdev->conf.macaddr.a[5]); netdev->nic = qemu_new_nic(&net_xen_info, &netdev->conf, object_get_typename(OBJECT(xendev)), DEVICE(xendev)->id, &xendev->qdev.mem_reentrancy_guard, netdev); nc = qemu_get_queue(netdev->nic); qemu_format_nic_info_str(nc, netdev->conf.macaddr.a); /* fill info */ xen_device_backend_printf(xendev, "feature-rx-copy", "%u", 1); xen_device_backend_printf(xendev, "feature-rx-flip", "%u", 0); trace_xen_netdev_realize(netdev->dev, nc->info_str, nc->peer ? nc->peer->name : "(none)"); } static bool net_event(void *_xendev) { XenNetDev *netdev = XEN_NET_DEVICE(_xendev); bool done_something; done_something = net_tx_packets(netdev); qemu_flush_queued_packets(qemu_get_queue(netdev->nic)); return done_something; } static bool xen_netdev_connect(XenDevice *xendev, Error **errp) { XenNetDev *netdev = XEN_NET_DEVICE(xendev); unsigned int port, rx_copy; assert(bql_locked()); if (xen_device_frontend_scanf(xendev, "tx-ring-ref", "%u", &netdev->tx_ring_ref) != 1) { error_setg(errp, "failed to read tx-ring-ref"); return false; } if (xen_device_frontend_scanf(xendev, "rx-ring-ref", "%u", &netdev->rx_ring_ref) != 1) { error_setg(errp, "failed to read rx-ring-ref"); return false; } if (xen_device_frontend_scanf(xendev, "event-channel", "%u", &port) != 1) { error_setg(errp, "failed to read event-channel"); return false; } if (xen_device_frontend_scanf(xendev, "request-rx-copy", "%u", &rx_copy) != 1) { rx_copy = 0; } if (rx_copy == 0) { error_setg(errp, "frontend doesn't support rx-copy"); return false; } netdev->txs = xen_device_map_grant_refs(xendev, &netdev->tx_ring_ref, 1, PROT_READ | PROT_WRITE, errp); if (!netdev->txs) { error_prepend(errp, "failed to map tx grant ref: "); return false; } netdev->rxs = xen_device_map_grant_refs(xendev, &netdev->rx_ring_ref, 1, PROT_READ | PROT_WRITE, errp); if (!netdev->rxs) { error_prepend(errp, "failed to map rx grant ref: "); return false; } BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XEN_PAGE_SIZE); BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XEN_PAGE_SIZE); netdev->event_channel = xen_device_bind_event_channel(xendev, port, net_event, netdev, errp); if (!netdev->event_channel) { return false; } trace_xen_netdev_connect(netdev->dev, netdev->tx_ring_ref, netdev->rx_ring_ref, port); net_tx_packets(netdev); return true; } static void xen_netdev_disconnect(XenDevice *xendev, Error **errp) { XenNetDev *netdev = XEN_NET_DEVICE(xendev); trace_xen_netdev_disconnect(netdev->dev); assert(bql_locked()); netdev->tx_ring.sring = NULL; netdev->rx_ring.sring = NULL; if (netdev->event_channel) { xen_device_unbind_event_channel(xendev, netdev->event_channel, errp); netdev->event_channel = NULL; } if (netdev->txs) { xen_device_unmap_grant_refs(xendev, netdev->txs, &netdev->tx_ring_ref, 1, errp); netdev->txs = NULL; } if (netdev->rxs) { xen_device_unmap_grant_refs(xendev, netdev->rxs, &netdev->rx_ring_ref, 1, errp); netdev->rxs = NULL; } } /* -------------------------------------------------------------------- */ static void xen_netdev_frontend_changed(XenDevice *xendev, enum xenbus_state frontend_state, Error **errp) { ERRP_GUARD(); enum xenbus_state backend_state = xen_device_backend_get_state(xendev); trace_xen_netdev_frontend_changed(xendev->name, frontend_state); switch (frontend_state) { case XenbusStateConnected: if (backend_state == XenbusStateConnected) { break; } xen_netdev_disconnect(xendev, errp); if (*errp) { break; } if (!xen_netdev_connect(xendev, errp)) { xen_netdev_disconnect(xendev, NULL); xen_device_backend_set_state(xendev, XenbusStateClosing); break; } xen_device_backend_set_state(xendev, XenbusStateConnected); break; case XenbusStateClosing: xen_device_backend_set_state(xendev, XenbusStateClosing); break; case XenbusStateClosed: case XenbusStateUnknown: xen_netdev_disconnect(xendev, errp); if (*errp) { break; } xen_device_backend_set_state(xendev, XenbusStateClosed); break; case XenbusStateInitialised: /* * Linux netback does nothing on the frontend going (back) to * XenbusStateInitialised, so do the same here. */ default: break; } } static char *xen_netdev_get_name(XenDevice *xendev, Error **errp) { XenNetDev *netdev = XEN_NET_DEVICE(xendev); if (netdev->dev == -1) { XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); char fe_path[XENSTORE_ABS_PATH_MAX + 1]; int idx = (xen_mode == XEN_EMULATE) ? 0 : 1; char *value; /* Theoretically we could go up to INT_MAX here but that's overkill */ while (idx < 100) { snprintf(fe_path, sizeof(fe_path), "/local/domain/%u/device/vif/%u", xendev->frontend_id, idx); value = qemu_xen_xs_read(xenbus->xsh, XBT_NULL, fe_path, NULL); if (!value) { if (errno == ENOENT) { netdev->dev = idx; goto found; } error_setg(errp, "cannot read %s: %s", fe_path, strerror(errno)); return NULL; } free(value); idx++; } error_setg(errp, "cannot find device index for netdev device"); return NULL; } found: return g_strdup_printf("%u", netdev->dev); } static void xen_netdev_unrealize(XenDevice *xendev) { XenNetDev *netdev = XEN_NET_DEVICE(xendev); trace_xen_netdev_unrealize(netdev->dev); /* Disconnect from the frontend in case this has not already happened */ xen_netdev_disconnect(xendev, NULL); if (netdev->nic) { qemu_del_nic(netdev->nic); } } /* ------------------------------------------------------------- */ static Property xen_netdev_properties[] = { DEFINE_NIC_PROPERTIES(XenNetDev, conf), DEFINE_PROP_INT32("idx", XenNetDev, dev, -1), DEFINE_PROP_END_OF_LIST(), }; static void xen_netdev_class_init(ObjectClass *class, void *data) { DeviceClass *dev_class = DEVICE_CLASS(class); XenDeviceClass *xendev_class = XEN_DEVICE_CLASS(class); xendev_class->backend = "qnic"; xendev_class->device = "vif"; xendev_class->get_name = xen_netdev_get_name; xendev_class->realize = xen_netdev_realize; xendev_class->frontend_changed = xen_netdev_frontend_changed; xendev_class->unrealize = xen_netdev_unrealize; set_bit(DEVICE_CATEGORY_NETWORK, dev_class->categories); dev_class->user_creatable = true; device_class_set_props(dev_class, xen_netdev_properties); } static const TypeInfo xen_net_type_info = { .name = TYPE_XEN_NET_DEVICE, .parent = TYPE_XEN_DEVICE, .instance_size = sizeof(XenNetDev), .class_init = xen_netdev_class_init, }; static void xen_net_register_types(void) { type_register_static(&xen_net_type_info); } type_init(xen_net_register_types) /* Called to instantiate a XenNetDev when the backend is detected. */ static void xen_net_device_create(XenBackendInstance *backend, QDict *opts, Error **errp) { ERRP_GUARD(); XenBus *xenbus = xen_backend_get_bus(backend); const char *name = xen_backend_get_name(backend); XenDevice *xendev = NULL; unsigned long number; const char *macstr; XenNetDev *net; MACAddr mac; if (qemu_strtoul(name, NULL, 10, &number) || number >= INT_MAX) { error_setg(errp, "failed to parse name '%s'", name); goto fail; } trace_xen_netdev_create(number); macstr = qdict_get_try_str(opts, "mac"); if (macstr == NULL) { error_setg(errp, "no MAC address found"); goto fail; } if (net_parse_macaddr(mac.a, macstr) < 0) { error_setg(errp, "failed to parse MAC address"); goto fail; } xendev = XEN_DEVICE(qdev_new(TYPE_XEN_NET_DEVICE)); net = XEN_NET_DEVICE(xendev); net->dev = number; memcpy(&net->conf.macaddr, &mac, sizeof(mac)); if (qdev_realize_and_unref(DEVICE(xendev), BUS(xenbus), errp)) { xen_backend_set_device(backend, xendev); return; } error_prepend(errp, "realization of net device %lu failed: ", number); fail: if (xendev) { object_unparent(OBJECT(xendev)); } } static void xen_net_device_destroy(XenBackendInstance *backend, Error **errp) { ERRP_GUARD(); XenDevice *xendev = xen_backend_get_device(backend); XenNetDev *netdev = XEN_NET_DEVICE(xendev); trace_xen_netdev_destroy(netdev->dev); object_unparent(OBJECT(xendev)); } static const XenBackendInfo xen_net_backend_info = { .type = "qnic", .create = xen_net_device_create, .destroy = xen_net_device_destroy, }; static void xen_net_register_backend(void) { xen_backend_register(&xen_net_backend_info); } xen_backend_init(xen_net_register_backend);