From ae0cebd71215188951902c5ccdd8685e431c286c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 12 May 2020 21:49:17 +0200 Subject: [PATCH 01/43] hw/ide: Make IDEDMAOps handlers take a const IDEDMA pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handlers don't need to modify the IDEDMA structure. Make it const. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200512194917.15807-1-philmd@redhat.com> Acked-by: John Snow Signed-off-by: Kevin Wolf --- hw/ide/ahci.c | 18 +++++++++--------- hw/ide/core.c | 6 +++--- hw/ide/macio.c | 6 +++--- hw/ide/pci.c | 12 ++++++------ include/hw/ide/internal.h | 12 ++++++------ 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index fc82cbd5f1..009120f88b 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -44,7 +44,7 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot); static void ahci_reset_port(AHCIState *s, int port); static bool ahci_write_fis_d2h(AHCIDevice *ad); static void ahci_init_d2h(AHCIDevice *ad); -static int ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit); +static int ahci_dma_prepare_buf(const IDEDMA *dma, int32_t limit); static bool ahci_map_clb_address(AHCIDevice *ad); static bool ahci_map_fis_address(AHCIDevice *ad); static void ahci_unmap_clb_address(AHCIDevice *ad); @@ -1338,7 +1338,7 @@ out: } /* Transfer PIO data between RAM and device */ -static void ahci_pio_transfer(IDEDMA *dma) +static void ahci_pio_transfer(const IDEDMA *dma) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); IDEState *s = &ad->port.ifs[0]; @@ -1397,7 +1397,7 @@ out: } } -static void ahci_start_dma(IDEDMA *dma, IDEState *s, +static void ahci_start_dma(const IDEDMA *dma, IDEState *s, BlockCompletionFunc *dma_cb) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); @@ -1406,7 +1406,7 @@ static void ahci_start_dma(IDEDMA *dma, IDEState *s, dma_cb(s, 0); } -static void ahci_restart_dma(IDEDMA *dma) +static void ahci_restart_dma(const IDEDMA *dma) { /* Nothing to do, ahci_start_dma already resets s->io_buffer_offset. */ } @@ -1415,7 +1415,7 @@ static void ahci_restart_dma(IDEDMA *dma) * IDE/PIO restarts are handled by the core layer, but NCQ commands * need an extra kick from the AHCI HBA. */ -static void ahci_restart(IDEDMA *dma) +static void ahci_restart(const IDEDMA *dma) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); int i; @@ -1432,7 +1432,7 @@ static void ahci_restart(IDEDMA *dma) * Called in DMA and PIO R/W chains to read the PRDT. * Not shared with NCQ pathways. */ -static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit) +static int32_t ahci_dma_prepare_buf(const IDEDMA *dma, int32_t limit) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); IDEState *s = &ad->port.ifs[0]; @@ -1453,7 +1453,7 @@ static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit) * Called via dma_buf_commit, for both DMA and PIO paths. * sglist destruction is handled within dma_buf_commit. */ -static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes) +static void ahci_commit_buf(const IDEDMA *dma, uint32_t tx_bytes) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); @@ -1461,7 +1461,7 @@ static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes) ad->cur_cmd->status = cpu_to_le32(tx_bytes); } -static int ahci_dma_rw_buf(IDEDMA *dma, bool is_write) +static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); IDEState *s = &ad->port.ifs[0]; @@ -1486,7 +1486,7 @@ static int ahci_dma_rw_buf(IDEDMA *dma, bool is_write) return 1; } -static void ahci_cmd_done(IDEDMA *dma) +static void ahci_cmd_done(const IDEDMA *dma) { AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma); diff --git a/hw/ide/core.c b/hw/ide/core.c index 689bb36409..d997a78e47 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2570,16 +2570,16 @@ static void ide_init1(IDEBus *bus, int unit) ide_sector_write_timer_cb, s); } -static int ide_nop_int(IDEDMA *dma, bool is_write) +static int ide_nop_int(const IDEDMA *dma, bool is_write) { return 0; } -static void ide_nop(IDEDMA *dma) +static void ide_nop(const IDEDMA *dma) { } -static int32_t ide_nop_int32(IDEDMA *dma, int32_t l) +static int32_t ide_nop_int32(const IDEDMA *dma, int32_t l) { return 0; } diff --git a/hw/ide/macio.c b/hw/ide/macio.c index 30af0e93e6..62a599a075 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -376,17 +376,17 @@ static void macio_ide_reset(DeviceState *dev) ide_bus_reset(&d->bus); } -static int ide_nop_int(IDEDMA *dma, bool is_write) +static int ide_nop_int(const IDEDMA *dma, bool is_write) { return 0; } -static int32_t ide_nop_int32(IDEDMA *dma, int32_t l) +static int32_t ide_nop_int32(const IDEDMA *dma, int32_t l) { return 0; } -static void ide_dbdma_start(IDEDMA *dma, IDEState *s, +static void ide_dbdma_start(const IDEDMA *dma, IDEState *s, BlockCompletionFunc *cb) { MACIOIDEState *m = container_of(dma, MACIOIDEState, dma); diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 97347f07f1..5e85c4ad17 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -103,7 +103,7 @@ const MemoryRegionOps pci_ide_data_le_ops = { .endianness = DEVICE_LITTLE_ENDIAN, }; -static void bmdma_start_dma(IDEDMA *dma, IDEState *s, +static void bmdma_start_dma(const IDEDMA *dma, IDEState *s, BlockCompletionFunc *dma_cb) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); @@ -126,7 +126,7 @@ static void bmdma_start_dma(IDEDMA *dma, IDEState *s, * IDEState.io_buffer_size will contain the number of bytes described * by the PRDs, whether or not we added them to the sglist. */ -static int32_t bmdma_prepare_buf(IDEDMA *dma, int32_t limit) +static int32_t bmdma_prepare_buf(const IDEDMA *dma, int32_t limit) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); IDEState *s = bmdma_active_if(bm); @@ -181,7 +181,7 @@ static int32_t bmdma_prepare_buf(IDEDMA *dma, int32_t limit) } /* return 0 if buffer completed */ -static int bmdma_rw_buf(IDEDMA *dma, bool is_write) +static int bmdma_rw_buf(const IDEDMA *dma, bool is_write) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); IDEState *s = bmdma_active_if(bm); @@ -230,7 +230,7 @@ static int bmdma_rw_buf(IDEDMA *dma, bool is_write) return 1; } -static void bmdma_set_inactive(IDEDMA *dma, bool more) +static void bmdma_set_inactive(const IDEDMA *dma, bool more) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); @@ -242,7 +242,7 @@ static void bmdma_set_inactive(IDEDMA *dma, bool more) } } -static void bmdma_restart_dma(IDEDMA *dma) +static void bmdma_restart_dma(const IDEDMA *dma) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); @@ -257,7 +257,7 @@ static void bmdma_cancel(BMDMAState *bm) } } -static void bmdma_reset(IDEDMA *dma) +static void bmdma_reset(const IDEDMA *dma) { BMDMAState *bm = DO_UPCAST(BMDMAState, dma, dma); diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h index 55da35d768..1a7869e85d 100644 --- a/include/hw/ide/internal.h +++ b/include/hw/ide/internal.h @@ -322,12 +322,12 @@ typedef enum { IDE_HD, IDE_CD, IDE_CFATA } IDEDriveKind; typedef void EndTransferFunc(IDEState *); -typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockCompletionFunc *); -typedef void DMAVoidFunc(IDEDMA *); -typedef int DMAIntFunc(IDEDMA *, bool); -typedef int32_t DMAInt32Func(IDEDMA *, int32_t len); -typedef void DMAu32Func(IDEDMA *, uint32_t); -typedef void DMAStopFunc(IDEDMA *, bool); +typedef void DMAStartFunc(const IDEDMA *, IDEState *, BlockCompletionFunc *); +typedef void DMAVoidFunc(const IDEDMA *); +typedef int DMAIntFunc(const IDEDMA *, bool); +typedef int32_t DMAInt32Func(const IDEDMA *, int32_t len); +typedef void DMAu32Func(const IDEDMA *, uint32_t); +typedef void DMAStopFunc(const IDEDMA *, bool); struct unreported_events { bool eject_request; From 5fb0a6b5e771b235275acc8af7da490de072917b Mon Sep 17 00:00:00 2001 From: Pavel Dovgalyuk Date: Wed, 3 Jun 2020 13:22:02 +0300 Subject: [PATCH 02/43] icount: make dma reads deterministic Windows guest sometimes makes DMA requests with overlapping target addresses. This leads to the following structure of iov for the block driver: addr size1 addr size2 addr size3 It means that three adjacent disk blocks should be read into the same memory buffer. Windows does not expects anything from these bytes (should it be data from the first block, or the last one, or some mix), but uses them somehow. It leads to non-determinism of the guest execution, because block driver does not preserve any order of reading. This situation was discusses in the mailing list at least twice: https://lists.gnu.org/archive/html/qemu-devel/2010-09/msg01996.html https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg05185.html This patch makes such disk reads deterministic in icount mode. It splits the whole request into several parts. Parts may overlap, but SGs inside one part do not overlap. Parts that are processed later overwrite the prior ones in case of overlapping. Examples for different SG part sequences: 1) A1 1000 A2 1000 A1 1000 A3 1000 -> One request is split into two. A1 1000 A2 1000 -- A1 1000 A3 1000 2) A1 800 A2 1000 A1 1000 -> A1 800 A2 1000 -- A1 1000 Signed-off-by: Pavel Dovgalyuk Message-Id: <159117972206.12193.12939621311413561779.stgit@pasha-ThinkPad-X280> Signed-off-by: Kevin Wolf --- dma-helpers.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/dma-helpers.c b/dma-helpers.c index e8a26e81e1..2a77b5a9cb 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -13,6 +13,8 @@ #include "trace-root.h" #include "qemu/thread.h" #include "qemu/main-loop.h" +#include "sysemu/cpus.h" +#include "qemu/range.h" /* #define DEBUG_IOMMU */ @@ -142,6 +144,26 @@ static void dma_blk_cb(void *opaque, int ret) cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte; cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte; mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir); + /* + * Make reads deterministic in icount mode. Windows sometimes issues + * disk read requests with overlapping SGs. It leads + * to non-determinism, because resulting buffer contents may be mixed + * from several sectors. This code splits all SGs into several + * groups. SGs in every group do not overlap. + */ + if (mem && use_icount && dbs->dir == DMA_DIRECTION_FROM_DEVICE) { + int i; + for (i = 0 ; i < dbs->iov.niov ; ++i) { + if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base, + dbs->iov.iov[i].iov_len, (intptr_t)mem, + cur_len)) { + dma_memory_unmap(dbs->sg->as, mem, cur_len, + dbs->dir, cur_len); + mem = NULL; + break; + } + } + } if (!mem) break; qemu_iovec_add(&dbs->iov, mem, cur_len); From 7aa1c247b466870b0704d3ccdc3755e5e7394dca Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Wed, 3 Jun 2020 11:32:39 +0200 Subject: [PATCH 03/43] virtio-blk: Refactor the code that processes queued requests Move the code that processes queued requests from virtio_blk_dma_restart_bh() to its own, non-static, function. This will allow us to call it from the virtio_blk_data_plane_start() in a future patch. Signed-off-by: Sergio Lopez Message-Id: <20200603093240.40489-2-slp@redhat.com> Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 16 +++++++++++----- include/hw/virtio/virtio-blk.h | 1 + 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index f5f6fc925e..978574e4da 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -819,15 +819,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtio_blk_handle_output_do(s, vq); } -static void virtio_blk_dma_restart_bh(void *opaque) +void virtio_blk_process_queued_requests(VirtIOBlock *s) { - VirtIOBlock *s = opaque; VirtIOBlockReq *req = s->rq; MultiReqBuffer mrb = {}; - qemu_bh_delete(s->bh); - s->bh = NULL; - s->rq = NULL; aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); @@ -855,6 +851,16 @@ static void virtio_blk_dma_restart_bh(void *opaque) aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } +static void virtio_blk_dma_restart_bh(void *opaque) +{ + VirtIOBlock *s = opaque; + + qemu_bh_delete(s->bh); + s->bh = NULL; + + virtio_blk_process_queued_requests(s); +} + static void virtio_blk_dma_restart_cb(void *opaque, int running, RunState state) { diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index 1e62f869b2..f584ad9b86 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -86,5 +86,6 @@ typedef struct MultiReqBuffer { } MultiReqBuffer; bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); +void virtio_blk_process_queued_requests(VirtIOBlock *s); #endif From 49b44549ace7890fffdf027fd3695218ee7f1121 Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Wed, 3 Jun 2020 11:32:40 +0200 Subject: [PATCH 04/43] virtio-blk: On restart, process queued requests in the proper context On restart, we were scheduling a BH to process queued requests, which would run before starting up the data plane, leading to those requests being assigned and started on coroutines on the main context. This could cause requests to be wrongly processed in parallel from different threads (the main thread and the iothread managing the data plane), potentially leading to multiple issues. For example, stopping and resuming a VM multiple times while the guest is generating I/O on a virtio_blk device can trigger a crash with a stack tracing looking like this one: <------> Thread 2 (Thread 0x7ff736765700 (LWP 1062503)): #0 0x00005567a13b99d6 in iov_memset (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:69 #1 0x00005567a13bab73 in qemu_iovec_memset (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 #5 0x00005567a12f43d9 in qemu_laio_process_completions_and_submit (s=0x7ff7182e8420) at block/linux-aio.c:236 #6 0x00005567a12f44c2 in qemu_laio_poll_cb (opaque=0x7ff7182e8430) at block/linux-aio.c:267 #7 0x00005567a13aed83 in run_poll_handlers_once (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) at util/aio-posix.c:520 #8 0x00005567a13aee9f in run_poll_handlers (ctx=0x5567a2b58c70, max_ns=16000, timeout=0x7ff7367645f8) at util/aio-posix.c:562 #9 0x00005567a13aefde in try_poll_mode (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) at util/aio-posix.c:597 #10 0x00005567a13af115 in aio_poll (ctx=0x5567a2b58c70, blocking=true) at util/aio-posix.c:639 #11 0x00005567a109acca in iothread_run (opaque=0x5567a2b29760) at iothread.c:75 #12 0x00005567a13b2790 in qemu_thread_start (args=0x5567a2b694c0) at util/qemu-thread-posix.c:519 #13 0x00007ff73eedf2de in start_thread () at /lib64/libpthread.so.0 #14 0x00007ff73ec10e83 in clone () at /lib64/libc.so.6 Thread 1 (Thread 0x7ff743986f00 (LWP 1062500)): #0 0x00005567a13b99d6 in iov_memset (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:69 #1 0x00005567a13bab73 in qemu_iovec_memset (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 #5 0x00005567a12f4a2f in laio_do_submit (fd=19, laiocb=0x7ff5f4ff9ae0, offset=472363008, type=2) at block/linux-aio.c:375 #6 0x00005567a12f4af2 in laio_co_submit (bs=0x5567a2b8c460, s=0x7ff7182e8420, fd=19, offset=472363008, qiov=0x7ff5f4ff9ca0, type=2) at block/linux-aio.c:394 #7 0x00005567a12f1803 in raw_co_prw (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, type=2) at block/file-posix.c:1892 #8 0x00005567a12f1941 in raw_co_pwritev (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, flags=0) at block/file-posix.c:1925 #9 0x00005567a12fe3e1 in bdrv_driver_pwritev (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, qiov_offset=0, flags=0) at block/io.c:1183 #10 0x00005567a1300340 in bdrv_aligned_pwritev (child=0x5567a2b5b070, req=0x7ff5f4ff9db0, offset=472363008, bytes=20480, align=512, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:1980 #11 0x00005567a1300b29 in bdrv_co_pwritev_part (child=0x5567a2b5b070, offset=472363008, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:2137 #12 0x00005567a12baba1 in qcow2_co_pwritev_task (bs=0x5567a2b92740, file_cluster_offset=472317952, offset=487305216, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, l2meta=0x0) at block/qcow2.c:2444 #13 0x00005567a12bacdb in qcow2_co_pwritev_task_entry (task=0x5567a2b48540) at block/qcow2.c:2475 #14 0x00005567a13167d8 in aio_task_co (opaque=0x5567a2b48540) at block/aio_task.c:45 #15 0x00005567a13cf00c in coroutine_trampoline (i0=738245600, i1=32759) at util/coroutine-ucontext.c:115 #16 0x00007ff73eb622e0 in __start_context () at /lib64/libc.so.6 #17 0x00007ff6626f1350 in () #18 0x0000000000000000 in () <------> This is also known to cause crashes with this message (assertion failed): aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule' RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1812765 Signed-off-by: Sergio Lopez Message-Id: <20200603093240.40489-3-slp@redhat.com> Signed-off-by: Kevin Wolf --- hw/block/dataplane/virtio-blk.c | 8 ++++++++ hw/block/virtio-blk.c | 18 ++++++++++++------ include/hw/virtio/virtio-blk.h | 2 +- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 1b52e8159c..37499c5564 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -220,6 +220,9 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) goto fail_guest_notifiers; } + /* Process queued requests before the ones in vring */ + virtio_blk_process_queued_requests(vblk, false); + /* Kick right away to begin processing requests already in vring */ for (i = 0; i < nvqs; i++) { VirtQueue *vq = virtio_get_queue(s->vdev, i); @@ -239,6 +242,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) return 0; fail_guest_notifiers: + /* + * If we failed to set up the guest notifiers queued requests will be + * processed on the main context. + */ + virtio_blk_process_queued_requests(vblk, false); vblk->dataplane_disabled = true; s->starting = false; vblk->dataplane_started = true; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 978574e4da..8882a1d1d4 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -819,7 +819,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtio_blk_handle_output_do(s, vq); } -void virtio_blk_process_queued_requests(VirtIOBlock *s) +void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) { VirtIOBlockReq *req = s->rq; MultiReqBuffer mrb = {}; @@ -847,7 +847,9 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s) if (mrb.num_reqs) { virtio_blk_submit_multireq(s->blk, &mrb); } - blk_dec_in_flight(s->conf.conf.blk); + if (is_bh) { + blk_dec_in_flight(s->conf.conf.blk); + } aio_context_release(blk_get_aio_context(s->conf.conf.blk)); } @@ -858,21 +860,25 @@ static void virtio_blk_dma_restart_bh(void *opaque) qemu_bh_delete(s->bh); s->bh = NULL; - virtio_blk_process_queued_requests(s); + virtio_blk_process_queued_requests(s, true); } static void virtio_blk_dma_restart_cb(void *opaque, int running, RunState state) { VirtIOBlock *s = opaque; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + VirtioBusState *bus = VIRTIO_BUS(qbus); if (!running) { return; } - if (!s->bh) { - /* FIXME The data plane is not started yet, so these requests are - * processed in the main thread. */ + /* + * If ioeventfd is enabled, don't schedule the BH here as queued + * requests will be processed while starting the data plane. + */ + if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) { s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), virtio_blk_dma_restart_bh, s); blk_inc_in_flight(s->conf.conf.blk); diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index f584ad9b86..b1334c3904 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -86,6 +86,6 @@ typedef struct MultiReqBuffer { } MultiReqBuffer; bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); -void virtio_blk_process_queued_requests(VirtIOBlock *s); +void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh); #endif From e37adbebd19634836369e9572d9aaa0f088332fd Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 8 Jun 2020 12:33:39 -0500 Subject: [PATCH 05/43] block: Refactor subdirectory recursion during make Rather than listing block/monitor from the top-level Makefile.objs, we should instead list monitor from block/Makefile.objs. Suggested-by: Kevin Wolf Fixes: bb4e58c613 Signed-off-by: Eric Blake Message-Id: <20200608173339.3244211-1-eblake@redhat.com> Signed-off-by: Kevin Wolf --- Makefile.objs | 2 +- block/Makefile.objs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile.objs b/Makefile.objs index c09d95dfe3..7ce2588b89 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -13,7 +13,7 @@ chardev-obj-y = chardev/ authz-obj-y = authz/ -block-obj-y = block/ block/monitor/ nbd/ scsi/ +block-obj-y = block/ nbd/ scsi/ block-obj-y += block.o blockjob.o job.o block-obj-y += qemu-io-cmds.o block-obj-$(CONFIG_REPLICATION) += replication.o diff --git a/block/Makefile.objs b/block/Makefile.objs index 3635b6b4c1..96028eedce 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -46,6 +46,7 @@ block-obj-y += aio_task.o block-obj-y += backup-top.o block-obj-y += filter-compress.o common-obj-y += monitor/ +block-obj-y += monitor/ block-obj-y += stream.o From f17d68477030b24a13ecb55d371f57f19199210d Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Mon, 8 Jun 2020 14:08:21 -0500 Subject: [PATCH 06/43] qcow2: Tweak comments on qcow2_get_persistent_dirty_bitmap_size For now, we don't have persistent bitmaps in any other formats, but that might not be true in the future. Make it obvious that our incoming parameter is not necessarily a qcow2 image, and therefore is limited to just the bdrv_dirty_bitmap_* API calls (rather than probing into qcow2 internals). Suggested-by: Kevin Wolf Signed-off-by: Eric Blake Message-Id: <20200608190821.3293867-1-eblake@redhat.com> Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index 7bf12502da..1f38806ca6 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -1757,19 +1757,20 @@ bool qcow2_supports_persistent_dirty_bitmap(BlockDriverState *bs) } /* - * Compute the space required for bitmaps in @bs. + * Compute the space required to copy bitmaps from @in_bs. * * The computation is based as if copying to a new image with the - * given @cluster_size, which may differ from the cluster size in @bs. + * given @cluster_size, which may differ from the cluster size in + * @in_bs; in fact, @in_bs might be something other than qcow2. */ -uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *bs, +uint64_t qcow2_get_persistent_dirty_bitmap_size(BlockDriverState *in_bs, uint32_t cluster_size) { uint64_t bitmaps_size = 0; BdrvDirtyBitmap *bm; size_t bitmap_dir_size = 0; - FOR_EACH_DIRTY_BITMAP(bs, bm) { + FOR_EACH_DIRTY_BITMAP(in_bs, bm) { if (bdrv_dirty_bitmap_get_persistence(bm)) { const char *name = bdrv_dirty_bitmap_name(bm); uint32_t granularity = bdrv_dirty_bitmap_granularity(bm); From f7e8c23f398c82eeba8ac02650f50e9c1747e033 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:12 +0200 Subject: [PATCH 07/43] hw/block/nvme: fix pci doorbell size calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The size of the BAR is 0x1000 (main registers) + 8 bytes for each queue. Currently, the size of the BAR is calculated like so: n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); Since the 'num_queues' parameter already accounts for the admin queue, this should in any case not need to be incremented by one. Also, the size should be initialized to (0x1000). n->reg_size = pow2ceil(0x1000 + 2 * n->num_queues * 4); This, with the default value of num_queues (64), we will set aside room for 1 admin queue and 63 I/O queues (4 bytes per doorbell, 2 doorbells per queue). Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-2-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index a21eeca2fb..c1476e8b2a 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -53,6 +53,9 @@ #include "trace.h" #include "nvme.h" +#define NVME_REG_SIZE 0x1000 +#define NVME_DB_SIZE 4 + #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ (trace_##trace)(__VA_ARGS__); \ @@ -1401,7 +1404,9 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(pci_dev, 0x80); n->num_namespaces = 1; - n->reg_size = pow2ceil(0x1004 + 2 * (n->num_queues + 1) * 4); + + /* num_queues is really number of pairs, so each has two doorbells */ + n->reg_size = pow2ceil(NVME_REG_SIZE + 2 * n->num_queues * NVME_DB_SIZE); n->ns_size = bs_size / (uint64_t)n->num_namespaces; n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); From 6f4ee2e9aacf01d8f4bfa5b54caff9aa46b2ff04 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:13 +0200 Subject: [PATCH 08/43] hw/block/nvme: rename trace events to pci_nvme MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the prefix of all nvme device related trace events to 'pci_nvme' to not clash with trace events from the nvme block driver. Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-3-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 198 +++++++++++++++++++++--------------------- hw/block/trace-events | 180 +++++++++++++++++++------------------- 2 files changed, 188 insertions(+), 190 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index c1476e8b2a..e8f5c5ab82 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -125,16 +125,16 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) { if (cq->irq_enabled) { if (msix_enabled(&(n->parent_obj))) { - trace_nvme_irq_msix(cq->vector); + trace_pci_nvme_irq_msix(cq->vector); msix_notify(&(n->parent_obj), cq->vector); } else { - trace_nvme_irq_pin(); + trace_pci_nvme_irq_pin(); assert(cq->cqid < 64); n->irq_status |= 1 << cq->cqid; nvme_irq_check(n); } } else { - trace_nvme_irq_masked(); + trace_pci_nvme_irq_masked(); } } @@ -159,7 +159,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, int num_prps = (len >> n->page_bits) + 1; if (unlikely(!prp1)) { - trace_nvme_err_invalid_prp(); + trace_pci_nvme_err_invalid_prp(); return NVME_INVALID_FIELD | NVME_DNR; } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { @@ -173,7 +173,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, len -= trans_len; if (len) { if (unlikely(!prp2)) { - trace_nvme_err_invalid_prp2_missing(); + trace_pci_nvme_err_invalid_prp2_missing(); goto unmap; } if (len > n->page_size) { @@ -189,7 +189,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, if (i == n->max_prp_ents - 1 && len > n->page_size) { if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { - trace_nvme_err_invalid_prplist_ent(prp_ent); + trace_pci_nvme_err_invalid_prplist_ent(prp_ent); goto unmap; } @@ -202,7 +202,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, } if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) { - trace_nvme_err_invalid_prplist_ent(prp_ent); + trace_pci_nvme_err_invalid_prplist_ent(prp_ent); goto unmap; } @@ -217,7 +217,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, } } else { if (unlikely(prp2 & (n->page_size - 1))) { - trace_nvme_err_invalid_prp2_align(prp2); + trace_pci_nvme_err_invalid_prp2_align(prp2); goto unmap; } if (qsg->nsg) { @@ -265,20 +265,20 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, QEMUIOVector iov; uint16_t status = NVME_SUCCESS; - trace_nvme_dma_read(prp1, prp2); + trace_pci_nvme_dma_read(prp1, prp2); if (nvme_map_prp(&qsg, &iov, prp1, prp2, len, n)) { return NVME_INVALID_FIELD | NVME_DNR; } if (qsg.nsg > 0) { if (unlikely(dma_buf_read(ptr, len, &qsg))) { - trace_nvme_err_invalid_dma(); + trace_pci_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } qemu_sglist_destroy(&qsg); } else { if (unlikely(qemu_iovec_from_buf(&iov, 0, ptr, len) != len)) { - trace_nvme_err_invalid_dma(); + trace_pci_nvme_err_invalid_dma(); status = NVME_INVALID_FIELD | NVME_DNR; } qemu_iovec_destroy(&iov); @@ -367,7 +367,7 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, uint32_t count = nlb << data_shift; if (unlikely(slba + nlb > ns->id_ns.nsze)) { - trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); + trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return NVME_LBA_RANGE | NVME_DNR; } @@ -395,11 +395,11 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeNamespace *ns, NvmeCmd *cmd, int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0; enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ; - trace_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); + trace_pci_nvme_rw(is_write ? "write" : "read", nlb, data_size, slba); if (unlikely((slba + nlb) > ns->id_ns.nsze)) { block_acct_invalid(blk_get_stats(n->conf.blk), acct); - trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); + trace_pci_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze); return NVME_LBA_RANGE | NVME_DNR; } @@ -434,7 +434,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) uint32_t nsid = le32_to_cpu(cmd->nsid); if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { - trace_nvme_err_invalid_ns(nsid, n->num_namespaces); + trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces); return NVME_INVALID_NSID | NVME_DNR; } @@ -448,7 +448,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_CMD_READ: return nvme_rw(n, ns, cmd, req); default: - trace_nvme_err_invalid_opc(cmd->opcode); + trace_pci_nvme_err_invalid_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; } } @@ -473,11 +473,11 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qid = le16_to_cpu(c->qid); if (unlikely(!qid || nvme_check_sqid(n, qid))) { - trace_nvme_err_invalid_del_sq(qid); + trace_pci_nvme_err_invalid_del_sq(qid); return NVME_INVALID_QID | NVME_DNR; } - trace_nvme_del_sq(qid); + trace_pci_nvme_del_sq(qid); sq = n->sq[qid]; while (!QTAILQ_EMPTY(&sq->out_req_list)) { @@ -541,26 +541,26 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qflags = le16_to_cpu(c->sq_flags); uint64_t prp1 = le64_to_cpu(c->prp1); - trace_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); + trace_pci_nvme_create_sq(prp1, sqid, cqid, qsize, qflags); if (unlikely(!cqid || nvme_check_cqid(n, cqid))) { - trace_nvme_err_invalid_create_sq_cqid(cqid); + trace_pci_nvme_err_invalid_create_sq_cqid(cqid); return NVME_INVALID_CQID | NVME_DNR; } if (unlikely(!sqid || !nvme_check_sqid(n, sqid))) { - trace_nvme_err_invalid_create_sq_sqid(sqid); + trace_pci_nvme_err_invalid_create_sq_sqid(sqid); return NVME_INVALID_QID | NVME_DNR; } if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { - trace_nvme_err_invalid_create_sq_size(qsize); + trace_pci_nvme_err_invalid_create_sq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } if (unlikely(!prp1 || prp1 & (n->page_size - 1))) { - trace_nvme_err_invalid_create_sq_addr(prp1); + trace_pci_nvme_err_invalid_create_sq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } if (unlikely(!(NVME_SQ_FLAGS_PC(qflags)))) { - trace_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); + trace_pci_nvme_err_invalid_create_sq_qflags(NVME_SQ_FLAGS_PC(qflags)); return NVME_INVALID_FIELD | NVME_DNR; } sq = g_malloc0(sizeof(*sq)); @@ -586,17 +586,17 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qid = le16_to_cpu(c->qid); if (unlikely(!qid || nvme_check_cqid(n, qid))) { - trace_nvme_err_invalid_del_cq_cqid(qid); + trace_pci_nvme_err_invalid_del_cq_cqid(qid); return NVME_INVALID_CQID | NVME_DNR; } cq = n->cq[qid]; if (unlikely(!QTAILQ_EMPTY(&cq->sq_list))) { - trace_nvme_err_invalid_del_cq_notempty(qid); + trace_pci_nvme_err_invalid_del_cq_notempty(qid); return NVME_INVALID_QUEUE_DEL; } nvme_irq_deassert(n, cq); - trace_nvme_del_cq(qid); + trace_pci_nvme_del_cq(qid); nvme_free_cq(cq, n); return NVME_SUCCESS; } @@ -629,27 +629,27 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) uint16_t qflags = le16_to_cpu(c->cq_flags); uint64_t prp1 = le64_to_cpu(c->prp1); - trace_nvme_create_cq(prp1, cqid, vector, qsize, qflags, - NVME_CQ_FLAGS_IEN(qflags) != 0); + trace_pci_nvme_create_cq(prp1, cqid, vector, qsize, qflags, + NVME_CQ_FLAGS_IEN(qflags) != 0); if (unlikely(!cqid || !nvme_check_cqid(n, cqid))) { - trace_nvme_err_invalid_create_cq_cqid(cqid); + trace_pci_nvme_err_invalid_create_cq_cqid(cqid); return NVME_INVALID_CQID | NVME_DNR; } if (unlikely(!qsize || qsize > NVME_CAP_MQES(n->bar.cap))) { - trace_nvme_err_invalid_create_cq_size(qsize); + trace_pci_nvme_err_invalid_create_cq_size(qsize); return NVME_MAX_QSIZE_EXCEEDED | NVME_DNR; } if (unlikely(!prp1)) { - trace_nvme_err_invalid_create_cq_addr(prp1); + trace_pci_nvme_err_invalid_create_cq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } if (unlikely(vector > n->num_queues)) { - trace_nvme_err_invalid_create_cq_vector(vector); + trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } if (unlikely(!(NVME_CQ_FLAGS_PC(qflags)))) { - trace_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); + trace_pci_nvme_err_invalid_create_cq_qflags(NVME_CQ_FLAGS_PC(qflags)); return NVME_INVALID_FIELD | NVME_DNR; } @@ -664,7 +664,7 @@ static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeIdentify *c) uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); - trace_nvme_identify_ctrl(); + trace_pci_nvme_identify_ctrl(); return nvme_dma_read_prp(n, (uint8_t *)&n->id_ctrl, sizeof(n->id_ctrl), prp1, prp2); @@ -677,10 +677,10 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); - trace_nvme_identify_ns(nsid); + trace_pci_nvme_identify_ns(nsid); if (unlikely(nsid == 0 || nsid > n->num_namespaces)) { - trace_nvme_err_invalid_ns(nsid, n->num_namespaces); + trace_pci_nvme_err_invalid_ns(nsid, n->num_namespaces); return NVME_INVALID_NSID | NVME_DNR; } @@ -700,7 +700,7 @@ static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) uint16_t ret; int i, j = 0; - trace_nvme_identify_nslist(min_nsid); + trace_pci_nvme_identify_nslist(min_nsid); list = g_malloc0(data_len); for (i = 0; i < n->num_namespaces; i++) { @@ -729,14 +729,14 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) case 0x02: return nvme_identify_nslist(n, c); default: - trace_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); + trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); return NVME_INVALID_FIELD | NVME_DNR; } } static inline void nvme_set_timestamp(NvmeCtrl *n, uint64_t ts) { - trace_nvme_setfeat_timestamp(ts); + trace_pci_nvme_setfeat_timestamp(ts); n->host_timestamp = le64_to_cpu(ts); n->timestamp_set_qemu_clock_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); @@ -769,7 +769,7 @@ static inline uint64_t nvme_get_timestamp(const NvmeCtrl *n) /* If the host timestamp is non-zero, set the timestamp origin */ ts.origin = n->host_timestamp ? 0x01 : 0x00; - trace_nvme_getfeat_timestamp(ts.all); + trace_pci_nvme_getfeat_timestamp(ts.all); return cpu_to_le64(ts.all); } @@ -793,17 +793,17 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) switch (dw10) { case NVME_VOLATILE_WRITE_CACHE: result = blk_enable_write_cache(n->conf.blk); - trace_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); + trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); break; case NVME_NUMBER_OF_QUEUES: result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); - trace_nvme_getfeat_numq(result); + trace_pci_nvme_getfeat_numq(result); break; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, cmd); break; default: - trace_nvme_err_invalid_getfeat(dw10); + trace_pci_nvme_err_invalid_getfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; } @@ -839,9 +839,9 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) blk_set_enable_write_cache(n->conf.blk, dw11 & 1); break; case NVME_NUMBER_OF_QUEUES: - trace_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, - ((dw11 >> 16) & 0xFFFF) + 1, - n->num_queues - 1, n->num_queues - 1); + trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, + ((dw11 >> 16) & 0xFFFF) + 1, + n->num_queues - 1, n->num_queues - 1); req->cqe.result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); break; @@ -851,7 +851,7 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; default: - trace_nvme_err_invalid_setfeat(dw10); + trace_pci_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; } return NVME_SUCCESS; @@ -875,7 +875,7 @@ static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_ADM_CMD_GET_FEATURES: return nvme_get_feature(n, cmd, req); default: - trace_nvme_err_invalid_admin_opc(cmd->opcode); + trace_pci_nvme_err_invalid_admin_opc(cmd->opcode); return NVME_INVALID_OPCODE | NVME_DNR; } } @@ -938,77 +938,77 @@ static int nvme_start_ctrl(NvmeCtrl *n) uint32_t page_size = 1 << page_bits; if (unlikely(n->cq[0])) { - trace_nvme_err_startfail_cq(); + trace_pci_nvme_err_startfail_cq(); return -1; } if (unlikely(n->sq[0])) { - trace_nvme_err_startfail_sq(); + trace_pci_nvme_err_startfail_sq(); return -1; } if (unlikely(!n->bar.asq)) { - trace_nvme_err_startfail_nbarasq(); + trace_pci_nvme_err_startfail_nbarasq(); return -1; } if (unlikely(!n->bar.acq)) { - trace_nvme_err_startfail_nbaracq(); + trace_pci_nvme_err_startfail_nbaracq(); return -1; } if (unlikely(n->bar.asq & (page_size - 1))) { - trace_nvme_err_startfail_asq_misaligned(n->bar.asq); + trace_pci_nvme_err_startfail_asq_misaligned(n->bar.asq); return -1; } if (unlikely(n->bar.acq & (page_size - 1))) { - trace_nvme_err_startfail_acq_misaligned(n->bar.acq); + trace_pci_nvme_err_startfail_acq_misaligned(n->bar.acq); return -1; } if (unlikely(NVME_CC_MPS(n->bar.cc) < NVME_CAP_MPSMIN(n->bar.cap))) { - trace_nvme_err_startfail_page_too_small( + trace_pci_nvme_err_startfail_page_too_small( NVME_CC_MPS(n->bar.cc), NVME_CAP_MPSMIN(n->bar.cap)); return -1; } if (unlikely(NVME_CC_MPS(n->bar.cc) > NVME_CAP_MPSMAX(n->bar.cap))) { - trace_nvme_err_startfail_page_too_large( + trace_pci_nvme_err_startfail_page_too_large( NVME_CC_MPS(n->bar.cc), NVME_CAP_MPSMAX(n->bar.cap)); return -1; } if (unlikely(NVME_CC_IOCQES(n->bar.cc) < NVME_CTRL_CQES_MIN(n->id_ctrl.cqes))) { - trace_nvme_err_startfail_cqent_too_small( + trace_pci_nvme_err_startfail_cqent_too_small( NVME_CC_IOCQES(n->bar.cc), NVME_CTRL_CQES_MIN(n->bar.cap)); return -1; } if (unlikely(NVME_CC_IOCQES(n->bar.cc) > NVME_CTRL_CQES_MAX(n->id_ctrl.cqes))) { - trace_nvme_err_startfail_cqent_too_large( + trace_pci_nvme_err_startfail_cqent_too_large( NVME_CC_IOCQES(n->bar.cc), NVME_CTRL_CQES_MAX(n->bar.cap)); return -1; } if (unlikely(NVME_CC_IOSQES(n->bar.cc) < NVME_CTRL_SQES_MIN(n->id_ctrl.sqes))) { - trace_nvme_err_startfail_sqent_too_small( + trace_pci_nvme_err_startfail_sqent_too_small( NVME_CC_IOSQES(n->bar.cc), NVME_CTRL_SQES_MIN(n->bar.cap)); return -1; } if (unlikely(NVME_CC_IOSQES(n->bar.cc) > NVME_CTRL_SQES_MAX(n->id_ctrl.sqes))) { - trace_nvme_err_startfail_sqent_too_large( + trace_pci_nvme_err_startfail_sqent_too_large( NVME_CC_IOSQES(n->bar.cc), NVME_CTRL_SQES_MAX(n->bar.cap)); return -1; } if (unlikely(!NVME_AQA_ASQS(n->bar.aqa))) { - trace_nvme_err_startfail_asqent_sz_zero(); + trace_pci_nvme_err_startfail_asqent_sz_zero(); return -1; } if (unlikely(!NVME_AQA_ACQS(n->bar.aqa))) { - trace_nvme_err_startfail_acqent_sz_zero(); + trace_pci_nvme_err_startfail_acqent_sz_zero(); return -1; } @@ -1031,14 +1031,14 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, unsigned size) { if (unlikely(offset & (sizeof(uint32_t) - 1))) { - NVME_GUEST_ERR(nvme_ub_mmiowr_misaligned32, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_misaligned32, "MMIO write not 32-bit aligned," " offset=0x%"PRIx64"", offset); /* should be ignored, fall through for now */ } if (unlikely(size < sizeof(uint32_t))) { - NVME_GUEST_ERR(nvme_ub_mmiowr_toosmall, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_toosmall, "MMIO write smaller than 32-bits," " offset=0x%"PRIx64", size=%u", offset, size); @@ -1048,32 +1048,30 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, switch (offset) { case 0xc: /* INTMS */ if (unlikely(msix_enabled(&(n->parent_obj)))) { - NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, "undefined access to interrupt mask set" " when MSI-X is enabled"); /* should be ignored, fall through for now */ } n->bar.intms |= data & 0xffffffff; n->bar.intmc = n->bar.intms; - trace_nvme_mmio_intm_set(data & 0xffffffff, - n->bar.intmc); + trace_pci_nvme_mmio_intm_set(data & 0xffffffff, n->bar.intmc); nvme_irq_check(n); break; case 0x10: /* INTMC */ if (unlikely(msix_enabled(&(n->parent_obj)))) { - NVME_GUEST_ERR(nvme_ub_mmiowr_intmask_with_msix, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_intmask_with_msix, "undefined access to interrupt mask clr" " when MSI-X is enabled"); /* should be ignored, fall through for now */ } n->bar.intms &= ~(data & 0xffffffff); n->bar.intmc = n->bar.intms; - trace_nvme_mmio_intm_clr(data & 0xffffffff, - n->bar.intmc); + trace_pci_nvme_mmio_intm_clr(data & 0xffffffff, n->bar.intmc); nvme_irq_check(n); break; case 0x14: /* CC */ - trace_nvme_mmio_cfg(data & 0xffffffff); + trace_pci_nvme_mmio_cfg(data & 0xffffffff); /* Windows first sends data, then sends enable bit */ if (!NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc) && !NVME_CC_SHN(data) && !NVME_CC_SHN(n->bar.cc)) @@ -1084,42 +1082,42 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, if (NVME_CC_EN(data) && !NVME_CC_EN(n->bar.cc)) { n->bar.cc = data; if (unlikely(nvme_start_ctrl(n))) { - trace_nvme_err_startfail(); + trace_pci_nvme_err_startfail(); n->bar.csts = NVME_CSTS_FAILED; } else { - trace_nvme_mmio_start_success(); + trace_pci_nvme_mmio_start_success(); n->bar.csts = NVME_CSTS_READY; } } else if (!NVME_CC_EN(data) && NVME_CC_EN(n->bar.cc)) { - trace_nvme_mmio_stopped(); + trace_pci_nvme_mmio_stopped(); nvme_clear_ctrl(n); n->bar.csts &= ~NVME_CSTS_READY; } if (NVME_CC_SHN(data) && !(NVME_CC_SHN(n->bar.cc))) { - trace_nvme_mmio_shutdown_set(); + trace_pci_nvme_mmio_shutdown_set(); nvme_clear_ctrl(n); n->bar.cc = data; n->bar.csts |= NVME_CSTS_SHST_COMPLETE; } else if (!NVME_CC_SHN(data) && NVME_CC_SHN(n->bar.cc)) { - trace_nvme_mmio_shutdown_cleared(); + trace_pci_nvme_mmio_shutdown_cleared(); n->bar.csts &= ~NVME_CSTS_SHST_COMPLETE; n->bar.cc = data; } break; case 0x1C: /* CSTS */ if (data & (1 << 4)) { - NVME_GUEST_ERR(nvme_ub_mmiowr_ssreset_w1c_unsupported, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ssreset_w1c_unsupported, "attempted to W1C CSTS.NSSRO" " but CAP.NSSRS is zero (not supported)"); } else if (data != 0) { - NVME_GUEST_ERR(nvme_ub_mmiowr_ro_csts, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_ro_csts, "attempted to set a read only bit" " of controller status"); } break; case 0x20: /* NSSR */ if (data == 0x4E564D65) { - trace_nvme_ub_mmiowr_ssreset_unsupported(); + trace_pci_nvme_ub_mmiowr_ssreset_unsupported(); } else { /* The spec says that writes of other values have no effect */ return; @@ -1127,55 +1125,55 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, break; case 0x24: /* AQA */ n->bar.aqa = data & 0xffffffff; - trace_nvme_mmio_aqattr(data & 0xffffffff); + trace_pci_nvme_mmio_aqattr(data & 0xffffffff); break; case 0x28: /* ASQ */ n->bar.asq = data; - trace_nvme_mmio_asqaddr(data); + trace_pci_nvme_mmio_asqaddr(data); break; case 0x2c: /* ASQ hi */ n->bar.asq |= data << 32; - trace_nvme_mmio_asqaddr_hi(data, n->bar.asq); + trace_pci_nvme_mmio_asqaddr_hi(data, n->bar.asq); break; case 0x30: /* ACQ */ - trace_nvme_mmio_acqaddr(data); + trace_pci_nvme_mmio_acqaddr(data); n->bar.acq = data; break; case 0x34: /* ACQ hi */ n->bar.acq |= data << 32; - trace_nvme_mmio_acqaddr_hi(data, n->bar.acq); + trace_pci_nvme_mmio_acqaddr_hi(data, n->bar.acq); break; case 0x38: /* CMBLOC */ - NVME_GUEST_ERR(nvme_ub_mmiowr_cmbloc_reserved, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbloc_reserved, "invalid write to reserved CMBLOC" " when CMBSZ is zero, ignored"); return; case 0x3C: /* CMBSZ */ - NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_cmbsz_readonly, "invalid write to read only CMBSZ, ignored"); return; case 0xE00: /* PMRCAP */ - NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrcap_readonly, "invalid write to PMRCAP register, ignored"); return; case 0xE04: /* TODO PMRCTL */ break; case 0xE08: /* PMRSTS */ - NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrsts_readonly, "invalid write to PMRSTS register, ignored"); return; case 0xE0C: /* PMREBS */ - NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrebs_readonly, "invalid write to PMREBS register, ignored"); return; case 0xE10: /* PMRSWTP */ - NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_pmrswtp_readonly, "invalid write to PMRSWTP register, ignored"); return; case 0xE14: /* TODO PMRMSC */ break; default: - NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, + NVME_GUEST_ERR(pci_nvme_ub_mmiowr_invalid, "invalid MMIO write," " offset=0x%"PRIx64", data=%"PRIx64"", offset, data); @@ -1190,12 +1188,12 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) uint64_t val = 0; if (unlikely(addr & (sizeof(uint32_t) - 1))) { - NVME_GUEST_ERR(nvme_ub_mmiord_misaligned32, + NVME_GUEST_ERR(pci_nvme_ub_mmiord_misaligned32, "MMIO read not 32-bit aligned," " offset=0x%"PRIx64"", addr); /* should RAZ, fall through for now */ } else if (unlikely(size < sizeof(uint32_t))) { - NVME_GUEST_ERR(nvme_ub_mmiord_toosmall, + NVME_GUEST_ERR(pci_nvme_ub_mmiord_toosmall, "MMIO read smaller than 32-bits," " offset=0x%"PRIx64"", addr); /* should RAZ, fall through for now */ @@ -1213,7 +1211,7 @@ static uint64_t nvme_mmio_read(void *opaque, hwaddr addr, unsigned size) } memcpy(&val, ptr + addr, size); } else { - NVME_GUEST_ERR(nvme_ub_mmiord_invalid_ofs, + NVME_GUEST_ERR(pci_nvme_ub_mmiord_invalid_ofs, "MMIO read beyond last register," " offset=0x%"PRIx64", returning 0", addr); } @@ -1226,7 +1224,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) uint32_t qid; if (unlikely(addr & ((1 << 2) - 1))) { - NVME_GUEST_ERR(nvme_ub_db_wr_misaligned, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_misaligned, "doorbell write not 32-bit aligned," " offset=0x%"PRIx64", ignoring", addr); return; @@ -1241,7 +1239,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) qid = (addr - (0x1000 + (1 << 2))) >> 3; if (unlikely(nvme_check_cqid(n, qid))) { - NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cq, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cq, "completion queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); @@ -1250,7 +1248,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) cq = n->cq[qid]; if (unlikely(new_head >= cq->size)) { - NVME_GUEST_ERR(nvme_ub_db_wr_invalid_cqhead, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_cqhead, "completion queue doorbell write value" " beyond queue size, sqid=%"PRIu32"," " new_head=%"PRIu16", ignoring", @@ -1279,7 +1277,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) qid = (addr - 0x1000) >> 3; if (unlikely(nvme_check_sqid(n, qid))) { - NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sq, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sq, "submission queue doorbell write" " for nonexistent queue," " sqid=%"PRIu32", ignoring", qid); @@ -1288,7 +1286,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) sq = n->sq[qid]; if (unlikely(new_tail >= sq->size)) { - NVME_GUEST_ERR(nvme_ub_db_wr_invalid_sqtail, + NVME_GUEST_ERR(pci_nvme_ub_db_wr_invalid_sqtail, "submission queue doorbell write value" " beyond queue size, sqid=%"PRIu32"," " new_tail=%"PRIu16", ignoring", diff --git a/hw/block/trace-events b/hw/block/trace-events index aca54bda14..958fcc5508 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -29,100 +29,100 @@ hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, int t # nvme.c # nvme traces for successful events -nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" -nvme_irq_pin(void) "pulsing IRQ pin" -nvme_irq_masked(void) "IRQ is masked" -nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" -nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" -nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" -nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" -nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" -nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" -nvme_identify_ctrl(void) "identify controller" -nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" -nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" -nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" -nvme_getfeat_numq(int result) "get feature number of queues, result=%d" -nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" -nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" -nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" -nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" -nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" -nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" -nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" -nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" -nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" -nvme_mmio_start_success(void) "setting controller enable bit succeeded" -nvme_mmio_stopped(void) "cleared controller enable bit" -nvme_mmio_shutdown_set(void) "shutdown bit set" -nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" +pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u" +pci_nvme_irq_pin(void) "pulsing IRQ pin" +pci_nvme_irq_masked(void) "IRQ is masked" +pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64"" +pci_nvme_rw(const char *verb, uint32_t blk_count, uint64_t byte_count, uint64_t lba) "%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64"" +pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16"" +pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d" +pci_nvme_del_sq(uint16_t qid) "deleting submission queue sqid=%"PRIu16"" +pci_nvme_del_cq(uint16_t cqid) "deleted completion queue, cqid=%"PRIu16"" +pci_nvme_identify_ctrl(void) "identify controller" +pci_nvme_identify_ns(uint16_t ns) "identify namespace, nsid=%"PRIu16"" +pci_nvme_identify_nslist(uint16_t ns) "identify namespace list, nsid=%"PRIu16"" +pci_nvme_getfeat_vwcache(const char* result) "get feature volatile write cache, result=%s" +pci_nvme_getfeat_numq(int result) "get feature number of queues, result=%d" +pci_nvme_setfeat_numq(int reqcq, int reqsq, int gotcq, int gotsq) "requested cq_count=%d sq_count=%d, responding with cq_count=%d sq_count=%d" +pci_nvme_setfeat_timestamp(uint64_t ts) "set feature timestamp = 0x%"PRIx64"" +pci_nvme_getfeat_timestamp(uint64_t ts) "get feature timestamp = 0x%"PRIx64"" +pci_nvme_mmio_intm_set(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask set, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_intm_clr(uint64_t data, uint64_t new_mask) "wrote MMIO, interrupt mask clr, data=0x%"PRIx64", new_mask=0x%"PRIx64"" +pci_nvme_mmio_cfg(uint64_t data) "wrote MMIO, config controller config=0x%"PRIx64"" +pci_nvme_mmio_aqattr(uint64_t data) "wrote MMIO, admin queue attributes=0x%"PRIx64"" +pci_nvme_mmio_asqaddr(uint64_t data) "wrote MMIO, admin submission queue address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr(uint64_t data) "wrote MMIO, admin completion queue address=0x%"PRIx64"" +pci_nvme_mmio_asqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin submission queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_acqaddr_hi(uint64_t data, uint64_t new_addr) "wrote MMIO, admin completion queue high half=0x%"PRIx64", new_address=0x%"PRIx64"" +pci_nvme_mmio_start_success(void) "setting controller enable bit succeeded" +pci_nvme_mmio_stopped(void) "cleared controller enable bit" +pci_nvme_mmio_shutdown_set(void) "shutdown bit set" +pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared" # nvme traces for error conditions -nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" -nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" -nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" -nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" -nvme_err_invalid_prp(void) "invalid PRP" -nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" -nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" -nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" -nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" -nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" -nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" -nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" -nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" -nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" -nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" -nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" -nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" -nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" -nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" -nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" -nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" -nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" -nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" -nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" -nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" -nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" -nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" -nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" -nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" -nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" -nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" -nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" -nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" -nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" -nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" -nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" -nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" -nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" -nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" -nvme_err_startfail(void) "setting controller enable bit failed" +pci_nvme_err_invalid_dma(void) "PRP/SGL is too small for transfer size" +pci_nvme_err_invalid_prplist_ent(uint64_t prplist) "PRP list entry is null or not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_prp2_align(uint64_t prp2) "PRP2 is not page aligned: 0x%"PRIx64"" +pci_nvme_err_invalid_prp2_missing(void) "PRP2 is null and more data to be transferred" +pci_nvme_err_invalid_prp(void) "invalid PRP" +pci_nvme_err_invalid_ns(uint32_t ns, uint32_t limit) "invalid namespace %u not within 1-%u" +pci_nvme_err_invalid_opc(uint8_t opc) "invalid opcode 0x%"PRIx8"" +pci_nvme_err_invalid_admin_opc(uint8_t opc) "invalid admin opcode 0x%"PRIx8"" +pci_nvme_err_invalid_lba_range(uint64_t start, uint64_t len, uint64_t limit) "Invalid LBA start=%"PRIu64" len=%"PRIu64" limit=%"PRIu64"" +pci_nvme_err_invalid_del_sq(uint16_t qid) "invalid submission queue deletion, sid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_cqid(uint16_t cqid) "failed creating submission queue, invalid cqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_sqid(uint16_t sqid) "failed creating submission queue, invalid sqid=%"PRIu16"" +pci_nvme_err_invalid_create_sq_size(uint16_t qsize) "failed creating submission queue, invalid qsize=%"PRIu16"" +pci_nvme_err_invalid_create_sq_addr(uint64_t addr) "failed creating submission queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_sq_qflags(uint16_t qflags) "failed creating submission queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_del_cq_cqid(uint16_t cqid) "failed deleting completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_del_cq_notempty(uint16_t cqid) "failed deleting completion queue, it is not empty, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_cqid(uint16_t cqid) "failed creating completion queue, cqid=%"PRIu16"" +pci_nvme_err_invalid_create_cq_size(uint16_t size) "failed creating completion queue, size=%"PRIu16"" +pci_nvme_err_invalid_create_cq_addr(uint64_t addr) "failed creating completion queue, addr=0x%"PRIx64"" +pci_nvme_err_invalid_create_cq_vector(uint16_t vector) "failed creating completion queue, vector=%"PRIu16"" +pci_nvme_err_invalid_create_cq_qflags(uint16_t qflags) "failed creating completion queue, qflags=%"PRIu16"" +pci_nvme_err_invalid_identify_cns(uint16_t cns) "identify, invalid cns=0x%"PRIx16"" +pci_nvme_err_invalid_getfeat(int dw10) "invalid get features, dw10=0x%"PRIx32"" +pci_nvme_err_invalid_setfeat(uint32_t dw10) "invalid set features, dw10=0x%"PRIx32"" +pci_nvme_err_startfail_cq(void) "nvme_start_ctrl failed because there are non-admin completion queues" +pci_nvme_err_startfail_sq(void) "nvme_start_ctrl failed because there are non-admin submission queues" +pci_nvme_err_startfail_nbarasq(void) "nvme_start_ctrl failed because the admin submission queue address is null" +pci_nvme_err_startfail_nbaracq(void) "nvme_start_ctrl failed because the admin completion queue address is null" +pci_nvme_err_startfail_asq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin submission queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_acq_misaligned(uint64_t addr) "nvme_start_ctrl failed because the admin completion queue address is misaligned: 0x%"PRIx64"" +pci_nvme_err_startfail_page_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_page_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the page size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_cqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_cqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the completion queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_sqent_too_small(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too small: log2size=%u, min=%u" +pci_nvme_err_startfail_sqent_too_large(uint8_t log2ps, uint8_t maxlog2ps) "nvme_start_ctrl failed because the submission queue entry size is too large: log2size=%u, max=%u" +pci_nvme_err_startfail_asqent_sz_zero(void) "nvme_start_ctrl failed because the admin submission queue size is zero" +pci_nvme_err_startfail_acqent_sz_zero(void) "nvme_start_ctrl failed because the admin completion queue size is zero" +pci_nvme_err_startfail(void) "setting controller enable bit failed" # Traces for undefined behavior -nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" -nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" -nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" -nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" -nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" -nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" -nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" -nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" -nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" -nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" -nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" -nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" -nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" -nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" -nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" -nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" -nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" -nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" -nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" -nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" -nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_mmiowr_misaligned32(uint64_t offset) "MMIO write not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiowr_toosmall(uint64_t offset, unsigned size) "MMIO write smaller than 32 bits, offset=0x%"PRIx64", size=%u" +pci_nvme_ub_mmiowr_intmask_with_msix(void) "undefined access to interrupt mask set when MSI-X is enabled" +pci_nvme_ub_mmiowr_ro_csts(void) "attempted to set a read only bit of controller status" +pci_nvme_ub_mmiowr_ssreset_w1c_unsupported(void) "attempted to W1C CSTS.NSSRO but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_ssreset_unsupported(void) "attempted NVM subsystem reset but CAP.NSSRS is zero (not supported)" +pci_nvme_ub_mmiowr_cmbloc_reserved(void) "invalid write to reserved CMBLOC when CMBSZ is zero, ignored" +pci_nvme_ub_mmiowr_cmbsz_readonly(void) "invalid write to read only CMBSZ, ignored" +pci_nvme_ub_mmiowr_pmrcap_readonly(void) "invalid write to read only PMRCAP, ignored" +pci_nvme_ub_mmiowr_pmrsts_readonly(void) "invalid write to read only PMRSTS, ignored" +pci_nvme_ub_mmiowr_pmrebs_readonly(void) "invalid write to read only PMREBS, ignored" +pci_nvme_ub_mmiowr_pmrswtp_readonly(void) "invalid write to read only PMRSWTP, ignored" +pci_nvme_ub_mmiowr_invalid(uint64_t offset, uint64_t data) "invalid MMIO write, offset=0x%"PRIx64", data=0x%"PRIx64"" +pci_nvme_ub_mmiord_misaligned32(uint64_t offset) "MMIO read not 32-bit aligned, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_toosmall(uint64_t offset) "MMIO read smaller than 32-bits, offset=0x%"PRIx64"" +pci_nvme_ub_mmiord_invalid_ofs(uint64_t offset) "MMIO read beyond last register, offset=0x%"PRIx64", returning 0" +pci_nvme_ub_db_wr_misaligned(uint64_t offset) "doorbell write not 32-bit aligned, offset=0x%"PRIx64", ignoring" +pci_nvme_ub_db_wr_invalid_cq(uint32_t qid) "completion queue doorbell write for nonexistent queue, cqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_cqhead(uint32_t qid, uint16_t new_head) "completion queue doorbell write value beyond queue size, cqid=%"PRIu32", new_head=%"PRIu16", ignoring" +pci_nvme_ub_db_wr_invalid_sq(uint32_t qid) "submission queue doorbell write for nonexistent queue, sqid=%"PRIu32", ignoring" +pci_nvme_ub_db_wr_invalid_sqtail(uint32_t qid, uint16_t new_tail) "submission queue doorbell write value beyond queue size, sqid=%"PRIu32", new_head=%"PRIu16", ignoring" # xen-block.c xen_block_realize(const char *type, uint32_t disk, uint32_t partition) "%s d%up%u" From 4920786ee69595137cfae1834798e73f15b402d8 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:14 +0200 Subject: [PATCH 09/43] hw/block/nvme: remove superfluous breaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These break statements was left over when commit 3036a626e9ef ("nvme: add Get/Set Feature Timestamp support") was merged. Signed-off-by: Klaus Jensen Reviewed-by: Maxim Levitsky Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-4-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index e8f5c5ab82..0d3f8f345f 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -801,7 +801,6 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) break; case NVME_TIMESTAMP: return nvme_get_feature_timestamp(n, cmd); - break; default: trace_pci_nvme_err_invalid_getfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; @@ -845,11 +844,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) req->cqe.result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); break; - case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, cmd); - break; - default: trace_pci_nvme_err_invalid_setfeat(dw10); return NVME_INVALID_FIELD | NVME_DNR; From 1065abfbf1f82cb2f4c8666fde9fae1084222cf9 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:15 +0200 Subject: [PATCH 10/43] hw/block/nvme: move device parameters to separate struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move device configuration parameters to separate struct to make it explicit what is configurable and what is set internally. Signed-off-by: Klaus Jensen Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Message-Id: <20200609190333.59390-5-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 49 ++++++++++++++++++++++++++----------------------- hw/block/nvme.h | 11 ++++++++--- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 0d3f8f345f..bc2d9d2091 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -77,12 +77,12 @@ static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { - return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1; + return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1; } static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) { - return cqid < n->num_queues && n->cq[cqid] != NULL ? 0 : -1; + return cqid < n->params.num_queues && n->cq[cqid] != NULL ? 0 : -1; } static void nvme_inc_cq_tail(NvmeCQueue *cq) @@ -644,7 +644,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) trace_pci_nvme_err_invalid_create_cq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } - if (unlikely(vector > n->num_queues)) { + if (unlikely(vector > n->params.num_queues)) { trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } @@ -796,7 +796,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); break; case NVME_NUMBER_OF_QUEUES: - result = cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); + result = cpu_to_le32((n->params.num_queues - 2) | + ((n->params.num_queues - 2) << 16)); trace_pci_nvme_getfeat_numq(result); break; case NVME_TIMESTAMP: @@ -840,9 +841,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_NUMBER_OF_QUEUES: trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, ((dw11 >> 16) & 0xFFFF) + 1, - n->num_queues - 1, n->num_queues - 1); - req->cqe.result = - cpu_to_le32((n->num_queues - 2) | ((n->num_queues - 2) << 16)); + n->params.num_queues - 1, + n->params.num_queues - 1); + req->cqe.result = cpu_to_le32((n->params.num_queues - 2) | + ((n->params.num_queues - 2) << 16)); break; case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, cmd); @@ -913,12 +915,12 @@ static void nvme_clear_ctrl(NvmeCtrl *n) blk_drain(n->conf.blk); - for (i = 0; i < n->num_queues; i++) { + for (i = 0; i < n->params.num_queues; i++) { if (n->sq[i] != NULL) { nvme_free_sq(n->sq[i], n); } } - for (i = 0; i < n->num_queues; i++) { + for (i = 0; i < n->params.num_queues; i++) { if (n->cq[i] != NULL) { nvme_free_cq(n->cq[i], n); } @@ -1348,7 +1350,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) int64_t bs_size; uint8_t *pci_conf; - if (!n->num_queues) { + if (!n->params.num_queues) { error_setg(errp, "num_queues can't be zero"); return; } @@ -1364,12 +1366,12 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } - if (!n->serial) { + if (!n->params.serial) { error_setg(errp, "serial property not set"); return; } - if (!n->cmb_size_mb && n->pmrdev) { + if (!n->params.cmb_size_mb && n->pmrdev) { if (host_memory_backend_is_mapped(n->pmrdev)) { char *path = object_get_canonical_path_component(OBJECT(n->pmrdev)); error_setg(errp, "can't use already busy memdev: %s", path); @@ -1400,25 +1402,26 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->num_namespaces = 1; /* num_queues is really number of pairs, so each has two doorbells */ - n->reg_size = pow2ceil(NVME_REG_SIZE + 2 * n->num_queues * NVME_DB_SIZE); + n->reg_size = pow2ceil(NVME_REG_SIZE + + 2 * n->params.num_queues * NVME_DB_SIZE); n->ns_size = bs_size / (uint64_t)n->num_namespaces; n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); - n->sq = g_new0(NvmeSQueue *, n->num_queues); - n->cq = g_new0(NvmeCQueue *, n->num_queues); + n->sq = g_new0(NvmeSQueue *, n->params.num_queues); + n->cq = g_new0(NvmeCQueue *, n->params.num_queues); memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", n->reg_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); - msix_init_exclusive_bar(pci_dev, n->num_queues, 4, NULL); + msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); strpadcpy((char *)id->fr, sizeof(id->fr), "1.0", ' '); - strpadcpy((char *)id->sn, sizeof(id->sn), n->serial, ' '); + strpadcpy((char *)id->sn, sizeof(id->sn), n->params.serial, ' '); id->rab = 6; id->ieee[0] = 0x00; id->ieee[1] = 0x02; @@ -1447,7 +1450,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.vs = 0x00010200; n->bar.intmc = n->bar.intms = 0; - if (n->cmb_size_mb) { + if (n->params.cmb_size_mb) { NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); @@ -1458,7 +1461,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb); + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); n->cmbloc = n->bar.cmbloc; n->cmbsz = n->bar.cmbsz; @@ -1542,7 +1545,7 @@ static void nvme_exit(PCIDevice *pci_dev) g_free(n->cq); g_free(n->sq); - if (n->cmb_size_mb) { + if (n->params.cmb_size_mb) { g_free(n->cmbuf); } @@ -1556,9 +1559,9 @@ static Property nvme_props[] = { DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf), DEFINE_PROP_LINK("pmrdev", NvmeCtrl, pmrdev, TYPE_MEMORY_BACKEND, HostMemoryBackend *), - DEFINE_PROP_STRING("serial", NvmeCtrl, serial), - DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0), - DEFINE_PROP_UINT32("num_queues", NvmeCtrl, num_queues, 64), + DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial), + DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0), + DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 64), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 6520a9f0be..9df244c93c 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -1,7 +1,14 @@ #ifndef HW_NVME_H #define HW_NVME_H + #include "block/nvme.h" +typedef struct NvmeParams { + char *serial; + uint32_t num_queues; + uint32_t cmb_size_mb; +} NvmeParams; + typedef struct NvmeAsyncEvent { QSIMPLEQ_ENTRY(NvmeAsyncEvent) entry; NvmeAerResult result; @@ -63,6 +70,7 @@ typedef struct NvmeCtrl { MemoryRegion ctrl_mem; NvmeBar bar; BlockConf conf; + NvmeParams params; uint32_t page_size; uint16_t page_bits; @@ -71,10 +79,8 @@ typedef struct NvmeCtrl { uint16_t sqe_size; uint32_t reg_size; uint32_t num_namespaces; - uint32_t num_queues; uint32_t max_q_ents; uint64_t ns_size; - uint32_t cmb_size_mb; uint32_t cmbsz; uint32_t cmbloc; uint8_t *cmbuf; @@ -82,7 +88,6 @@ typedef struct NvmeCtrl { uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ - char *serial; HostMemoryBackend *pmrdev; NvmeNamespace *namespaces; From 3e829fd438033cdf8646ec22bcc869555b100212 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:16 +0200 Subject: [PATCH 11/43] hw/block/nvme: use constants in identify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Maxim Levitsky Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-6-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 8 ++++---- include/block/nvme.h | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index bc2d9d2091..2a26b8859a 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -692,7 +692,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeIdentify *c) static uint16_t nvme_identify_nslist(NvmeCtrl *n, NvmeIdentify *c) { - static const int data_len = 4 * KiB; + static const int data_len = NVME_IDENTIFY_DATA_SIZE; uint32_t min_nsid = le32_to_cpu(c->nsid); uint64_t prp1 = le64_to_cpu(c->prp1); uint64_t prp2 = le64_to_cpu(c->prp2); @@ -722,11 +722,11 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeCmd *cmd) NvmeIdentify *c = (NvmeIdentify *)cmd; switch (le32_to_cpu(c->cns)) { - case 0x00: + case NVME_ID_CNS_NS: return nvme_identify_ns(n, c); - case 0x01: + case NVME_ID_CNS_CTRL: return nvme_identify_ctrl(n, c); - case 0x02: + case NVME_ID_CNS_NS_ACTIVE_LIST: return nvme_identify_nslist(n, c); default: trace_pci_nvme_err_invalid_identify_cns(le32_to_cpu(c->cns)); diff --git a/include/block/nvme.h b/include/block/nvme.h index 5525c8e343..1720ee1d51 100644 --- a/include/block/nvme.h +++ b/include/block/nvme.h @@ -705,6 +705,14 @@ typedef struct NvmePSD { uint8_t resv[16]; } NvmePSD; +#define NVME_IDENTIFY_DATA_SIZE 4096 + +enum { + NVME_ID_CNS_NS = 0x0, + NVME_ID_CNS_CTRL = 0x1, + NVME_ID_CNS_NS_ACTIVE_LIST = 0x2, +}; + typedef struct NvmeIdCtrl { uint16_t vid; uint16_t ssvid; From b4529c5c3af69189b65b22906a35b09a7fe17960 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:17 +0200 Subject: [PATCH 12/43] hw/block/nvme: refactor nvme_addr_read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the controller memory buffer check to its own function. The check will be used on its own in later patches. Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-7-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 2a26b8859a..d6fcf078a4 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -65,14 +65,22 @@ static void nvme_process_sq(void *opaque); +static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) +{ + hwaddr low = n->ctrl_mem.addr; + hwaddr hi = n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size); + + return addr >= low && addr < hi; +} + static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) { - if (n->cmbsz && addr >= n->ctrl_mem.addr && - addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) { + if (n->cmbsz && nvme_addr_is_cmb(n, addr)) { memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); - } else { - pci_dma_read(&n->parent_obj, addr, buf, size); + return; } + + pci_dma_read(&n->parent_obj, addr, buf, size); } static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) From ca247d35098d396db25233d5f554bd3098949d60 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:18 +0200 Subject: [PATCH 13/43] hw/block/nvme: fix pin-based interrupt behavior First, since the device only supports MSI-X or pin-based interrupt, if MSI-X is not enabled, it should not accept interrupt vectors different from 0 when creating completion queues. Secondly, the irq_status NvmeCtrl member is meant to be compared to the INTMS register, so it should only be 32 bits wide. And it is really only useful when used with multi-message MSI. Third, since we do not force a 1-to-1 correspondence between cqid and interrupt vector, the irq_status register should not have bits set according to cqid, but according to the associated interrupt vector. Fix these issues, but keep irq_status available so we can easily support multi-message MSI down the line. Fixes: 5e9aa92eb1a5 ("hw/block: Fix pin-based interrupt behaviour of NVMe") Cc: "Michael S. Tsirkin" Cc: Marcel Apfelbaum Signed-off-by: Klaus Jensen Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-8-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 12 ++++++++---- hw/block/nvme.h | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d6fcf078a4..ee514625ee 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -137,8 +137,8 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq) msix_notify(&(n->parent_obj), cq->vector); } else { trace_pci_nvme_irq_pin(); - assert(cq->cqid < 64); - n->irq_status |= 1 << cq->cqid; + assert(cq->vector < 32); + n->irq_status |= 1 << cq->vector; nvme_irq_check(n); } } else { @@ -152,8 +152,8 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) if (msix_enabled(&(n->parent_obj))) { return; } else { - assert(cq->cqid < 64); - n->irq_status &= ~(1 << cq->cqid); + assert(cq->vector < 32); + n->irq_status &= ~(1 << cq->vector); nvme_irq_check(n); } } @@ -652,6 +652,10 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) trace_pci_nvme_err_invalid_create_cq_addr(prp1); return NVME_INVALID_FIELD | NVME_DNR; } + if (unlikely(!msix_enabled(&n->parent_obj) && vector)) { + trace_pci_nvme_err_invalid_create_cq_vector(vector); + return NVME_INVALID_IRQ_VECTOR | NVME_DNR; + } if (unlikely(vector > n->params.num_queues)) { trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 9df244c93c..91f16c8125 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -84,7 +84,7 @@ typedef struct NvmeCtrl { uint32_t cmbsz; uint32_t cmbloc; uint8_t *cmbuf; - uint64_t irq_status; + uint32_t irq_status; uint64_t host_timestamp; /* Timestamp sent by the host */ uint64_t timestamp_set_qemu_clock_ms; /* QEMU clock time */ From dce22c864612659793342bb99bd3d7a91f31afd6 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:19 +0200 Subject: [PATCH 14/43] hw/block/nvme: add max_ioqpairs device parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The num_queues device paramater has a slightly confusing meaning because it accounts for the admin queue pair which is not really optional. Secondly, it is really a maximum value of queues allowed. Add a new max_ioqpairs parameter that only accounts for I/O queue pairs, but keep num_queues for compatibility. Signed-off-by: Klaus Jensen Reviewed-by: Maxim Levitsky Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-9-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 51 ++++++++++++++++++++++++++++++------------------- hw/block/nvme.h | 3 ++- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ee514625ee..1c1d2f8b77 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -20,7 +20,7 @@ * -device nvme,drive=,serial=,id=, \ * cmb_size_mb=, \ * [pmrdev=,] \ - * num_queues= + * max_ioqpairs= * * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. @@ -36,6 +36,7 @@ #include "qemu/osdep.h" #include "qemu/units.h" +#include "qemu/error-report.h" #include "hw/block/block.h" #include "hw/pci/msix.h" #include "hw/pci/pci.h" @@ -85,12 +86,12 @@ static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid) { - return sqid < n->params.num_queues && n->sq[sqid] != NULL ? 0 : -1; + return sqid < n->params.max_ioqpairs + 1 && n->sq[sqid] != NULL ? 0 : -1; } static int nvme_check_cqid(NvmeCtrl *n, uint16_t cqid) { - return cqid < n->params.num_queues && n->cq[cqid] != NULL ? 0 : -1; + return cqid < n->params.max_ioqpairs + 1 && n->cq[cqid] != NULL ? 0 : -1; } static void nvme_inc_cq_tail(NvmeCQueue *cq) @@ -656,7 +657,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } - if (unlikely(vector > n->params.num_queues)) { + if (unlikely(vector > n->params.max_ioqpairs)) { trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } @@ -808,8 +809,8 @@ static uint16_t nvme_get_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) trace_pci_nvme_getfeat_vwcache(result ? "enabled" : "disabled"); break; case NVME_NUMBER_OF_QUEUES: - result = cpu_to_le32((n->params.num_queues - 2) | - ((n->params.num_queues - 2) << 16)); + result = cpu_to_le32((n->params.max_ioqpairs - 1) | + ((n->params.max_ioqpairs - 1) << 16)); trace_pci_nvme_getfeat_numq(result); break; case NVME_TIMESTAMP: @@ -853,10 +854,10 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req) case NVME_NUMBER_OF_QUEUES: trace_pci_nvme_setfeat_numq((dw11 & 0xFFFF) + 1, ((dw11 >> 16) & 0xFFFF) + 1, - n->params.num_queues - 1, - n->params.num_queues - 1); - req->cqe.result = cpu_to_le32((n->params.num_queues - 2) | - ((n->params.num_queues - 2) << 16)); + n->params.max_ioqpairs, + n->params.max_ioqpairs); + req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) | + ((n->params.max_ioqpairs - 1) << 16)); break; case NVME_TIMESTAMP: return nvme_set_feature_timestamp(n, cmd); @@ -927,12 +928,12 @@ static void nvme_clear_ctrl(NvmeCtrl *n) blk_drain(n->conf.blk); - for (i = 0; i < n->params.num_queues; i++) { + for (i = 0; i < n->params.max_ioqpairs + 1; i++) { if (n->sq[i] != NULL) { nvme_free_sq(n->sq[i], n); } } - for (i = 0; i < n->params.num_queues; i++) { + for (i = 0; i < n->params.max_ioqpairs + 1; i++) { if (n->cq[i] != NULL) { nvme_free_cq(n->cq[i], n); } @@ -1362,8 +1363,17 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) int64_t bs_size; uint8_t *pci_conf; - if (!n->params.num_queues) { - error_setg(errp, "num_queues can't be zero"); + if (n->params.num_queues) { + warn_report("num_queues is deprecated; please use max_ioqpairs " + "instead"); + + n->params.max_ioqpairs = n->params.num_queues - 1; + } + + if (n->params.max_ioqpairs < 1 || + n->params.max_ioqpairs > PCI_MSIX_FLAGS_QSIZE) { + error_setg(errp, "max_ioqpairs must be between 1 and %d", + PCI_MSIX_FLAGS_QSIZE); return; } @@ -1413,21 +1423,21 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->num_namespaces = 1; - /* num_queues is really number of pairs, so each has two doorbells */ + /* add one to max_ioqpairs to account for the admin queue pair */ n->reg_size = pow2ceil(NVME_REG_SIZE + - 2 * n->params.num_queues * NVME_DB_SIZE); + 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); n->ns_size = bs_size / (uint64_t)n->num_namespaces; n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); - n->sq = g_new0(NvmeSQueue *, n->params.num_queues); - n->cq = g_new0(NvmeCQueue *, n->params.num_queues); + n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1); + n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", n->reg_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); - msix_init_exclusive_bar(pci_dev, n->params.num_queues, 4, NULL); + msix_init_exclusive_bar(pci_dev, n->params.max_ioqpairs + 1, 4, NULL); id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); @@ -1573,7 +1583,8 @@ static Property nvme_props[] = { HostMemoryBackend *), DEFINE_PROP_STRING("serial", NvmeCtrl, params.serial), DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0), - DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 64), + DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0), + DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 91f16c8125..26c38bd913 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -5,7 +5,8 @@ typedef struct NvmeParams { char *serial; - uint32_t num_queues; + uint32_t num_queues; /* deprecated since 5.1 */ + uint32_t max_ioqpairs; uint32_t cmb_size_mb; } NvmeParams; From e1731e816ad201273ed284fbb9cd51db131fd196 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:20 +0200 Subject: [PATCH 15/43] hw/block/nvme: remove redundant cmbloc/cmbsz members MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-10-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 7 ++----- hw/block/nvme.h | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 1c1d2f8b77..61447220a8 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -76,7 +76,7 @@ static bool nvme_addr_is_cmb(NvmeCtrl *n, hwaddr addr) static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size) { - if (n->cmbsz && nvme_addr_is_cmb(n, addr)) { + if (n->bar.cmbsz && nvme_addr_is_cmb(n, addr)) { memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size); return; } @@ -170,7 +170,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, QEMUIOVector *iov, uint64_t prp1, if (unlikely(!prp1)) { trace_pci_nvme_err_invalid_prp(); return NVME_INVALID_FIELD | NVME_DNR; - } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr && + } else if (n->bar.cmbsz && prp1 >= n->ctrl_mem.addr && prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) { qsg->nsg = 0; qemu_iovec_init(iov, num_prps); @@ -1485,9 +1485,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); - n->cmbloc = n->bar.cmbloc; - n->cmbsz = n->bar.cmbsz; - n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 26c38bd913..cedc8022db 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -82,8 +82,6 @@ typedef struct NvmeCtrl { uint32_t num_namespaces; uint32_t max_q_ents; uint64_t ns_size; - uint32_t cmbsz; - uint32_t cmbloc; uint8_t *cmbuf; uint32_t irq_status; uint64_t host_timestamp; /* Timestamp sent by the host */ From 54000c66f050ab2aec180e0822c09523057448e7 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:21 +0200 Subject: [PATCH 16/43] hw/block/nvme: factor out property/constraint checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-11-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 48 ++++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 61447220a8..ee669ee8dc 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1354,24 +1354,19 @@ static const MemoryRegionOps nvme_cmb_ops = { }, }; -static void nvme_realize(PCIDevice *pci_dev, Error **errp) +static void nvme_check_constraints(NvmeCtrl *n, Error **errp) { - NvmeCtrl *n = NVME(pci_dev); - NvmeIdCtrl *id = &n->id_ctrl; + NvmeParams *params = &n->params; - int i; - int64_t bs_size; - uint8_t *pci_conf; - - if (n->params.num_queues) { + if (params->num_queues) { warn_report("num_queues is deprecated; please use max_ioqpairs " "instead"); - n->params.max_ioqpairs = n->params.num_queues - 1; + params->max_ioqpairs = params->num_queues - 1; } - if (n->params.max_ioqpairs < 1 || - n->params.max_ioqpairs > PCI_MSIX_FLAGS_QSIZE) { + if (params->max_ioqpairs < 1 || + params->max_ioqpairs > PCI_MSIX_FLAGS_QSIZE) { error_setg(errp, "max_ioqpairs must be between 1 and %d", PCI_MSIX_FLAGS_QSIZE); return; @@ -1382,13 +1377,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } - bs_size = blk_getlength(n->conf.blk); - if (bs_size < 0) { - error_setg(errp, "could not get backing file size"); - return; - } - - if (!n->params.serial) { + if (!params->serial) { error_setg(errp, "serial property not set"); return; } @@ -1408,6 +1397,29 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) host_memory_backend_set_mapped(n->pmrdev, true); } +} + +static void nvme_realize(PCIDevice *pci_dev, Error **errp) +{ + NvmeCtrl *n = NVME(pci_dev); + NvmeIdCtrl *id = &n->id_ctrl; + Error *local_err = NULL; + + int i; + int64_t bs_size; + uint8_t *pci_conf; + + nvme_check_constraints(n, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + bs_size = blk_getlength(n->conf.blk); + if (bs_size < 0) { + error_setg(errp, "could not get backing file size"); + return; + } blkconf_blocksizes(&n->conf); if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), From a17f50188bfe0a59e96745a823d309ae2de39f10 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:22 +0200 Subject: [PATCH 17/43] hw/block/nvme: factor out device state setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-12-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index ee669ee8dc..b721cab9b0 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1399,6 +1399,17 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) } } +static void nvme_init_state(NvmeCtrl *n) +{ + n->num_namespaces = 1; + /* add one to max_ioqpairs to account for the admin queue pair */ + n->reg_size = pow2ceil(NVME_REG_SIZE + + 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); + n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); + n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1); + n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); +} + static void nvme_realize(PCIDevice *pci_dev, Error **errp) { NvmeCtrl *n = NVME(pci_dev); @@ -1415,6 +1426,8 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } + nvme_init_state(n); + bs_size = blk_getlength(n->conf.blk); if (bs_size < 0) { error_setg(errp, "could not get backing file size"); @@ -1433,17 +1446,8 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); pcie_endpoint_cap_init(pci_dev, 0x80); - n->num_namespaces = 1; - - /* add one to max_ioqpairs to account for the admin queue pair */ - n->reg_size = pow2ceil(NVME_REG_SIZE + - 2 * (n->params.max_ioqpairs + 1) * NVME_DB_SIZE); n->ns_size = bs_size / (uint64_t)n->num_namespaces; - n->namespaces = g_new0(NvmeNamespace, n->num_namespaces); - n->sq = g_new0(NvmeSQueue *, n->params.max_ioqpairs + 1); - n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", n->reg_size); pci_register_bar(pci_dev, 0, From 90f4511543748de94bb34c250deab865f4018128 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:23 +0200 Subject: [PATCH 18/43] hw/block/nvme: factor out block backend setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-13-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index b721cab9b0..87f1f0d0d1 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1410,6 +1410,13 @@ static void nvme_init_state(NvmeCtrl *n) n->cq = g_new0(NvmeCQueue *, n->params.max_ioqpairs + 1); } +static void nvme_init_blk(NvmeCtrl *n, Error **errp) +{ + blkconf_blocksizes(&n->conf); + blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), + false, errp); +} + static void nvme_realize(PCIDevice *pci_dev, Error **errp) { NvmeCtrl *n = NVME(pci_dev); @@ -1434,9 +1441,9 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } - blkconf_blocksizes(&n->conf); - if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), - false, errp)) { + nvme_init_blk(n, &local_err); + if (local_err) { + error_propagate(errp, local_err); return; } From 3adee1c2d3c443e49fffe995b424932dda577a3a Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:24 +0200 Subject: [PATCH 19/43] hw/block/nvme: add namespace helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce some small helpers to make the next patches easier on the eye. Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-14-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 3 +-- hw/block/nvme.h | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 87f1f0d0d1..3f3db17231 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1573,8 +1573,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) id_ns->dps = 0; id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; id_ns->ncap = id_ns->nuse = id_ns->nsze = - cpu_to_le64(n->ns_size >> - id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds); + cpu_to_le64(nvme_ns_nlbas(n, ns)); } } diff --git a/hw/block/nvme.h b/hw/block/nvme.h index cedc8022db..61dd9b23b8 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -61,6 +61,17 @@ typedef struct NvmeNamespace { NvmeIdNs id_ns; } NvmeNamespace; +static inline NvmeLBAF *nvme_ns_lbaf(NvmeNamespace *ns) +{ + NvmeIdNs *id_ns = &ns->id_ns; + return &id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(id_ns->flbas)]; +} + +static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns) +{ + return nvme_ns_lbaf(ns)->ds; +} + #define TYPE_NVME "nvme" #define NVME(obj) \ OBJECT_CHECK(NvmeCtrl, (obj), TYPE_NVME) @@ -97,4 +108,10 @@ typedef struct NvmeCtrl { NvmeIdCtrl id_ctrl; } NvmeCtrl; +/* calculate the number of LBAs that the namespace can accomodate */ +static inline uint64_t nvme_ns_nlbas(NvmeCtrl *n, NvmeNamespace *ns) +{ + return n->ns_size >> nvme_ns_lbads(ns); +} + #endif /* HW_NVME_H */ From d634d74229c413dfdf7383bc943b8dcf0e22e9a1 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:25 +0200 Subject: [PATCH 20/43] hw/block/nvme: factor out namespace setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-15-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 3f3db17231..c98af03f44 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1417,6 +1417,27 @@ static void nvme_init_blk(NvmeCtrl *n, Error **errp) false, errp); } +static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) +{ + int64_t bs_size; + NvmeIdNs *id_ns = &ns->id_ns; + + bs_size = blk_getlength(n->conf.blk); + if (bs_size < 0) { + error_setg_errno(errp, -bs_size, "could not get backing file size"); + return; + } + + n->ns_size = bs_size; + + id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; + id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(n, ns)); + + /* no thin provisioning */ + id_ns->ncap = id_ns->nsze; + id_ns->nuse = id_ns->ncap; +} + static void nvme_realize(PCIDevice *pci_dev, Error **errp) { NvmeCtrl *n = NVME(pci_dev); @@ -1424,7 +1445,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) Error *local_err = NULL; int i; - int64_t bs_size; uint8_t *pci_conf; nvme_check_constraints(n, &local_err); @@ -1435,12 +1455,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) nvme_init_state(n); - bs_size = blk_getlength(n->conf.blk); - if (bs_size < 0) { - error_setg(errp, "could not get backing file size"); - return; - } - nvme_init_blk(n, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -1453,8 +1467,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); pcie_endpoint_cap_init(pci_dev, 0x80); - n->ns_size = bs_size / (uint64_t)n->num_namespaces; - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", n->reg_size); pci_register_bar(pci_dev, 0, @@ -1563,17 +1575,11 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) } for (i = 0; i < n->num_namespaces; i++) { - NvmeNamespace *ns = &n->namespaces[i]; - NvmeIdNs *id_ns = &ns->id_ns; - id_ns->nsfeat = 0; - id_ns->nlbaf = 0; - id_ns->flbas = 0; - id_ns->mc = 0; - id_ns->dpc = 0; - id_ns->dps = 0; - id_ns->lbaf[0].ds = BDRV_SECTOR_BITS; - id_ns->ncap = id_ns->nuse = id_ns->nsze = - cpu_to_le64(nvme_ns_nlbas(n, ns)); + nvme_init_namespace(n, &n->namespaces[i], &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } } } From c3f5526d221c63a3278aefcba1415fa966fad615 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:26 +0200 Subject: [PATCH 21/43] hw/block/nvme: factor out pci setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-16-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index c98af03f44..a4022b0291 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1438,6 +1438,22 @@ static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) id_ns->nuse = id_ns->ncap; } +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) +{ + uint8_t *pci_conf = pci_dev->config; + + pci_conf[PCI_INTERRUPT_PIN] = 1; + pci_config_set_prog_interface(pci_conf, 0x2); + pci_config_set_class(pci_conf, PCI_CLASS_STORAGE_EXPRESS); + pcie_endpoint_cap_init(pci_dev, 0x80); + + memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, "nvme", + n->reg_size); + pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); + msix_init_exclusive_bar(pci_dev, n->params.max_ioqpairs + 1, 4, NULL); +} + static void nvme_realize(PCIDevice *pci_dev, Error **errp) { NvmeCtrl *n = NVME(pci_dev); @@ -1461,19 +1477,9 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } + nvme_init_pci(n, pci_dev); + pci_conf = pci_dev->config; - pci_conf[PCI_INTERRUPT_PIN] = 1; - pci_config_set_prog_interface(pci_dev->config, 0x2); - pci_config_set_class(pci_dev->config, PCI_CLASS_STORAGE_EXPRESS); - pcie_endpoint_cap_init(pci_dev, 0x80); - - memory_region_init_io(&n->iomem, OBJECT(n), &nvme_mmio_ops, n, - "nvme", n->reg_size); - pci_register_bar(pci_dev, 0, - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, - &n->iomem); - msix_init_exclusive_bar(pci_dev, n->params.max_ioqpairs + 1, 4, NULL); - id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); From 51ec094d4017eb61d8d92b8eb64343b013d9f90f Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:27 +0200 Subject: [PATCH 22/43] hw/block/nvme: factor out cmb setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-17-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index a4022b0291..8aabb4c3c3 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -56,6 +56,7 @@ #define NVME_REG_SIZE 0x1000 #define NVME_DB_SIZE 4 +#define NVME_CMB_BIR 2 #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -1438,6 +1439,28 @@ static void nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp) id_ns->nuse = id_ns->ncap; } +static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) +{ + NVME_CMBLOC_SET_BIR(n->bar.cmbloc, NVME_CMB_BIR); + NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); + + NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); + NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); + NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ + NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); + + n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, + "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); + pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); +} + static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) { uint8_t *pci_conf = pci_dev->config; @@ -1514,25 +1537,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.intmc = n->bar.intms = 0; if (n->params.cmb_size_mb) { - - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); - NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); - - NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0); - NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 1); - NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */ - NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->params.cmb_size_mb); - - n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n, - "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz)); - pci_register_bar(pci_dev, NVME_CMBLOC_BIR(n->bar.cmbloc), - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); - + nvme_init_cmb(n, pci_dev); } else if (n->pmrdev) { /* Controller Capabilities register */ NVME_CAP_SET_PMRS(n->bar.cap, 1); From 37712e00b1f0ef157a71c0ad211bb4bf47efc9b8 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:28 +0200 Subject: [PATCH 23/43] hw/block/nvme: factor out pmr setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Maxim Levitsky Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20200609190333.59390-18-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 95 ++++++++++++++++++++++++++----------------------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 8aabb4c3c3..b954e7b7b2 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -57,6 +57,7 @@ #define NVME_REG_SIZE 0x1000 #define NVME_DB_SIZE 4 #define NVME_CMB_BIR 2 +#define NVME_PMR_BIR 2 #define NVME_GUEST_ERR(trace, fmt, ...) \ do { \ @@ -1461,6 +1462,55 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev) PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem); } +static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) +{ + /* Controller Capabilities register */ + NVME_CAP_SET_PMRS(n->bar.cap, 1); + + /* PMR Capabities register */ + n->bar.pmrcap = 0; + NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_BIR(n->bar.pmrcap, NVME_PMR_BIR); + NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0); + /* Turn on bit 1 support */ + NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); + NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0); + NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0); + + /* PMR Control register */ + n->bar.pmrctl = 0; + NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0); + + /* PMR Status register */ + n->bar.pmrsts = 0; + NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0); + NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0); + + /* PMR Elasticity Buffer Size register */ + n->bar.pmrebs = 0; + NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0); + NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0); + NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0); + + /* PMR Sustained Write Throughput register */ + n->bar.pmrswtp = 0; + NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0); + NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0); + + /* PMR Memory Space Control register */ + n->bar.pmrmsc = 0; + NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0); + NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0); + + pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64 | + PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr); +} + static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) { uint8_t *pci_conf = pci_dev->config; @@ -1539,50 +1589,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) if (n->params.cmb_size_mb) { nvme_init_cmb(n, pci_dev); } else if (n->pmrdev) { - /* Controller Capabilities register */ - NVME_CAP_SET_PMRS(n->bar.cap, 1); - - /* PMR Capabities register */ - n->bar.pmrcap = 0; - NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0); - NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0); - NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2); - NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0); - /* Turn on bit 1 support */ - NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02); - NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0); - NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0); - - /* PMR Control register */ - n->bar.pmrctl = 0; - NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0); - - /* PMR Status register */ - n->bar.pmrsts = 0; - NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0); - NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0); - NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0); - NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0); - - /* PMR Elasticity Buffer Size register */ - n->bar.pmrebs = 0; - NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0); - NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0); - NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0); - - /* PMR Sustained Write Throughput register */ - n->bar.pmrswtp = 0; - NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0); - NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0); - - /* PMR Memory Space Control register */ - n->bar.pmrmsc = 0; - NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0); - NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0); - - pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap), - PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 | - PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr); + nvme_init_pmr(n, pci_dev); } for (i = 0; i < n->num_namespaces; i++) { From 0c35ad46b6ffec663f94e0ef47b8df856e180f98 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:29 +0200 Subject: [PATCH 24/43] hw/block/nvme: do cmb/pmr init as part of pci init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Maxim Levitsky Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20200609190333.59390-19-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index b954e7b7b2..02a6a97df9 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1525,6 +1525,12 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); msix_init_exclusive_bar(pci_dev, n->params.max_ioqpairs + 1, 4, NULL); + + if (n->params.cmb_size_mb) { + nvme_init_cmb(n, pci_dev); + } else if (n->pmrdev) { + nvme_init_pmr(n, pci_dev); + } } static void nvme_realize(PCIDevice *pci_dev, Error **errp) @@ -1586,12 +1592,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.vs = 0x00010200; n->bar.intmc = n->bar.intms = 0; - if (n->params.cmb_size_mb) { - nvme_init_cmb(n, pci_dev); - } else if (n->pmrdev) { - nvme_init_pmr(n, pci_dev); - } - for (i = 0; i < n->num_namespaces; i++) { nvme_init_namespace(n, &n->namespaces[i], &local_err); if (local_err) { From 945cb8f4c20e01241ede4ab33593d18812a3c1e4 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:30 +0200 Subject: [PATCH 25/43] hw/block/nvme: factor out controller identify setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Jensen Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Maxim Levitsky Reviewed-by: Keith Busch Message-Id: <20200609190333.59390-20-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 02a6a97df9..e10fc774fc 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1533,32 +1533,11 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) } } -static void nvme_realize(PCIDevice *pci_dev, Error **errp) +static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev) { - NvmeCtrl *n = NVME(pci_dev); NvmeIdCtrl *id = &n->id_ctrl; - Error *local_err = NULL; + uint8_t *pci_conf = pci_dev->config; - int i; - uint8_t *pci_conf; - - nvme_check_constraints(n, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - - nvme_init_state(n); - - nvme_init_blk(n, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; - } - - nvme_init_pci(n, pci_dev); - - pci_conf = pci_dev->config; id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID)); id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID)); strpadcpy((char *)id->mn, sizeof(id->mn), "QEMU NVMe Ctrl", ' '); @@ -1591,6 +1570,30 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.vs = 0x00010200; n->bar.intmc = n->bar.intms = 0; +} + +static void nvme_realize(PCIDevice *pci_dev, Error **errp) +{ + NvmeCtrl *n = NVME(pci_dev); + Error *local_err = NULL; + + int i; + + nvme_check_constraints(n, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + nvme_init_state(n); + nvme_init_blk(n, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + nvme_init_pci(n, pci_dev); + nvme_init_ctrl(n, pci_dev); for (i = 0; i < n->num_namespaces; i++) { nvme_init_namespace(n, &n->namespaces[i], &local_err); From fbf2e5375e33d43c9e1386eed448e1a3c0996e88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Tue, 9 Jun 2020 21:03:31 +0200 Subject: [PATCH 26/43] hw/block/nvme: Verify msix_vector_use() returned value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit msix_vector_use() returns -EINVAL on error. Assert it won't. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200609190333.59390-21-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index e10fc774fc..fe17aa5d70 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -615,6 +615,10 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeCmd *cmd) static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t irq_enabled) { + int ret; + + ret = msix_vector_use(&n->parent_obj, vector); + assert(ret == 0); cq->ctrl = n; cq->cqid = cqid; cq->size = size; @@ -625,7 +629,6 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, uint64_t dma_addr, cq->head = cq->tail = 0; QTAILQ_INIT(&cq->req_list); QTAILQ_INIT(&cq->sq_list); - msix_vector_use(&n->parent_obj, cq->vector); n->cq[cqid] = cq; cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq); } From 6a25a4b42e24df515e0e9a6b65683e500c66de73 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:32 +0200 Subject: [PATCH 27/43] hw/block/nvme: add msix_qsize parameter Decouple the requested maximum number of ioqpairs (param max_ioqpairs) from the number of MSI-X interrupt vectors by introducing a new msix_qsize parameter and initialize MSI-X with that. This allows emulating a device that has fewer vectors than I/O queue pairs and also allows more than 2048 queue pairs. To keep the device behaving as previously, use a msix_qsize default of 65 (default max_ioqpairs + 1). This decoupling was actually suggested by Maxim some time ago in a slightly different context, so adding a Suggested-by. Suggested-by: Maxim Levitsky Signed-off-by: Klaus Jensen Message-Id: <20200609190333.59390-22-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 17 +++++++++++++---- hw/block/nvme.h | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index fe17aa5d70..acc6dbc900 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -54,6 +54,7 @@ #include "trace.h" #include "nvme.h" +#define NVME_MAX_IOQPAIRS 0xffff #define NVME_REG_SIZE 0x1000 #define NVME_DB_SIZE 4 #define NVME_CMB_BIR 2 @@ -662,7 +663,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd) trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } - if (unlikely(vector > n->params.max_ioqpairs)) { + if (unlikely(vector >= n->params.msix_qsize)) { trace_pci_nvme_err_invalid_create_cq_vector(vector); return NVME_INVALID_IRQ_VECTOR | NVME_DNR; } @@ -1371,9 +1372,16 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp) } if (params->max_ioqpairs < 1 || - params->max_ioqpairs > PCI_MSIX_FLAGS_QSIZE) { + params->max_ioqpairs > NVME_MAX_IOQPAIRS) { error_setg(errp, "max_ioqpairs must be between 1 and %d", - PCI_MSIX_FLAGS_QSIZE); + NVME_MAX_IOQPAIRS); + return; + } + + if (params->msix_qsize < 1 || + params->msix_qsize > PCI_MSIX_FLAGS_QSIZE + 1) { + error_setg(errp, "msix_qsize must be between 1 and %d", + PCI_MSIX_FLAGS_QSIZE + 1); return; } @@ -1527,7 +1535,7 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) n->reg_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); - msix_init_exclusive_bar(pci_dev, n->params.max_ioqpairs + 1, 4, NULL); + msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, NULL); if (n->params.cmb_size_mb) { nvme_init_cmb(n, pci_dev); @@ -1634,6 +1642,7 @@ static Property nvme_props[] = { DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, params.cmb_size_mb, 0), DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0), DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64), + DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 61dd9b23b8..1d30c0bca2 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -7,6 +7,7 @@ typedef struct NvmeParams { char *serial; uint32_t num_queues; /* deprecated since 5.1 */ uint32_t max_ioqpairs; + uint16_t msix_qsize; uint32_t cmb_size_mb; } NvmeParams; From 1c0c2163aa087c34efc7f84a8bc7e6640e3f6b75 Mon Sep 17 00:00:00 2001 From: Klaus Jensen Date: Tue, 9 Jun 2020 21:03:33 +0200 Subject: [PATCH 28/43] hw/block/nvme: verify msix_init_exclusive_bar() return value Pass an Error to msix_init_exclusive_bar() and check it. Signed-off-by: Klaus Jensen Message-Id: <20200609190333.59390-23-its@irrelevant.dk> Signed-off-by: Kevin Wolf --- hw/block/nvme.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index acc6dbc900..2a2e43f681 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1522,7 +1522,7 @@ static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev) PCI_BASE_ADDRESS_MEM_PREFETCH, &n->pmrdev->mr); } -static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) +static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) { uint8_t *pci_conf = pci_dev->config; @@ -1535,7 +1535,9 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev) n->reg_size); pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64, &n->iomem); - msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, NULL); + if (msix_init_exclusive_bar(pci_dev, n->params.msix_qsize, 4, errp)) { + return; + } if (n->params.cmb_size_mb) { nvme_init_cmb(n, pci_dev); @@ -1603,7 +1605,12 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) return; } - nvme_init_pci(n, pci_dev); + nvme_init_pci(n, pci_dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + nvme_init_ctrl(n, pci_dev); for (i = 0; i < n->num_namespaces; i++) { From d54f36ea8c26af32584511533f8884ac7a50bf4a Mon Sep 17 00:00:00 2001 From: Roman Bolshakov Date: Fri, 12 Jun 2020 13:58:31 +0300 Subject: [PATCH 29/43] .gitignore: Ignore storage-daemon files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The files are generated. Fixes: 2af282ec51a ("qemu-storage-daemon: Add --monitor option") Cc: Kevin Wolf Signed-off-by: Roman Bolshakov Message-Id: <20200612105830.17082-1-r.bolshakov@yadro.com> Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf --- .gitignore | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 0c5af83aa7..90acb4347d 100644 --- a/.gitignore +++ b/.gitignore @@ -34,18 +34,18 @@ /qapi/qapi-builtin-types.[ch] /qapi/qapi-builtin-visit.[ch] /qapi/qapi-commands-*.[ch] -/qapi/qapi-commands.[ch] -/qapi/qapi-emit-events.[ch] +**/qapi/qapi-commands.[ch] +**/qapi/qapi-emit-events.[ch] /qapi/qapi-events-*.[ch] -/qapi/qapi-events.[ch] -/qapi/qapi-init-commands.[ch] -/qapi/qapi-introspect.[ch] +**/qapi/qapi-events.[ch] +**/qapi/qapi-init-commands.[ch] +**/qapi/qapi-introspect.[ch] /qapi/qapi-types-*.[ch] -/qapi/qapi-types.[ch] +**/qapi/qapi-types.[ch] /qapi/qapi-visit-*.[ch] !/qapi/qapi-visit-core.c -/qapi/qapi-visit.[ch] -/qapi/qapi-doc.texi +**/qapi/qapi-visit.[ch] +**/qapi/qapi-doc.texi /qemu-edid /qemu-img /qemu-nbd @@ -59,6 +59,7 @@ /qemu-keymap /qemu-monitor.texi /qemu-monitor-info.texi +/qemu-storage-daemon /qemu-version.h /qemu-version.h.tmp /module_block.h From 6abee2608563599f783bf1305a8322ee7b447815 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 29 May 2020 01:55:09 +0300 Subject: [PATCH 30/43] virtio-blk: store opt_io_size with correct size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The width of opt_io_size in virtio_blk_config is 32bit. However, it's written with virtio_stw_p; this may result in value truncation, and on big-endian systems with legacy virtio in completely bogus readings in the guest. Use the appropriate accessor to store it. Signed-off-by: Roman Kagan Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Kevin Wolf Message-Id: <20200528225516.1676602-2-rvkagan@yandex-team.ru> Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 8882a1d1d4..6938a75aa5 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -930,7 +930,7 @@ static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config) virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls); virtio_stl_p(vdev, &blkcfg.blk_size, blk_size); virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size); - virtio_stw_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); + virtio_stl_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size); blkcfg.geometry.heads = conf->heads; /* * We must ensure that the block device capacity is a multiple of From c56ee92fcba8cc922b1b2188ac20614d20223db2 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 29 May 2020 01:55:10 +0300 Subject: [PATCH 31/43] block: consolidate blocksize properties consistency checks Several block device properties related to blocksize configuration must be in certain relationship WRT each other: physical block must be no smaller than logical block; min_io_size, opt_io_size, and discard_granularity must be a multiple of a logical block. To ensure these requirements are met, add corresponding consistency checks to blkconf_blocksizes, adjusting its signature to communicate possible error to the caller. Also remove the now redundant consistency checks from the specific devices. Signed-off-by: Roman Kagan Reviewed-by: Eric Blake Reviewed-by: Paul Durrant Message-Id: <20200528225516.1676602-3-rvkagan@yandex-team.ru> Signed-off-by: Kevin Wolf --- hw/block/block.c | 30 +++++++++++++++++++++++++++++- hw/block/fdc.c | 5 ++++- hw/block/nvme.c | 4 +++- hw/block/swim.c | 5 ++++- hw/block/virtio-blk.c | 7 +------ hw/block/xen-block.c | 6 +----- hw/ide/qdev.c | 5 ++++- hw/scsi/scsi-disk.c | 12 +++++------- hw/usb/dev-storage.c | 5 ++++- include/hw/block/block.h | 2 +- tests/qemu-iotests/172.out | 2 +- 11 files changed, 57 insertions(+), 26 deletions(-) diff --git a/hw/block/block.c b/hw/block/block.c index bf56c7612b..b22207c921 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -61,7 +61,7 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size, return true; } -void blkconf_blocksizes(BlockConf *conf) +bool blkconf_blocksizes(BlockConf *conf, Error **errp) { BlockBackend *blk = conf->blk; BlockSizes blocksizes; @@ -83,6 +83,34 @@ void blkconf_blocksizes(BlockConf *conf) conf->logical_block_size = BDRV_SECTOR_SIZE; } } + + if (conf->logical_block_size > conf->physical_block_size) { + error_setg(errp, + "logical_block_size > physical_block_size not supported"); + return false; + } + + if (!QEMU_IS_ALIGNED(conf->min_io_size, conf->logical_block_size)) { + error_setg(errp, + "min_io_size must be a multiple of logical_block_size"); + return false; + } + + if (!QEMU_IS_ALIGNED(conf->opt_io_size, conf->logical_block_size)) { + error_setg(errp, + "opt_io_size must be a multiple of logical_block_size"); + return false; + } + + if (conf->discard_granularity != -1 && + !QEMU_IS_ALIGNED(conf->discard_granularity, + conf->logical_block_size)) { + error_setg(errp, "discard_granularity must be " + "a multiple of logical_block_size"); + return false; + } + + return true; } bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, diff --git a/hw/block/fdc.c b/hw/block/fdc.c index 8528b9a3c7..be0674e4aa 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -554,7 +554,10 @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp) read_only = !blk_bs(dev->conf.blk) || blk_is_read_only(dev->conf.blk); } - blkconf_blocksizes(&dev->conf); + if (!blkconf_blocksizes(&dev->conf, errp)) { + return; + } + if (dev->conf.logical_block_size != 512 || dev->conf.physical_block_size != 512) { diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 2a2e43f681..1aee042d4c 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1425,7 +1425,9 @@ static void nvme_init_state(NvmeCtrl *n) static void nvme_init_blk(NvmeCtrl *n, Error **errp) { - blkconf_blocksizes(&n->conf); + if (!blkconf_blocksizes(&n->conf, errp)) { + return; + } blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), false, errp); } diff --git a/hw/block/swim.c b/hw/block/swim.c index 8f124782f4..74f56e8f46 100644 --- a/hw/block/swim.c +++ b/hw/block/swim.c @@ -189,7 +189,10 @@ static void swim_drive_realize(DeviceState *qdev, Error **errp) assert(ret == 0); } - blkconf_blocksizes(&dev->conf); + if (!blkconf_blocksizes(&dev->conf, errp)) { + return; + } + if (dev->conf.logical_block_size != 512 || dev->conf.physical_block_size != 512) { diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 6938a75aa5..413783693c 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -1174,12 +1174,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) return; } - blkconf_blocksizes(&conf->conf); - - if (conf->conf.logical_block_size > - conf->conf.physical_block_size) { - error_setg(errp, - "logical_block_size > physical_block_size not supported"); + if (!blkconf_blocksizes(&conf->conf, errp)) { return; } diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index 2827c90ac7..1b7bc5de08 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -239,11 +239,7 @@ static void xen_block_realize(XenDevice *xendev, Error **errp) return; } - blkconf_blocksizes(conf); - - if (conf->logical_block_size > conf->physical_block_size) { - error_setg( - errp, "logical_block_size > physical_block_size not supported"); + if (!blkconf_blocksizes(conf, errp)) { return; } diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c index caa88526f5..3ccb5e2529 100644 --- a/hw/ide/qdev.c +++ b/hw/ide/qdev.c @@ -187,7 +187,10 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp) return; } - blkconf_blocksizes(&dev->conf); + if (!blkconf_blocksizes(&dev->conf, errp)) { + return; + } + if (dev->conf.logical_block_size != 512) { error_setg(errp, "logical_block_size must be 512 for IDE"); return; diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 387503e11b..8ce68a9dd6 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2346,12 +2346,7 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) return; } - blkconf_blocksizes(&s->qdev.conf); - - if (s->qdev.conf.logical_block_size > - s->qdev.conf.physical_block_size) { - error_setg(errp, - "logical_block_size > physical_block_size not supported"); + if (!blkconf_blocksizes(&s->qdev.conf, errp)) { return; } @@ -2436,7 +2431,9 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) if (s->qdev.conf.blk) { ctx = blk_get_aio_context(s->qdev.conf.blk); aio_context_acquire(ctx); - blkconf_blocksizes(&s->qdev.conf); + if (!blkconf_blocksizes(&s->qdev.conf, errp)) { + goto out; + } } s->qdev.blocksize = s->qdev.conf.logical_block_size; s->qdev.type = TYPE_DISK; @@ -2444,6 +2441,7 @@ static void scsi_hd_realize(SCSIDevice *dev, Error **errp) s->product = g_strdup("QEMU HARDDISK"); } scsi_realize(&s->qdev, errp); +out: if (ctx) { aio_context_release(ctx); } diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c index a5204b6f2a..f5977eb72e 100644 --- a/hw/usb/dev-storage.c +++ b/hw/usb/dev-storage.c @@ -612,7 +612,10 @@ static void usb_msd_storage_realize(USBDevice *dev, Error **errp) return; } - blkconf_blocksizes(&s->conf); + if (!blkconf_blocksizes(&s->conf, errp)) { + return; + } + if (!blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, errp)) { return; diff --git a/include/hw/block/block.h b/include/hw/block/block.h index d7246f3862..784953a237 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -87,7 +87,7 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size, bool blkconf_geometry(BlockConf *conf, int *trans, unsigned cyls_max, unsigned heads_max, unsigned secs_max, Error **errp); -void blkconf_blocksizes(BlockConf *conf); +bool blkconf_blocksizes(BlockConf *conf, Error **errp); bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, bool resizable, Error **errp); diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out index 7abbe82427..59cc70aebb 100644 --- a/tests/qemu-iotests/172.out +++ b/tests/qemu-iotests/172.out @@ -1204,7 +1204,7 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica drive-type = "144" Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical_block_size=4096 -QEMU_PROG: -device floppy,drive=none0,logical_block_size=4096: Physical and logical block size must be 512 for floppy +QEMU_PROG: -device floppy,drive=none0,logical_block_size=4096: logical_block_size > physical_block_size not supported Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physical_block_size=1024 QEMU_PROG: -device floppy,drive=none0,physical_block_size=1024: Physical and logical block size must be 512 for floppy From a345c5523607a0a4549990cce1be096b63df9668 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 29 May 2020 01:55:11 +0300 Subject: [PATCH 32/43] qdev-properties: blocksize: use same limits in code and description Make it easier (more visible) to maintain the limits on the blocksize properties in sync with the respective description, by using macros both in the code and in the description. Signed-off-by: Roman Kagan Reviewed-by: Eric Blake Message-Id: <20200528225516.1676602-4-rvkagan@yandex-team.ru> Signed-off-by: Kevin Wolf --- hw/core/qdev-properties.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index cc924815da..249dc69bd8 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -729,6 +729,13 @@ const PropertyInfo qdev_prop_pci_devfn = { /* --- blocksize --- */ +/* lower limit is sector size */ +#define MIN_BLOCK_SIZE 512 +#define MIN_BLOCK_SIZE_STR stringify(MIN_BLOCK_SIZE) +/* upper limit is the max power of 2 that fits in uint16_t */ +#define MAX_BLOCK_SIZE 32768 +#define MAX_BLOCK_SIZE_STR stringify(MAX_BLOCK_SIZE) + static void set_blocksize(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -736,8 +743,6 @@ static void set_blocksize(Object *obj, Visitor *v, const char *name, Property *prop = opaque; uint16_t value, *ptr = qdev_get_prop_ptr(dev, prop); Error *local_err = NULL; - const int64_t min = 512; - const int64_t max = 32768; if (dev->realized) { qdev_prop_set_after_realize(dev, name, errp); @@ -750,9 +755,12 @@ static void set_blocksize(Object *obj, Visitor *v, const char *name, return; } /* value of 0 means "unset" */ - if (value && (value < min || value > max)) { - error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE, - dev->id ? : "", name, (int64_t)value, min, max); + if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) { + error_setg(errp, + "Property %s.%s doesn't take value %" PRIu16 + " (minimum: " MIN_BLOCK_SIZE_STR + ", maximum: " MAX_BLOCK_SIZE_STR ")", + dev->id ? : "", name, value); return; } @@ -769,7 +777,8 @@ static void set_blocksize(Object *obj, Visitor *v, const char *name, const PropertyInfo qdev_prop_blocksize = { .name = "uint16", - .description = "A power of two between 512 and 32768", + .description = "A power of two between " MIN_BLOCK_SIZE_STR + " and " MAX_BLOCK_SIZE_STR, .get = get_uint16, .set = set_blocksize, .set_default_value = set_default_value_uint, From 914e74cda9a54ac860000aa18882dc40d3c8180b Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 29 May 2020 01:55:12 +0300 Subject: [PATCH 33/43] qdev-properties: add size32 property type Introduce size32 property type which handles size suffixes (k, m, g) just like size property, but is uint32_t rather than uint64_t. It's going to be useful for properties that are byte sizes but are inherently 32bit, like BlkConf.opt_io_size or .discard_granularity (they are switched to this new property type in a followup commit). The getter for size32 is left out for a separate patch as its benefit is less obvious, and it affects test output; for now the regular uint32 getter is used. Signed-off-by: Roman Kagan Message-Id: <20200528225516.1676602-5-rvkagan@yandex-team.ru> Signed-off-by: Kevin Wolf --- hw/core/qdev-properties.c | 40 ++++++++++++++++++++++++++++++++++++ include/hw/qdev-properties.h | 3 +++ 2 files changed, 43 insertions(+) diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 249dc69bd8..40c13f6ebe 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -727,6 +727,46 @@ const PropertyInfo qdev_prop_pci_devfn = { .set_default_value = set_default_value_int, }; +/* --- 32bit unsigned int 'size' type --- */ + +static void set_size32(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + uint32_t *ptr = qdev_get_prop_ptr(dev, prop); + uint64_t value; + Error *local_err = NULL; + + if (dev->realized) { + qdev_prop_set_after_realize(dev, name, errp); + return; + } + + visit_type_size(v, name, &value, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (value > UINT32_MAX) { + error_setg(errp, + "Property %s.%s doesn't take value %" PRIu64 + " (maximum: %u)", + dev->id ? : "", name, value, UINT32_MAX); + return; + } + + *ptr = value; +} + +const PropertyInfo qdev_prop_size32 = { + .name = "size", + .get = get_uint32, + .set = set_size32, + .set_default_value = set_default_value_uint, +}; + /* --- blocksize --- */ /* lower limit is sector size */ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index f161604fb6..c03eadfad6 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -29,6 +29,7 @@ extern const PropertyInfo qdev_prop_drive; extern const PropertyInfo qdev_prop_drive_iothread; extern const PropertyInfo qdev_prop_netdev; extern const PropertyInfo qdev_prop_pci_devfn; +extern const PropertyInfo qdev_prop_size32; extern const PropertyInfo qdev_prop_blocksize; extern const PropertyInfo qdev_prop_pci_host_devaddr; extern const PropertyInfo qdev_prop_uuid; @@ -196,6 +197,8 @@ extern const PropertyInfo qdev_prop_pcie_link_width; BlockdevOnError) #define DEFINE_PROP_BIOS_CHS_TRANS(_n, _s, _f, _d) \ DEFINE_PROP_SIGNED(_n, _s, _f, _d, qdev_prop_bios_chs_trans, int) +#define DEFINE_PROP_SIZE32(_n, _s, _f, _d) \ + DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size32, uint32_t) #define DEFINE_PROP_BLOCKSIZE(_n, _s, _f) \ DEFINE_PROP_UNSIGNED(_n, _s, _f, 0, qdev_prop_blocksize, uint16_t) #define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \ From 645b55d1c2bf35ce6344f1f472fa30e327cafce0 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 29 May 2020 01:55:13 +0300 Subject: [PATCH 34/43] qdev-properties: make blocksize accept size suffixes It appears convenient to be able to specify physical_block_size and logical_block_size using common size suffixes. Teach the blocksize property setter to interpret them. Also express the upper and lower limits in the respective units. Signed-off-by: Roman Kagan Reviewed-by: Eric Blake Message-Id: <20200528225516.1676602-6-rvkagan@yandex-team.ru> Signed-off-by: Kevin Wolf --- hw/core/qdev-properties.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 40c13f6ebe..c9af6a1341 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -14,6 +14,7 @@ #include "qapi/visitor.h" #include "chardev/char.h" #include "qemu/uuid.h" +#include "qemu/units.h" void qdev_prop_set_after_realize(DeviceState *dev, const char *name, Error **errp) @@ -771,17 +772,18 @@ const PropertyInfo qdev_prop_size32 = { /* lower limit is sector size */ #define MIN_BLOCK_SIZE 512 -#define MIN_BLOCK_SIZE_STR stringify(MIN_BLOCK_SIZE) +#define MIN_BLOCK_SIZE_STR "512 B" /* upper limit is the max power of 2 that fits in uint16_t */ -#define MAX_BLOCK_SIZE 32768 -#define MAX_BLOCK_SIZE_STR stringify(MAX_BLOCK_SIZE) +#define MAX_BLOCK_SIZE (32 * KiB) +#define MAX_BLOCK_SIZE_STR "32 KiB" static void set_blocksize(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { DeviceState *dev = DEVICE(obj); Property *prop = opaque; - uint16_t value, *ptr = qdev_get_prop_ptr(dev, prop); + uint16_t *ptr = qdev_get_prop_ptr(dev, prop); + uint64_t value; Error *local_err = NULL; if (dev->realized) { @@ -789,7 +791,7 @@ static void set_blocksize(Object *obj, Visitor *v, const char *name, return; } - visit_type_uint16(v, name, &value, &local_err); + visit_type_size(v, name, &value, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -797,7 +799,7 @@ static void set_blocksize(Object *obj, Visitor *v, const char *name, /* value of 0 means "unset" */ if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) { error_setg(errp, - "Property %s.%s doesn't take value %" PRIu16 + "Property %s.%s doesn't take value %" PRIu64 " (minimum: " MIN_BLOCK_SIZE_STR ", maximum: " MAX_BLOCK_SIZE_STR ")", dev->id ? : "", name, value); @@ -816,7 +818,7 @@ static void set_blocksize(Object *obj, Visitor *v, const char *name, } const PropertyInfo qdev_prop_blocksize = { - .name = "uint16", + .name = "size", .description = "A power of two between " MIN_BLOCK_SIZE_STR " and " MAX_BLOCK_SIZE_STR, .get = get_uint16, From 4f44bbc5bba0858d34b1aa356397847696276546 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 29 May 2020 01:55:14 +0300 Subject: [PATCH 35/43] block: make BlockConf size props 32bit and accept size suffixes Convert all size-related properties in BlockConf to 32bit. This will accommodate bigger block sizes (in a followup patch). This also allows to make them all accept size suffixes, either via DEFINE_PROP_BLOCKSIZE or via DEFINE_PROP_SIZE32. Also, since min_io_size is exposed to the guest by scsi and virtio-blk devices as an uint16_t in units of logical blocks, introduce an additional check in blkconf_blocksizes to prevent its silent truncation. Signed-off-by: Roman Kagan Message-Id: <20200528225516.1676602-7-rvkagan@yandex-team.ru> Signed-off-by: Kevin Wolf --- hw/block/block.c | 10 ++++++++++ hw/core/qdev-properties.c | 4 ++-- include/hw/block/block.h | 12 ++++++------ include/hw/qdev-properties.h | 2 +- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/hw/block/block.c b/hw/block/block.c index b22207c921..1e34573da7 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -96,6 +96,16 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp) return false; } + /* + * all devices which support min_io_size (scsi and virtio-blk) expose it to + * the guest as a uint16_t in units of logical blocks + */ + if (conf->min_io_size / conf->logical_block_size > UINT16_MAX) { + error_setg(errp, "min_io_size must not exceed %u logical blocks", + UINT16_MAX); + return false; + } + if (!QEMU_IS_ALIGNED(conf->opt_io_size, conf->logical_block_size)) { error_setg(errp, "opt_io_size must be a multiple of logical_block_size"); diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index c9af6a1341..bd4abdc1d1 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -782,7 +782,7 @@ static void set_blocksize(Object *obj, Visitor *v, const char *name, { DeviceState *dev = DEVICE(obj); Property *prop = opaque; - uint16_t *ptr = qdev_get_prop_ptr(dev, prop); + uint32_t *ptr = qdev_get_prop_ptr(dev, prop); uint64_t value; Error *local_err = NULL; @@ -821,7 +821,7 @@ const PropertyInfo qdev_prop_blocksize = { .name = "size", .description = "A power of two between " MIN_BLOCK_SIZE_STR " and " MAX_BLOCK_SIZE_STR, - .get = get_uint16, + .get = get_uint32, .set = set_blocksize, .set_default_value = set_default_value_uint, }; diff --git a/include/hw/block/block.h b/include/hw/block/block.h index 784953a237..1e8b6253dd 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -18,9 +18,9 @@ typedef struct BlockConf { BlockBackend *blk; - uint16_t physical_block_size; - uint16_t logical_block_size; - uint16_t min_io_size; + uint32_t physical_block_size; + uint32_t logical_block_size; + uint32_t min_io_size; uint32_t opt_io_size; int32_t bootindex; uint32_t discard_granularity; @@ -51,9 +51,9 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) _conf.logical_block_size), \ DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ _conf.physical_block_size), \ - DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0), \ - DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0), \ - DEFINE_PROP_UINT32("discard_granularity", _state, \ + DEFINE_PROP_SIZE32("min_io_size", _state, _conf.min_io_size, 0), \ + DEFINE_PROP_SIZE32("opt_io_size", _state, _conf.opt_io_size, 0), \ + DEFINE_PROP_SIZE32("discard_granularity", _state, \ _conf.discard_granularity, -1), \ DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \ ON_OFF_AUTO_AUTO), \ diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h index c03eadfad6..5252bb6b1a 100644 --- a/include/hw/qdev-properties.h +++ b/include/hw/qdev-properties.h @@ -200,7 +200,7 @@ extern const PropertyInfo qdev_prop_pcie_link_width; #define DEFINE_PROP_SIZE32(_n, _s, _f, _d) \ DEFINE_PROP_UNSIGNED(_n, _s, _f, _d, qdev_prop_size32, uint32_t) #define DEFINE_PROP_BLOCKSIZE(_n, _s, _f) \ - DEFINE_PROP_UNSIGNED(_n, _s, _f, 0, qdev_prop_blocksize, uint16_t) + DEFINE_PROP_UNSIGNED(_n, _s, _f, 0, qdev_prop_blocksize, uint32_t) #define DEFINE_PROP_PCI_HOST_DEVADDR(_n, _s, _f) \ DEFINE_PROP(_n, _s, _f, qdev_prop_pci_host_devaddr, PCIHostDeviceAddress) #define DEFINE_PROP_OFF_AUTO_PCIBAR(_n, _s, _f, _d) \ From 031ffd9a612618e86eb1d783b285d658e95f117d Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 29 May 2020 01:55:15 +0300 Subject: [PATCH 36/43] qdev-properties: add getter for size32 and blocksize Add getter for size32, and use it for blocksize, too. In its human-readable branch, it reports approximate size in human-readable units next to the exact byte value, like the getter for 64bit size does. Adjust the expected test output accordingly. Signed-off-by: Roman Kagan Reviewed-by: Eric Blake Message-Id: <20200528225516.1676602-8-rvkagan@yandex-team.ru> Signed-off-by: Kevin Wolf --- hw/core/qdev-properties.c | 15 +- tests/qemu-iotests/172.out | 530 ++++++++++++++++++------------------- 2 files changed, 278 insertions(+), 267 deletions(-) diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index bd4abdc1d1..63d48db70c 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -730,6 +730,17 @@ const PropertyInfo qdev_prop_pci_devfn = { /* --- 32bit unsigned int 'size' type --- */ +static void get_size32(Object *obj, Visitor *v, const char *name, void *opaque, + Error **errp) +{ + DeviceState *dev = DEVICE(obj); + Property *prop = opaque; + uint32_t *ptr = qdev_get_prop_ptr(dev, prop); + uint64_t value = *ptr; + + visit_type_size(v, name, &value, errp); +} + static void set_size32(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { @@ -763,7 +774,7 @@ static void set_size32(Object *obj, Visitor *v, const char *name, void *opaque, const PropertyInfo qdev_prop_size32 = { .name = "size", - .get = get_uint32, + .get = get_size32, .set = set_size32, .set_default_value = set_default_value_uint, }; @@ -821,7 +832,7 @@ const PropertyInfo qdev_prop_blocksize = { .name = "size", .description = "A power of two between " MIN_BLOCK_SIZE_STR " and " MAX_BLOCK_SIZE_STR, - .get = get_uint32, + .get = get_size32, .set = set_blocksize, .set_default_value = set_default_value_uint, }; diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out index 59cc70aebb..e782c5957e 100644 --- a/tests/qemu-iotests/172.out +++ b/tests/qemu-iotests/172.out @@ -24,11 +24,11 @@ Testing: dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -54,11 +54,11 @@ Testing: -fda TEST_DIR/t.qcow2 dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -81,22 +81,22 @@ Testing: -fdb TEST_DIR/t.qcow2 dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -119,22 +119,22 @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -160,11 +160,11 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -187,22 +187,22 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -225,22 +225,22 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -266,11 +266,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveA=none0 dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -293,11 +293,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global isa-fdc.driveB=none0 dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -320,22 +320,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 1 (0x1) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -361,11 +361,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -388,11 +388,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -415,22 +415,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 1 (0x1) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -456,22 +456,22 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global is dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -494,22 +494,22 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global is dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -532,11 +532,11 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global is dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -559,11 +559,11 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -global is dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -589,22 +589,22 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -627,22 +627,22 @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -665,22 +665,22 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -703,22 +703,22 @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) drive = "floppy1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -750,22 +750,22 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -788,22 +788,22 @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "floppy0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -832,22 +832,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 1 (0x1) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -870,22 +870,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 1 (0x1) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -908,22 +908,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 0 (0x0) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -946,22 +946,22 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco dev: floppy, id "" unit = 0 (0x0) drive = "none1" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" dev: floppy, id "" unit = 1 (0x1) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -999,11 +999,11 @@ Testing: -device floppy dev: floppy, id "" unit = 0 (0x0) drive = "" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -1026,11 +1026,11 @@ Testing: -device floppy,drive-type=120 dev: floppy, id "" unit = 0 (0x0) drive = "" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "120" @@ -1053,11 +1053,11 @@ Testing: -device floppy,drive-type=144 dev: floppy, id "" unit = 0 (0x0) drive = "" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -1080,11 +1080,11 @@ Testing: -device floppy,drive-type=288 dev: floppy, id "" unit = 0 (0x0) drive = "" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -1110,11 +1110,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "120" @@ -1137,11 +1137,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "288" @@ -1167,11 +1167,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" @@ -1194,11 +1194,11 @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica dev: floppy, id "" unit = 0 (0x0) drive = "none0" - logical_block_size = 512 (0x200) - physical_block_size = 512 (0x200) - min_io_size = 0 (0x0) - opt_io_size = 0 (0x0) - discard_granularity = 4294967295 (0xffffffff) + logical_block_size = 512 (512 B) + physical_block_size = 512 (512 B) + min_io_size = 0 (0 B) + opt_io_size = 0 (0 B) + discard_granularity = 4294967295 (4 GiB) write-cache = "auto" share-rw = false drive-type = "144" From 6510ba1c0ebd1503097ac831956505adf5ec29d2 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Fri, 29 May 2020 01:55:16 +0300 Subject: [PATCH 37/43] block: lift blocksize property limit to 2 MiB Logical and physical block sizes in QEMU are limited to 32 KiB. This appears unnecessarily tight, and we've seen bigger block sizes handy at times. Lift the limitation up to 2 MiB which appears to be good enough for everybody, and matches the qcow2 cluster size limit. Signed-off-by: Roman Kagan Reviewed-by: Eric Blake Message-Id: <20200528225516.1676602-9-rvkagan@yandex-team.ru> Signed-off-by: Kevin Wolf --- hw/core/qdev-properties.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c index 63d48db70c..ead35d7ffd 100644 --- a/hw/core/qdev-properties.c +++ b/hw/core/qdev-properties.c @@ -784,9 +784,12 @@ const PropertyInfo qdev_prop_size32 = { /* lower limit is sector size */ #define MIN_BLOCK_SIZE 512 #define MIN_BLOCK_SIZE_STR "512 B" -/* upper limit is the max power of 2 that fits in uint16_t */ -#define MAX_BLOCK_SIZE (32 * KiB) -#define MAX_BLOCK_SIZE_STR "32 KiB" +/* + * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and + * matches qcow2 cluster size limit + */ +#define MAX_BLOCK_SIZE (2 * MiB) +#define MAX_BLOCK_SIZE_STR "2 MiB" static void set_blocksize(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) From ff3caf5af0fd204133668f26fde39b27f86c5d76 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 17 Jun 2020 12:48:18 +0200 Subject: [PATCH 38/43] iotests.py: Add skip_for_formats() decorator Sometimes, we want to skip some test methods for certain formats. This decorator allows that. Signed-off-by: Max Reitz Message-Id: <20200617104822.27525-2-mreitz@redhat.com> Tested-by: Thomas Huth Signed-off-by: Kevin Wolf --- tests/qemu-iotests/118 | 7 +++---- tests/qemu-iotests/iotests.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/tests/qemu-iotests/118 b/tests/qemu-iotests/118 index adc8a848b5..2350929fd8 100755 --- a/tests/qemu-iotests/118 +++ b/tests/qemu-iotests/118 @@ -683,11 +683,10 @@ class TestBlockJobsAfterCycle(ChangeBaseClass): except OSError: pass + # We need backing file support + @iotests.skip_for_formats(('vpc', 'parallels', 'qcow', 'vdi', 'vmdk', 'raw', + 'vhdx')) def test_snapshot_and_commit(self): - # We need backing file support - if iotests.imgfmt != 'qcow2' and iotests.imgfmt != 'qed': - return - result = self.vm.qmp('blockdev-snapshot-sync', device='drive0', snapshot_file=new_img, format=iotests.imgfmt) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index f20d90f969..5ea4c4df8b 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -1103,6 +1103,22 @@ def skip_if_unsupported(required_formats=(), read_only=False): return func_wrapper return skip_test_decorator +def skip_for_formats(formats: Sequence[str] = ()) \ + -> Callable[[Callable[[QMPTestCase, List[Any], Dict[str, Any]], None]], + Callable[[QMPTestCase, List[Any], Dict[str, Any]], None]]: + '''Skip Test Decorator + Skips the test for the given formats''' + def skip_test_decorator(func): + def func_wrapper(test_case: QMPTestCase, *args: List[Any], + **kwargs: Dict[str, Any]) -> None: + if imgfmt in formats: + msg = f'{test_case}: Skipped for format {imgfmt}' + test_case.case_skip(msg) + else: + func(test_case, *args, **kwargs) + return func_wrapper + return skip_test_decorator + def skip_if_user_is_root(func): '''Skip Test Decorator Runs the test only without root permissions''' From c7070942c78741a7040655f94ed011c09aa832cf Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 17 Jun 2020 12:48:19 +0200 Subject: [PATCH 39/43] iotests/041: Skip test_small_target for qed qed does not support shrinking images, so the test_small_target method should be skipped to keep 041 passing. Fixes: 16cea4ee1c8e5a69a058e76f426b2e17974d8d7d Signed-off-by: Max Reitz Message-Id: <20200617104822.27525-3-mreitz@redhat.com> Tested-by: Thomas Huth Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 601c756117..b843f88a66 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -277,6 +277,8 @@ class TestSingleBlockdev(TestSingleDrive): result = self.vm.run_job('job0') self.assertEqual(result, 'Source and target image have different sizes') + # qed does not support shrinking + @iotests.skip_for_formats(('qed')) def test_small_target(self): self.do_test_target_size(self.image_len // 2) From e6de31bcad4079d055036b75b0a4715de6140f15 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 17 Jun 2020 12:48:20 +0200 Subject: [PATCH 40/43] iotests/292: data_file is unsupported Fixes: e4d7019e1a81c61de6a925c3ac5bb6e62ea21b29 Signed-off-by: Max Reitz Message-Id: <20200617104822.27525-4-mreitz@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/292 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/qemu-iotests/292 b/tests/qemu-iotests/292 index a2de27cca4..83ab19231d 100755 --- a/tests/qemu-iotests/292 +++ b/tests/qemu-iotests/292 @@ -40,6 +40,11 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 _supported_proto file _supported_os Linux +# We need qemu-img map to show the file where the data is allocated, +# but with an external data file, it will show that instead of the +# file we want to check. So just skip this test for external data +# files. +_unsupported_imgopts data_file echo '### Create the backing image' BACKING_IMG="$TEST_IMG.base" From 73b2b7b5cab2d06cf810d7f8c7c4cc918db30e04 Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 17 Jun 2020 12:48:21 +0200 Subject: [PATCH 41/43] iotests/229: data_file is unsupported Fixes: d89ac3cf305b28c024a76805a84d75c0ee1e786f Signed-off-by: Max Reitz Message-Id: <20200617104822.27525-5-mreitz@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/229 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/qemu-iotests/229 b/tests/qemu-iotests/229 index 99acb55ebb..89a5359f32 100755 --- a/tests/qemu-iotests/229 +++ b/tests/qemu-iotests/229 @@ -46,6 +46,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow2 qed _supported_proto file _supported_os Linux +# blkdebug can only inject errors on bs->file, so external data files +# do not work with this test +_unsupported_imgopts data_file DEST_IMG="$TEST_DIR/d.$IMGFMT" From 2e3becf9d7ca8d07e3587aacd5431e0b5945896e Mon Sep 17 00:00:00 2001 From: Max Reitz Date: Wed, 17 Jun 2020 12:48:22 +0200 Subject: [PATCH 42/43] iotests/{190,291}: compat=0.10 is unsupported Fixes: 5d72c68b49769c927e90b78af6d90f6a384b26ac Fixes: cf2d1203dcfc2bf964453d83a2302231ce77f2dc Signed-off-by: Max Reitz Message-Id: <20200617104822.27525-6-mreitz@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/190 | 2 ++ tests/qemu-iotests/291 | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/qemu-iotests/190 b/tests/qemu-iotests/190 index fe630918e9..c22d8d64f9 100755 --- a/tests/qemu-iotests/190 +++ b/tests/qemu-iotests/190 @@ -41,6 +41,8 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 # See 178 for more extensive tests across more formats _supported_fmt qcow2 _supported_proto file +# compat=0.10 does not support bitmaps +_unsupported_imgopts 'compat=0.10' echo "== Huge file without bitmaps ==" echo diff --git a/tests/qemu-iotests/291 b/tests/qemu-iotests/291 index 404f8521f7..28e4fb9b4d 100755 --- a/tests/qemu-iotests/291 +++ b/tests/qemu-iotests/291 @@ -39,6 +39,8 @@ _supported_fmt qcow2 _supported_proto file _supported_os Linux _require_command QEMU_NBD +# compat=0.10 does not support bitmaps +_unsupported_imgopts 'compat=0.10' echo echo "=== Initial image setup ===" From 3419ec713f04c323b030e0763459435335b25476 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Tue, 9 Jun 2020 15:59:44 -0500 Subject: [PATCH 43/43] iotests: Add copyright line in qcow2.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The file qcow2.py was originally contributed in 2012 by Kevin Wolf, but was not given traditional boilerplate headers at the time. The missing license was just rectified (commit 16306a7b39) using the project-default GPLv2+, but as Vladimir is not at Red Hat, he did not add a Copyright line. All earlier contributions have come from CC'd authors, where all but Stefan used a Red Hat address at the time of the contribution, and that copyright carries over to the split to qcow2_format.py (d5262c7124). CC: Kevin Wolf CC: Stefan Hajnoczi CC: Eduardo Habkost CC: Max Reitz CC: Philippe Mathieu-Daudé CC: Paolo Bonzini Signed-off-by: Eric Blake Message-Id: <20200609205944.3549240-1-eblake@redhat.com> Acked-by: Stefan Hajnoczi Acked-by: Philippe Mathieu-Daudé Signed-off-by: Kevin Wolf --- tests/qemu-iotests/qcow2.py | 2 ++ tests/qemu-iotests/qcow2_format.py | 1 + 2 files changed, 3 insertions(+) diff --git a/tests/qemu-iotests/qcow2.py b/tests/qemu-iotests/qcow2.py index 8c187e9a72..0910e6ac07 100755 --- a/tests/qemu-iotests/qcow2.py +++ b/tests/qemu-iotests/qcow2.py @@ -2,6 +2,8 @@ # # Manipulations with qcow2 image # +# Copyright (C) 2012 Red Hat, Inc. +# # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or diff --git a/tests/qemu-iotests/qcow2_format.py b/tests/qemu-iotests/qcow2_format.py index 0f65fd161d..cc432e7ae0 100644 --- a/tests/qemu-iotests/qcow2_format.py +++ b/tests/qemu-iotests/qcow2_format.py @@ -1,6 +1,7 @@ # Library for manipulations with qcow2 image # # Copyright (c) 2020 Virtuozzo International GmbH. +# Copyright (C) 2012 Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by