scsi: only access SCSIDevice->requests from one thread

Stop depending on the AioContext lock and instead access
SCSIDevice->requests from only one thread at a time:
- When the VM is running only the BlockBackend's AioContext may access
  the requests list.
- When the VM is stopped only the main loop may access the requests
  list.

These constraints protect the requests list without the need for locking
in the I/O code path.

Note that multiple IOThreads are not supported yet because the code
assumes all SCSIRequests are executed from a single AioContext. Leave
that as future work.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-ID: <20231204164259.1515217-2-stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
master
Stefan Hajnoczi 2023-12-04 11:42:56 -05:00 committed by Kevin Wolf
parent bb6e2511eb
commit eaad0fe260
2 changed files with 131 additions and 57 deletions

View File

@ -85,6 +85,89 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun)
return d;
}
/*
* Invoke @fn() for each enqueued request in device @s. Must be called from the
* main loop thread while the guest is stopped. This is only suitable for
* vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
*/
static void scsi_device_for_each_req_sync(SCSIDevice *s,
void (*fn)(SCSIRequest *, void *),
void *opaque)
{
SCSIRequest *req;
SCSIRequest *next_req;
assert(!runstate_is_running());
assert(qemu_in_main_thread());
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
fn(req, opaque);
}
}
typedef struct {
SCSIDevice *s;
void (*fn)(SCSIRequest *, void *);
void *fn_opaque;
} SCSIDeviceForEachReqAsyncData;
static void scsi_device_for_each_req_async_bh(void *opaque)
{
g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
SCSIDevice *s = data->s;
AioContext *ctx;
SCSIRequest *req;
SCSIRequest *next;
/*
* If the AioContext changed before this BH was called then reschedule into
* the new AioContext before accessing ->requests. This can happen when
* scsi_device_for_each_req_async() is called and then the AioContext is
* changed before BHs are run.
*/
ctx = blk_get_aio_context(s->conf.blk);
if (ctx != qemu_get_current_aio_context()) {
aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh,
g_steal_pointer(&data));
return;
}
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
data->fn(req, data->fn_opaque);
}
/* Drop the reference taken by scsi_device_for_each_req_async() */
object_unref(OBJECT(s));
}
/*
* Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
* runs in the AioContext that is executing the request.
*/
static void scsi_device_for_each_req_async(SCSIDevice *s,
void (*fn)(SCSIRequest *, void *),
void *opaque)
{
assert(qemu_in_main_thread());
SCSIDeviceForEachReqAsyncData *data =
g_new(SCSIDeviceForEachReqAsyncData, 1);
data->s = s;
data->fn = fn;
data->fn_opaque = opaque;
/*
* Hold a reference to the SCSIDevice until
* scsi_device_for_each_req_async_bh() finishes.
*/
object_ref(OBJECT(s));
aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
scsi_device_for_each_req_async_bh,
data);
}
static void scsi_device_realize(SCSIDevice *s, Error **errp)
{
SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s);
@ -144,20 +227,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host,
qbus_set_bus_hotplug_handler(BUS(bus));
}
static void scsi_dma_restart_bh(void *opaque)
void scsi_req_retry(SCSIRequest *req)
{
SCSIDevice *s = opaque;
SCSIRequest *req, *next;
req->retry = true;
}
qemu_bh_delete(s->bh);
s->bh = NULL;
aio_context_acquire(blk_get_aio_context(s->conf.blk));
QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
scsi_req_ref(req);
if (req->retry) {
req->retry = false;
switch (req->cmd.mode) {
/* Called in the AioContext that is executing the request */
static void scsi_dma_restart_req(SCSIRequest *req, void *opaque)
{
scsi_req_ref(req);
if (req->retry) {
req->retry = false;
switch (req->cmd.mode) {
case SCSI_XFER_FROM_DEV:
case SCSI_XFER_TO_DEV:
scsi_req_continue(req);
@ -166,37 +247,22 @@ static void scsi_dma_restart_bh(void *opaque)
scsi_req_dequeue(req);
scsi_req_enqueue(req);
break;
}
}
scsi_req_unref(req);
}
aio_context_release(blk_get_aio_context(s->conf.blk));
/* Drop the reference that was acquired in scsi_dma_restart_cb */
object_unref(OBJECT(s));
}
void scsi_req_retry(SCSIRequest *req)
{
/* No need to save a reference, because scsi_dma_restart_bh just
* looks at the request list. */
req->retry = true;
scsi_req_unref(req);
}
static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
{
SCSIDevice *s = opaque;
assert(qemu_in_main_thread());
if (!running) {
return;
}
if (!s->bh) {
AioContext *ctx = blk_get_aio_context(s->conf.blk);
/* The reference is dropped in scsi_dma_restart_bh.*/
object_ref(OBJECT(s));
s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
&DEVICE(s)->mem_reentrancy_guard);
qemu_bh_schedule(s->bh);
}
scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL);
}
static bool scsi_bus_is_address_free(SCSIBus *bus,
@ -1657,15 +1723,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense)
}
}
static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque)
{
scsi_req_cancel_async(req, NULL);
}
void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
{
SCSIRequest *req;
scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL);
aio_context_acquire(blk_get_aio_context(sdev->conf.blk));
while (!QTAILQ_EMPTY(&sdev->requests)) {
req = QTAILQ_FIRST(&sdev->requests);
scsi_req_cancel_async(req, NULL);
}
blk_drain(sdev->conf.blk);
aio_context_release(blk_get_aio_context(sdev->conf.blk));
scsi_device_set_ua(sdev, sense);
@ -1737,31 +1804,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev)
/* SCSI request list. For simplicity, pv points to the whole device */
static void put_scsi_req(SCSIRequest *req, void *opaque)
{
QEMUFile *f = opaque;
assert(!req->io_canceled);
assert(req->status == -1 && req->host_status == -1);
assert(req->enqueued);
qemu_put_sbyte(f, req->retry ? 1 : 2);
qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
qemu_put_be32s(f, &req->tag);
qemu_put_be32s(f, &req->lun);
if (req->bus->info->save_request) {
req->bus->info->save_request(f, req);
}
if (req->ops->save_request) {
req->ops->save_request(f, req);
}
}
static int put_scsi_requests(QEMUFile *f, void *pv, size_t size,
const VMStateField *field, JSONWriter *vmdesc)
{
SCSIDevice *s = pv;
SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus);
SCSIRequest *req;
QTAILQ_FOREACH(req, &s->requests, next) {
assert(!req->io_canceled);
assert(req->status == -1 && req->host_status == -1);
assert(req->enqueued);
qemu_put_sbyte(f, req->retry ? 1 : 2);
qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
qemu_put_be32s(f, &req->tag);
qemu_put_be32s(f, &req->lun);
if (bus->info->save_request) {
bus->info->save_request(f, req);
}
if (req->ops->save_request) {
req->ops->save_request(f, req);
}
}
scsi_device_for_each_req_sync(s, put_scsi_req, f);
qemu_put_sbyte(f, 0);
return 0;
}

View File

@ -69,14 +69,19 @@ struct SCSIDevice
{
DeviceState qdev;
VMChangeStateEntry *vmsentry;
QEMUBH *bh;
uint32_t id;
BlockConf conf;
SCSISense unit_attention;
bool sense_is_ua;
uint8_t sense[SCSI_SENSE_BUF_SIZE];
uint32_t sense_len;
/*
* The requests list is only accessed from the AioContext that executes
* requests or from the main loop when IOThread processing is stopped.
*/
QTAILQ_HEAD(, SCSIRequest) requests;
uint32_t channel;
uint32_t lun;
int blocksize;