diff --git a/block/nvme.c b/block/nvme.c index eb2f54dd9d..374e268915 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -33,6 +33,14 @@ #define NVME_QUEUE_SIZE 128 #define NVME_BAR_SIZE 8192 +/* + * We have to leave one slot empty as that is the full queue case where + * head == tail + 1. + */ +#define NVME_NUM_REQS (NVME_QUEUE_SIZE - 1) + +typedef struct BDRVNVMeState BDRVNVMeState; + typedef struct { int32_t head, tail; uint8_t *queue; @@ -47,24 +55,30 @@ typedef struct { int cid; void *prp_list_page; uint64_t prp_list_iova; - bool busy; + int free_req_next; /* q->reqs[] index of next free req */ } NVMeRequest; typedef struct { - CoQueue free_req_queue; QemuMutex lock; + /* Read from I/O code path, initialized under BQL */ + BDRVNVMeState *s; + int index; + /* Fields protected by BQL */ - int index; uint8_t *prp_list_pages; /* Fields protected by @lock */ + CoQueue free_req_queue; NVMeQueue sq, cq; int cq_phase; - NVMeRequest reqs[NVME_QUEUE_SIZE]; - bool busy; + int free_req_head; + NVMeRequest reqs[NVME_NUM_REQS]; int need_kick; int inflight; + + /* Thread-safe, no lock necessary */ + QEMUBH *completion_bh; } NVMeQueuePair; /* Memory mapped registers */ @@ -89,7 +103,7 @@ typedef volatile struct { QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) != 0x1000); -typedef struct { +struct BDRVNVMeState { AioContext *aio_context; QEMUVFIOState *vfio; NVMeRegs *regs; @@ -123,11 +137,13 @@ typedef struct { /* PCI address (required for nvme_refresh_filename()) */ char *device; -} BDRVNVMeState; +}; #define NVME_BLOCK_OPT_DEVICE "device" #define NVME_BLOCK_OPT_NAMESPACE "namespace" +static void nvme_process_completion_bh(void *opaque); + static QemuOptsList runtime_opts = { .name = "nvme", .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), @@ -167,8 +183,11 @@ static void nvme_init_queue(BlockDriverState *bs, NVMeQueue *q, } } -static void nvme_free_queue_pair(BlockDriverState *bs, NVMeQueuePair *q) +static void nvme_free_queue_pair(NVMeQueuePair *q) { + if (q->completion_bh) { + qemu_bh_delete(q->completion_bh); + } qemu_vfree(q->prp_list_pages); qemu_vfree(q->sq.queue); qemu_vfree(q->cq.queue); @@ -198,21 +217,28 @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs, uint64_t prp_list_iova; qemu_mutex_init(&q->lock); + q->s = s; q->index = idx; qemu_co_queue_init(&q->free_req_queue); - q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_QUEUE_SIZE); + q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_NUM_REQS); + q->completion_bh = aio_bh_new(bdrv_get_aio_context(bs), + nvme_process_completion_bh, q); r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, - s->page_size * NVME_QUEUE_SIZE, + s->page_size * NVME_NUM_REQS, false, &prp_list_iova); if (r) { goto fail; } - for (i = 0; i < NVME_QUEUE_SIZE; i++) { + q->free_req_head = -1; + for (i = 0; i < NVME_NUM_REQS; i++) { NVMeRequest *req = &q->reqs[i]; req->cid = i + 1; + req->free_req_next = q->free_req_head; + q->free_req_head = i; req->prp_list_page = q->prp_list_pages + i * s->page_size; req->prp_list_iova = prp_list_iova + i * s->page_size; } + nvme_init_queue(bs, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -229,13 +255,15 @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs, return q; fail: - nvme_free_queue_pair(bs, q); + nvme_free_queue_pair(q); return NULL; } /* With q->lock */ -static void nvme_kick(BDRVNVMeState *s, NVMeQueuePair *q) +static void nvme_kick(NVMeQueuePair *q) { + BDRVNVMeState *s = q->s; + if (s->plugged || !q->need_kick) { return; } @@ -254,13 +282,11 @@ static void nvme_kick(BDRVNVMeState *s, NVMeQueuePair *q) */ static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q) { - int i; - NVMeRequest *req = NULL; + NVMeRequest *req; qemu_mutex_lock(&q->lock); - while (q->inflight + q->need_kick > NVME_QUEUE_SIZE - 2) { - /* We have to leave one slot empty as that is the full queue case (head - * == tail + 1). */ + + while (q->free_req_head == -1) { if (qemu_in_coroutine()) { trace_nvme_free_req_queue_wait(q); qemu_co_queue_wait(&q->free_req_queue, &q->lock); @@ -269,20 +295,40 @@ static NVMeRequest *nvme_get_free_req(NVMeQueuePair *q) return NULL; } } - for (i = 0; i < NVME_QUEUE_SIZE; i++) { - if (!q->reqs[i].busy) { - q->reqs[i].busy = true; - req = &q->reqs[i]; - break; - } - } - /* We have checked inflight and need_kick while holding q->lock, so one - * free req must be available. */ - assert(req); + + req = &q->reqs[q->free_req_head]; + q->free_req_head = req->free_req_next; + req->free_req_next = -1; + qemu_mutex_unlock(&q->lock); return req; } +/* With q->lock */ +static void nvme_put_free_req_locked(NVMeQueuePair *q, NVMeRequest *req) +{ + req->free_req_next = q->free_req_head; + q->free_req_head = req - q->reqs; +} + +/* With q->lock */ +static void nvme_wake_free_req_locked(NVMeQueuePair *q) +{ + if (!qemu_co_queue_empty(&q->free_req_queue)) { + replay_bh_schedule_oneshot_event(q->s->aio_context, + nvme_free_req_queue_cb, q); + } +} + +/* Insert a request in the freelist and wake waiters */ +static void nvme_put_free_req_and_wake(NVMeQueuePair *q, NVMeRequest *req) +{ + qemu_mutex_lock(&q->lock); + nvme_put_free_req_locked(q, req); + nvme_wake_free_req_locked(q); + qemu_mutex_unlock(&q->lock); +} + static inline int nvme_translate_error(const NvmeCqe *c) { uint16_t status = (le16_to_cpu(c->status) >> 1) & 0xFF; @@ -306,26 +352,40 @@ static inline int nvme_translate_error(const NvmeCqe *c) } /* With q->lock */ -static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q) +static bool nvme_process_completion(NVMeQueuePair *q) { + BDRVNVMeState *s = q->s; bool progress = false; NVMeRequest *preq; NVMeRequest req; NvmeCqe *c; trace_nvme_process_completion(s, q->index, q->inflight); - if (q->busy || s->plugged) { - trace_nvme_process_completion_queue_busy(s, q->index); + if (s->plugged) { + trace_nvme_process_completion_queue_plugged(s, q->index); return false; } - q->busy = true; + + /* + * Support re-entrancy when a request cb() function invokes aio_poll(). + * Pending completions must be visible to aio_poll() so that a cb() + * function can wait for the completion of another request. + * + * The aio_poll() loop will execute our BH and we'll resume completion + * processing there. + */ + qemu_bh_schedule(q->completion_bh); + assert(q->inflight >= 0); while (q->inflight) { + int ret; int16_t cid; + c = (NvmeCqe *)&q->cq.queue[q->cq.head * NVME_CQ_ENTRY_BYTES]; if ((le16_to_cpu(c->status) & 0x1) == q->cq_phase) { break; } + ret = nvme_translate_error(c); q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE; if (!q->cq.head) { q->cq_phase = !q->cq_phase; @@ -336,33 +396,47 @@ static bool nvme_process_completion(BDRVNVMeState *s, NVMeQueuePair *q) cid); continue; } - assert(cid <= NVME_QUEUE_SIZE); trace_nvme_complete_command(s, q->index, cid); preq = &q->reqs[cid - 1]; req = *preq; assert(req.cid == cid); assert(req.cb); - preq->busy = false; + nvme_put_free_req_locked(q, preq); preq->cb = preq->opaque = NULL; - qemu_mutex_unlock(&q->lock); - req.cb(req.opaque, nvme_translate_error(c)); - qemu_mutex_lock(&q->lock); q->inflight--; + qemu_mutex_unlock(&q->lock); + req.cb(req.opaque, ret); + qemu_mutex_lock(&q->lock); progress = true; } if (progress) { /* Notify the device so it can post more completions. */ smp_mb_release(); *q->cq.doorbell = cpu_to_le32(q->cq.head); - if (!qemu_co_queue_empty(&q->free_req_queue)) { - replay_bh_schedule_oneshot_event(s->aio_context, - nvme_free_req_queue_cb, q); - } + nvme_wake_free_req_locked(q); } - q->busy = false; + + qemu_bh_cancel(q->completion_bh); + return progress; } +static void nvme_process_completion_bh(void *opaque) +{ + NVMeQueuePair *q = opaque; + + /* + * We're being invoked because a nvme_process_completion() cb() function + * called aio_poll(). The callback may be waiting for further completions + * so notify the device that it has space to fill in more completions now. + */ + smp_mb_release(); + *q->cq.doorbell = cpu_to_le32(q->cq.head); + nvme_wake_free_req_locked(q); + + nvme_process_completion(q); +} + static void nvme_trace_command(const NvmeCmd *cmd) { int i; @@ -374,8 +448,7 @@ static void nvme_trace_command(const NvmeCmd *cmd) } } -static void nvme_submit_command(BDRVNVMeState *s, NVMeQueuePair *q, - NVMeRequest *req, +static void nvme_submit_command(NVMeQueuePair *q, NVMeRequest *req, NvmeCmd *cmd, BlockCompletionFunc cb, void *opaque) { @@ -384,15 +457,15 @@ static void nvme_submit_command(BDRVNVMeState *s, NVMeQueuePair *q, req->opaque = opaque; cmd->cid = cpu_to_le32(req->cid); - trace_nvme_submit_command(s, q->index, req->cid); + trace_nvme_submit_command(q->s, q->index, req->cid); nvme_trace_command(cmd); qemu_mutex_lock(&q->lock); memcpy((uint8_t *)q->sq.queue + q->sq.tail * NVME_SQ_ENTRY_BYTES, cmd, sizeof(*cmd)); q->sq.tail = (q->sq.tail + 1) % NVME_QUEUE_SIZE; q->need_kick++; - nvme_kick(s, q); - nvme_process_completion(s, q); + nvme_kick(q); + nvme_process_completion(q); qemu_mutex_unlock(&q->lock); } @@ -407,13 +480,12 @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q, NvmeCmd *cmd) { NVMeRequest *req; - BDRVNVMeState *s = bs->opaque; int ret = -EINPROGRESS; req = nvme_get_free_req(q); if (!req) { return -EBUSY; } - nvme_submit_command(s, q, req, cmd, nvme_cmd_sync_cb, &ret); + nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret); BDRV_POLL_WHILE(bs, ret == -EINPROGRESS); return ret; @@ -512,8 +584,20 @@ static bool nvme_poll_queues(BDRVNVMeState *s) for (i = 0; i < s->nr_queues; i++) { NVMeQueuePair *q = s->queues[i]; + const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; + NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; + + /* + * Do an early check for completions. q->lock isn't needed because + * nvme_process_completion() only runs in the event loop thread and + * cannot race with itself. + */ + if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) { + continue; + } + qemu_mutex_lock(&q->lock); - while (nvme_process_completion(s, q)) { + while (nvme_process_completion(q)) { /* Keep polling */ progress = true; } @@ -551,7 +635,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) }; if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { error_setg(errp, "Failed to create io queue [%d]", n); - nvme_free_queue_pair(bs, q); + nvme_free_queue_pair(q); return false; } cmd = (NvmeCmd) { @@ -562,7 +646,7 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp) }; if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { error_setg(errp, "Failed to create io queue [%d]", n); - nvme_free_queue_pair(bs, q); + nvme_free_queue_pair(q); return false; } s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1); @@ -757,7 +841,7 @@ static void nvme_close(BlockDriverState *bs) BDRVNVMeState *s = bs->opaque; for (i = 0; i < s->nr_queues; ++i) { - nvme_free_queue_pair(bs, s->queues[i]); + nvme_free_queue_pair(s->queues[i]); } g_free(s->queues); aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier, @@ -987,10 +1071,10 @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs, r = nvme_cmd_map_qiov(bs, &cmd, req, qiov); qemu_co_mutex_unlock(&s->dma_map_lock); if (r) { - req->busy = false; + nvme_put_free_req_and_wake(ioq, req); return r; } - nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); data.co = qemu_coroutine_self(); while (data.ret == -EINPROGRESS) { @@ -1090,7 +1174,7 @@ static coroutine_fn int nvme_co_flush(BlockDriverState *bs) assert(s->nr_queues > 1); req = nvme_get_free_req(ioq); assert(req); - nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); data.co = qemu_coroutine_self(); if (data.ret == -EINPROGRESS) { @@ -1143,7 +1227,7 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs, req = nvme_get_free_req(ioq); assert(req); - nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); data.co = qemu_coroutine_self(); while (data.ret == -EINPROGRESS) { @@ -1204,13 +1288,13 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs, qemu_co_mutex_unlock(&s->dma_map_lock); if (ret) { - req->busy = false; + nvme_put_free_req_and_wake(ioq, req); goto out; } trace_nvme_dsm(s, offset, bytes); - nvme_submit_command(s, ioq, req, &cmd, nvme_rw_cb, &data); + nvme_submit_command(ioq, req, &cmd, nvme_rw_cb, &data); data.co = qemu_coroutine_self(); while (data.ret == -EINPROGRESS) { @@ -1262,6 +1346,13 @@ static void nvme_detach_aio_context(BlockDriverState *bs) { BDRVNVMeState *s = bs->opaque; + for (int i = 0; i < s->nr_queues; i++) { + NVMeQueuePair *q = s->queues[i]; + + qemu_bh_delete(q->completion_bh); + q->completion_bh = NULL; + } + aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier, false, NULL, NULL); } @@ -1274,6 +1365,13 @@ static void nvme_attach_aio_context(BlockDriverState *bs, s->aio_context = new_context; aio_set_event_notifier(new_context, &s->irq_notifier, false, nvme_handle_event, nvme_poll_cb); + + for (int i = 0; i < s->nr_queues; i++) { + NVMeQueuePair *q = s->queues[i]; + + q->completion_bh = + aio_bh_new(new_context, nvme_process_completion_bh, q); + } } static void nvme_aio_plug(BlockDriverState *bs) @@ -1292,8 +1390,8 @@ static void nvme_aio_unplug(BlockDriverState *bs) for (i = 1; i < s->nr_queues; i++) { NVMeQueuePair *q = s->queues[i]; qemu_mutex_lock(&q->lock); - nvme_kick(s, q); - nvme_process_completion(s, q); + nvme_kick(q); + nvme_process_completion(q); qemu_mutex_unlock(&q->lock); } } diff --git a/block/trace-events b/block/trace-events index 29dff8881c..dbe76a7613 100644 --- a/block/trace-events +++ b/block/trace-events @@ -158,7 +158,7 @@ nvme_kick(void *s, int queue) "s %p queue %d" nvme_dma_flush_queue_wait(void *s) "s %p" nvme_error(int cmd_specific, int sq_head, int sqid, int cid, int status) "cmd_specific %d sq_head %d sqid %d cid %d status 0x%x" nvme_process_completion(void *s, int index, int inflight) "s %p queue %d inflight %d" -nvme_process_completion_queue_busy(void *s, int index) "s %p queue %d" +nvme_process_completion_queue_plugged(void *s, int index) "s %p queue %d" nvme_complete_command(void *s, int index, int cid) "s %p queue %d cid %d" nvme_submit_command(void *s, int index, int cid) "s %p queue %d cid %d" nvme_submit_command_raw(int c0, int c1, int c2, int c3, int c4, int c5, int c6, int c7) "%02x %02x %02x %02x %02x %02x %02x %02x" diff --git a/configure b/configure index ba88fd1824..ae8737d5a2 100755 --- a/configure +++ b/configure @@ -307,6 +307,7 @@ audio_win_int="" libs_qga="" debug_info="yes" stack_protector="" +safe_stack="" use_containers="yes" gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb") @@ -1287,6 +1288,10 @@ for opt do ;; --disable-stack-protector) stack_protector="no" ;; + --enable-safe-stack) safe_stack="yes" + ;; + --disable-safe-stack) safe_stack="no" + ;; --disable-curses) curses="no" ;; --enable-curses) curses="yes" @@ -1829,6 +1834,8 @@ disabled with --disable-FEATURE, default is enabled if available: debug-tcg TCG debugging (default is disabled) debug-info debugging information sparse sparse checker + safe-stack SafeStack Stack Smash Protection. Depends on + clang/llvm >= 3.7 and requires coroutine backend ucontext. gnutls GNUTLS cryptography support nettle nettle cryptography support @@ -5573,6 +5580,67 @@ if test "$debug_stack_usage" = "yes"; then fi fi +################################################## +# SafeStack + + +if test "$safe_stack" = "yes"; then +cat > $TMPC << EOF +int main(int argc, char *argv[]) +{ +#if ! __has_feature(safe_stack) +#error SafeStack Disabled +#endif + return 0; +} +EOF + flag="-fsanitize=safe-stack" + # Check that safe-stack is supported and enabled. + if compile_prog "-Werror $flag" "$flag"; then + # Flag needed both at compilation and at linking + QEMU_CFLAGS="$QEMU_CFLAGS $flag" + QEMU_LDFLAGS="$QEMU_LDFLAGS $flag" + else + error_exit "SafeStack not supported by your compiler" + fi + if test "$coroutine" != "ucontext"; then + error_exit "SafeStack is only supported by the coroutine backend ucontext" + fi +else +cat > $TMPC << EOF +int main(int argc, char *argv[]) +{ +#if defined(__has_feature) +#if __has_feature(safe_stack) +#error SafeStack Enabled +#endif +#endif + return 0; +} +EOF +if test "$safe_stack" = "no"; then + # Make sure that safe-stack is disabled + if ! compile_prog "-Werror" ""; then + # SafeStack was already enabled, try to explicitly remove the feature + flag="-fno-sanitize=safe-stack" + if ! compile_prog "-Werror $flag" "$flag"; then + error_exit "Configure cannot disable SafeStack" + fi + QEMU_CFLAGS="$QEMU_CFLAGS $flag" + QEMU_LDFLAGS="$QEMU_LDFLAGS $flag" + fi +else # "$safe_stack" = "" + # Set safe_stack to yes or no based on pre-existing flags + if compile_prog "-Werror" ""; then + safe_stack="no" + else + safe_stack="yes" + if test "$coroutine" != "ucontext"; then + error_exit "SafeStack is only supported by the coroutine backend ucontext" + fi + fi +fi +fi ########################################## # check if we have open_by_handle_at @@ -6765,6 +6833,7 @@ echo "sparse enabled $sparse" echo "strip binaries $strip_opt" echo "profiler $profiler" echo "static build $static" +echo "safe stack $safe_stack" if test "$darwin" = "yes" ; then echo "Cocoa support $cocoa" fi @@ -8370,6 +8439,10 @@ if test "$ccache_cpp2" = "yes"; then echo "export CCACHE_CPP2=y" >> $config_host_mak fi +if test "$safe_stack" = "yes"; then + echo "CONFIG_SAFESTACK=y" >> $config_host_mak +fi + # If we're using a separate build tree, set it up now. # DIRS are directories which we simply mkdir in the build tree; # LINKS are things to symlink back into the source tree diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h index bd6b0468e1..1da148552f 100644 --- a/include/qemu/coroutine_int.h +++ b/include/qemu/coroutine_int.h @@ -28,6 +28,11 @@ #include "qemu/queue.h" #include "qemu/coroutine.h" +#ifdef CONFIG_SAFESTACK +/* Pointer to the unsafe stack, defined by the compiler */ +extern __thread void *__safestack_unsafe_stack_ptr; +#endif + #define COROUTINE_STACK_SIZE (1 << 20) typedef enum { diff --git a/scripts/minikconf.py b/scripts/minikconf.py index 90b99517c1..bcd91015d3 100755 --- a/scripts/minikconf.py +++ b/scripts/minikconf.py @@ -402,7 +402,7 @@ class KconfigParser: if incl_abs_fname in self.data.previously_included: return try: - fp = open(incl_abs_fname, 'r') + fp = open(incl_abs_fname, 'rt', encoding='utf-8') except IOError as e: raise KconfigParserError(self, '%s: %s' % (e.strerror, include)) @@ -696,7 +696,7 @@ if __name__ == '__main__': parser.do_assignment(name, value == 'y') external_vars.add(name[7:]) else: - fp = open(arg, 'r') + fp = open(arg, 'rt', encoding='utf-8') parser.parse_file(fp) fp.close() @@ -705,7 +705,7 @@ if __name__ == '__main__': if key not in external_vars and config[key]: print ('CONFIG_%s=y' % key) - deps = open(argv[2], 'w') + deps = open(argv[2], 'wt', encoding='utf-8') for fname in data.previously_included: print ('%s: %s' % (argv[1], fname), file=deps) deps.close() diff --git a/tests/check-block.sh b/tests/check-block.sh index ad320c21ba..8e29c868e5 100755 --- a/tests/check-block.sh +++ b/tests/check-block.sh @@ -21,7 +21,17 @@ if grep -q "CONFIG_GPROF=y" config-host.mak 2>/dev/null ; then exit 0 fi -if grep -q "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null ; then +# Disable tests with any sanitizer except for SafeStack +CFLAGS=$( grep "CFLAGS.*-fsanitize" config-host.mak 2>/dev/null ) +SANITIZE_FLAGS="" +#Remove all occurrencies of -fsanitize=safe-stack +for i in ${CFLAGS}; do + if [ "${i}" != "-fsanitize=safe-stack" ]; then + SANITIZE_FLAGS="${SANITIZE_FLAGS} ${i}" + fi +done +if echo ${SANITIZE_FLAGS} | grep -q "\-fsanitize" 2>/dev/null; then + # Have a sanitize flag that is not allowed, stop echo "Sanitizers are enabled ==> Not running the qemu-iotests." exit 0 fi diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c index f6fc49a0e5..aade82afb8 100644 --- a/util/coroutine-sigaltstack.c +++ b/util/coroutine-sigaltstack.c @@ -30,6 +30,10 @@ #include "qemu-common.h" #include "qemu/coroutine_int.h" +#ifdef CONFIG_SAFESTACK +#error "SafeStack is not compatible with code run in alternate signal stacks" +#endif + typedef struct { Coroutine base; void *stack; diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 613f4c118e..f0b66320e1 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -45,6 +45,11 @@ typedef struct { Coroutine base; void *stack; size_t stack_size; +#ifdef CONFIG_SAFESTACK + /* Need an unsafe stack for each coroutine */ + void *unsafe_stack; + size_t unsafe_stack_size; +#endif sigjmp_buf env; void *tsan_co_fiber; @@ -179,6 +184,10 @@ Coroutine *qemu_coroutine_new(void) co = g_malloc0(sizeof(*co)); co->stack_size = COROUTINE_STACK_SIZE; co->stack = qemu_alloc_stack(&co->stack_size); +#ifdef CONFIG_SAFESTACK + co->unsafe_stack_size = COROUTINE_STACK_SIZE; + co->unsafe_stack = qemu_alloc_stack(&co->unsafe_stack_size); +#endif co->base.entry_arg = &old_env; /* stash away our jmp_buf */ uc.uc_link = &old_uc; @@ -203,6 +212,22 @@ Coroutine *qemu_coroutine_new(void) COROUTINE_YIELD, &fake_stack_save, co->stack, co->stack_size, co->tsan_co_fiber); + +#ifdef CONFIG_SAFESTACK + /* + * Before we swap the context, set the new unsafe stack + * The unsafe stack grows just like the normal stack, so start from + * the last usable location of the memory area. + * NOTE: we don't have to re-set the usp afterwards because we are + * coming back to this context through a siglongjmp. + * The compiler already wrapped the corresponding sigsetjmp call with + * code that saves the usp on the (safe) stack before the call, and + * restores it right after (which is where we return with siglongjmp). + */ + void *usp = co->unsafe_stack + co->unsafe_stack_size; + __safestack_unsafe_stack_ptr = usp; +#endif + swapcontext(&old_uc, &uc); } @@ -235,6 +260,9 @@ void qemu_coroutine_delete(Coroutine *co_) #endif qemu_free_stack(co->stack, co->stack_size); +#ifdef CONFIG_SAFESTACK + qemu_free_stack(co->unsafe_stack, co->unsafe_stack_size); +#endif g_free(co); }