Hyper-V Dynamic Memory protocol driver.

This driver is like virtio-balloon on steroids for Windows guests:
 it allows both changing the guest memory allocation via ballooning and
 inserting pieces of extra RAM into it on demand from a provided memory
 backend via Windows-native Hyper-V Dynamic Memory protocol.
 
 * Preparatory patches to support empty memory devices and ones with
 large alignment requirements.
 
 * Revert of recently added "hw/virtio/virtio-pmem: Replace impossible
 check by assertion" commit 5960f254db since this series makes this
 situation possible again.
 
 * Protocol definitions.
 
 * Hyper-V DM protocol driver (hv-balloon) base (ballooning only).
 
 * Hyper-V DM protocol driver (hv-balloon) hot-add support.
 
 * qapi query-memory-devices support for the driver.
 
 * qapi HV_BALLOON_STATUS_REPORT event.
 
 * The relevant PC machine plumbing.
 
 * New MAINTAINERS entry for the above.
 -----BEGIN PGP SIGNATURE-----
 
 iQGzBAABCAAdFiEE4ndqq6COJv9aG0oJUrHW6VHQzgcFAmVI81IACgkQUrHW6VHQ
 zgdzTgv+I5eV2R01YLOBBJhBjzxZ4/BUqkuUHNxHpfjuCqEIzPb7FIfoZ4ZyXZFT
 YJdSE4lPeTZLrmmi/Nt6G0rUKDvdCeIgkS2VLHFSsTV8IzcT71BTRGzV0zAjUF5v
 yDH6uzo6e9gmaziIalRjibUxSDjCQmoCifms2rS2DwazADudUp+naGfm+3uyA0gM
 raOfBfRkNZsDqhXg2ayuqPIES75xQONoON9xYPKDAthS48POEbqtWBKuFopr3kXY
 y0eph+NAw+RajCyLYKM3poIgaSu3l4WegInuKQffzqKR8dxrbwPdCmtgo6NSHx0W
 uDfl7FUBnGzrR18VU4ZfTSrF5SVscGwF9EL7uocJen15inJjl1q3G53uZgyGzHLC
 cw8fKMjucmE8njQR2qiMyX0b+T4+9nKO1rykBgTG/+c9prRUVoxYpFCF117Ei0U8
 QzLGACW1oK+LV41bekWAye7w9pShUtFaxffhPbJeZDDGh7q0x61R3Z3yKkA07p46
 /YWWFWUD
 =RAb0
 -----END PGP SIGNATURE-----

Merge tag 'pull-hv-balloon-20231106' of https://github.com/maciejsszmigiero/qemu into staging

Hyper-V Dynamic Memory protocol driver.

This driver is like virtio-balloon on steroids for Windows guests:
it allows both changing the guest memory allocation via ballooning and
inserting pieces of extra RAM into it on demand from a provided memory
backend via Windows-native Hyper-V Dynamic Memory protocol.

* Preparatory patches to support empty memory devices and ones with
large alignment requirements.

* Revert of recently added "hw/virtio/virtio-pmem: Replace impossible
check by assertion" commit 5960f254db since this series makes this
situation possible again.

* Protocol definitions.

* Hyper-V DM protocol driver (hv-balloon) base (ballooning only).

* Hyper-V DM protocol driver (hv-balloon) hot-add support.

* qapi query-memory-devices support for the driver.

* qapi HV_BALLOON_STATUS_REPORT event.

* The relevant PC machine plumbing.

* New MAINTAINERS entry for the above.

# -----BEGIN PGP SIGNATURE-----
#
# iQGzBAABCAAdFiEE4ndqq6COJv9aG0oJUrHW6VHQzgcFAmVI81IACgkQUrHW6VHQ
# zgdzTgv+I5eV2R01YLOBBJhBjzxZ4/BUqkuUHNxHpfjuCqEIzPb7FIfoZ4ZyXZFT
# YJdSE4lPeTZLrmmi/Nt6G0rUKDvdCeIgkS2VLHFSsTV8IzcT71BTRGzV0zAjUF5v
# yDH6uzo6e9gmaziIalRjibUxSDjCQmoCifms2rS2DwazADudUp+naGfm+3uyA0gM
# raOfBfRkNZsDqhXg2ayuqPIES75xQONoON9xYPKDAthS48POEbqtWBKuFopr3kXY
# y0eph+NAw+RajCyLYKM3poIgaSu3l4WegInuKQffzqKR8dxrbwPdCmtgo6NSHx0W
# uDfl7FUBnGzrR18VU4ZfTSrF5SVscGwF9EL7uocJen15inJjl1q3G53uZgyGzHLC
# cw8fKMjucmE8njQR2qiMyX0b+T4+9nKO1rykBgTG/+c9prRUVoxYpFCF117Ei0U8
# QzLGACW1oK+LV41bekWAye7w9pShUtFaxffhPbJeZDDGh7q0x61R3Z3yKkA07p46
# /YWWFWUD
# =RAb0
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 06 Nov 2023 22:08:18 HKT
# gpg:                using RSA key E2776AABA08E26FF5A1B4A0952B1D6E951D0CE07
# gpg: Good signature from "Maciej S. Szmigiero <mail@maciej.szmigiero.name>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 727A 0D4D DB9E D9F6 039B  ECEF 847F 5E37 90CE 0977
#      Subkey fingerprint: E277 6AAB A08E 26FF 5A1B  4A09 52B1 D6E9 51D0 CE07

* tag 'pull-hv-balloon-20231106' of https://github.com/maciejsszmigiero/qemu:
  MAINTAINERS: Add an entry for Hyper-V Dynamic Memory Protocol
  hw/i386/pc: Support hv-balloon
  qapi: Add HV_BALLOON_STATUS_REPORT event and its QMP query command
  qapi: Add query-memory-devices support to hv-balloon
  Add Hyper-V Dynamic Memory Protocol driver (hv-balloon) hot-add support
  Add Hyper-V Dynamic Memory Protocol driver (hv-balloon) base
  Add Hyper-V Dynamic Memory Protocol definitions
  memory-device: Drop size alignment check
  Revert "hw/virtio/virtio-pmem: Replace impossible check by assertion"
  memory-device: Support empty memory devices

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
master
Stefan Hajnoczi 2023-11-07 09:41:42 +08:00
commit 17735e9371
26 changed files with 3180 additions and 14 deletions

View File

@ -46,3 +46,6 @@ config FUZZ
config VFIO_USER_SERVER_ALLOWED
bool
imply VFIO_USER_SERVER
config HV_BALLOON_POSSIBLE
bool

View File

@ -2656,6 +2656,14 @@ F: hw/usb/canokey.c
F: hw/usb/canokey.h
F: docs/system/devices/canokey.rst
Hyper-V Dynamic Memory Protocol
M: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
S: Supported
F: hw/hyperv/hv-balloon*.c
F: hw/hyperv/hv-balloon*.h
F: include/hw/hyperv/dynmem-proto.h
F: include/hw/hyperv/hv-balloon.h
Subsystems
----------
Overall Audio backends

View File

@ -253,6 +253,7 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
MemoryDeviceInfo *value;
PCDIMMDeviceInfo *di;
SgxEPCDeviceInfo *se;
HvBalloonDeviceInfo *hi;
for (info = info_list; info; info = info->next) {
value = info->value;
@ -310,6 +311,20 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
monitor_printf(mon, " node: %" PRId64 "\n", se->node);
monitor_printf(mon, " memdev: %s\n", se->memdev);
break;
case MEMORY_DEVICE_INFO_KIND_HV_BALLOON:
hi = value->u.hv_balloon.data;
monitor_printf(mon, "Memory device [%s]: \"%s\"\n",
MemoryDeviceInfoKind_str(value->type),
hi->id ? hi->id : "");
if (hi->has_memaddr) {
monitor_printf(mon, " memaddr: 0x%" PRIx64 "\n",
hi->memaddr);
}
monitor_printf(mon, " max-size: %" PRIu64 "\n", hi->max_size);
if (hi->memdev) {
monitor_printf(mon, " memdev: %s\n", hi->memdev);
}
break;
default:
g_assert_not_reached();
}

View File

@ -16,3 +16,13 @@ config SYNDBG
bool
default y
depends on VMBUS
config HV_BALLOON_SUPPORTED
bool
config HV_BALLOON
bool
default y
depends on VMBUS
depends on HV_BALLOON_POSSIBLE
depends on HV_BALLOON_SUPPORTED

View File

@ -0,0 +1,33 @@
/*
* QEMU Hyper-V Dynamic Memory Protocol driver
*
* Copyright (C) 2020-2023 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef HW_HYPERV_HV_BALLOON_INTERNAL_H
#define HW_HYPERV_HV_BALLOON_INTERNAL_H
#include "qemu/osdep.h"
#define HV_BALLOON_PFN_SHIFT 12
#define HV_BALLOON_PAGE_SIZE (1 << HV_BALLOON_PFN_SHIFT)
#define SUM_OVERFLOW_U64(in1, in2) ((in1) > UINT64_MAX - (in2))
#define SUM_SATURATE_U64(in1, in2) \
({ \
uint64_t _in1 = (in1), _in2 = (in2); \
uint64_t _result; \
\
if (!SUM_OVERFLOW_U64(_in1, _in2)) { \
_result = _in1 + _in2; \
} else { \
_result = UINT64_MAX; \
} \
\
_result; \
})
#endif

View File

@ -0,0 +1,201 @@
/*
* QEMU Hyper-V Dynamic Memory Protocol driver
*
* Copyright (C) 2020-2023 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "hv-balloon-internal.h"
#include "hv-balloon-our_range_memslots.h"
#include "trace.h"
/* OurRange */
static void our_range_init(OurRange *our_range, uint64_t start, uint64_t count)
{
assert(count <= UINT64_MAX - start);
our_range->range.start = start;
our_range->range.count = count;
hvb_page_range_tree_init(&our_range->removed_guest);
hvb_page_range_tree_init(&our_range->removed_both);
/* mark the whole range as unused but for potential use */
our_range->added = 0;
our_range->unusable_tail = 0;
}
static void our_range_destroy(OurRange *our_range)
{
hvb_page_range_tree_destroy(&our_range->removed_guest);
hvb_page_range_tree_destroy(&our_range->removed_both);
}
void hvb_our_range_clear_removed_trees(OurRange *our_range)
{
hvb_page_range_tree_destroy(&our_range->removed_guest);
hvb_page_range_tree_destroy(&our_range->removed_both);
hvb_page_range_tree_init(&our_range->removed_guest);
hvb_page_range_tree_init(&our_range->removed_both);
}
void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size)
{
assert(additional_size <= UINT64_MAX - our_range->added);
our_range->added += additional_size;
assert(our_range->added <= UINT64_MAX - our_range->unusable_tail);
assert(our_range->added + our_range->unusable_tail <=
our_range->range.count);
}
/* OurRangeMemslots */
static void our_range_memslots_init_slots(OurRangeMemslots *our_range,
MemoryRegion *backing_mr,
Object *memslot_owner)
{
OurRangeMemslotsSlots *memslots = &our_range->slots;
unsigned int idx;
uint64_t memslot_offset;
assert(memslots->count > 0);
memslots->slots = g_new0(MemoryRegion, memslots->count);
/* Initialize our memslots, but don't map them yet. */
assert(memslots->size_each > 0);
for (idx = 0, memslot_offset = 0; idx < memslots->count;
idx++, memslot_offset += memslots->size_each) {
uint64_t memslot_size;
g_autofree char *name = NULL;
/* The size of the last memslot might be smaller. */
if (idx == memslots->count - 1) {
uint64_t region_size;
assert(our_range->mr);
region_size = memory_region_size(our_range->mr);
memslot_size = region_size - memslot_offset;
} else {
memslot_size = memslots->size_each;
}
name = g_strdup_printf("memslot-%u", idx);
memory_region_init_alias(&memslots->slots[idx], memslot_owner, name,
backing_mr, memslot_offset, memslot_size);
/*
* We want to be able to atomically and efficiently activate/deactivate
* individual memslots without affecting adjacent memslots in memory
* notifiers.
*/
memory_region_set_unmergeable(&memslots->slots[idx], true);
}
memslots->mapped_count = 0;
}
OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
MemoryRegion *parent_mr,
MemoryRegion *backing_mr,
Object *memslot_owner,
unsigned int memslot_count,
uint64_t memslot_size)
{
OurRangeMemslots *our_range;
our_range = g_malloc(sizeof(*our_range));
our_range_init(&our_range->range,
addr / HV_BALLOON_PAGE_SIZE,
memory_region_size(parent_mr) / HV_BALLOON_PAGE_SIZE);
our_range->slots.size_each = memslot_size;
our_range->slots.count = memslot_count;
our_range->mr = parent_mr;
our_range_memslots_init_slots(our_range, backing_mr, memslot_owner);
return our_range;
}
static void our_range_memslots_free_memslots(OurRangeMemslots *our_range)
{
OurRangeMemslotsSlots *memslots = &our_range->slots;
unsigned int idx;
uint64_t offset;
memory_region_transaction_begin();
for (idx = 0, offset = 0; idx < memslots->mapped_count;
idx++, offset += memslots->size_each) {
trace_hv_balloon_unmap_slot(idx, memslots->count, offset);
assert(memory_region_is_mapped(&memslots->slots[idx]));
memory_region_del_subregion(our_range->mr, &memslots->slots[idx]);
}
memory_region_transaction_commit();
for (idx = 0; idx < memslots->count; idx++) {
object_unparent(OBJECT(&memslots->slots[idx]));
}
g_clear_pointer(&our_range->slots.slots, g_free);
}
void hvb_our_range_memslots_free(OurRangeMemslots *our_range)
{
OurRangeMemslotsSlots *memslots = &our_range->slots;
MemoryRegion *hostmem_mr;
RAMBlock *rb;
assert(our_range->slots.count > 0);
assert(our_range->slots.slots);
hostmem_mr = memslots->slots[0].alias;
rb = hostmem_mr->ram_block;
ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb));
our_range_memslots_free_memslots(our_range);
our_range_destroy(&our_range->range);
g_free(our_range);
}
void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
uint64_t additional_map_size)
{
OurRangeMemslotsSlots *memslots = &our_range->slots;
uint64_t total_map_size;
unsigned int idx;
uint64_t offset;
total_map_size = (our_range->range.added + additional_map_size) *
HV_BALLOON_PAGE_SIZE;
idx = memslots->mapped_count;
assert(memslots->size_each > 0);
offset = idx * memslots->size_each;
/*
* Activate all memslots covered by the newly added region in a single
* transaction.
*/
memory_region_transaction_begin();
for ( ; idx < memslots->count;
idx++, offset += memslots->size_each) {
/*
* If this memslot starts beyond or at the end of the range to map so
* does every next one.
*/
if (offset >= total_map_size) {
break;
}
/*
* Instead of enabling/disabling memslot, we add/remove them. This
* should make address space updates faster, because we don't have to
* loop over many disabled subregions.
*/
trace_hv_balloon_map_slot(idx, memslots->count, offset);
assert(!memory_region_is_mapped(&memslots->slots[idx]));
memory_region_add_subregion(our_range->mr, offset,
&memslots->slots[idx]);
memslots->mapped_count++;
}
memory_region_transaction_commit();
}

View File

@ -0,0 +1,110 @@
/*
* QEMU Hyper-V Dynamic Memory Protocol driver
*
* Copyright (C) 2020-2023 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
#define HW_HYPERV_HV_BALLOON_OUR_RANGE_MEMSLOTS_H
#include "qemu/osdep.h"
#include "exec/memory.h"
#include "qom/object.h"
#include "hv-balloon-page_range_tree.h"
/* OurRange */
#define OUR_RANGE(ptr) ((OurRange *)(ptr))
/* "our range" means the memory range owned by this driver (for hot-adding) */
typedef struct OurRange {
PageRange range;
/* How many pages were hot-added to the guest */
uint64_t added;
/* Pages at the end not currently usable */
uint64_t unusable_tail;
/* Memory removed from the guest */
PageRangeTree removed_guest, removed_both;
} OurRange;
static inline uint64_t our_range_get_remaining_start(OurRange *our_range)
{
return our_range->range.start + our_range->added;
}
static inline uint64_t our_range_get_remaining_size(OurRange *our_range)
{
return our_range->range.count - our_range->added - our_range->unusable_tail;
}
void hvb_our_range_mark_added(OurRange *our_range, uint64_t additional_size);
static inline void our_range_mark_remaining_unusable(OurRange *our_range)
{
our_range->unusable_tail = our_range->range.count - our_range->added;
}
static inline PageRangeTree our_range_get_removed_tree(OurRange *our_range,
bool both)
{
if (both) {
return our_range->removed_both;
} else {
return our_range->removed_guest;
}
}
static inline bool our_range_is_removed_tree_empty(OurRange *our_range,
bool both)
{
if (both) {
return page_range_tree_is_empty(our_range->removed_both);
} else {
return page_range_tree_is_empty(our_range->removed_guest);
}
}
void hvb_our_range_clear_removed_trees(OurRange *our_range);
/* OurRangeMemslots */
typedef struct OurRangeMemslotsSlots {
/* Nominal size of each memslot (the last one might be smaller) */
uint64_t size_each;
/* Slots array and its element count */
MemoryRegion *slots;
unsigned int count;
/* How many slots are currently mapped */
unsigned int mapped_count;
} OurRangeMemslotsSlots;
typedef struct OurRangeMemslots {
OurRange range;
/* Memslots covering our range */
OurRangeMemslotsSlots slots;
MemoryRegion *mr;
} OurRangeMemslots;
OurRangeMemslots *hvb_our_range_memslots_new(uint64_t addr,
MemoryRegion *parent_mr,
MemoryRegion *backing_mr,
Object *memslot_owner,
unsigned int memslot_count,
uint64_t memslot_size);
void hvb_our_range_memslots_free(OurRangeMemslots *our_range);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(OurRangeMemslots, hvb_our_range_memslots_free)
void hvb_our_range_memslots_ensure_mapped_additional(OurRangeMemslots *our_range,
uint64_t additional_map_size);
#endif

View File

@ -0,0 +1,228 @@
/*
* QEMU Hyper-V Dynamic Memory Protocol driver
*
* Copyright (C) 2020-2023 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "hv-balloon-internal.h"
#include "hv-balloon-page_range_tree.h"
/*
* temporarily avoid warnings about enhanced GTree API usage requiring a
* too recent Glib version until GLIB_VERSION_MAX_ALLOWED finally reaches
* the Glib version with this API
*/
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
/* PageRangeTree */
static gint page_range_tree_key_compare(gconstpointer leftp,
gconstpointer rightp,
gpointer user_data)
{
const uint64_t *left = leftp, *right = rightp;
if (*left < *right) {
return -1;
} else if (*left > *right) {
return 1;
} else { /* *left == *right */
return 0;
}
}
static GTreeNode *page_range_tree_insert_new(PageRangeTree tree,
uint64_t start, uint64_t count)
{
uint64_t *key = g_malloc(sizeof(*key));
PageRange *range = g_malloc(sizeof(*range));
assert(count > 0);
*key = range->start = start;
range->count = count;
return g_tree_insert_node(tree.t, key, range);
}
void hvb_page_range_tree_insert(PageRangeTree tree,
uint64_t start, uint64_t count,
uint64_t *dupcount)
{
GTreeNode *node;
bool joinable;
uint64_t intersection;
PageRange *range;
assert(!SUM_OVERFLOW_U64(start, count));
if (count == 0) {
return;
}
node = g_tree_upper_bound(tree.t, &start);
if (node) {
node = g_tree_node_previous(node);
} else {
node = g_tree_node_last(tree.t);
}
if (node) {
range = g_tree_node_value(node);
assert(range);
intersection = page_range_intersection_size(range, start, count);
joinable = page_range_joinable_right(range, start, count);
}
if (!node ||
(!intersection && !joinable)) {
/*
* !node case: the tree is empty or the very first node in the tree
* already has a higher key (the start of its range).
* the other case: there is a gap in the tree between the new range
* and the previous one.
* anyway, let's just insert the new range into the tree.
*/
node = page_range_tree_insert_new(tree, start, count);
assert(node);
range = g_tree_node_value(node);
assert(range);
} else {
/*
* the previous range in the tree either partially covers the new
* range or ends just at its beginning - extend it
*/
if (dupcount) {
*dupcount += intersection;
}
count += start - range->start;
range->count = MAX(range->count, count);
}
/* check next nodes for possible merging */
for (node = g_tree_node_next(node); node; ) {
PageRange *rangecur;
rangecur = g_tree_node_value(node);
assert(rangecur);
intersection = page_range_intersection_size(rangecur,
range->start, range->count);
joinable = page_range_joinable_left(rangecur,
range->start, range->count);
if (!intersection && !joinable) {
/* the current node is disjoint */
break;
}
if (dupcount) {
*dupcount += intersection;
}
count = rangecur->count + (rangecur->start - range->start);
range->count = MAX(range->count, count);
/* the current node was merged in, remove it */
start = rangecur->start;
node = g_tree_node_next(node);
/* no hinted removal in GTree... */
g_tree_remove(tree.t, &start);
}
}
bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out,
uint64_t maxcount)
{
GTreeNode *node;
PageRange *range;
node = g_tree_node_last(tree.t);
if (!node) {
return false;
}
range = g_tree_node_value(node);
assert(range);
out->start = range->start;
/* can't modify range->start as it is the node key */
if (range->count > maxcount) {
out->start += range->count - maxcount;
out->count = maxcount;
range->count -= maxcount;
} else {
out->count = range->count;
/* no hinted removal in GTree... */
g_tree_remove(tree.t, &out->start);
}
return true;
}
bool hvb_page_range_tree_intree_any(PageRangeTree tree,
uint64_t start, uint64_t count)
{
GTreeNode *node;
if (count == 0) {
return false;
}
/* find the first node that can possibly intersect our range */
node = g_tree_upper_bound(tree.t, &start);
if (node) {
/*
* a NULL node below means that the very first node in the tree
* already has a higher key (the start of its range).
*/
node = g_tree_node_previous(node);
} else {
/* a NULL node below means that the tree is empty */
node = g_tree_node_last(tree.t);
}
/* node range start <= range start */
if (!node) {
/* node range start > range start */
node = g_tree_node_first(tree.t);
}
for ( ; node; node = g_tree_node_next(node)) {
PageRange *range = g_tree_node_value(node);
assert(range);
/*
* if this node starts beyond or at the end of our range so does
* every next one
*/
if (range->start >= start + count) {
break;
}
if (page_range_intersection_size(range, start, count) > 0) {
return true;
}
}
return false;
}
void hvb_page_range_tree_init(PageRangeTree *tree)
{
tree->t = g_tree_new_full(page_range_tree_key_compare, NULL,
g_free, g_free);
}
void hvb_page_range_tree_destroy(PageRangeTree *tree)
{
/* g_tree_destroy() is not NULL-safe */
if (!tree->t) {
return;
}
g_tree_destroy(tree->t);
tree->t = NULL;
}

View File

@ -0,0 +1,118 @@
/*
* QEMU Hyper-V Dynamic Memory Protocol driver
*
* Copyright (C) 2020-2023 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H
#define HW_HYPERV_HV_BALLOON_PAGE_RANGE_TREE_H
#include "qemu/osdep.h"
/* PageRange */
typedef struct PageRange {
uint64_t start;
uint64_t count;
} PageRange;
/* return just the part of range before (start) */
static inline void page_range_part_before(const PageRange *range,
uint64_t start, PageRange *out)
{
uint64_t endr = range->start + range->count;
uint64_t end = MIN(endr, start);
out->start = range->start;
if (end > out->start) {
out->count = end - out->start;
} else {
out->count = 0;
}
}
/* return just the part of range after (start, count) */
static inline void page_range_part_after(const PageRange *range,
uint64_t start, uint64_t count,
PageRange *out)
{
uint64_t end = range->start + range->count;
uint64_t ends = start + count;
out->start = MAX(range->start, ends);
if (end > out->start) {
out->count = end - out->start;
} else {
out->count = 0;
}
}
static inline void page_range_intersect(const PageRange *range,
uint64_t start, uint64_t count,
PageRange *out)
{
uint64_t end1 = range->start + range->count;
uint64_t end2 = start + count;
uint64_t end = MIN(end1, end2);
out->start = MAX(range->start, start);
out->count = out->start < end ? end - out->start : 0;
}
static inline uint64_t page_range_intersection_size(const PageRange *range,
uint64_t start, uint64_t count)
{
PageRange trange;
page_range_intersect(range, start, count, &trange);
return trange.count;
}
static inline bool page_range_joinable_left(const PageRange *range,
uint64_t start, uint64_t count)
{
return start + count == range->start;
}
static inline bool page_range_joinable_right(const PageRange *range,
uint64_t start, uint64_t count)
{
return range->start + range->count == start;
}
static inline bool page_range_joinable(const PageRange *range,
uint64_t start, uint64_t count)
{
return page_range_joinable_left(range, start, count) ||
page_range_joinable_right(range, start, count);
}
/* PageRangeTree */
/* type safety */
typedef struct PageRangeTree {
GTree *t;
} PageRangeTree;
static inline bool page_range_tree_is_empty(PageRangeTree tree)
{
guint nnodes = g_tree_nnodes(tree.t);
return nnodes == 0;
}
void hvb_page_range_tree_init(PageRangeTree *tree);
void hvb_page_range_tree_destroy(PageRangeTree *tree);
bool hvb_page_range_tree_intree_any(PageRangeTree tree,
uint64_t start, uint64_t count);
bool hvb_page_range_tree_pop(PageRangeTree tree, PageRange *out,
uint64_t maxcount);
void hvb_page_range_tree_insert(PageRangeTree tree,
uint64_t start, uint64_t count,
uint64_t *dupcount);
#endif

View File

@ -0,0 +1,19 @@
/*
* QEMU Hyper-V Dynamic Memory Protocol driver
*
* Copyright (C) 2023 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qapi-types-machine.h"
HvBalloonInfo *qmp_query_hv_balloon_status_report(Error **errp)
{
error_setg(errp, "hv-balloon device not enabled in this build");
return NULL;
}

1769
hw/hyperv/hv-balloon.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -2,3 +2,4 @@ specific_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'))
specific_ss.add(when: 'CONFIG_HYPERV_TESTDEV', if_true: files('hyperv_testdev.c'))
specific_ss.add(when: 'CONFIG_VMBUS', if_true: files('vmbus.c'))
specific_ss.add(when: 'CONFIG_SYNDBG', if_true: files('syndbg.c'))
specific_ss.add(when: 'CONFIG_HV_BALLOON', if_true: files('hv-balloon.c', 'hv-balloon-page_range_tree.c', 'hv-balloon-our_range_memslots.c'), if_false: files('hv-balloon-stub.c'))

View File

@ -16,3 +16,21 @@ vmbus_gpadl_torndown(uint32_t gpadl_id) "gpadl #%d"
vmbus_open_channel(uint32_t chan_id, uint32_t gpadl_id, uint32_t target_vp) "channel #%d gpadl #%d target vp %d"
vmbus_channel_open(uint32_t chan_id, uint32_t status) "channel #%d status %d"
vmbus_close_channel(uint32_t chan_id) "channel #%d"
# hv-balloon
hv_balloon_state_change(const char *tostr) "-> %s"
hv_balloon_incoming_version(uint16_t major, uint16_t minor) "incoming proto version %u.%u"
hv_balloon_incoming_caps(uint32_t caps) "incoming caps 0x%x"
hv_balloon_outgoing_unballoon(uint32_t trans_id, uint64_t count, uint64_t start, uint64_t rempages) "posting unballoon %"PRIu32" for %"PRIu64" @ 0x%"PRIx64", remaining %"PRIu64
hv_balloon_incoming_unballoon(uint32_t trans_id) "incoming unballoon response %"PRIu32
hv_balloon_outgoing_hot_add(uint32_t trans_id, uint64_t count, uint64_t start) "posting hot add %"PRIu32" for %"PRIu64" @ 0x%"PRIx64
hv_balloon_incoming_hot_add(uint32_t trans_id, uint32_t result, uint32_t count) "incoming hot add response %"PRIu32", result %"PRIu32", count %"PRIu32
hv_balloon_outgoing_balloon(uint32_t trans_id, uint64_t count, uint64_t rempages) "posting balloon %"PRIu32" for %"PRIu64", remaining %"PRIu64
hv_balloon_incoming_balloon(uint32_t trans_id, uint32_t range_count, uint32_t more_pages) "incoming balloon response %"PRIu32", ranges %"PRIu32", more %"PRIu32
hv_balloon_our_range_add(uint64_t count, uint64_t start) "adding our range %"PRIu64" @ 0x%"PRIx64
hv_balloon_remove_response(uint64_t count, uint64_t start, unsigned int both) "processing remove response range %"PRIu64" @ 0x%"PRIx64", both %u"
hv_balloon_remove_response_hole(uint64_t counthole, uint64_t starthole, uint64_t countrange, uint64_t startrange, uint64_t starthpr, unsigned int both) "response range hole %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64", before our start 0x%"PRIx64", both %u"
hv_balloon_remove_response_common(uint64_t countcommon, uint64_t startcommon, uint64_t countrange, uint64_t startrange, uint64_t counthpr, uint64_t starthpr, uint64_t removed, unsigned int both) "response common range %"PRIu64" @ 0x%"PRIx64" from range %"PRIu64" @ 0x%"PRIx64" with our %"PRIu64" @ 0x%"PRIx64", removed %"PRIu64", both %u"
hv_balloon_remove_response_remainder(uint64_t count, uint64_t start, unsigned int both) "remove response remaining range %"PRIu64" @ 0x%"PRIx64", both %u"
hv_balloon_map_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "mapping memslot %u / %u @ 0x%"PRIx64
hv_balloon_unmap_slot(unsigned int idx, unsigned int total_slots, uint64_t offset) "unmapping memslot %u / %u @ 0x%"PRIx64

View File

@ -45,6 +45,7 @@ config PC
select ACPI_VMGENID
select VIRTIO_PMEM_SUPPORTED
select VIRTIO_MEM_SUPPORTED
select HV_BALLOON_SUPPORTED
config PC_PCI
bool

View File

@ -27,6 +27,7 @@
#include "hw/i386/pc.h"
#include "hw/char/serial.h"
#include "hw/char/parallel.h"
#include "hw/hyperv/hv-balloon.h"
#include "hw/i386/fw_cfg.h"
#include "hw/i386/vmport.h"
#include "sysemu/cpus.h"
@ -57,6 +58,7 @@
#include "hw/i386/kvm/xen_evtchn.h"
#include "hw/i386/kvm/xen_gnttab.h"
#include "hw/i386/kvm/xen_xenstore.h"
#include "hw/mem/memory-device.h"
#include "e820_memory_layout.h"
#include "trace.h"
#include CONFIG_DEVICES
@ -1422,6 +1424,21 @@ static void pc_memory_unplug(HotplugHandler *hotplug_dev,
error_propagate(errp, local_err);
}
static void pc_hv_balloon_pre_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
/* The vmbus handler has no hotplug handler; we should never end up here. */
g_assert(!dev->hotplugged);
memory_device_pre_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev), NULL,
errp);
}
static void pc_hv_balloon_plug(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
memory_device_plug(MEMORY_DEVICE(dev), MACHINE(hotplug_dev));
}
static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
DeviceState *dev, Error **errp)
{
@ -1452,6 +1469,8 @@ static void pc_machine_device_pre_plug_cb(HotplugHandler *hotplug_dev,
return;
}
pcms->iommu = dev;
} else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
pc_hv_balloon_pre_plug(hotplug_dev, dev, errp);
}
}
@ -1464,6 +1483,8 @@ static void pc_machine_device_plug_cb(HotplugHandler *hotplug_dev,
x86_cpu_plug(hotplug_dev, dev, errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI)) {
virtio_md_pci_plug(VIRTIO_MD_PCI(dev), MACHINE(hotplug_dev), errp);
} else if (object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON)) {
pc_hv_balloon_plug(hotplug_dev, dev, errp);
}
}
@ -1505,6 +1526,7 @@ static HotplugHandler *pc_get_hotplug_handler(MachineState *machine,
object_dynamic_cast(OBJECT(dev), TYPE_CPU) ||
object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_MD_PCI) ||
object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
object_dynamic_cast(OBJECT(dev), TYPE_HV_BALLOON) ||
object_dynamic_cast(OBJECT(dev), TYPE_X86_IOMMU_DEVICE)) {
return HOTPLUG_HANDLER(machine);
}

View File

@ -20,6 +20,22 @@
#include "exec/address-spaces.h"
#include "trace.h"
static bool memory_device_is_empty(const MemoryDeviceState *md)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
Error *local_err = NULL;
MemoryRegion *mr;
/* dropping const here is fine as we don't touch the memory region */
mr = mdc->get_memory_region((MemoryDeviceState *)md, &local_err);
if (local_err) {
/* Not empty, we'll report errors later when ontaining the MR again. */
error_free(local_err);
return false;
}
return !mr;
}
static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
{
const MemoryDeviceState *md_a = MEMORY_DEVICE(a);
@ -220,12 +236,6 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
return 0;
}
if (!QEMU_IS_ALIGNED(size, align)) {
error_setg(errp, "backend memory size must be multiple of 0x%"
PRIx64, align);
return 0;
}
if (hint) {
if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) {
error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64
@ -249,6 +259,10 @@ static uint64_t memory_device_get_free_addr(MachineState *ms,
uint64_t next_addr;
Range tmp;
if (memory_device_is_empty(md)) {
continue;
}
range_init_nofail(&tmp, mdc->get_addr(md),
memory_device_get_region_size(md, &error_abort));
@ -292,6 +306,7 @@ MemoryDeviceInfoList *qmp_memory_device_list(void)
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data);
MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
/* Let's query infotmation even for empty memory devices. */
mdc->fill_device_info(md, info);
QAPI_LIST_APPEND(tail, info);
@ -311,7 +326,7 @@ static int memory_device_plugged_size(Object *obj, void *opaque)
const MemoryDeviceState *md = MEMORY_DEVICE(obj);
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj);
if (dev->realized) {
if (dev->realized && !memory_device_is_empty(md)) {
*size += mdc->get_plugged_size(md, &error_abort);
}
}
@ -337,6 +352,11 @@ void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
uint64_t addr, align = 0;
MemoryRegion *mr;
/* We support empty memory devices even without device memory. */
if (memory_device_is_empty(md)) {
return;
}
if (!ms->device_memory) {
error_setg(errp, "the configuration is not prepared for memory devices"
" (e.g., for memory hotplug), consider specifying the"
@ -380,10 +400,17 @@ out:
void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
{
const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
const unsigned int memslots = memory_device_get_memslots(md);
const uint64_t addr = mdc->get_addr(md);
unsigned int memslots;
uint64_t addr;
MemoryRegion *mr;
if (memory_device_is_empty(md)) {
return;
}
memslots = memory_device_get_memslots(md);
addr = mdc->get_addr(md);
/*
* We expect that a previous call to memory_device_pre_plug() succeeded, so
* it can't fail at this point.
@ -408,6 +435,10 @@ void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
const unsigned int memslots = memory_device_get_memslots(md);
MemoryRegion *mr;
if (memory_device_is_empty(md)) {
return;
}
/*
* We expect that a previous call to memory_device_pre_plug() succeeded, so
* it can't fail at this point.

View File

@ -147,7 +147,10 @@ static void virtio_pmem_fill_device_info(const VirtIOPMEM *pmem,
static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem,
Error **errp)
{
assert(pmem->memdev);
if (!pmem->memdev) {
error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP);
return NULL;
}
return &pmem->memdev->mr;
}

View File

@ -0,0 +1,423 @@
#ifndef HW_HYPERV_DYNMEM_PROTO_H
#define HW_HYPERV_DYNMEM_PROTO_H
/*
* Hyper-V Dynamic Memory Protocol definitions
*
* Copyright (C) 2020-2023 Oracle and/or its affiliates.
*
* Based on drivers/hv/hv_balloon.c from Linux kernel:
* Copyright (c) 2012, Microsoft Corporation.
*
* Author: K. Y. Srinivasan <kys@microsoft.com>
*
* This work is licensed under the terms of the GNU GPL, version 2.
* See the COPYING file in the top-level directory.
*/
/*
* Protocol versions. The low word is the minor version, the high word the major
* version.
*
* History:
* Initial version 1.0
* Changed to 0.1 on 2009/03/25
* Changes to 0.2 on 2009/05/14
* Changes to 0.3 on 2009/12/03
* Changed to 1.0 on 2011/04/05
* Changed to 2.0 on 2019/12/10
*/
#define DYNMEM_MAKE_VERSION(Major, Minor) ((uint32_t)(((Major) << 16) | (Minor)))
#define DYNMEM_MAJOR_VERSION(Version) ((uint32_t)(Version) >> 16)
#define DYNMEM_MINOR_VERSION(Version) ((uint32_t)(Version) & 0xff)
enum {
DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3),
DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0),
DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0),
DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1,
DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2,
DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3,
DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10
};
/*
* Message Types
*/
enum dm_message_type {
/*
* Version 0.3
*/
DM_ERROR = 0,
DM_VERSION_REQUEST = 1,
DM_VERSION_RESPONSE = 2,
DM_CAPABILITIES_REPORT = 3,
DM_CAPABILITIES_RESPONSE = 4,
DM_STATUS_REPORT = 5,
DM_BALLOON_REQUEST = 6,
DM_BALLOON_RESPONSE = 7,
DM_UNBALLOON_REQUEST = 8,
DM_UNBALLOON_RESPONSE = 9,
DM_MEM_HOT_ADD_REQUEST = 10,
DM_MEM_HOT_ADD_RESPONSE = 11,
DM_VERSION_03_MAX = 11,
/*
* Version 1.0.
*/
DM_INFO_MESSAGE = 12,
DM_VERSION_1_MAX = 12,
/*
* Version 2.0
*/
DM_MEM_HOT_REMOVE_REQUEST = 13,
DM_MEM_HOT_REMOVE_RESPONSE = 14
};
/*
* Structures defining the dynamic memory management
* protocol.
*/
union dm_version {
struct {
uint16_t minor_version;
uint16_t major_version;
};
uint32_t version;
} QEMU_PACKED;
union dm_caps {
struct {
uint64_t balloon:1;
uint64_t hot_add:1;
/*
* To support guests that may have alignment
* limitations on hot-add, the guest can specify
* its alignment requirements; a value of n
* represents an alignment of 2^n in mega bytes.
*/
uint64_t hot_add_alignment:4;
uint64_t hot_remove:1;
uint64_t reservedz:57;
} cap_bits;
uint64_t caps;
} QEMU_PACKED;
union dm_mem_page_range {
struct {
/*
* The PFN number of the first page in the range.
* 40 bits is the architectural limit of a PFN
* number for AMD64.
*/
uint64_t start_page:40;
/*
* The number of pages in the range.
*/
uint64_t page_cnt:24;
} finfo;
uint64_t page_range;
} QEMU_PACKED;
/*
* The header for all dynamic memory messages:
*
* type: Type of the message.
* size: Size of the message in bytes; including the header.
* trans_id: The guest is responsible for manufacturing this ID.
*/
struct dm_header {
uint16_t type;
uint16_t size;
uint32_t trans_id;
} QEMU_PACKED;
/*
* A generic message format for dynamic memory.
* Specific message formats are defined later in the file.
*/
struct dm_message {
struct dm_header hdr;
uint8_t data[]; /* enclosed message */
} QEMU_PACKED;
/*
* Specific message types supporting the dynamic memory protocol.
*/
/*
* Version negotiation message. Sent from the guest to the host.
* The guest is free to try different versions until the host
* accepts the version.
*
* dm_version: The protocol version requested.
* is_last_attempt: If TRUE, this is the last version guest will request.
* reservedz: Reserved field, set to zero.
*/
struct dm_version_request {
struct dm_header hdr;
union dm_version version;
uint32_t is_last_attempt:1;
uint32_t reservedz:31;
} QEMU_PACKED;
/*
* Version response message; Host to Guest and indicates
* if the host has accepted the version sent by the guest.
*
* is_accepted: If TRUE, host has accepted the version and the guest
* should proceed to the next stage of the protocol. FALSE indicates that
* guest should re-try with a different version.
*
* reservedz: Reserved field, set to zero.
*/
struct dm_version_response {
struct dm_header hdr;
uint64_t is_accepted:1;
uint64_t reservedz:63;
} QEMU_PACKED;
/*
* Message reporting capabilities. This is sent from the guest to the
* host.
*/
struct dm_capabilities {
struct dm_header hdr;
union dm_caps caps;
uint64_t min_page_cnt;
uint64_t max_page_number;
} QEMU_PACKED;
/*
* Response to the capabilities message. This is sent from the host to the
* guest. This message notifies if the host has accepted the guest's
* capabilities. If the host has not accepted, the guest must shutdown
* the service.
*
* is_accepted: Indicates if the host has accepted guest's capabilities.
* reservedz: Must be 0.
*/
struct dm_capabilities_resp_msg {
struct dm_header hdr;
uint64_t is_accepted:1;
uint64_t hot_remove:1;
uint64_t suppress_pressure_reports:1;
uint64_t reservedz:61;
} QEMU_PACKED;
/*
* This message is used to report memory pressure from the guest.
* This message is not part of any transaction and there is no
* response to this message.
*
* num_avail: Available memory in pages.
* num_committed: Committed memory in pages.
* page_file_size: The accumulated size of all page files
* in the system in pages.
* zero_free: The nunber of zero and free pages.
* page_file_writes: The writes to the page file in pages.
* io_diff: An indicator of file cache efficiency or page file activity,
* calculated as File Cache Page Fault Count - Page Read Count.
* This value is in pages.
*
* Some of these metrics are Windows specific and fortunately
* the algorithm on the host side that computes the guest memory
* pressure only uses num_committed value.
*/
struct dm_status {
struct dm_header hdr;
uint64_t num_avail;
uint64_t num_committed;
uint64_t page_file_size;
uint64_t zero_free;
uint32_t page_file_writes;
uint32_t io_diff;
} QEMU_PACKED;
/*
* Message to ask the guest to allocate memory - balloon up message.
* This message is sent from the host to the guest. The guest may not be
* able to allocate as much memory as requested.
*
* num_pages: number of pages to allocate.
*/
struct dm_balloon {
struct dm_header hdr;
uint32_t num_pages;
uint32_t reservedz;
} QEMU_PACKED;
/*
* Balloon response message; this message is sent from the guest
* to the host in response to the balloon message.
*
* reservedz: Reserved; must be set to zero.
* more_pages: If FALSE, this is the last message of the transaction.
* if TRUE there will atleast one more message from the guest.
*
* range_count: The number of ranges in the range array.
*
* range_array: An array of page ranges returned to the host.
*
*/
struct dm_balloon_response {
struct dm_header hdr;
uint32_t reservedz;
uint32_t more_pages:1;
uint32_t range_count:31;
union dm_mem_page_range range_array[];
} QEMU_PACKED;
/*
* Un-balloon message; this message is sent from the host
* to the guest to give guest more memory.
*
* more_pages: If FALSE, this is the last message of the transaction.
* if TRUE there will atleast one more message from the guest.
*
* reservedz: Reserved; must be set to zero.
*
* range_count: The number of ranges in the range array.
*
* range_array: An array of page ranges returned to the host.
*
*/
struct dm_unballoon_request {
struct dm_header hdr;
uint32_t more_pages:1;
uint32_t reservedz:31;
uint32_t range_count;
union dm_mem_page_range range_array[];
} QEMU_PACKED;
/*
* Un-balloon response message; this message is sent from the guest
* to the host in response to an unballoon request.
*
*/
struct dm_unballoon_response {
struct dm_header hdr;
} QEMU_PACKED;
/*
* Hot add request message. Message sent from the host to the guest.
*
* mem_range: Memory range to hot add.
*
*/
struct dm_hot_add {
struct dm_header hdr;
union dm_mem_page_range range;
} QEMU_PACKED;
/*
* Hot add response message.
* This message is sent by the guest to report the status of a hot add request.
* If page_count is less than the requested page count, then the host should
* assume all further hot add requests will fail, since this indicates that
* the guest has hit an upper physical memory barrier.
*
* Hot adds may also fail due to low resources; in this case, the guest must
* not complete this message until the hot add can succeed, and the host must
* not send a new hot add request until the response is sent.
* If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS
* times it fails the request.
*
*
* page_count: number of pages that were successfully hot added.
*
* result: result of the operation 1: success, 0: failure.
*
*/
struct dm_hot_add_response {
struct dm_header hdr;
uint32_t page_count;
uint32_t result;
} QEMU_PACKED;
struct dm_hot_remove {
struct dm_header hdr;
uint32_t virtual_node;
uint32_t page_count;
uint32_t qos_flags;
uint32_t reservedZ;
} QEMU_PACKED;
struct dm_hot_remove_response {
struct dm_header hdr;
uint32_t result;
uint32_t range_count;
uint64_t more_pages:1;
uint64_t reservedz:63;
union dm_mem_page_range range_array[];
} QEMU_PACKED;
#define DM_REMOVE_QOS_LARGE (1 << 0)
#define DM_REMOVE_QOS_LOCAL (1 << 1)
#define DM_REMOVE_QOS_MASK (0x3)
/*
* Types of information sent from host to the guest.
*/
enum dm_info_type {
INFO_TYPE_MAX_PAGE_CNT = 0,
MAX_INFO_TYPE
};
/*
* Header for the information message.
*/
struct dm_info_header {
enum dm_info_type type;
uint32_t data_size;
uint8_t data[];
} QEMU_PACKED;
/*
* This message is sent from the host to the guest to pass
* some relevant information (win8 addition).
*
* reserved: no used.
* info_size: size of the information blob.
* info: information blob.
*/
struct dm_info_msg {
struct dm_header hdr;
uint32_t reserved;
uint32_t info_size;
uint8_t info[];
};
#endif

View File

@ -0,0 +1,18 @@
/*
* QEMU Hyper-V Dynamic Memory Protocol driver
*
* Copyright (C) 2020-2023 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#ifndef HW_HV_BALLOON_H
#define HW_HV_BALLOON_H
#include "qom/object.h"
#define TYPE_HV_BALLOON "hv-balloon"
OBJECT_DECLARE_SIMPLE_TYPE(HvBalloon, HV_BALLOON)
#endif

View File

@ -38,6 +38,10 @@ typedef struct MemoryDeviceState MemoryDeviceState;
* address in guest physical memory can either be specified explicitly
* or get assigned automatically.
*
* Some memory device might not own a memory region in certain device
* configurations. Such devices can logically get (un)plugged, however,
* empty memory devices are mostly ignored by the memory device code.
*
* Conceptually, memory devices only span one memory region. If multiple
* successive memory regions are used, a covering memory region has to
* be provided. Scattered memory regions are not supported for single
@ -91,7 +95,8 @@ struct MemoryDeviceClass {
uint64_t (*get_plugged_size)(const MemoryDeviceState *md, Error **errp);
/*
* Return the memory region of the memory device.
* Return the memory region of the memory device. If the device is
* completely empty, returns NULL without an error.
*
* Called when (un)plugging the memory device, to (un)map the
* memory region in guest physical memory, but also to detect the

View File

@ -1323,6 +1323,30 @@ if not get_option('glusterfs').auto() or have_block
endif
endif
hv_balloon = false
if get_option('hv_balloon').allowed() and have_system
if cc.links('''
#include <string.h>
#include <gmodule.h>
int main(void) {
GTree *tree;
tree = g_tree_new((GCompareFunc)strcmp);
(void)g_tree_node_first(tree);
g_tree_destroy(tree);
return 0;
}
''', dependencies: glib)
hv_balloon = true
else
if get_option('hv_balloon').enabled()
error('could not enable hv-balloon, update your glib')
else
warning('could not find glib support for hv-balloon, disabling')
endif
endif
endif
libssh = not_found
if not get_option('libssh').auto() or have_block
libssh = dependency('libssh', version: '>=0.8.7',
@ -2855,7 +2879,8 @@ host_kconfig = \
(targetos == 'linux' ? ['CONFIG_LINUX=y'] : []) + \
(have_pvrdma ? ['CONFIG_PVRDMA=y'] : []) + \
(multiprocess_allowed ? ['CONFIG_MULTIPROCESS_ALLOWED=y'] : []) + \
(vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : [])
(vfio_user_server_allowed ? ['CONFIG_VFIO_USER_SERVER_ALLOWED=y'] : []) + \
(hv_balloon ? ['CONFIG_HV_BALLOON_POSSIBLE=y'] : [])
ignored = [ 'TARGET_XML_FILES', 'TARGET_ABI_DIR', 'TARGET_ARCH' ]
@ -4321,6 +4346,7 @@ if targetos == 'windows'
endif
summary_info += {'seccomp support': seccomp}
summary_info += {'GlusterFS support': glusterfs}
summary_info += {'hv-balloon support': hv_balloon}
summary_info += {'TPM support': have_tpm}
summary_info += {'libssh support': libssh}
summary_info += {'lzo support': lzo}

View File

@ -150,6 +150,8 @@ option('gio', type : 'feature', value : 'auto',
description: 'use libgio for D-Bus support')
option('glusterfs', type : 'feature', value : 'auto',
description: 'Glusterfs block device driver')
option('hv_balloon', type : 'feature', value : 'auto',
description: 'hv-balloon driver (requires Glib 2.68+ GTree API)')
option('libdw', type : 'feature', value : 'auto',
description: 'debuginfo support')
option('libiscsi', type : 'feature', value : 'auto',

View File

@ -315,6 +315,7 @@ static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
[QAPI_EVENT_QUORUM_FAILURE] = { 1000 * SCALE_MS },
[QAPI_EVENT_VSERPORT_CHANGE] = { 1000 * SCALE_MS },
[QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE] = { 1000 * SCALE_MS },
[QAPI_EVENT_HV_BALLOON_STATUS_REPORT] = { 1000 * SCALE_MS },
};
/*

View File

@ -1137,6 +1137,68 @@
{ 'event': 'BALLOON_CHANGE',
'data': { 'actual': 'int' } }
##
# @HvBalloonInfo:
#
# hv-balloon guest-provided memory status information.
#
# @committed: the amount of memory in use inside the guest plus the
# amount of the memory unusable inside the guest (ballooned out,
# offline, etc.)
#
# @available: the amount of the memory inside the guest available for
# new allocations ("free")
#
# Since: 8.2
##
{ 'struct': 'HvBalloonInfo',
'data': { 'committed': 'size', 'available': 'size' } }
##
# @query-hv-balloon-status-report:
#
# Returns the hv-balloon driver data contained in the last received "STATUS"
# message from the guest.
#
# Returns:
# - @HvBalloonInfo on success
# - If no hv-balloon device is present, guest memory status reporting
# is not enabled or no guest memory status report received yet,
# GenericError
#
# Since: 8.2
#
# Example:
#
# -> { "execute": "query-hv-balloon-status-report" }
# <- { "return": {
# "committed": 816640000,
# "available": 3333054464
# }
# }
##
{ 'command': 'query-hv-balloon-status-report', 'returns': 'HvBalloonInfo' }
##
# @HV_BALLOON_STATUS_REPORT:
#
# Emitted when the hv-balloon driver receives a "STATUS" message from
# the guest.
#
# Note: this event is rate-limited.
#
# Since: 8.2
#
# Example:
#
# <- { "event": "HV_BALLOON_STATUS_REPORT",
# "data": { "committed": 816640000, "available": 3333054464 },
# "timestamp": { "seconds": 1600295492, "microseconds": 661044 } }
#
##
{ 'event': 'HV_BALLOON_STATUS_REPORT',
'data': 'HvBalloonInfo' }
##
# @MemoryInfo:
#
@ -1289,6 +1351,29 @@
}
}
##
# @HvBalloonDeviceInfo:
#
# hv-balloon provided memory state information
#
# @id: device's ID
#
# @memaddr: physical address in memory, where device is mapped
#
# @max-size: the maximum size of memory that the device can provide
#
# @memdev: memory backend linked with device
#
# Since: 8.2
##
{ 'struct': 'HvBalloonDeviceInfo',
'data': { '*id': 'str',
'*memaddr': 'size',
'max-size': 'size',
'*memdev': 'str'
}
}
##
# @MemoryDeviceInfoKind:
#
@ -1300,10 +1385,13 @@
#
# @sgx-epc: since 6.2.
#
# @hv-balloon: since 8.2.
#
# Since: 2.1
##
{ 'enum': 'MemoryDeviceInfoKind',
'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc' ] }
'data': [ 'dimm', 'nvdimm', 'virtio-pmem', 'virtio-mem', 'sgx-epc',
'hv-balloon' ] }
##
# @PCDIMMDeviceInfoWrapper:
@ -1337,6 +1425,14 @@
{ 'struct': 'SgxEPCDeviceInfoWrapper',
'data': { 'data': 'SgxEPCDeviceInfo' } }
##
# @HvBalloonDeviceInfoWrapper:
#
# Since: 8.2
##
{ 'struct': 'HvBalloonDeviceInfoWrapper',
'data': { 'data': 'HvBalloonDeviceInfo' } }
##
# @MemoryDeviceInfo:
#
@ -1351,7 +1447,8 @@
'nvdimm': 'PCDIMMDeviceInfoWrapper',
'virtio-pmem': 'VirtioPMEMDeviceInfoWrapper',
'virtio-mem': 'VirtioMEMDeviceInfoWrapper',
'sgx-epc': 'SgxEPCDeviceInfoWrapper'
'sgx-epc': 'SgxEPCDeviceInfoWrapper',
'hv-balloon': 'HvBalloonDeviceInfoWrapper'
}
}

View File

@ -123,6 +123,7 @@ meson_options_help() {
printf "%s\n" ' gtk-clipboard clipboard support for the gtk UI (EXPERIMENTAL, MAY HANG)'
printf "%s\n" ' guest-agent Build QEMU Guest Agent'
printf "%s\n" ' guest-agent-msi Build MSI package for the QEMU Guest Agent'
printf "%s\n" ' hv-balloon hv-balloon driver (requires Glib 2.68+ GTree API)'
printf "%s\n" ' hvf HVF acceleration support'
printf "%s\n" ' iconv Font glyph conversion support'
printf "%s\n" ' jack JACK sound support'
@ -333,6 +334,8 @@ _meson_option_parse() {
--disable-guest-agent-msi) printf "%s" -Dguest_agent_msi=disabled ;;
--enable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=true ;;
--disable-hexagon-idef-parser) printf "%s" -Dhexagon_idef_parser=false ;;
--enable-hv-balloon) printf "%s" -Dhv_balloon=enabled ;;
--disable-hv-balloon) printf "%s" -Dhv_balloon=disabled ;;
--enable-hvf) printf "%s" -Dhvf=enabled ;;
--disable-hvf) printf "%s" -Dhvf=disabled ;;
--iasl=*) quote_sh "-Diasl=$2" ;;

View File

@ -45,6 +45,7 @@ static int query_error_class(const char *cmd)
{ "query-acpi-ospm-status", ERROR_CLASS_GENERIC_ERROR },
{ "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
{ "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
{ "query-hv-balloon-status-report", ERROR_CLASS_GENERIC_ERROR },
{ "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
/* Only valid with a USB bus added */
{ "x-query-usb", ERROR_CLASS_GENERIC_ERROR },