numa: Extend CLI to provide memory side cache information

Add -numa hmat-cache option to provide Memory Side Cache Information.
These memory attributes help to build Memory Side Cache Information
Structure(s) in ACPI Heterogeneous Memory Attribute Table (HMAT).
Before using hmat-cache option, enable HMAT with -machine hmat=on.

Acked-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Liu Jingqi <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Message-Id: <20191213011929.2520-4-tao3.xu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
master
Liu Jingqi 2019-12-13 09:19:24 +08:00 committed by Michael S. Tsirkin
parent 9b12dfa03a
commit c412a48d4d
4 changed files with 179 additions and 4 deletions

View File

@ -379,6 +379,73 @@ void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
g_array_append_val(hmat_lb->list, lb_data); g_array_append_val(hmat_lb->list, lb_data);
} }
void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
Error **errp)
{
int nb_numa_nodes = ms->numa_state->num_nodes;
NodeInfo *numa_info = ms->numa_state->nodes;
NumaHmatCacheOptions *hmat_cache = NULL;
if (node->node_id >= nb_numa_nodes) {
error_setg(errp, "Invalid node-id=%" PRIu32 ", it should be less "
"than %d", node->node_id, nb_numa_nodes);
return;
}
if (numa_info[node->node_id].lb_info_provided != (BIT(0) | BIT(1))) {
error_setg(errp, "The latency and bandwidth information of "
"node-id=%" PRIu32 " should be provided before memory side "
"cache attributes", node->node_id);
return;
}
if (node->level < 1 || node->level >= HMAT_LB_LEVELS) {
error_setg(errp, "Invalid level=%" PRIu8 ", it should be larger than 0 "
"and less than or equal to %d", node->level,
HMAT_LB_LEVELS - 1);
return;
}
assert(node->associativity < HMAT_CACHE_ASSOCIATIVITY__MAX);
assert(node->policy < HMAT_CACHE_WRITE_POLICY__MAX);
if (ms->numa_state->hmat_cache[node->node_id][node->level]) {
error_setg(errp, "Duplicate configuration of the side cache for "
"node-id=%" PRIu32 " and level=%" PRIu8,
node->node_id, node->level);
return;
}
if ((node->level > 1) &&
ms->numa_state->hmat_cache[node->node_id][node->level - 1] &&
(node->size >=
ms->numa_state->hmat_cache[node->node_id][node->level - 1]->size)) {
error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
" should be less than the size(%" PRIu64 ") of "
"level=%u", node->size, node->level,
ms->numa_state->hmat_cache[node->node_id]
[node->level - 1]->size,
node->level - 1);
return;
}
if ((node->level < HMAT_LB_LEVELS - 1) &&
ms->numa_state->hmat_cache[node->node_id][node->level + 1] &&
(node->size <=
ms->numa_state->hmat_cache[node->node_id][node->level + 1]->size)) {
error_setg(errp, "Invalid size=%" PRIu64 ", the size of level=%" PRIu8
" should be larger than the size(%" PRIu64 ") of "
"level=%u", node->size, node->level,
ms->numa_state->hmat_cache[node->node_id]
[node->level + 1]->size,
node->level + 1);
return;
}
hmat_cache = g_malloc0(sizeof(*hmat_cache));
memcpy(hmat_cache, node, sizeof(*hmat_cache));
ms->numa_state->hmat_cache[node->node_id][node->level] = hmat_cache;
}
void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp) void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
{ {
Error *err = NULL; Error *err = NULL;
@ -430,6 +497,19 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp)
goto end; goto end;
} }
break; break;
case NUMA_OPTIONS_TYPE_HMAT_CACHE:
if (!ms->numa_state->hmat_enabled) {
error_setg(errp, "ACPI Heterogeneous Memory Attribute Table "
"(HMAT) is disabled, enable it with -machine hmat=on "
"before using any of hmat specific options");
return;
}
parse_numa_hmat_cache(ms, &object->u.hmat_cache, &err);
if (err) {
goto end;
}
break;
default: default:
abort(); abort();
} }

View File

@ -91,6 +91,9 @@ struct NumaState {
/* NUMA nodes HMAT Locality Latency and Bandwidth Information */ /* NUMA nodes HMAT Locality Latency and Bandwidth Information */
HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES]; HMAT_LB_Info *hmat_lb[HMAT_LB_LEVELS][HMAT_LB_TYPES];
/* Memory Side Cache Information Structure */
NumaHmatCacheOptions *hmat_cache[MAX_NODES][HMAT_LB_LEVELS];
}; };
typedef struct NumaState NumaState; typedef struct NumaState NumaState;
@ -98,6 +101,8 @@ void set_numa_options(MachineState *ms, NumaOptions *object, Error **errp);
void parse_numa_opts(MachineState *ms); void parse_numa_opts(MachineState *ms);
void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node, void parse_numa_hmat_lb(NumaState *numa_state, NumaHmatLBOptions *node,
Error **errp); Error **errp);
void parse_numa_hmat_cache(MachineState *ms, NumaHmatCacheOptions *node,
Error **errp);
void numa_complete_configuration(MachineState *ms); void numa_complete_configuration(MachineState *ms);
void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms); void query_numa_node_mem(NumaNodeMem node_mem[], MachineState *ms);
extern QemuOptsList qemu_numa_opts; extern QemuOptsList qemu_numa_opts;

View File

@ -428,10 +428,12 @@
# #
# @hmat-lb: memory latency and bandwidth information (Since: 5.0) # @hmat-lb: memory latency and bandwidth information (Since: 5.0)
# #
# @hmat-cache: memory side cache information (Since: 5.0)
#
# Since: 2.1 # Since: 2.1
## ##
{ 'enum': 'NumaOptionsType', { 'enum': 'NumaOptionsType',
'data': [ 'node', 'dist', 'cpu', 'hmat-lb' ] } 'data': [ 'node', 'dist', 'cpu', 'hmat-lb', 'hmat-cache' ] }
## ##
# @NumaOptions: # @NumaOptions:
@ -447,7 +449,8 @@
'node': 'NumaNodeOptions', 'node': 'NumaNodeOptions',
'dist': 'NumaDistOptions', 'dist': 'NumaDistOptions',
'cpu': 'NumaCpuOptions', 'cpu': 'NumaCpuOptions',
'hmat-lb': 'NumaHmatLBOptions' }} 'hmat-lb': 'NumaHmatLBOptions',
'hmat-cache': 'NumaHmatCacheOptions' }}
## ##
# @NumaNodeOptions: # @NumaNodeOptions:
@ -646,6 +649,80 @@
'*latency': 'uint64', '*latency': 'uint64',
'*bandwidth': 'size' }} '*bandwidth': 'size' }}
##
# @HmatCacheAssociativity:
#
# Cache associativity in the Memory Side Cache Information Structure
# of HMAT
#
# For more information of @HmatCacheAssociativity, see chapter
# 5.2.27.5: Table 5-147 of ACPI 6.3 spec.
#
# @none: None (no memory side cache in this proximity domain,
# or cache associativity unknown)
#
# @direct: Direct Mapped
#
# @complex: Complex Cache Indexing (implementation specific)
#
# Since: 5.0
##
{ 'enum': 'HmatCacheAssociativity',
'data': [ 'none', 'direct', 'complex' ] }
##
# @HmatCacheWritePolicy:
#
# Cache write policy in the Memory Side Cache Information Structure
# of HMAT
#
# For more information of @HmatCacheWritePolicy, see chapter
# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
#
# @none: None (no memory side cache in this proximity domain,
# or cache write policy unknown)
#
# @write-back: Write Back (WB)
#
# @write-through: Write Through (WT)
#
# Since: 5.0
##
{ 'enum': 'HmatCacheWritePolicy',
'data': [ 'none', 'write-back', 'write-through' ] }
##
# @NumaHmatCacheOptions:
#
# Set the memory side cache information for a given memory domain.
#
# For more information of @NumaHmatCacheOptions, see chapter
# 5.2.27.5: Table 5-147: Field "Cache Attributes" of ACPI 6.3 spec.
#
# @node-id: the memory proximity domain to which the memory belongs.
#
# @size: the size of memory side cache in bytes.
#
# @level: the cache level described in this structure.
#
# @associativity: the cache associativity,
# none/direct-mapped/complex(complex cache indexing).
#
# @policy: the write policy, none/write-back/write-through.
#
# @line: the cache Line size in bytes.
#
# Since: 5.0
##
{ 'struct': 'NumaHmatCacheOptions',
'data': {
'node-id': 'uint32',
'size': 'size',
'level': 'uint8',
'associativity': 'HmatCacheAssociativity',
'policy': 'HmatCacheWritePolicy',
'line': 'uint16' }}
## ##
# @HostMemPolicy: # @HostMemPolicy:
# #

View File

@ -176,7 +176,8 @@ DEF("numa", HAS_ARG, QEMU_OPTION_numa,
"-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n" "-numa node[,memdev=id][,cpus=firstcpu[-lastcpu]][,nodeid=node][,initiator=node]\n"
"-numa dist,src=source,dst=destination,val=distance\n" "-numa dist,src=source,dst=destination,val=distance\n"
"-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n" "-numa cpu,node-id=node[,socket-id=x][,core-id=y][,thread-id=z]\n"
"-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n", "-numa hmat-lb,initiator=node,target=node,hierarchy=memory|first-level|second-level|third-level,data-type=access-latency|read-latency|write-latency[,latency=lat][,bandwidth=bw]\n"
"-numa hmat-cache,node-id=node,size=size,level=level[,associativity=none|direct|complex][,policy=none|write-back|write-through][,line=size]\n",
QEMU_ARCH_ALL) QEMU_ARCH_ALL)
STEXI STEXI
@item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}] @item -numa node[,mem=@var{size}][,cpus=@var{firstcpu}[-@var{lastcpu}]][,nodeid=@var{node}][,initiator=@var{initiator}]
@ -184,6 +185,7 @@ STEXI
@itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance} @itemx -numa dist,src=@var{source},dst=@var{destination},val=@var{distance}
@itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}] @itemx -numa cpu,node-id=@var{node}[,socket-id=@var{x}][,core-id=@var{y}][,thread-id=@var{z}]
@itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}] @itemx -numa hmat-lb,initiator=@var{node},target=@var{node},hierarchy=@var{hierarchy},data-type=@var{tpye}[,latency=@var{lat}][,bandwidth=@var{bw}]
@itemx -numa hmat-cache,node-id=@var{node},size=@var{size},level=@var{level}[,associativity=@var{str}][,policy=@var{str}][,line=@var{size}]
@findex -numa @findex -numa
Define a NUMA node and assign RAM and VCPUs to it. Define a NUMA node and assign RAM and VCPUs to it.
Set the NUMA distance from a source node to a destination node. Set the NUMA distance from a source node to a destination node.
@ -287,11 +289,20 @@ NUM byte per second (or MB/s, GB/s or TB/s depending on used suffix).
Note that if latency or bandwidth value is 0, means the corresponding latency or Note that if latency or bandwidth value is 0, means the corresponding latency or
bandwidth information is not provided. bandwidth information is not provided.
In @samp{hmat-cache} option, @var{node-id} is the NUMA-id of the memory belongs.
@var{size} is the size of memory side cache in bytes. @var{level} is the cache
level described in this structure, note that the cache level 0 should not be used
with @samp{hmat-cache} option. @var{associativity} is the cache associativity,
the possible value is 'none/direct(direct-mapped)/complex(complex cache indexing)'.
@var{policy} is the write policy. @var{line} is the cache Line size in bytes.
For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and For example, the following options describe 2 NUMA nodes. Node 0 has 2 cpus and
a ram, node 1 has only a ram. The processors in node 0 access memory in node a ram, node 1 has only a ram. The processors in node 0 access memory in node
0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s; 0 with access-latency 5 nanoseconds, access-bandwidth is 200 MB/s;
The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10 The processors in NUMA node 0 access memory in NUMA node 1 with access-latency 10
nanoseconds, access-bandwidth is 100 MB/s. nanoseconds, access-bandwidth is 100 MB/s.
And for memory side cache information, NUMA node 0 and 1 both have 1 level memory
cache, size is 10KB, policy is write-back, the cache Line size is 8 bytes:
@example @example
-machine hmat=on \ -machine hmat=on \
-m 2G \ -m 2G \
@ -305,7 +316,9 @@ nanoseconds, access-bandwidth is 100 MB/s.
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \ -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,latency=5 \
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \ -numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,bandwidth=200M \
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \ -numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,latency=10 \
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M -numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,bandwidth=100M \
-numa hmat-cache,node-id=0,size=10K,level=1,associativity=direct,policy=write-back,line=8 \
-numa hmat-cache,node-id=1,size=10K,level=1,associativity=direct,policy=write-back,line=8
@end example @end example
ETEXI ETEXI