On systems where numTasks is not evenly divisible by 'tasksPerNode' we were
seeing some nodes reading multiple files while others read none after
reordering.

Commonly all nodes have the same number of tasks but there is nothing
requiring that to be the case.  Imagine having 64 tasks running against 4
nodes which can run 20 tasks each.  Here you get three groups of 20 and one
group of 4.  On this sytem nodes running in the group of 4 were previously
getting tasksPerNode of 4 which meant they reordered tasks differently than
the nodes which got tasksPerNode of 20.

The key to fixing this is ensuring that every node reorders tasks the same
way, which means ensuring they all use the same input values.  Obviously on
systems where the number of tasks per node is inconsistent the reordering will
also be inconsistent (some tasks may end up on the same node, or not as far
separated as desired, etc.) but at least this way you'll always end up with a
1:1 reordering.

- Renamed nodes/nodeCount to numNodes
- Renamed tasksPerNode to numTasksOnNode0
- Ensured that numTasksOnNode0 will always have the same value regardless of
  which node you're on
- Removed inconsistently used globals numTasksWorld and tasksPerNode and
  replaced with per-test params equivalents
- Added utility functions for setting these values:
  - numNodes -> GetNumNodes
  - numTasks -> GetNumTasks
  - numTasksOnNode0 -> GetNumNodesOnTask0
- Improved MPI_VERSION < 3 logic for GetNumNodes so it works when numTasks is
  not evenly divisible by numTasksOnNode0
- Left 'nodes' and 'tasksPerNode' in output alone to not break compatibility
- Allowed command-line params to override numTasks, numNodes, and
  numTasksOnNode0 but default to using the MPI-calculated values
master
Josh Schwartz 2019-08-30 16:45:03 -06:00
parent 0d9f46e980
commit 0e952f0f8c
9 changed files with 164 additions and 76 deletions

2
NEWS
View File

@ -120,7 +120,7 @@ Version 2.10.1
- Corrected IOR_GetFileSize() function to point to HDF5 and NCMPI versions of
IOR_GetFileSize() calls
- Changed the netcdf dataset from 1D array to 4D array, where the 4 dimensions
are: [segmentCount][numTasksWorld][numTransfers][transferSize]
are: [segmentCount][numTasks][numTransfers][transferSize]
This patch from Wei-keng Liao allows for file sizes > 4GB (provided no
single dimension is > 4GB).
- Finalized random-capability release

View File

@ -216,7 +216,7 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer,
param->blockSize / param->transferSize;
/* reshape 1D array to 3D array:
[segmentCount*numTasksWorld][numTransfers][transferSize]
[segmentCount*numTasks][numTransfers][transferSize]
Requirement: none of these dimensions should be > 4G,
*/
NCMPI_CHECK(ncmpi_def_dim
@ -267,7 +267,7 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer,
bufSize[1] = 1;
bufSize[2] = param->transferSize;
offset[0] = segmentNum * numTasksWorld + rank;
offset[0] = segmentNum * param->numTasks + rank;
offset[1] = transferNum;
offset[2] = 0;

View File

@ -339,10 +339,10 @@ void ShowTestStart(IOR_param_t *test)
PrintKeyVal("options", test->options);
PrintKeyValInt("dryRun", test->dryRun);
PrintKeyValInt("nodes", test->nodes);
PrintKeyValInt("nodes", test->numNodes);
PrintKeyValInt("memoryPerTask", (unsigned long) test->memoryPerTask);
PrintKeyValInt("memoryPerNode", (unsigned long) test->memoryPerNode);
PrintKeyValInt("tasksPerNode", tasksPerNode);
PrintKeyValInt("tasksPerNode", test->numTasksOnNode0);
PrintKeyValInt("repetitions", test->repetitions);
PrintKeyValInt("multiFile", test->multiFile);
PrintKeyValInt("interTestDelay", test->interTestDelay);
@ -430,8 +430,9 @@ void ShowSetup(IOR_param_t *params)
PrintKeyValInt("task offset", params->taskPerNodeOffset);
PrintKeyValInt("reorder random seed", params->reorderTasksRandomSeed);
}
PrintKeyValInt("nodes", params->numNodes);
PrintKeyValInt("tasks", params->numTasks);
PrintKeyValInt("clients per node", params->tasksPerNode);
PrintKeyValInt("clients per node", params->numTasksOnNode0);
if (params->memoryPerTask != 0){
PrintKeyVal("memoryPerTask", HumanReadable(params->memoryPerTask, BASE_TWO));
}
@ -571,7 +572,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access)
}
fprintf(out_resultfile, "%5d ", params->id);
fprintf(out_resultfile, "%6d ", params->numTasks);
fprintf(out_resultfile, "%3d ", params->tasksPerNode);
fprintf(out_resultfile, "%3d ", params->numTasksOnNode0);
fprintf(out_resultfile, "%4d ", params->repetitions);
fprintf(out_resultfile, "%3d ", params->filePerProc);
fprintf(out_resultfile, "%5d ", params->reorderTasks);
@ -595,7 +596,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access)
PrintKeyValInt("blockSize", params->blockSize);
PrintKeyValInt("transferSize", params->transferSize);
PrintKeyValInt("numTasks", params->numTasks);
PrintKeyValInt("tasksPerNode", params->tasksPerNode);
PrintKeyValInt("tasksPerNode", params->numTasksOnNode0);
PrintKeyValInt("repetitions", params->repetitions);
PrintKeyValInt("filePerProc", params->filePerProc);
PrintKeyValInt("reorderTasks", params->reorderTasks);

View File

@ -65,7 +65,6 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out
out_resultfile = world_out;
mpi_comm_world = world_com;
MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld), "cannot get number of tasks");
MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank");
/* setup tests, and validate parameters */
@ -113,8 +112,6 @@ int ior_main(int argc, char **argv)
MPI_CHECK(MPI_Init(&argc, &argv), "cannot initialize MPI");
mpi_comm_world = MPI_COMM_WORLD;
MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld),
"cannot get number of tasks");
MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank");
/* set error-handling */
@ -188,8 +185,14 @@ void init_IOR_Param_t(IOR_param_t * p)
p->writeFile = p->readFile = FALSE;
p->checkWrite = p->checkRead = FALSE;
p->nodes = 1;
p->tasksPerNode = 1;
/*
* These can be overridden from the command-line but otherwise will be
* set from MPI.
*/
p->numTasks = -1;
p->numNodes = -1;
p->numTasksOnNode0 = -1;
p->repetitions = 1;
p->repCounter = -1;
p->open = WRITE;
@ -919,12 +922,17 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test)
*/
static void InitTests(IOR_test_t *tests, MPI_Comm com)
{
int size;
int mpiNumNodes = 0;
int mpiNumTasks = 0;
int mpiNumTasksOnNode0 = 0;
MPI_CHECK(MPI_Comm_size(com, & size), "MPI_Comm_size() error");
/* count the tasks per node */
tasksPerNode = CountTasksPerNode(com);
/*
* These default values are the same for every test and expensive to
* retrieve so just do it once.
*/
mpiNumNodes = GetNumNodes(com);
mpiNumTasks = GetNumTasks(com);
mpiNumTasksOnNode0 = GetNumTasksOnNode0(com);
/*
* Since there is no guarantee that anyone other than
@ -937,12 +945,28 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com)
while (tests != NULL) {
IOR_param_t *params = & tests->params;
params->testComm = com;
params->nodes = params->numTasks / tasksPerNode;
params->tasksPerNode = tasksPerNode;
params->tasksBlockMapping = QueryNodeMapping(com,false);
if (params->numTasks == 0) {
params->numTasks = size;
/* use MPI values if not overridden on command-line */
if (params->numNodes == -1) {
params->numNodes = mpiNumNodes;
}
if (params->numTasks == -1) {
params->numTasks = mpiNumTasks;
} else if (params->numTasks > mpiNumTasks) {
if (rank == 0) {
fprintf(out_logfile,
"WARNING: More tasks requested (%d) than available (%d),",
params->numTasks, mpiNumTasks);
fprintf(out_logfile, " running with %d tasks.\n",
mpiNumTasks);
}
params->numTasks = mpiNumTasks;
}
if (params->numTasksOnNode0 == -1) {
params->numTasksOnNode0 = mpiNumTasksOnNode0;
}
params->tasksBlockMapping = QueryNodeMapping(com,false);
params->expectedAggFileSize =
params->blockSize * params->segmentCount * params->numTasks;
@ -1090,7 +1114,7 @@ static void *HogMemory(IOR_param_t *params)
if (verbose >= VERBOSE_3)
fprintf(out_logfile, "This node hogging %ld bytes of memory\n",
params->memoryPerNode);
size = params->memoryPerNode / params->tasksPerNode;
size = params->memoryPerNode / params->numTasksOnNode0;
} else {
return NULL;
}
@ -1190,16 +1214,6 @@ static void TestIoSys(IOR_test_t *test)
IOR_io_buffers ioBuffers;
/* set up communicator for test */
if (params->numTasks > numTasksWorld) {
if (rank == 0) {
fprintf(out_logfile,
"WARNING: More tasks requested (%d) than available (%d),",
params->numTasks, numTasksWorld);
fprintf(out_logfile, " running on %d tasks.\n",
numTasksWorld);
}
params->numTasks = numTasksWorld;
}
MPI_CHECK(MPI_Comm_group(mpi_comm_world, &orig_group),
"MPI_Comm_group() error");
range[0] = 0; /* first rank */
@ -1226,7 +1240,6 @@ static void TestIoSys(IOR_test_t *test)
"Using reorderTasks '-C' (useful to avoid read cache in client)\n");
fflush(out_logfile);
}
params->tasksPerNode = CountTasksPerNode(testComm);
backend = params->backend;
/* show test setup */
if (rank == 0 && verbose >= VERBOSE_0)
@ -1363,7 +1376,7 @@ static void TestIoSys(IOR_test_t *test)
/* move two nodes away from writing node */
int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
if (params->tasksBlockMapping) {
shift = params->tasksPerNode; /* switch to by-slot (contiguous block) mapping */
shift = params->numTasksOnNode0; /* switch to by-slot (contiguous block) mapping */
}
rankOffset = (2 * shift) % params->numTasks;
}
@ -1388,7 +1401,7 @@ static void TestIoSys(IOR_test_t *test)
if(params->stoneWallingStatusFile){
params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile);
if(params->stoneWallingWearOutIterations == -1 && rank == 0){
fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!");
fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!\n");
params->stoneWallingWearOutIterations = 0;
}
}
@ -1403,7 +1416,7 @@ static void TestIoSys(IOR_test_t *test)
/* move one node away from writing node */
int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
if (params->tasksBlockMapping) {
shift=params->tasksPerNode; /* switch to a by-slot (contiguous block) mapping */
shift=params->numTasksOnNode0; /* switch to a by-slot (contiguous block) mapping */
}
rankOffset = (params->taskPerNodeOffset * shift) % params->numTasks;
}
@ -1414,7 +1427,7 @@ static void TestIoSys(IOR_test_t *test)
int nodeoffset;
unsigned int iseed0;
nodeoffset = params->taskPerNodeOffset;
nodeoffset = (nodeoffset < params->nodes) ? nodeoffset : params->nodes - 1;
nodeoffset = (nodeoffset < params->numNodes) ? nodeoffset : params->numNodes - 1;
if (params->reorderTasksRandomSeed < 0)
iseed0 = -1 * params->reorderTasksRandomSeed + rep;
else
@ -1424,7 +1437,7 @@ static void TestIoSys(IOR_test_t *test)
rankOffset = rand() % params->numTasks;
}
while (rankOffset <
(nodeoffset * params->tasksPerNode)) {
(nodeoffset * params->numTasksOnNode0)) {
rankOffset = rand() % params->numTasks;
}
/* Get more detailed stats if requested by verbose level */
@ -1454,7 +1467,7 @@ static void TestIoSys(IOR_test_t *test)
"barrier error");
if (rank == 0 && verbose >= VERBOSE_1) {
fprintf(out_logfile,
"Commencing read performance test: %s",
"Commencing read performance test: %s\n",
CurrentTimeString());
}
timer[2] = GetTimeStamp();

View File

@ -98,8 +98,8 @@ typedef struct
// intermediate options
int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */
int numTasks; /* number of tasks for test */
int nodes; /* number of nodes for test */
int tasksPerNode; /* number of tasks per node */
int numNodes; /* number of nodes for test */
int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */
int tasksBlockMapping; /* are the tasks in contiguous blocks across nodes or round-robin */
int repetitions; /* number of repetitions of test */
int repCounter; /* rep counter */

View File

@ -1867,7 +1867,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
mdtest_init_args();
int i, j;
int nodeCount;
int numNodes;
int numTasksOnNode0 = 0;
MPI_Group worldgroup, testgroup;
struct {
int first;
@ -1943,8 +1944,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
pid = getpid();
uid = getuid();
tasksPerNode = CountTasksPerNode(testComm);
nodeCount = size / tasksPerNode;
numNodes = GetNumNodes(testComm);
numTasksOnNode0 = GetNumTasksOnNode0(testComm);
char cmd_buffer[4096];
strncpy(cmd_buffer, argv[0], 4096);
@ -1953,7 +1954,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
}
VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp());
VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, size, nodeCount);
VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, size, numNodes);
VERBOSE(0,-1,"Command line used: %s", cmd_buffer);
/* adjust special variables */
@ -2120,10 +2121,10 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
/* set the shift to mimic IOR and shift by procs per node */
if (nstride > 0) {
if ( nodeCount > 1 && tasksBlockMapping ) {
if ( numNodes > 1 && tasksBlockMapping ) {
/* the user set the stride presumably to get the consumer tasks on a different node than the producer tasks
however, if the mpirun scheduler placed the tasks by-slot (in a contiguous block) then we need to adjust the shift by ppn */
nstride *= tasksPerNode;
nstride *= numTasksOnNode0;
}
VERBOSE(0,5,"Shifting ranks by %d for each phase.", nstride);
}

View File

@ -151,8 +151,12 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt
params->maxTimeDuration = atoi(value);
} else if (strcasecmp(option, "outlierthreshold") == 0) {
params->outlierThreshold = atoi(value);
} else if (strcasecmp(option, "nodes") == 0) {
params->nodes = atoi(value);
} else if (strcasecmp(option, "numnodes") == 0) {
params->numNodes = atoi(value);
} else if (strcasecmp(option, "numtasks") == 0) {
params->numTasks = atoi(value);
} else if (strcasecmp(option, "numtasksonnode0") == 0) {
params->numTasksOnNode0 = atoi(value);
} else if (strcasecmp(option, "repetitions") == 0) {
params->repetitions = atoi(value);
} else if (strcasecmp(option, "intertestdelay") == 0) {
@ -286,8 +290,6 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt
params->beegfs_chunkSize = string_to_bytes(value);
if (!ISPOWEROFTWO(params->beegfs_chunkSize) || params->beegfs_chunkSize < (1<<16))
ERR("beegfsChunkSize must be a power of two and >64k");
} else if (strcasecmp(option, "numtasks") == 0) {
params->numTasks = atoi(value);
} else if (strcasecmp(option, "summaryalways") == 0) {
params->summary_every_test = atoi(value);
} else {
@ -498,7 +500,7 @@ option_help * createGlobalOptions(IOR_param_t * params){
{'m', NULL, "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & params->multiFile},
{'M', NULL, "memoryPerNode -- hog memory on the node (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & params->memoryPerNodeStr},
{'n', NULL, "noFill -- no fill in HDF5 file creation", OPTION_FLAG, 'd', & params->noFill},
{'N', NULL, "numTasks -- number of tasks that should participate in the test", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks},
{'N', NULL, "numTasks -- number of tasks that are participating in the test (overrides MPI)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks},
{'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & params->testFileName},
{'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper},
{'p', NULL, "preallocate -- preallocate file size", OPTION_FLAG, 'd', & params->preallocate},

View File

@ -53,11 +53,9 @@
extern int errno;
extern int numTasks;
/* globals used by other files, also defined "extern" in ior.h */
int numTasksWorld = 0;
/* globals used by other files, also defined "extern" in utilities.h */
int rank = 0;
int rankOffset = 0;
int tasksPerNode = 0; /* tasks per node */
int verbose = VERBOSE_0; /* verbose output */
MPI_Comm testComm;
MPI_Comm mpi_comm_world;
@ -265,35 +263,108 @@ int QueryNodeMapping(MPI_Comm comm, int print_nodemap) {
return ret;
}
/*
* There is a more direct way to determine the node count in modern MPI
* versions so we use that if possible.
*
* For older versions we use a method which should still provide accurate
* results even if the total number of tasks is not evenly divisible by the
* tasks on node rank 0.
*/
int GetNumNodes(MPI_Comm comm) {
#if MPI_VERSION >= 3
int CountTasksPerNode(MPI_Comm comm) {
/* modern MPI provides a simple way to get the local process count */
MPI_Comm shared_comm;
int count;
MPI_Comm shared_comm;
int shared_rank = 0;
int local_result = 0;
int numNodes = 0;
MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm),
"MPI_Comm_split_type() error");
MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank), "MPI_Comm_rank() error");
local_result = shared_rank == 0? 1 : 0;
MPI_CHECK(MPI_Allreduce(&local_result, &numNodes, 1, MPI_INT, MPI_SUM, comm),
"MPI_Allreduce() error");
MPI_CHECK(MPI_Comm_free(&shared_comm), "MPI_Comm_free() error");
MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm);
MPI_Comm_size (shared_comm, &count);
MPI_Comm_free (&shared_comm);
return numNodes;
#else
int numTasks = 0;
int numTasksOnNode0 = 0;
return count;
numTasks = GetNumTasks(comm);
numTasksOnNode0 = GetNumTasksOnNode0(comm);
return ((numTasks - 1) / numTasksOnNode0) + 1;
#endif
}
int GetNumTasks(MPI_Comm comm) {
int numTasks = 0;
MPI_CHECK(MPI_Comm_size(comm, &numTasks), "cannot get number of tasks");
return numTasks;
}
/*
* It's very important that this method provide the same result to every
* process as it's used for redistributing which jobs read from which files.
* It was renamed accordingly.
*
* If different nodes get different results from this method then jobs get
* redistributed unevenly and you no longer have a 1:1 relationship with some
* nodes reading multiple files while others read none.
*
* In the common case the number of tasks on each node (MPI_Comm_size on an
* MPI_COMM_TYPE_SHARED communicator) will be the same. However, there is
* nothing which guarantees this. It's valid to have, for example, 64 jobs
* across 4 systems which can run 20 jobs each. In that scenario you end up
* with 3 MPI_COMM_TYPE_SHARED groups of 20, and one group of 4.
*
* In the (MPI_VERSION < 3) implementation of this method consistency is
* ensured by asking specifically about the number of tasks on the node with
* rank 0. In the original implementation for (MPI_VERSION >= 3) this was
* broken by using the LOCAL process count which differed depending on which
* node you were on.
*
* This was corrected below by first splitting the comm into groups by node
* (MPI_COMM_TYPE_SHARED) and then having only the node with world rank 0 and
* shared rank 0 return the MPI_Comm_size of its shared subgroup. This yields
* the original consistent behavior no matter which node asks.
*
* In the common case where every node has the same number of tasks this
* method will return the same value it always has.
*/
int GetNumTasksOnNode0(MPI_Comm comm) {
#if MPI_VERSION >= 3
MPI_Comm shared_comm;
int shared_rank = 0;
int tasks_on_node_rank0 = 0;
int local_result = 0;
MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm),
"MPI_Comm_split_type() error");
MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank), "MPI_Comm_rank() error");
if (rank == 0 && shared_rank == 0) {
MPI_CHECK(MPI_Comm_size(shared_comm, &local_result), "MPI_Comm_size() error");
}
MPI_CHECK(MPI_Allreduce(&local_result, &tasks_on_node_rank0, 1, MPI_INT, MPI_SUM, comm),
"MPI_Allreduce() error");
MPI_CHECK(MPI_Comm_free(&shared_comm), "MPI_Comm_free() error");
return tasks_on_node_rank0;
#else
/*
* Count the number of tasks that share a host.
*
* This function employees the gethostname() call, rather than using
* This version employs the gethostname() call, rather than using
* MPI_Get_processor_name(). We are interested in knowing the number
* of tasks that share a file system client (I/O node, compute node,
* whatever that may be). However on machines like BlueGene/Q,
* MPI_Get_processor_name() uniquely identifies a cpu in a compute node,
* not the node where the I/O is function shipped to. gethostname()
* is assumed to identify the shared filesystem client in more situations.
*
* NOTE: This also assumes that the task count on all nodes is equal
* to the task count on the host running MPI task 0.
*/
int CountTasksPerNode(MPI_Comm comm) {
int size;
MPI_Comm_size(comm, & size);
/* for debugging and testing */
@ -336,8 +407,8 @@ int CountTasksPerNode(MPI_Comm comm) {
MPI_Bcast(&count, 1, MPI_INT, 0, comm);
return(count);
}
#endif
}
/*

View File

@ -18,10 +18,8 @@
#include <mpi.h>
#include "ior.h"
extern int numTasksWorld;
extern int rank;
extern int rankOffset;
extern int tasksPerNode;
extern int verbose;
extern MPI_Comm testComm;
extern MPI_Comm mpi_comm_world;
@ -55,8 +53,10 @@ void SeedRandGen(MPI_Comm);
void SetHints (MPI_Info *, char *);
void ShowHints (MPI_Info *);
char *HumanReadable(IOR_offset_t value, int base);
int CountTasksPerNode(MPI_Comm comm);
int QueryNodeMapping(MPI_Comm comm, int print_nodemap);
int GetNumNodes(MPI_Comm);
int GetNumTasks(MPI_Comm);
int GetNumTasksOnNode0(MPI_Comm);
void DelaySecs(int delay);
void updateParsedOptions(IOR_param_t * options, options_all_t * global_options);
size_t NodeMemoryStringToBytes(char *size_str);