commit
604598ab2f
2
NEWS
2
NEWS
|
@ -120,7 +120,7 @@ Version 2.10.1
|
||||||
- Corrected IOR_GetFileSize() function to point to HDF5 and NCMPI versions of
|
- Corrected IOR_GetFileSize() function to point to HDF5 and NCMPI versions of
|
||||||
IOR_GetFileSize() calls
|
IOR_GetFileSize() calls
|
||||||
- Changed the netcdf dataset from 1D array to 4D array, where the 4 dimensions
|
- Changed the netcdf dataset from 1D array to 4D array, where the 4 dimensions
|
||||||
are: [segmentCount][numTasksWorld][numTransfers][transferSize]
|
are: [segmentCount][numTasks][numTransfers][transferSize]
|
||||||
This patch from Wei-keng Liao allows for file sizes > 4GB (provided no
|
This patch from Wei-keng Liao allows for file sizes > 4GB (provided no
|
||||||
single dimension is > 4GB).
|
single dimension is > 4GB).
|
||||||
- Finalized random-capability release
|
- Finalized random-capability release
|
||||||
|
|
|
@ -216,7 +216,7 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer,
|
||||||
param->blockSize / param->transferSize;
|
param->blockSize / param->transferSize;
|
||||||
|
|
||||||
/* reshape 1D array to 3D array:
|
/* reshape 1D array to 3D array:
|
||||||
[segmentCount*numTasksWorld][numTransfers][transferSize]
|
[segmentCount*numTasks][numTransfers][transferSize]
|
||||||
Requirement: none of these dimensions should be > 4G,
|
Requirement: none of these dimensions should be > 4G,
|
||||||
*/
|
*/
|
||||||
NCMPI_CHECK(ncmpi_def_dim
|
NCMPI_CHECK(ncmpi_def_dim
|
||||||
|
@ -267,7 +267,7 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer,
|
||||||
bufSize[1] = 1;
|
bufSize[1] = 1;
|
||||||
bufSize[2] = param->transferSize;
|
bufSize[2] = param->transferSize;
|
||||||
|
|
||||||
offset[0] = segmentNum * numTasksWorld + rank;
|
offset[0] = segmentNum * param->numTasks + rank;
|
||||||
offset[1] = transferNum;
|
offset[1] = transferNum;
|
||||||
offset[2] = 0;
|
offset[2] = 0;
|
||||||
|
|
||||||
|
|
|
@ -340,10 +340,10 @@ void ShowTestStart(IOR_param_t *test)
|
||||||
|
|
||||||
PrintKeyVal("options", test->options);
|
PrintKeyVal("options", test->options);
|
||||||
PrintKeyValInt("dryRun", test->dryRun);
|
PrintKeyValInt("dryRun", test->dryRun);
|
||||||
PrintKeyValInt("nodes", test->nodes);
|
PrintKeyValInt("nodes", test->numNodes);
|
||||||
PrintKeyValInt("memoryPerTask", (unsigned long) test->memoryPerTask);
|
PrintKeyValInt("memoryPerTask", (unsigned long) test->memoryPerTask);
|
||||||
PrintKeyValInt("memoryPerNode", (unsigned long) test->memoryPerNode);
|
PrintKeyValInt("memoryPerNode", (unsigned long) test->memoryPerNode);
|
||||||
PrintKeyValInt("tasksPerNode", tasksPerNode);
|
PrintKeyValInt("tasksPerNode", test->numTasksOnNode0);
|
||||||
PrintKeyValInt("repetitions", test->repetitions);
|
PrintKeyValInt("repetitions", test->repetitions);
|
||||||
PrintKeyValInt("multiFile", test->multiFile);
|
PrintKeyValInt("multiFile", test->multiFile);
|
||||||
PrintKeyValInt("interTestDelay", test->interTestDelay);
|
PrintKeyValInt("interTestDelay", test->interTestDelay);
|
||||||
|
@ -431,8 +431,9 @@ void ShowSetup(IOR_param_t *params)
|
||||||
PrintKeyValInt("task offset", params->taskPerNodeOffset);
|
PrintKeyValInt("task offset", params->taskPerNodeOffset);
|
||||||
PrintKeyValInt("reorder random seed", params->reorderTasksRandomSeed);
|
PrintKeyValInt("reorder random seed", params->reorderTasksRandomSeed);
|
||||||
}
|
}
|
||||||
|
PrintKeyValInt("nodes", params->numNodes);
|
||||||
PrintKeyValInt("tasks", params->numTasks);
|
PrintKeyValInt("tasks", params->numTasks);
|
||||||
PrintKeyValInt("clients per node", params->tasksPerNode);
|
PrintKeyValInt("clients per node", params->numTasksOnNode0);
|
||||||
if (params->memoryPerTask != 0){
|
if (params->memoryPerTask != 0){
|
||||||
PrintKeyVal("memoryPerTask", HumanReadable(params->memoryPerTask, BASE_TWO));
|
PrintKeyVal("memoryPerTask", HumanReadable(params->memoryPerTask, BASE_TWO));
|
||||||
}
|
}
|
||||||
|
@ -572,7 +573,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access)
|
||||||
}
|
}
|
||||||
fprintf(out_resultfile, "%5d ", params->id);
|
fprintf(out_resultfile, "%5d ", params->id);
|
||||||
fprintf(out_resultfile, "%6d ", params->numTasks);
|
fprintf(out_resultfile, "%6d ", params->numTasks);
|
||||||
fprintf(out_resultfile, "%3d ", params->tasksPerNode);
|
fprintf(out_resultfile, "%3d ", params->numTasksOnNode0);
|
||||||
fprintf(out_resultfile, "%4d ", params->repetitions);
|
fprintf(out_resultfile, "%4d ", params->repetitions);
|
||||||
fprintf(out_resultfile, "%3d ", params->filePerProc);
|
fprintf(out_resultfile, "%3d ", params->filePerProc);
|
||||||
fprintf(out_resultfile, "%5d ", params->reorderTasks);
|
fprintf(out_resultfile, "%5d ", params->reorderTasks);
|
||||||
|
@ -596,7 +597,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access)
|
||||||
PrintKeyValInt("blockSize", params->blockSize);
|
PrintKeyValInt("blockSize", params->blockSize);
|
||||||
PrintKeyValInt("transferSize", params->transferSize);
|
PrintKeyValInt("transferSize", params->transferSize);
|
||||||
PrintKeyValInt("numTasks", params->numTasks);
|
PrintKeyValInt("numTasks", params->numTasks);
|
||||||
PrintKeyValInt("tasksPerNode", params->tasksPerNode);
|
PrintKeyValInt("tasksPerNode", params->numTasksOnNode0);
|
||||||
PrintKeyValInt("repetitions", params->repetitions);
|
PrintKeyValInt("repetitions", params->repetitions);
|
||||||
PrintKeyValInt("filePerProc", params->filePerProc);
|
PrintKeyValInt("filePerProc", params->filePerProc);
|
||||||
PrintKeyValInt("reorderTasks", params->reorderTasks);
|
PrintKeyValInt("reorderTasks", params->reorderTasks);
|
||||||
|
|
79
src/ior.c
79
src/ior.c
|
@ -65,7 +65,6 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out
|
||||||
out_resultfile = world_out;
|
out_resultfile = world_out;
|
||||||
mpi_comm_world = world_com;
|
mpi_comm_world = world_com;
|
||||||
|
|
||||||
MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld), "cannot get number of tasks");
|
|
||||||
MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank");
|
MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank");
|
||||||
|
|
||||||
/* setup tests, and validate parameters */
|
/* setup tests, and validate parameters */
|
||||||
|
@ -113,8 +112,6 @@ int ior_main(int argc, char **argv)
|
||||||
MPI_CHECK(MPI_Init(&argc, &argv), "cannot initialize MPI");
|
MPI_CHECK(MPI_Init(&argc, &argv), "cannot initialize MPI");
|
||||||
|
|
||||||
mpi_comm_world = MPI_COMM_WORLD;
|
mpi_comm_world = MPI_COMM_WORLD;
|
||||||
MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld),
|
|
||||||
"cannot get number of tasks");
|
|
||||||
MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank");
|
MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank");
|
||||||
|
|
||||||
/* set error-handling */
|
/* set error-handling */
|
||||||
|
@ -189,8 +186,14 @@ void init_IOR_Param_t(IOR_param_t * p)
|
||||||
p->writeFile = p->readFile = FALSE;
|
p->writeFile = p->readFile = FALSE;
|
||||||
p->checkWrite = p->checkRead = FALSE;
|
p->checkWrite = p->checkRead = FALSE;
|
||||||
|
|
||||||
p->nodes = 1;
|
/*
|
||||||
p->tasksPerNode = 1;
|
* These can be overridden from the command-line but otherwise will be
|
||||||
|
* set from MPI.
|
||||||
|
*/
|
||||||
|
p->numTasks = -1;
|
||||||
|
p->numNodes = -1;
|
||||||
|
p->numTasksOnNode0 = -1;
|
||||||
|
|
||||||
p->repetitions = 1;
|
p->repetitions = 1;
|
||||||
p->repCounter = -1;
|
p->repCounter = -1;
|
||||||
p->open = WRITE;
|
p->open = WRITE;
|
||||||
|
@ -920,12 +923,17 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test)
|
||||||
*/
|
*/
|
||||||
static void InitTests(IOR_test_t *tests, MPI_Comm com)
|
static void InitTests(IOR_test_t *tests, MPI_Comm com)
|
||||||
{
|
{
|
||||||
int size;
|
int mpiNumNodes = 0;
|
||||||
|
int mpiNumTasks = 0;
|
||||||
|
int mpiNumTasksOnNode0 = 0;
|
||||||
|
|
||||||
MPI_CHECK(MPI_Comm_size(com, & size), "MPI_Comm_size() error");
|
/*
|
||||||
|
* These default values are the same for every test and expensive to
|
||||||
/* count the tasks per node */
|
* retrieve so just do it once.
|
||||||
tasksPerNode = CountTasksPerNode(com);
|
*/
|
||||||
|
mpiNumNodes = GetNumNodes(com);
|
||||||
|
mpiNumTasks = GetNumTasks(com);
|
||||||
|
mpiNumTasksOnNode0 = GetNumTasksOnNode0(com);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Since there is no guarantee that anyone other than
|
* Since there is no guarantee that anyone other than
|
||||||
|
@ -938,12 +946,28 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com)
|
||||||
while (tests != NULL) {
|
while (tests != NULL) {
|
||||||
IOR_param_t *params = & tests->params;
|
IOR_param_t *params = & tests->params;
|
||||||
params->testComm = com;
|
params->testComm = com;
|
||||||
params->nodes = params->numTasks / tasksPerNode;
|
|
||||||
params->tasksPerNode = tasksPerNode;
|
/* use MPI values if not overridden on command-line */
|
||||||
params->tasksBlockMapping = QueryNodeMapping(com,false);
|
if (params->numNodes == -1) {
|
||||||
if (params->numTasks == 0) {
|
params->numNodes = mpiNumNodes;
|
||||||
params->numTasks = size;
|
|
||||||
}
|
}
|
||||||
|
if (params->numTasks == -1) {
|
||||||
|
params->numTasks = mpiNumTasks;
|
||||||
|
} else if (params->numTasks > mpiNumTasks) {
|
||||||
|
if (rank == 0) {
|
||||||
|
fprintf(out_logfile,
|
||||||
|
"WARNING: More tasks requested (%d) than available (%d),",
|
||||||
|
params->numTasks, mpiNumTasks);
|
||||||
|
fprintf(out_logfile, " running with %d tasks.\n",
|
||||||
|
mpiNumTasks);
|
||||||
|
}
|
||||||
|
params->numTasks = mpiNumTasks;
|
||||||
|
}
|
||||||
|
if (params->numTasksOnNode0 == -1) {
|
||||||
|
params->numTasksOnNode0 = mpiNumTasksOnNode0;
|
||||||
|
}
|
||||||
|
|
||||||
|
params->tasksBlockMapping = QueryNodeMapping(com,false);
|
||||||
params->expectedAggFileSize =
|
params->expectedAggFileSize =
|
||||||
params->blockSize * params->segmentCount * params->numTasks;
|
params->blockSize * params->segmentCount * params->numTasks;
|
||||||
|
|
||||||
|
@ -1091,7 +1115,7 @@ static void *HogMemory(IOR_param_t *params)
|
||||||
if (verbose >= VERBOSE_3)
|
if (verbose >= VERBOSE_3)
|
||||||
fprintf(out_logfile, "This node hogging %ld bytes of memory\n",
|
fprintf(out_logfile, "This node hogging %ld bytes of memory\n",
|
||||||
params->memoryPerNode);
|
params->memoryPerNode);
|
||||||
size = params->memoryPerNode / params->tasksPerNode;
|
size = params->memoryPerNode / params->numTasksOnNode0;
|
||||||
} else {
|
} else {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -1191,16 +1215,6 @@ static void TestIoSys(IOR_test_t *test)
|
||||||
IOR_io_buffers ioBuffers;
|
IOR_io_buffers ioBuffers;
|
||||||
|
|
||||||
/* set up communicator for test */
|
/* set up communicator for test */
|
||||||
if (params->numTasks > numTasksWorld) {
|
|
||||||
if (rank == 0) {
|
|
||||||
fprintf(out_logfile,
|
|
||||||
"WARNING: More tasks requested (%d) than available (%d),",
|
|
||||||
params->numTasks, numTasksWorld);
|
|
||||||
fprintf(out_logfile, " running on %d tasks.\n",
|
|
||||||
numTasksWorld);
|
|
||||||
}
|
|
||||||
params->numTasks = numTasksWorld;
|
|
||||||
}
|
|
||||||
MPI_CHECK(MPI_Comm_group(mpi_comm_world, &orig_group),
|
MPI_CHECK(MPI_Comm_group(mpi_comm_world, &orig_group),
|
||||||
"MPI_Comm_group() error");
|
"MPI_Comm_group() error");
|
||||||
range[0] = 0; /* first rank */
|
range[0] = 0; /* first rank */
|
||||||
|
@ -1227,7 +1241,6 @@ static void TestIoSys(IOR_test_t *test)
|
||||||
"Using reorderTasks '-C' (useful to avoid read cache in client)\n");
|
"Using reorderTasks '-C' (useful to avoid read cache in client)\n");
|
||||||
fflush(out_logfile);
|
fflush(out_logfile);
|
||||||
}
|
}
|
||||||
params->tasksPerNode = CountTasksPerNode(testComm);
|
|
||||||
backend = params->backend;
|
backend = params->backend;
|
||||||
/* show test setup */
|
/* show test setup */
|
||||||
if (rank == 0 && verbose >= VERBOSE_0)
|
if (rank == 0 && verbose >= VERBOSE_0)
|
||||||
|
@ -1364,7 +1377,7 @@ static void TestIoSys(IOR_test_t *test)
|
||||||
/* move two nodes away from writing node */
|
/* move two nodes away from writing node */
|
||||||
int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
|
int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
|
||||||
if (params->tasksBlockMapping) {
|
if (params->tasksBlockMapping) {
|
||||||
shift = params->tasksPerNode; /* switch to by-slot (contiguous block) mapping */
|
shift = params->numTasksOnNode0; /* switch to by-slot (contiguous block) mapping */
|
||||||
}
|
}
|
||||||
rankOffset = (2 * shift) % params->numTasks;
|
rankOffset = (2 * shift) % params->numTasks;
|
||||||
}
|
}
|
||||||
|
@ -1389,7 +1402,7 @@ static void TestIoSys(IOR_test_t *test)
|
||||||
if(params->stoneWallingStatusFile){
|
if(params->stoneWallingStatusFile){
|
||||||
params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile);
|
params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile);
|
||||||
if(params->stoneWallingWearOutIterations == -1 && rank == 0){
|
if(params->stoneWallingWearOutIterations == -1 && rank == 0){
|
||||||
fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!");
|
fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!\n");
|
||||||
params->stoneWallingWearOutIterations = 0;
|
params->stoneWallingWearOutIterations = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1404,7 +1417,7 @@ static void TestIoSys(IOR_test_t *test)
|
||||||
/* move one node away from writing node */
|
/* move one node away from writing node */
|
||||||
int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
|
int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
|
||||||
if (params->tasksBlockMapping) {
|
if (params->tasksBlockMapping) {
|
||||||
shift=params->tasksPerNode; /* switch to a by-slot (contiguous block) mapping */
|
shift=params->numTasksOnNode0; /* switch to a by-slot (contiguous block) mapping */
|
||||||
}
|
}
|
||||||
rankOffset = (params->taskPerNodeOffset * shift) % params->numTasks;
|
rankOffset = (params->taskPerNodeOffset * shift) % params->numTasks;
|
||||||
}
|
}
|
||||||
|
@ -1415,7 +1428,7 @@ static void TestIoSys(IOR_test_t *test)
|
||||||
int nodeoffset;
|
int nodeoffset;
|
||||||
unsigned int iseed0;
|
unsigned int iseed0;
|
||||||
nodeoffset = params->taskPerNodeOffset;
|
nodeoffset = params->taskPerNodeOffset;
|
||||||
nodeoffset = (nodeoffset < params->nodes) ? nodeoffset : params->nodes - 1;
|
nodeoffset = (nodeoffset < params->numNodes) ? nodeoffset : params->numNodes - 1;
|
||||||
if (params->reorderTasksRandomSeed < 0)
|
if (params->reorderTasksRandomSeed < 0)
|
||||||
iseed0 = -1 * params->reorderTasksRandomSeed + rep;
|
iseed0 = -1 * params->reorderTasksRandomSeed + rep;
|
||||||
else
|
else
|
||||||
|
@ -1425,7 +1438,7 @@ static void TestIoSys(IOR_test_t *test)
|
||||||
rankOffset = rand() % params->numTasks;
|
rankOffset = rand() % params->numTasks;
|
||||||
}
|
}
|
||||||
while (rankOffset <
|
while (rankOffset <
|
||||||
(nodeoffset * params->tasksPerNode)) {
|
(nodeoffset * params->numTasksOnNode0)) {
|
||||||
rankOffset = rand() % params->numTasks;
|
rankOffset = rand() % params->numTasks;
|
||||||
}
|
}
|
||||||
/* Get more detailed stats if requested by verbose level */
|
/* Get more detailed stats if requested by verbose level */
|
||||||
|
@ -1455,7 +1468,7 @@ static void TestIoSys(IOR_test_t *test)
|
||||||
"barrier error");
|
"barrier error");
|
||||||
if (rank == 0 && verbose >= VERBOSE_1) {
|
if (rank == 0 && verbose >= VERBOSE_1) {
|
||||||
fprintf(out_logfile,
|
fprintf(out_logfile,
|
||||||
"Commencing read performance test: %s",
|
"Commencing read performance test: %s\n",
|
||||||
CurrentTimeString());
|
CurrentTimeString());
|
||||||
}
|
}
|
||||||
timer[2] = GetTimeStamp();
|
timer[2] = GetTimeStamp();
|
||||||
|
|
|
@ -98,8 +98,8 @@ typedef struct
|
||||||
// intermediate options
|
// intermediate options
|
||||||
int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */
|
int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */
|
||||||
int numTasks; /* number of tasks for test */
|
int numTasks; /* number of tasks for test */
|
||||||
int nodes; /* number of nodes for test */
|
int numNodes; /* number of nodes for test */
|
||||||
int tasksPerNode; /* number of tasks per node */
|
int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */
|
||||||
int tasksBlockMapping; /* are the tasks in contiguous blocks across nodes or round-robin */
|
int tasksBlockMapping; /* are the tasks in contiguous blocks across nodes or round-robin */
|
||||||
int repetitions; /* number of repetitions of test */
|
int repetitions; /* number of repetitions of test */
|
||||||
int repCounter; /* rep counter */
|
int repCounter; /* rep counter */
|
||||||
|
|
13
src/mdtest.c
13
src/mdtest.c
|
@ -1870,7 +1870,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
|
|
||||||
mdtest_init_args();
|
mdtest_init_args();
|
||||||
int i, j;
|
int i, j;
|
||||||
int nodeCount;
|
int numNodes;
|
||||||
|
int numTasksOnNode0 = 0;
|
||||||
MPI_Group worldgroup, testgroup;
|
MPI_Group worldgroup, testgroup;
|
||||||
struct {
|
struct {
|
||||||
int first;
|
int first;
|
||||||
|
@ -1950,8 +1951,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
pid = getpid();
|
pid = getpid();
|
||||||
uid = getuid();
|
uid = getuid();
|
||||||
|
|
||||||
tasksPerNode = CountTasksPerNode(testComm);
|
numNodes = GetNumNodes(testComm);
|
||||||
nodeCount = size / tasksPerNode;
|
numTasksOnNode0 = GetNumTasksOnNode0(testComm);
|
||||||
|
|
||||||
char cmd_buffer[4096];
|
char cmd_buffer[4096];
|
||||||
strncpy(cmd_buffer, argv[0], 4096);
|
strncpy(cmd_buffer, argv[0], 4096);
|
||||||
|
@ -1960,7 +1961,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
}
|
}
|
||||||
|
|
||||||
VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp());
|
VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp());
|
||||||
VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, size, nodeCount);
|
VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, size, numNodes);
|
||||||
VERBOSE(0,-1,"Command line used: %s", cmd_buffer);
|
VERBOSE(0,-1,"Command line used: %s", cmd_buffer);
|
||||||
|
|
||||||
/* adjust special variables */
|
/* adjust special variables */
|
||||||
|
@ -2128,10 +2129,10 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
|
|
||||||
/* set the shift to mimic IOR and shift by procs per node */
|
/* set the shift to mimic IOR and shift by procs per node */
|
||||||
if (nstride > 0) {
|
if (nstride > 0) {
|
||||||
if ( nodeCount > 1 && tasksBlockMapping ) {
|
if ( numNodes > 1 && tasksBlockMapping ) {
|
||||||
/* the user set the stride presumably to get the consumer tasks on a different node than the producer tasks
|
/* the user set the stride presumably to get the consumer tasks on a different node than the producer tasks
|
||||||
however, if the mpirun scheduler placed the tasks by-slot (in a contiguous block) then we need to adjust the shift by ppn */
|
however, if the mpirun scheduler placed the tasks by-slot (in a contiguous block) then we need to adjust the shift by ppn */
|
||||||
nstride *= tasksPerNode;
|
nstride *= numTasksOnNode0;
|
||||||
}
|
}
|
||||||
VERBOSE(0,5,"Shifting ranks by %d for each phase.", nstride);
|
VERBOSE(0,5,"Shifting ranks by %d for each phase.", nstride);
|
||||||
}
|
}
|
||||||
|
|
|
@ -151,8 +151,12 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt
|
||||||
params->maxTimeDuration = atoi(value);
|
params->maxTimeDuration = atoi(value);
|
||||||
} else if (strcasecmp(option, "outlierthreshold") == 0) {
|
} else if (strcasecmp(option, "outlierthreshold") == 0) {
|
||||||
params->outlierThreshold = atoi(value);
|
params->outlierThreshold = atoi(value);
|
||||||
} else if (strcasecmp(option, "nodes") == 0) {
|
} else if (strcasecmp(option, "numnodes") == 0) {
|
||||||
params->nodes = atoi(value);
|
params->numNodes = atoi(value);
|
||||||
|
} else if (strcasecmp(option, "numtasks") == 0) {
|
||||||
|
params->numTasks = atoi(value);
|
||||||
|
} else if (strcasecmp(option, "numtasksonnode0") == 0) {
|
||||||
|
params->numTasksOnNode0 = atoi(value);
|
||||||
} else if (strcasecmp(option, "repetitions") == 0) {
|
} else if (strcasecmp(option, "repetitions") == 0) {
|
||||||
params->repetitions = atoi(value);
|
params->repetitions = atoi(value);
|
||||||
} else if (strcasecmp(option, "intertestdelay") == 0) {
|
} else if (strcasecmp(option, "intertestdelay") == 0) {
|
||||||
|
@ -286,8 +290,6 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt
|
||||||
params->beegfs_chunkSize = string_to_bytes(value);
|
params->beegfs_chunkSize = string_to_bytes(value);
|
||||||
if (!ISPOWEROFTWO(params->beegfs_chunkSize) || params->beegfs_chunkSize < (1<<16))
|
if (!ISPOWEROFTWO(params->beegfs_chunkSize) || params->beegfs_chunkSize < (1<<16))
|
||||||
ERR("beegfsChunkSize must be a power of two and >64k");
|
ERR("beegfsChunkSize must be a power of two and >64k");
|
||||||
} else if (strcasecmp(option, "numtasks") == 0) {
|
|
||||||
params->numTasks = atoi(value);
|
|
||||||
} else if (strcasecmp(option, "summaryalways") == 0) {
|
} else if (strcasecmp(option, "summaryalways") == 0) {
|
||||||
params->summary_every_test = atoi(value);
|
params->summary_every_test = atoi(value);
|
||||||
} else {
|
} else {
|
||||||
|
@ -498,7 +500,7 @@ option_help * createGlobalOptions(IOR_param_t * params){
|
||||||
{'m', NULL, "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & params->multiFile},
|
{'m', NULL, "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & params->multiFile},
|
||||||
{'M', NULL, "memoryPerNode -- hog memory on the node (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & params->memoryPerNodeStr},
|
{'M', NULL, "memoryPerNode -- hog memory on the node (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & params->memoryPerNodeStr},
|
||||||
{'n', NULL, "noFill -- no fill in HDF5 file creation", OPTION_FLAG, 'd', & params->noFill},
|
{'n', NULL, "noFill -- no fill in HDF5 file creation", OPTION_FLAG, 'd', & params->noFill},
|
||||||
{'N', NULL, "numTasks -- number of tasks that should participate in the test", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks},
|
{'N', NULL, "numTasks -- number of tasks that are participating in the test (overrides MPI)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks},
|
||||||
{'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & params->testFileName},
|
{'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & params->testFileName},
|
||||||
{'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper},
|
{'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper},
|
||||||
{'p', NULL, "preallocate -- preallocate file size", OPTION_FLAG, 'd', & params->preallocate},
|
{'p', NULL, "preallocate -- preallocate file size", OPTION_FLAG, 'd', & params->preallocate},
|
||||||
|
|
109
src/utilities.c
109
src/utilities.c
|
@ -53,11 +53,9 @@
|
||||||
extern int errno;
|
extern int errno;
|
||||||
extern int numTasks;
|
extern int numTasks;
|
||||||
|
|
||||||
/* globals used by other files, also defined "extern" in ior.h */
|
/* globals used by other files, also defined "extern" in utilities.h */
|
||||||
int numTasksWorld = 0;
|
|
||||||
int rank = 0;
|
int rank = 0;
|
||||||
int rankOffset = 0;
|
int rankOffset = 0;
|
||||||
int tasksPerNode = 0; /* tasks per node */
|
|
||||||
int verbose = VERBOSE_0; /* verbose output */
|
int verbose = VERBOSE_0; /* verbose output */
|
||||||
MPI_Comm testComm;
|
MPI_Comm testComm;
|
||||||
MPI_Comm mpi_comm_world;
|
MPI_Comm mpi_comm_world;
|
||||||
|
@ -265,35 +263,108 @@ int QueryNodeMapping(MPI_Comm comm, int print_nodemap) {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There is a more direct way to determine the node count in modern MPI
|
||||||
|
* versions so we use that if possible.
|
||||||
|
*
|
||||||
|
* For older versions we use a method which should still provide accurate
|
||||||
|
* results even if the total number of tasks is not evenly divisible by the
|
||||||
|
* tasks on node rank 0.
|
||||||
|
*/
|
||||||
|
int GetNumNodes(MPI_Comm comm) {
|
||||||
#if MPI_VERSION >= 3
|
#if MPI_VERSION >= 3
|
||||||
int CountTasksPerNode(MPI_Comm comm) {
|
MPI_Comm shared_comm;
|
||||||
/* modern MPI provides a simple way to get the local process count */
|
int shared_rank = 0;
|
||||||
MPI_Comm shared_comm;
|
int local_result = 0;
|
||||||
int count;
|
int numNodes = 0;
|
||||||
|
|
||||||
|
MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm),
|
||||||
|
"MPI_Comm_split_type() error");
|
||||||
|
MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank), "MPI_Comm_rank() error");
|
||||||
|
local_result = shared_rank == 0? 1 : 0;
|
||||||
|
MPI_CHECK(MPI_Allreduce(&local_result, &numNodes, 1, MPI_INT, MPI_SUM, comm),
|
||||||
|
"MPI_Allreduce() error");
|
||||||
|
MPI_CHECK(MPI_Comm_free(&shared_comm), "MPI_Comm_free() error");
|
||||||
|
|
||||||
MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm);
|
return numNodes;
|
||||||
MPI_Comm_size (shared_comm, &count);
|
#else
|
||||||
MPI_Comm_free (&shared_comm);
|
int numTasks = 0;
|
||||||
|
int numTasksOnNode0 = 0;
|
||||||
|
|
||||||
return count;
|
numTasks = GetNumTasks(comm);
|
||||||
|
numTasksOnNode0 = GetNumTasksOnNode0(comm);
|
||||||
|
|
||||||
|
return ((numTasks - 1) / numTasksOnNode0) + 1;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int GetNumTasks(MPI_Comm comm) {
|
||||||
|
int numTasks = 0;
|
||||||
|
|
||||||
|
MPI_CHECK(MPI_Comm_size(comm, &numTasks), "cannot get number of tasks");
|
||||||
|
|
||||||
|
return numTasks;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It's very important that this method provide the same result to every
|
||||||
|
* process as it's used for redistributing which jobs read from which files.
|
||||||
|
* It was renamed accordingly.
|
||||||
|
*
|
||||||
|
* If different nodes get different results from this method then jobs get
|
||||||
|
* redistributed unevenly and you no longer have a 1:1 relationship with some
|
||||||
|
* nodes reading multiple files while others read none.
|
||||||
|
*
|
||||||
|
* In the common case the number of tasks on each node (MPI_Comm_size on an
|
||||||
|
* MPI_COMM_TYPE_SHARED communicator) will be the same. However, there is
|
||||||
|
* nothing which guarantees this. It's valid to have, for example, 64 jobs
|
||||||
|
* across 4 systems which can run 20 jobs each. In that scenario you end up
|
||||||
|
* with 3 MPI_COMM_TYPE_SHARED groups of 20, and one group of 4.
|
||||||
|
*
|
||||||
|
* In the (MPI_VERSION < 3) implementation of this method consistency is
|
||||||
|
* ensured by asking specifically about the number of tasks on the node with
|
||||||
|
* rank 0. In the original implementation for (MPI_VERSION >= 3) this was
|
||||||
|
* broken by using the LOCAL process count which differed depending on which
|
||||||
|
* node you were on.
|
||||||
|
*
|
||||||
|
* This was corrected below by first splitting the comm into groups by node
|
||||||
|
* (MPI_COMM_TYPE_SHARED) and then having only the node with world rank 0 and
|
||||||
|
* shared rank 0 return the MPI_Comm_size of its shared subgroup. This yields
|
||||||
|
* the original consistent behavior no matter which node asks.
|
||||||
|
*
|
||||||
|
* In the common case where every node has the same number of tasks this
|
||||||
|
* method will return the same value it always has.
|
||||||
|
*/
|
||||||
|
int GetNumTasksOnNode0(MPI_Comm comm) {
|
||||||
|
#if MPI_VERSION >= 3
|
||||||
|
MPI_Comm shared_comm;
|
||||||
|
int shared_rank = 0;
|
||||||
|
int tasks_on_node_rank0 = 0;
|
||||||
|
int local_result = 0;
|
||||||
|
|
||||||
|
MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm),
|
||||||
|
"MPI_Comm_split_type() error");
|
||||||
|
MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank), "MPI_Comm_rank() error");
|
||||||
|
if (rank == 0 && shared_rank == 0) {
|
||||||
|
MPI_CHECK(MPI_Comm_size(shared_comm, &local_result), "MPI_Comm_size() error");
|
||||||
|
}
|
||||||
|
MPI_CHECK(MPI_Allreduce(&local_result, &tasks_on_node_rank0, 1, MPI_INT, MPI_SUM, comm),
|
||||||
|
"MPI_Allreduce() error");
|
||||||
|
MPI_CHECK(MPI_Comm_free(&shared_comm), "MPI_Comm_free() error");
|
||||||
|
|
||||||
|
return tasks_on_node_rank0;
|
||||||
#else
|
#else
|
||||||
/*
|
/*
|
||||||
* Count the number of tasks that share a host.
|
* This version employs the gethostname() call, rather than using
|
||||||
*
|
|
||||||
* This function employees the gethostname() call, rather than using
|
|
||||||
* MPI_Get_processor_name(). We are interested in knowing the number
|
* MPI_Get_processor_name(). We are interested in knowing the number
|
||||||
* of tasks that share a file system client (I/O node, compute node,
|
* of tasks that share a file system client (I/O node, compute node,
|
||||||
* whatever that may be). However on machines like BlueGene/Q,
|
* whatever that may be). However on machines like BlueGene/Q,
|
||||||
* MPI_Get_processor_name() uniquely identifies a cpu in a compute node,
|
* MPI_Get_processor_name() uniquely identifies a cpu in a compute node,
|
||||||
* not the node where the I/O is function shipped to. gethostname()
|
* not the node where the I/O is function shipped to. gethostname()
|
||||||
* is assumed to identify the shared filesystem client in more situations.
|
* is assumed to identify the shared filesystem client in more situations.
|
||||||
*
|
|
||||||
* NOTE: This also assumes that the task count on all nodes is equal
|
|
||||||
* to the task count on the host running MPI task 0.
|
|
||||||
*/
|
*/
|
||||||
int CountTasksPerNode(MPI_Comm comm) {
|
|
||||||
int size;
|
int size;
|
||||||
MPI_Comm_size(comm, & size);
|
MPI_Comm_size(comm, & size);
|
||||||
/* for debugging and testing */
|
/* for debugging and testing */
|
||||||
|
@ -336,8 +407,8 @@ int CountTasksPerNode(MPI_Comm comm) {
|
||||||
MPI_Bcast(&count, 1, MPI_INT, 0, comm);
|
MPI_Bcast(&count, 1, MPI_INT, 0, comm);
|
||||||
|
|
||||||
return(count);
|
return(count);
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -18,10 +18,8 @@
|
||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#include "ior.h"
|
#include "ior.h"
|
||||||
|
|
||||||
extern int numTasksWorld;
|
|
||||||
extern int rank;
|
extern int rank;
|
||||||
extern int rankOffset;
|
extern int rankOffset;
|
||||||
extern int tasksPerNode;
|
|
||||||
extern int verbose;
|
extern int verbose;
|
||||||
extern MPI_Comm testComm;
|
extern MPI_Comm testComm;
|
||||||
extern MPI_Comm mpi_comm_world;
|
extern MPI_Comm mpi_comm_world;
|
||||||
|
@ -55,8 +53,10 @@ void SeedRandGen(MPI_Comm);
|
||||||
void SetHints (MPI_Info *, char *);
|
void SetHints (MPI_Info *, char *);
|
||||||
void ShowHints (MPI_Info *);
|
void ShowHints (MPI_Info *);
|
||||||
char *HumanReadable(IOR_offset_t value, int base);
|
char *HumanReadable(IOR_offset_t value, int base);
|
||||||
int CountTasksPerNode(MPI_Comm comm);
|
|
||||||
int QueryNodeMapping(MPI_Comm comm, int print_nodemap);
|
int QueryNodeMapping(MPI_Comm comm, int print_nodemap);
|
||||||
|
int GetNumNodes(MPI_Comm);
|
||||||
|
int GetNumTasks(MPI_Comm);
|
||||||
|
int GetNumTasksOnNode0(MPI_Comm);
|
||||||
void DelaySecs(int delay);
|
void DelaySecs(int delay);
|
||||||
void updateParsedOptions(IOR_param_t * options, options_all_t * global_options);
|
void updateParsedOptions(IOR_param_t * options, options_all_t * global_options);
|
||||||
size_t NodeMemoryStringToBytes(char *size_str);
|
size_t NodeMemoryStringToBytes(char *size_str);
|
||||||
|
|
Loading…
Reference in New Issue