Merge pull request #162 from johnbent/master

Fixed shift.  Also cleaned up output messages
master
Julian Kunkel 2019-08-01 15:21:26 +01:00 committed by GitHub
commit b686b6c26a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 283 additions and 538 deletions

View File

@ -939,6 +939,7 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com)
params->testComm = com; params->testComm = com;
params->nodes = params->numTasks / tasksPerNode; params->nodes = params->numTasks / tasksPerNode;
params->tasksPerNode = tasksPerNode; params->tasksPerNode = tasksPerNode;
params->tasksBlockMapping = QueryNodeMapping(com,false);
if (params->numTasks == 0) { if (params->numTasks == 0) {
params->numTasks = size; params->numTasks = size;
} }
@ -1222,7 +1223,7 @@ static void TestIoSys(IOR_test_t *test)
} }
if (rank == 0 && params->reorderTasks == TRUE && verbose >= VERBOSE_1) { if (rank == 0 && params->reorderTasks == TRUE && verbose >= VERBOSE_1) {
fprintf(out_logfile, fprintf(out_logfile,
"Using reorderTasks '-C' (expecting block, not cyclic, task assignment)\n"); "Using reorderTasks '-C' (useful to avoid read cache in client)\n");
fflush(out_logfile); fflush(out_logfile);
} }
params->tasksPerNode = CountTasksPerNode(testComm); params->tasksPerNode = CountTasksPerNode(testComm);
@ -1360,7 +1361,11 @@ static void TestIoSys(IOR_test_t *test)
} }
if (params->reorderTasks) { if (params->reorderTasks) {
/* move two nodes away from writing node */ /* move two nodes away from writing node */
rankOffset = (2 * params->tasksPerNode) % params->numTasks; int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
if (params->tasksBlockMapping) {
shift = params->tasksPerNode; /* switch to by-slot (contiguous block) mapping */
}
rankOffset = (2 * shift) % params->numTasks;
} }
// update the check buffer // update the check buffer
@ -1395,9 +1400,12 @@ static void TestIoSys(IOR_test_t *test)
/* Get rankOffset [file offset] for this process to read, based on -C,-Z,-Q,-X options */ /* Get rankOffset [file offset] for this process to read, based on -C,-Z,-Q,-X options */
/* Constant process offset reading */ /* Constant process offset reading */
if (params->reorderTasks) { if (params->reorderTasks) {
/* move taskPerNodeOffset nodes[1==default] away from writing node */ /* move one node away from writing node */
rankOffset = (params->taskPerNodeOffset * int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
params->tasksPerNode) % params->numTasks; if (params->tasksBlockMapping) {
shift=params->tasksPerNode; /* switch to a by-slot (contiguous block) mapping */
}
rankOffset = (params->taskPerNodeOffset * shift) % params->numTasks;
} }
/* random process offset reading */ /* random process offset reading */
if (params->reorderTasksRandom) { if (params->reorderTasksRandom) {

View File

@ -100,6 +100,7 @@ typedef struct
int numTasks; /* number of tasks for test */ int numTasks; /* number of tasks for test */
int nodes; /* number of nodes for test */ int nodes; /* number of nodes for test */
int tasksPerNode; /* number of tasks per node */ int tasksPerNode; /* number of tasks per node */
int tasksBlockMapping; /* are the tasks in contiguous blocks across nodes or round-robin */
int repetitions; /* number of repetitions of test */ int repetitions; /* number of repetitions of test */
int repCounter; /* rep counter */ int repCounter; /* rep counter */
int multiFile; /* multiple files */ int multiFile; /* multiple files */

File diff suppressed because it is too large Load Diff

View File

@ -471,7 +471,7 @@ option_help * createGlobalOptions(IOR_param_t * params){
{'A', NULL, "refNum -- user supplied reference number to include in the summary", OPTION_OPTIONAL_ARGUMENT, 'd', & params->referenceNumber}, {'A', NULL, "refNum -- user supplied reference number to include in the summary", OPTION_OPTIONAL_ARGUMENT, 'd', & params->referenceNumber},
{'b', NULL, "blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & params->blockSize}, {'b', NULL, "blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & params->blockSize},
{'c', NULL, "collective -- collective I/O", OPTION_FLAG, 'd', & params->collective}, {'c', NULL, "collective -- collective I/O", OPTION_FLAG, 'd', & params->collective},
{'C', NULL, "reorderTasks -- changes task ordering to n+1 ordering for readback", OPTION_FLAG, 'd', & params->reorderTasks}, {'C', NULL, "reorderTasks -- changes task ordering for readback (useful to avoid client cache)", OPTION_FLAG, 'd', & params->reorderTasks},
{'d', NULL, "interTestDelay -- delay between reps in seconds", OPTION_OPTIONAL_ARGUMENT, 'd', & params->interTestDelay}, {'d', NULL, "interTestDelay -- delay between reps in seconds", OPTION_OPTIONAL_ARGUMENT, 'd', & params->interTestDelay},
{'D', NULL, "deadlineForStonewalling -- seconds before stopping write or read phase", OPTION_OPTIONAL_ARGUMENT, 'd', & params->deadlineForStonewalling}, {'D', NULL, "deadlineForStonewalling -- seconds before stopping write or read phase", OPTION_OPTIONAL_ARGUMENT, 'd', & params->deadlineForStonewalling},
{.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT},

View File

@ -20,6 +20,7 @@
# define _GNU_SOURCE /* Needed for O_DIRECT in fcntl */ # define _GNU_SOURCE /* Needed for O_DIRECT in fcntl */
#endif /* __linux__ */ #endif /* __linux__ */
#include <stdarg.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <errno.h> #include <errno.h>
@ -75,6 +76,17 @@ void* safeMalloc(uint64_t size){
return d; return d;
} }
void FailMessage(int rank, const char *location, char *format, ...) {
char msg[4096];
va_list args;
va_start(args, format);
vsnprintf(msg, 4096, format, args);
va_end(args);
fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n",
PrintTimestamp(), rank, location, msg, strerror(errno));
fflush(out_logfile);
MPI_Abort(testComm, 1);
}
size_t NodeMemoryStringToBytes(char *size_str) size_t NodeMemoryStringToBytes(char *size_str)
{ {
@ -213,12 +225,51 @@ void DumpBuffer(void *buffer,
return; return;
} /* DumpBuffer() */ } /* DumpBuffer() */
/* a function that prints an int array where each index corresponds to a rank
and the value is whether that rank is on the same host as root.
Also returns 1 if rank 1 is on same host and 0 otherwise
*/
int QueryNodeMapping(MPI_Comm comm, int print_nodemap) {
char localhost[MAX_PATHLEN], roothost[MAX_PATHLEN];
int num_ranks;
MPI_Comm_size(comm, &num_ranks);
int *node_map = (int*)malloc(sizeof(int) * num_ranks);
if ( ! node_map ) {
FAIL("malloc");
}
if (gethostname(localhost, MAX_PATHLEN) != 0) {
FAIL("gethostname()");
}
if (rank==0) {
strncpy(roothost,localhost,MAX_PATHLEN);
}
/* have rank 0 broadcast out its hostname */
MPI_Bcast(roothost, MAX_PATHLEN, MPI_CHAR, 0, comm);
//printf("Rank %d received root host as %s\n", rank, roothost);
/* then every rank figures out whether it is same host as root and then gathers that */
int same_as_root = strcmp(roothost,localhost) == 0;
MPI_Gather( &same_as_root, 1, MPI_INT, node_map, 1, MPI_INT, 0, comm);
if ( print_nodemap && rank==0) {
fprintf( out_logfile, "Nodemap: " );
for ( int i = 0; i < num_ranks; i++ ) {
fprintf( out_logfile, "%d", node_map[i] );
}
fprintf( out_logfile, "\n" );
}
int ret = node_map[1] == 1;
MPI_Bcast(&ret, 1, MPI_INT, 0, comm);
free(node_map);
return ret;
}
#if MPI_VERSION >= 3 #if MPI_VERSION >= 3
int CountTasksPerNode(MPI_Comm comm) { int CountTasksPerNode(MPI_Comm comm) {
/* modern MPI provides a simple way to get the local process count */ /* modern MPI provides a simple way to get the local process count */
MPI_Comm shared_comm; MPI_Comm shared_comm;
int count; int count;
MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm); MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm);
MPI_Comm_size (shared_comm, &count); MPI_Comm_size (shared_comm, &count);
MPI_Comm_free (&shared_comm); MPI_Comm_free (&shared_comm);

View File

@ -36,23 +36,14 @@ extern enum OutputFormat_t outputFormat; /* format of the output */
#ifdef __linux__ #ifdef __linux__
#define FAIL(msg) do { \ #define ERROR_LOCATION __func__
fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", \
PrintTimestamp(), rank, __func__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#else #else
#define FAIL(msg) do { \ #define ERROR_LOCATION __LINE__
fprintf(out_logfile, "%s: Process %d: FAILED at %d, %s: %s\n", \
PrintTimestamp(), rank, __LINE__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#endif #endif
#define FAIL(...) FailMessage(rank, ERROR_LOCATION, __VA_ARGS__)
void FailMessage(int rank, const char *location, char *format, ...);
void* safeMalloc(uint64_t size); void* safeMalloc(uint64_t size);
void set_o_direct_flag(int *fd); void set_o_direct_flag(int *fd);
@ -65,6 +56,7 @@ void SetHints (MPI_Info *, char *);
void ShowHints (MPI_Info *); void ShowHints (MPI_Info *);
char *HumanReadable(IOR_offset_t value, int base); char *HumanReadable(IOR_offset_t value, int base);
int CountTasksPerNode(MPI_Comm comm); int CountTasksPerNode(MPI_Comm comm);
int QueryNodeMapping(MPI_Comm comm, int print_nodemap);
void DelaySecs(int delay); void DelaySecs(int delay);
void updateParsedOptions(IOR_param_t * options, options_all_t * global_options); void updateParsedOptions(IOR_param_t * options, options_all_t * global_options);
size_t NodeMemoryStringToBytes(char *size_str); size_t NodeMemoryStringToBytes(char *size_str);