Merge pull request #162 from johnbent/master

Fixed shift.  Also cleaned up output messages
master
Julian Kunkel 2019-08-01 15:21:26 +01:00 committed by GitHub
commit b686b6c26a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 283 additions and 538 deletions

View File

@ -939,6 +939,7 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com)
params->testComm = com;
params->nodes = params->numTasks / tasksPerNode;
params->tasksPerNode = tasksPerNode;
params->tasksBlockMapping = QueryNodeMapping(com,false);
if (params->numTasks == 0) {
params->numTasks = size;
}
@ -1222,7 +1223,7 @@ static void TestIoSys(IOR_test_t *test)
}
if (rank == 0 && params->reorderTasks == TRUE && verbose >= VERBOSE_1) {
fprintf(out_logfile,
"Using reorderTasks '-C' (expecting block, not cyclic, task assignment)\n");
"Using reorderTasks '-C' (useful to avoid read cache in client)\n");
fflush(out_logfile);
}
params->tasksPerNode = CountTasksPerNode(testComm);
@ -1360,7 +1361,11 @@ static void TestIoSys(IOR_test_t *test)
}
if (params->reorderTasks) {
/* move two nodes away from writing node */
rankOffset = (2 * params->tasksPerNode) % params->numTasks;
int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
if (params->tasksBlockMapping) {
shift = params->tasksPerNode; /* switch to by-slot (contiguous block) mapping */
}
rankOffset = (2 * shift) % params->numTasks;
}
// update the check buffer
@ -1395,9 +1400,12 @@ static void TestIoSys(IOR_test_t *test)
/* Get rankOffset [file offset] for this process to read, based on -C,-Z,-Q,-X options */
/* Constant process offset reading */
if (params->reorderTasks) {
/* move taskPerNodeOffset nodes[1==default] away from writing node */
rankOffset = (params->taskPerNodeOffset *
params->tasksPerNode) % params->numTasks;
/* move one node away from writing node */
int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */
if (params->tasksBlockMapping) {
shift=params->tasksPerNode; /* switch to a by-slot (contiguous block) mapping */
}
rankOffset = (params->taskPerNodeOffset * shift) % params->numTasks;
}
/* random process offset reading */
if (params->reorderTasksRandom) {

View File

@ -100,6 +100,7 @@ typedef struct
int numTasks; /* number of tasks for test */
int nodes; /* number of nodes for test */
int tasksPerNode; /* number of tasks per node */
int tasksBlockMapping; /* are the tasks in contiguous blocks across nodes or round-robin */
int repetitions; /* number of repetitions of test */
int repCounter; /* rep counter */
int multiFile; /* multiple files */

File diff suppressed because it is too large Load Diff

View File

@ -471,7 +471,7 @@ option_help * createGlobalOptions(IOR_param_t * params){
{'A', NULL, "refNum -- user supplied reference number to include in the summary", OPTION_OPTIONAL_ARGUMENT, 'd', & params->referenceNumber},
{'b', NULL, "blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & params->blockSize},
{'c', NULL, "collective -- collective I/O", OPTION_FLAG, 'd', & params->collective},
{'C', NULL, "reorderTasks -- changes task ordering to n+1 ordering for readback", OPTION_FLAG, 'd', & params->reorderTasks},
{'C', NULL, "reorderTasks -- changes task ordering for readback (useful to avoid client cache)", OPTION_FLAG, 'd', & params->reorderTasks},
{'d', NULL, "interTestDelay -- delay between reps in seconds", OPTION_OPTIONAL_ARGUMENT, 'd', & params->interTestDelay},
{'D', NULL, "deadlineForStonewalling -- seconds before stopping write or read phase", OPTION_OPTIONAL_ARGUMENT, 'd', & params->deadlineForStonewalling},
{.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT},

View File

@ -20,6 +20,7 @@
# define _GNU_SOURCE /* Needed for O_DIRECT in fcntl */
#endif /* __linux__ */
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
@ -75,6 +76,17 @@ void* safeMalloc(uint64_t size){
return d;
}
void FailMessage(int rank, const char *location, char *format, ...) {
char msg[4096];
va_list args;
va_start(args, format);
vsnprintf(msg, 4096, format, args);
va_end(args);
fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n",
PrintTimestamp(), rank, location, msg, strerror(errno));
fflush(out_logfile);
MPI_Abort(testComm, 1);
}
size_t NodeMemoryStringToBytes(char *size_str)
{
@ -213,12 +225,51 @@ void DumpBuffer(void *buffer,
return;
} /* DumpBuffer() */
/* a function that prints an int array where each index corresponds to a rank
and the value is whether that rank is on the same host as root.
Also returns 1 if rank 1 is on same host and 0 otherwise
*/
int QueryNodeMapping(MPI_Comm comm, int print_nodemap) {
char localhost[MAX_PATHLEN], roothost[MAX_PATHLEN];
int num_ranks;
MPI_Comm_size(comm, &num_ranks);
int *node_map = (int*)malloc(sizeof(int) * num_ranks);
if ( ! node_map ) {
FAIL("malloc");
}
if (gethostname(localhost, MAX_PATHLEN) != 0) {
FAIL("gethostname()");
}
if (rank==0) {
strncpy(roothost,localhost,MAX_PATHLEN);
}
/* have rank 0 broadcast out its hostname */
MPI_Bcast(roothost, MAX_PATHLEN, MPI_CHAR, 0, comm);
//printf("Rank %d received root host as %s\n", rank, roothost);
/* then every rank figures out whether it is same host as root and then gathers that */
int same_as_root = strcmp(roothost,localhost) == 0;
MPI_Gather( &same_as_root, 1, MPI_INT, node_map, 1, MPI_INT, 0, comm);
if ( print_nodemap && rank==0) {
fprintf( out_logfile, "Nodemap: " );
for ( int i = 0; i < num_ranks; i++ ) {
fprintf( out_logfile, "%d", node_map[i] );
}
fprintf( out_logfile, "\n" );
}
int ret = node_map[1] == 1;
MPI_Bcast(&ret, 1, MPI_INT, 0, comm);
free(node_map);
return ret;
}
#if MPI_VERSION >= 3
int CountTasksPerNode(MPI_Comm comm) {
/* modern MPI provides a simple way to get the local process count */
MPI_Comm shared_comm;
int count;
MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm);
MPI_Comm_size (shared_comm, &count);
MPI_Comm_free (&shared_comm);

View File

@ -36,23 +36,14 @@ extern enum OutputFormat_t outputFormat; /* format of the output */
#ifdef __linux__
#define FAIL(msg) do { \
fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", \
PrintTimestamp(), rank, __func__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#define ERROR_LOCATION __func__
#else
#define FAIL(msg) do { \
fprintf(out_logfile, "%s: Process %d: FAILED at %d, %s: %s\n", \
PrintTimestamp(), rank, __LINE__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#define ERROR_LOCATION __LINE__
#endif
#define FAIL(...) FailMessage(rank, ERROR_LOCATION, __VA_ARGS__)
void FailMessage(int rank, const char *location, char *format, ...);
void* safeMalloc(uint64_t size);
void set_o_direct_flag(int *fd);
@ -65,6 +56,7 @@ void SetHints (MPI_Info *, char *);
void ShowHints (MPI_Info *);
char *HumanReadable(IOR_offset_t value, int base);
int CountTasksPerNode(MPI_Comm comm);
int QueryNodeMapping(MPI_Comm comm, int print_nodemap);
void DelaySecs(int delay);
void updateParsedOptions(IOR_param_t * options, options_all_t * global_options);
size_t NodeMemoryStringToBytes(char *size_str);