From 524d053be1dd789cd4ba241011f9039def364d96 Mon Sep 17 00:00:00 2001 From: John Bent Date: Fri, 26 Jul 2019 08:55:24 -0600 Subject: [PATCH] Making shift work in mdtest as well as it works in IOR and on a per-node basis. Also added printing the nodemap so we can check the allocation. --- src/mdtest.c | 16 +++++++++++----- src/utilities.c | 38 ++++++++++++++++++++++++++++++++++++++ src/utilities.h | 1 + 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 2d15c9f..cbb0de8 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -2268,11 +2268,6 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * fflush( out_logfile ); } - /* set the shift to mimic IOR and shift by procs per node */ - if (nstride > 0) { - nstride *= tasksPerNode; - } - /* setup total number of items and number of items per dir */ if (depth <= 0) { num_dirs_in_tree = 1; @@ -2381,6 +2376,17 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } if (rank == 0) display_freespace(testdirpath); + int packedByNode = QueryNodeMapping(testComm); + + /* set the shift to mimic IOR and shift by procs per node */ + if (nstride > 0) { + if ( packedByNode ) { + nstride *= tasksPerNode; + } + if (rank == 0) { + fprintf(out_logfile, "Shifting ranks by %d for each phase.\n", nstride); + } + } if (verbose >= 3 && rank == 0) { fprintf(out_logfile, "V-3: main (after display_freespace): testdirpath is \"%s\"\n", testdirpath ); diff --git a/src/utilities.c b/src/utilities.c index c0c7c61..493eb89 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -213,12 +213,50 @@ void DumpBuffer(void *buffer, return; } /* DumpBuffer() */ +/* a function that prints an int array where each index corresponds to a rank + and the value is whether that rank is on the same host as root. + Also returns 1 if rank 1 is on same host and 0 otherwise +*/ +int QueryNodeMapping(MPI_Comm comm) { + char localhost[MAX_PATHLEN], roothost[MAX_PATHLEN]; + int num_ranks; + MPI_Comm_size(comm, &num_ranks); + int *node_map = (int*)malloc(sizeof(int) * num_ranks); + if ( ! node_map ) { + FAIL("malloc"); + } + if (gethostname(localhost, MAX_PATHLEN) != 0) { + FAIL("gethostname()"); + } + if (rank==0) { + strncpy(roothost,localhost,MAX_PATHLEN); + } + + /* have rank 0 broadcast out its hostname */ + MPI_Bcast(roothost, MAX_PATHLEN, MPI_CHAR, 0, comm); + //printf("Rank %d received root host as %s\n", rank, roothost); + /* then every rank figures out whether it is same host as root and then gathers that */ + int same_as_root = strcmp(roothost,localhost) == 0; + MPI_Gather( &same_as_root, 1, MPI_INT, node_map, 1, MPI_INT, 0, comm); + if (rank==0) { + fprintf( out_logfile, "Nodemap: " ); + for ( int i = 0; i < num_ranks; i++ ) { + fprintf( out_logfile, "%d", node_map[i] ); + } + fprintf( out_logfile, "\n" ); + } + int ret = node_map[1] == 1; + free(node_map); + return ret; +} + #if MPI_VERSION >= 3 int CountTasksPerNode(MPI_Comm comm) { /* modern MPI provides a simple way to get the local process count */ MPI_Comm shared_comm; int count; + MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm); MPI_Comm_size (shared_comm, &count); MPI_Comm_free (&shared_comm); diff --git a/src/utilities.h b/src/utilities.h index b646174..d811261 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -65,6 +65,7 @@ void SetHints (MPI_Info *, char *); void ShowHints (MPI_Info *); char *HumanReadable(IOR_offset_t value, int base); int CountTasksPerNode(MPI_Comm comm); +int QueryNodeMapping(MPI_Comm comm); void DelaySecs(int delay); void updateParsedOptions(IOR_param_t * options, options_all_t * global_options); size_t NodeMemoryStringToBytes(char *size_str);