diff --git a/src/ior.c b/src/ior.c index b7db09c..c724664 100644 --- a/src/ior.c +++ b/src/ior.c @@ -449,46 +449,63 @@ static int CountErrors(IOR_param_t * test, int access, int errors) } /* - * Compares hostnames to determine the number of tasks per node + * Count the number of tasks that share a host. + * + * This function employees the gethostname() call, rather than using + * MPI_Get_processor_name(). We are interested in knowing the number + * of tasks that share a file system client (I/O node, compute node, + * whatever that may be). However on machines like BlueGene/Q, + * MPI_Get_processor_name() uniquely identifies a cpu in a compute node, + * not the node where the I/O is function shipped to. gethostname() + * is assumed to identify the shared filesystem client in more situations. + * + * NOTE: This also assumes that the task count on all nodes is equal + * to the task count on the host running MPI task 0. */ static int CountTasksPerNode(int numTasks, MPI_Comm comm) { - char localhost[MAX_STR], hostname[MAX_STR], taskOnNode[MAX_STR]; - int count = 1, resultsLen = MAX_STR, i; + char localhost[MAX_STR]; + char hostname0[MAX_STR]; static int firstPass = TRUE; - MPI_Status status; - - MPI_CHECK(MPI_Get_processor_name(localhost, &resultsLen), - "cannot get processor name"); + unsigned count; + unsigned flag; + int rc; if (verbose >= VERBOSE_2 && firstPass) { - sprintf(taskOnNode, "task %d on %s", rank, localhost); - OutputToRoot(numTasks, comm, taskOnNode); + char tmp[MAX_STR]; + sprintf(tmp, "task %d on %s", rank, localhost); + OutputToRoot(numTasks, comm, tmp); firstPass = FALSE; } - if (numTasks > 1) { - if (rank == 0) { - /* MPI_receive all hostnames, and compare to local hostname */ - for (i = 0; i < numTasks - 1; i++) { - MPI_CHECK(MPI_Recv - (hostname, MAX_STR, MPI_CHAR, - MPI_ANY_SOURCE, MPI_ANY_TAG, comm, - &status), - "cannot receive hostnames"); - if (strcmp(hostname, localhost) == 0) - count++; - } - } else { - /* MPI_send hostname to root node */ - MPI_CHECK(MPI_Send(localhost, MAX_STR, MPI_CHAR, 0, 0, - comm), "cannot send hostname"); - } - MPI_CHECK(MPI_Bcast(&count, 1, MPI_INT, 0, comm), - "cannot broadcast tasks-per-node value"); + rc = gethostname(localhost, MAX_STR); + if (rc == -1) { + /* This node won't match task 0's hostname...expect in the + case where ALL gethostname() calls fail, in which + case ALL nodes will appear to be on the same node. + We'll handle that later. */ + localhost[0] = '\0'; + if (rank == 0) + perror("gethostname() failed"); } - return (count); + /* send task 0's hostname to all tasks */ + if (rank == 0) + strcpy(hostname0, localhost); + MPI_CHECK(MPI_Bcast(hostname0, MAX_STR, MPI_CHAR, 0, comm), + "broadcast of task 0's hostname failed"); + if (strcmp(hostname0, localhost) == 0) + flag = 1; + else + flag = 0; + + /* count the tasks share the same host as task 0 */ + MPI_Allreduce(&flag, &count, 1, MPI_UNSIGNED, MPI_SUM, comm); + + if (hostname0[0] == '\0') + count = 1; + + return (int)count; } /*