diff --git a/src/ior.c b/src/ior.c index 5c708cf..d7dd096 100755 --- a/src/ior.c +++ b/src/ior.c @@ -549,77 +549,6 @@ static int CountErrors(IOR_param_t * test, int access, int errors) return (allErrors); } -/* - * Count the number of tasks that share a host. - * - * This function employees the gethostname() call, rather than using - * MPI_Get_processor_name(). We are interested in knowing the number - * of tasks that share a file system client (I/O node, compute node, - * whatever that may be). However on machines like BlueGene/Q, - * MPI_Get_processor_name() uniquely identifies a cpu in a compute node, - * not the node where the I/O is function shipped to. gethostname() - * is assumed to identify the shared filesystem client in more situations. - * - * NOTE: This also assumes that the task count on all nodes is equal - * to the task count on the host running MPI task 0. - */ -int CountTasksPerNode(int numTasks, MPI_Comm comm) -{ - /* for debugging and testing */ - if (getenv("IOR_FAKE_TASK_PER_NODES")){ - int tasksPerNode = atoi(getenv("IOR_FAKE_TASK_PER_NODES")); - int rank; - MPI_Comm_rank(comm, & rank); - if(rank == 0){ - printf("Fake tasks per node: using %d\n", tasksPerNode); - } - return tasksPerNode; - } - - char localhost[MAX_STR]; - char hostname0[MAX_STR]; - static int firstPass = TRUE; - unsigned count; - unsigned flag; - int rc; - - rc = gethostname(localhost, MAX_STR); - if (rc == -1) { - /* This node won't match task 0's hostname...except in the - case where ALL gethostname() calls fail, in which - case ALL nodes will appear to be on the same node. - We'll handle that later. */ - localhost[0] = '\0'; - if (rank == 0) - perror("gethostname() failed"); - } - - if (verbose >= VERBOSE_2 && firstPass) { - char tmp[MAX_STR]; - sprintf(tmp, "task %d on %s", rank, localhost); - OutputToRoot(numTasks, comm, tmp); - firstPass = FALSE; - } - - /* send task 0's hostname to all tasks */ - if (rank == 0) - strcpy(hostname0, localhost); - MPI_CHECK(MPI_Bcast(hostname0, MAX_STR, MPI_CHAR, 0, comm), - "broadcast of task 0's hostname failed"); - if (strcmp(hostname0, localhost) == 0) - flag = 1; - else - flag = 0; - - /* count the tasks share the same host as task 0 */ - MPI_Allreduce(&flag, &count, 1, MPI_UNSIGNED, MPI_SUM, comm); - - if (hostname0[0] == '\0') - count = 1; - - return (int)count; -} - /* * Allocate a page-aligned (required by O_DIRECT) buffer. */ @@ -1349,7 +1278,7 @@ static IOR_test_t *SetupTests(int argc, char **argv) IOR_test_t *tests, *testsHead; /* count the tasks per node */ - tasksPerNode = CountTasksPerNode(numTasksWorld, mpi_comm_world); + tasksPerNode = CountTasksPerNode(mpi_comm_world); testsHead = tests = ParseCommandLine(argc, argv); /* @@ -2042,7 +1971,7 @@ static void TestIoSys(IOR_test_t *test) "Using reorderTasks '-C' (expecting block, not cyclic, task assignment)\n"); fflush(out_logfile); } - params->tasksPerNode = CountTasksPerNode(params->numTasks, testComm); + params->tasksPerNode = CountTasksPerNode(testComm); /* setup timers */ for (i = 0; i < 12; i++) { diff --git a/src/ior.h b/src/ior.h index 76ea323..7ee0396 100755 --- a/src/ior.h +++ b/src/ior.h @@ -135,7 +135,7 @@ typedef struct int deadlineForStonewalling; /* max time in seconds to run any test phase */ int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */ uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */ - char stoneWallingStatusFile[MAXPATHLEN]; + char stoneWallingStatusFile[MAXPATHLEN]; int maxTimeDuration; /* max time in minutes to run each test */ int outlierThreshold; /* warn on outlier N seconds from mean */ @@ -240,7 +240,6 @@ IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num); void AllocResults(IOR_test_t *test); void GetPlatformName(char *); void init_IOR_Param_t(IOR_param_t *p); -int CountTasksPerNode(int numTasks, MPI_Comm comm); /* * This function runs IOR given by command line, useful for testing diff --git a/src/mdtest.c b/src/mdtest.c index e0627a5..81b1df3 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -86,7 +86,6 @@ #define LLU "%lu" -//int rank; static int size; static uint64_t *rand_array; static char testdir[MAX_LEN]; @@ -176,55 +175,6 @@ enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR}; -#if MPI_VERSION >= 3 -int count_tasks_per_node(void) { - /* modern MPI provides a simple way to get the local process count */ - MPI_Comm shared_comm; - int rc, count; - - MPI_Comm_split_type (testComm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm); - - MPI_Comm_size (shared_comm, &count); - - MPI_Comm_free (&shared_comm); - - return count; -} -#else -int count_tasks_per_node(void) { - char localhost[MAX_LEN], - hostname[MAX_LEN]; - int count = 1, - i; - MPI_Status status; - - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering count_tasks_per_node...\n" ); - fflush( out_logfile ); - } - - if (gethostname(localhost, MAX_LEN) != 0) { - FAIL("gethostname()"); - } - if (rank == 0) { - /* MPI_receive all hostnames, and compare to local hostname */ - for (i = 0; i < size-1; i++) { - MPI_Recv(hostname, MAX_LEN, MPI_CHAR, MPI_ANY_SOURCE, - MPI_ANY_TAG, testComm, &status); - if (strcmp(hostname, localhost) == 0) { - count++; - } - } - } else { - /* MPI_send hostname to root node */ - MPI_Send(localhost, MAX_LEN, MPI_CHAR, 0, 0, testComm); - } - MPI_Bcast(&count, 1, MPI_INT, 0, testComm); - - return(count); -} -#endif - void delay_secs(int delay) { @@ -1023,7 +973,6 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran fflush( out_logfile ); } - double start_timer = GetTimeStamp(); /* remove directories */ if (collective_creates) { if (rank == 0) { @@ -1112,7 +1061,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro t[0] = MPI_Wtime(); /* create phase */ - if (create_only && ! CHECK_STONE_WALL(progress)) { + if (create_only ) { if (unique_dir_per_task) { unique_dir_access(MK_UNI_DIR, temp_path); if (!time_unique_dir_overhead) { @@ -1138,6 +1087,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro /* create files */ create_remove_items(0, 0, 1, 0, temp_path, 0, progress); if(stone_wall_timer_seconds){ + /* TODO */ if (verbose >= 1 ) { fprintf( out_logfile, "V-1: rank %d stonewall hit with %lld items\n", rank, progress->items_done ); fflush( out_logfile ); @@ -1176,7 +1126,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro t[1] = MPI_Wtime(); /* stat phase */ - if (stat_only && ! CHECK_STONE_WALL(progress)) { + if (stat_only ) { if (unique_dir_per_task) { unique_dir_access(STAT_SUB_DIR, temp_path); if (!time_unique_dir_overhead) { @@ -1205,7 +1155,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro t[2] = MPI_Wtime(); /* read phase */ - if (read_only && ! CHECK_STONE_WALL(progress)) { + if (read_only ) { if (unique_dir_per_task) { unique_dir_access(READ_SUB_DIR, temp_path); if (!time_unique_dir_overhead) { @@ -1233,7 +1183,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } t[3] = MPI_Wtime(); - if (remove_only && ! CHECK_STONE_WALL(progress)) { + if (remove_only) { if (unique_dir_per_task) { unique_dir_access(RM_SUB_DIR, temp_path); if (!time_unique_dir_overhead) { @@ -1261,7 +1211,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro MPI_Barrier(testComm); } t[4] = MPI_Wtime(); - if (remove_only && ! CHECK_STONE_WALL(progress)) { + if (remove_only) { if (unique_dir_per_task) { unique_dir_access(RM_UNI_DIR, temp_path); } else { @@ -2009,9 +1959,6 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } MPI_Barrier(testComm); - if(CHECK_STONE_WALL(progress)){ - return; - } if (remove_only) { startCreate = MPI_Wtime(); if (unique_dir_per_task) { @@ -2164,7 +2111,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * pid = getpid(); uid = getuid(); - nodeCount = size / count_tasks_per_node(); + nodeCount = size / CountTasksPerNode(testComm); if (rank == 0) { fprintf(out_logfile, "-- started at %s --\n\n", PrintTimestamp()); @@ -2384,9 +2331,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * * range 0 .. 1. Multiply that by n and you get a number in * the range 0 .. n. */ - - uint64_t k = - ( uint64_t ) ((( double )rand() / ( double )RAND_MAX ) * ( double )n ); + uint64_t k = ( uint64_t ) ((( double )rand() / ( double )RAND_MAX ) * ( double )n ); /* * Now move the nth element to the kth (randomly chosen) diff --git a/src/mdtest.h b/src/mdtest.h index 897329f..6267282 100644 --- a/src/mdtest.h +++ b/src/mdtest.h @@ -21,14 +21,15 @@ typedef enum { typedef struct { - double rate[MDTEST_LAST_NUM]; - double time[MDTEST_LAST_NUM]; - uint64_t items[MDTEST_LAST_NUM]; + double rate[MDTEST_LAST_NUM]; /* Calculated throughput */ + double time[MDTEST_LAST_NUM]; /* Time */ + uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done */ - uint64_t stonewall_last_item[MDTEST_LAST_NUM]; - double stonewall_time[MDTEST_LAST_NUM]; - uint64_t stonewall_item_min[MDTEST_LAST_NUM]; - uint64_t stonewall_item_sum[MDTEST_LAST_NUM]; + /* Statistics when hitting the stonewall */ + double stonewall_time[MDTEST_LAST_NUM]; /* runtime until completion / hit of the stonewall */ + uint64_t stonewall_last_item[MDTEST_LAST_NUM]; /* Max number of items a process has accessed */ + uint64_t stonewall_item_min[MDTEST_LAST_NUM]; /* Min number of items a process has accessed */ + uint64_t stonewall_item_sum[MDTEST_LAST_NUM]; /* Total number of items accessed until stonewall */ } mdtest_results_t; mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * out_logfile); diff --git a/src/utilities.c b/src/utilities.c index f71d790..05d6f46 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -130,77 +130,77 @@ void DumpBuffer(void *buffer, return; } /* DumpBuffer() */ -/* - * Sends all strings to root nodes and displays. - */ -void OutputToRoot(int numTasks, MPI_Comm comm, char *stringToDisplay) -{ - int i; - int swapNeeded = TRUE; - int pairsToSwap; - char **stringArray; - char tmpString[MAX_STR]; - MPI_Status status; +#if MPI_VERSION >= 3 +int CountTasksPerNode(MPI_Comm comm) { + /* modern MPI provides a simple way to get the local process count */ + MPI_Comm shared_comm; + int rc, count; - /* malloc string array */ - stringArray = (char **)malloc(sizeof(char *) * numTasks); - if (stringArray == NULL) - ERR("out of memory"); - for (i = 0; i < numTasks; i++) { - stringArray[i] = (char *)malloc(sizeof(char) * MAX_STR); - if (stringArray[i] == NULL) - ERR("out of memory"); - } + MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm); + MPI_Comm_size (shared_comm, &count); + MPI_Comm_free (&shared_comm); - strcpy(stringArray[rank], stringToDisplay); - - if (rank == 0) { - /* MPI_receive all strings */ - for (i = 1; i < numTasks; i++) { - MPI_CHECK(MPI_Recv(stringArray[i], MAX_STR, MPI_CHAR, - MPI_ANY_SOURCE, MPI_ANY_TAG, comm, - &status), "MPI_Recv() error"); - } - } else { - /* MPI_send string to root node */ - MPI_CHECK(MPI_Send - (stringArray[rank], MAX_STR, MPI_CHAR, 0, 0, comm), - "MPI_Send() error"); - } - MPI_CHECK(MPI_Barrier(comm), "barrier error"); - - /* sort strings using bubblesort */ - if (rank == 0) { - pairsToSwap = numTasks - 1; - while (swapNeeded) { - swapNeeded = FALSE; - for (i = 0; i < pairsToSwap; i++) { - if (strcmp(stringArray[i], stringArray[i + 1]) > - 0) { - strcpy(tmpString, stringArray[i]); - strcpy(stringArray[i], - stringArray[i + 1]); - strcpy(stringArray[i + 1], tmpString); - swapNeeded = TRUE; - } - } - pairsToSwap--; - } - } - - /* display strings */ - if (rank == 0) { - for (i = 0; i < numTasks; i++) { - fprintf(out_logfile, "%s\n", stringArray[i]); - } - } - - /* free strings */ - for (i = 0; i < numTasks; i++) { - free(stringArray[i]); - } - free(stringArray); + return count; } +#else +/* + * Count the number of tasks that share a host. + * + * This function employees the gethostname() call, rather than using + * MPI_Get_processor_name(). We are interested in knowing the number + * of tasks that share a file system client (I/O node, compute node, + * whatever that may be). However on machines like BlueGene/Q, + * MPI_Get_processor_name() uniquely identifies a cpu in a compute node, + * not the node where the I/O is function shipped to. gethostname() + * is assumed to identify the shared filesystem client in more situations. + * + * NOTE: This also assumes that the task count on all nodes is equal + * to the task count on the host running MPI task 0. + */ +int CountTasksPerNode(MPI_Comm comm) { + /* for debugging and testing */ + if (getenv("IOR_FAKE_TASK_PER_NODES")){ + int tasksPerNode = atoi(getenv("IOR_FAKE_TASK_PER_NODES")); + int rank; + MPI_Comm_rank(comm, & rank); + if(rank == 0){ + printf("Fake tasks per node: using %d\n", tasksPerNode); + } + return tasksPerNode; + } + char localhost[MAX_LEN], + hostname[MAX_LEN]; + int count = 1, + i; + MPI_Status status; + + if (( rank == 0 ) && ( verbose >= 1 )) { + fprintf( out_logfile, "V-1: Entering count_tasks_per_node...\n" ); + fflush( out_logfile ); + } + + if (gethostname(localhost, MAX_LEN) != 0) { + FAIL("gethostname()"); + } + if (rank == 0) { + /* MPI_receive all hostnames, and compare to local hostname */ + for (i = 0; i < size-1; i++) { + MPI_Recv(hostname, MAX_LEN, MPI_CHAR, MPI_ANY_SOURCE, + MPI_ANY_TAG, testComm, &status); + if (strcmp(hostname, localhost) == 0) { + count++; + } + } + } else { + /* MPI_send hostname to root node */ + MPI_Send(localhost, MAX_LEN, MPI_CHAR, 0, 0, testComm); + } + MPI_Bcast(&count, 1, MPI_INT, 0, testComm); + + return(count); +} +#endif + /* * Extract key/value pair from hint string. diff --git a/src/utilities.h b/src/utilities.h index 3865757..9759752 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -48,13 +48,13 @@ extern FILE * out_logfile; void set_o_direct_flag(int *fd); char *CurrentTimeString(void); -void OutputToRoot(int, MPI_Comm, char *); int Regex(char *, char *); void ShowFileSystemSize(char *); void DumpBuffer(void *, size_t); void SeedRandGen(MPI_Comm); void SetHints (MPI_Info *, char *); void ShowHints (MPI_Info *); +int CountTasksPerNode(MPI_Comm comm); /* Returns -1, if cannot be read */ int64_t ReadStoneWallingIterations(char * const filename);