Some simplification / unification between IOR and MDTest.

MDTest needs more refactoring to be really maintainable.
master
Julian M. Kunkel 2018-07-07 22:19:42 +01:00
parent 7bc868d5cf
commit 6f8d2e8845
6 changed files with 88 additions and 214 deletions

View File

@ -549,77 +549,6 @@ static int CountErrors(IOR_param_t * test, int access, int errors)
return (allErrors);
}
/*
* Count the number of tasks that share a host.
*
* This function employees the gethostname() call, rather than using
* MPI_Get_processor_name(). We are interested in knowing the number
* of tasks that share a file system client (I/O node, compute node,
* whatever that may be). However on machines like BlueGene/Q,
* MPI_Get_processor_name() uniquely identifies a cpu in a compute node,
* not the node where the I/O is function shipped to. gethostname()
* is assumed to identify the shared filesystem client in more situations.
*
* NOTE: This also assumes that the task count on all nodes is equal
* to the task count on the host running MPI task 0.
*/
int CountTasksPerNode(int numTasks, MPI_Comm comm)
{
/* for debugging and testing */
if (getenv("IOR_FAKE_TASK_PER_NODES")){
int tasksPerNode = atoi(getenv("IOR_FAKE_TASK_PER_NODES"));
int rank;
MPI_Comm_rank(comm, & rank);
if(rank == 0){
printf("Fake tasks per node: using %d\n", tasksPerNode);
}
return tasksPerNode;
}
char localhost[MAX_STR];
char hostname0[MAX_STR];
static int firstPass = TRUE;
unsigned count;
unsigned flag;
int rc;
rc = gethostname(localhost, MAX_STR);
if (rc == -1) {
/* This node won't match task 0's hostname...except in the
case where ALL gethostname() calls fail, in which
case ALL nodes will appear to be on the same node.
We'll handle that later. */
localhost[0] = '\0';
if (rank == 0)
perror("gethostname() failed");
}
if (verbose >= VERBOSE_2 && firstPass) {
char tmp[MAX_STR];
sprintf(tmp, "task %d on %s", rank, localhost);
OutputToRoot(numTasks, comm, tmp);
firstPass = FALSE;
}
/* send task 0's hostname to all tasks */
if (rank == 0)
strcpy(hostname0, localhost);
MPI_CHECK(MPI_Bcast(hostname0, MAX_STR, MPI_CHAR, 0, comm),
"broadcast of task 0's hostname failed");
if (strcmp(hostname0, localhost) == 0)
flag = 1;
else
flag = 0;
/* count the tasks share the same host as task 0 */
MPI_Allreduce(&flag, &count, 1, MPI_UNSIGNED, MPI_SUM, comm);
if (hostname0[0] == '\0')
count = 1;
return (int)count;
}
/*
* Allocate a page-aligned (required by O_DIRECT) buffer.
*/
@ -1349,7 +1278,7 @@ static IOR_test_t *SetupTests(int argc, char **argv)
IOR_test_t *tests, *testsHead;
/* count the tasks per node */
tasksPerNode = CountTasksPerNode(numTasksWorld, mpi_comm_world);
tasksPerNode = CountTasksPerNode(mpi_comm_world);
testsHead = tests = ParseCommandLine(argc, argv);
/*
@ -2042,7 +1971,7 @@ static void TestIoSys(IOR_test_t *test)
"Using reorderTasks '-C' (expecting block, not cyclic, task assignment)\n");
fflush(out_logfile);
}
params->tasksPerNode = CountTasksPerNode(params->numTasks, testComm);
params->tasksPerNode = CountTasksPerNode(testComm);
/* setup timers */
for (i = 0; i < 12; i++) {

View File

@ -135,7 +135,7 @@ typedef struct
int deadlineForStonewalling; /* max time in seconds to run any test phase */
int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */
uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
char stoneWallingStatusFile[MAXPATHLEN];
char stoneWallingStatusFile[MAXPATHLEN];
int maxTimeDuration; /* max time in minutes to run each test */
int outlierThreshold; /* warn on outlier N seconds from mean */
@ -240,7 +240,6 @@ IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num);
void AllocResults(IOR_test_t *test);
void GetPlatformName(char *);
void init_IOR_Param_t(IOR_param_t *p);
int CountTasksPerNode(int numTasks, MPI_Comm comm);
/*
* This function runs IOR given by command line, useful for testing

View File

@ -86,7 +86,6 @@
#define LLU "%lu"
//int rank;
static int size;
static uint64_t *rand_array;
static char testdir[MAX_LEN];
@ -176,55 +175,6 @@ enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR};
#if MPI_VERSION >= 3
int count_tasks_per_node(void) {
/* modern MPI provides a simple way to get the local process count */
MPI_Comm shared_comm;
int rc, count;
MPI_Comm_split_type (testComm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm);
MPI_Comm_size (shared_comm, &count);
MPI_Comm_free (&shared_comm);
return count;
}
#else
int count_tasks_per_node(void) {
char localhost[MAX_LEN],
hostname[MAX_LEN];
int count = 1,
i;
MPI_Status status;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( out_logfile, "V-1: Entering count_tasks_per_node...\n" );
fflush( out_logfile );
}
if (gethostname(localhost, MAX_LEN) != 0) {
FAIL("gethostname()");
}
if (rank == 0) {
/* MPI_receive all hostnames, and compare to local hostname */
for (i = 0; i < size-1; i++) {
MPI_Recv(hostname, MAX_LEN, MPI_CHAR, MPI_ANY_SOURCE,
MPI_ANY_TAG, testComm, &status);
if (strcmp(hostname, localhost) == 0) {
count++;
}
}
} else {
/* MPI_send hostname to root node */
MPI_Send(localhost, MAX_LEN, MPI_CHAR, 0, 0, testComm);
}
MPI_Bcast(&count, 1, MPI_INT, 0, testComm);
return(count);
}
#endif
void delay_secs(int delay) {
@ -1023,7 +973,6 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran
fflush( out_logfile );
}
double start_timer = GetTimeStamp();
/* remove directories */
if (collective_creates) {
if (rank == 0) {
@ -1112,7 +1061,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
t[0] = MPI_Wtime();
/* create phase */
if (create_only && ! CHECK_STONE_WALL(progress)) {
if (create_only ) {
if (unique_dir_per_task) {
unique_dir_access(MK_UNI_DIR, temp_path);
if (!time_unique_dir_overhead) {
@ -1138,6 +1087,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
/* create files */
create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
if(stone_wall_timer_seconds){
/* TODO */
if (verbose >= 1 ) {
fprintf( out_logfile, "V-1: rank %d stonewall hit with %lld items\n", rank, progress->items_done );
fflush( out_logfile );
@ -1176,7 +1126,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
t[1] = MPI_Wtime();
/* stat phase */
if (stat_only && ! CHECK_STONE_WALL(progress)) {
if (stat_only ) {
if (unique_dir_per_task) {
unique_dir_access(STAT_SUB_DIR, temp_path);
if (!time_unique_dir_overhead) {
@ -1205,7 +1155,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
t[2] = MPI_Wtime();
/* read phase */
if (read_only && ! CHECK_STONE_WALL(progress)) {
if (read_only ) {
if (unique_dir_per_task) {
unique_dir_access(READ_SUB_DIR, temp_path);
if (!time_unique_dir_overhead) {
@ -1233,7 +1183,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
}
t[3] = MPI_Wtime();
if (remove_only && ! CHECK_STONE_WALL(progress)) {
if (remove_only) {
if (unique_dir_per_task) {
unique_dir_access(RM_SUB_DIR, temp_path);
if (!time_unique_dir_overhead) {
@ -1261,7 +1211,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
MPI_Barrier(testComm);
}
t[4] = MPI_Wtime();
if (remove_only && ! CHECK_STONE_WALL(progress)) {
if (remove_only) {
if (unique_dir_per_task) {
unique_dir_access(RM_UNI_DIR, temp_path);
} else {
@ -2009,9 +1959,6 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t
}
MPI_Barrier(testComm);
if(CHECK_STONE_WALL(progress)){
return;
}
if (remove_only) {
startCreate = MPI_Wtime();
if (unique_dir_per_task) {
@ -2164,7 +2111,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
pid = getpid();
uid = getuid();
nodeCount = size / count_tasks_per_node();
nodeCount = size / CountTasksPerNode(testComm);
if (rank == 0) {
fprintf(out_logfile, "-- started at %s --\n\n", PrintTimestamp());
@ -2384,9 +2331,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
* range 0 .. 1. Multiply that by n and you get a number in
* the range 0 .. n.
*/
uint64_t k =
( uint64_t ) ((( double )rand() / ( double )RAND_MAX ) * ( double )n );
uint64_t k = ( uint64_t ) ((( double )rand() / ( double )RAND_MAX ) * ( double )n );
/*
* Now move the nth element to the kth (randomly chosen)

View File

@ -21,14 +21,15 @@ typedef enum {
typedef struct
{
double rate[MDTEST_LAST_NUM];
double time[MDTEST_LAST_NUM];
uint64_t items[MDTEST_LAST_NUM];
double rate[MDTEST_LAST_NUM]; /* Calculated throughput */
double time[MDTEST_LAST_NUM]; /* Time */
uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done */
uint64_t stonewall_last_item[MDTEST_LAST_NUM];
double stonewall_time[MDTEST_LAST_NUM];
uint64_t stonewall_item_min[MDTEST_LAST_NUM];
uint64_t stonewall_item_sum[MDTEST_LAST_NUM];
/* Statistics when hitting the stonewall */
double stonewall_time[MDTEST_LAST_NUM]; /* runtime until completion / hit of the stonewall */
uint64_t stonewall_last_item[MDTEST_LAST_NUM]; /* Max number of items a process has accessed */
uint64_t stonewall_item_min[MDTEST_LAST_NUM]; /* Min number of items a process has accessed */
uint64_t stonewall_item_sum[MDTEST_LAST_NUM]; /* Total number of items accessed until stonewall */
} mdtest_results_t;
mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * out_logfile);

View File

@ -130,77 +130,77 @@ void DumpBuffer(void *buffer,
return;
} /* DumpBuffer() */
/*
* Sends all strings to root nodes and displays.
*/
void OutputToRoot(int numTasks, MPI_Comm comm, char *stringToDisplay)
{
int i;
int swapNeeded = TRUE;
int pairsToSwap;
char **stringArray;
char tmpString[MAX_STR];
MPI_Status status;
#if MPI_VERSION >= 3
int CountTasksPerNode(MPI_Comm comm) {
/* modern MPI provides a simple way to get the local process count */
MPI_Comm shared_comm;
int rc, count;
/* malloc string array */
stringArray = (char **)malloc(sizeof(char *) * numTasks);
if (stringArray == NULL)
ERR("out of memory");
for (i = 0; i < numTasks; i++) {
stringArray[i] = (char *)malloc(sizeof(char) * MAX_STR);
if (stringArray[i] == NULL)
ERR("out of memory");
}
MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm);
MPI_Comm_size (shared_comm, &count);
MPI_Comm_free (&shared_comm);
strcpy(stringArray[rank], stringToDisplay);
if (rank == 0) {
/* MPI_receive all strings */
for (i = 1; i < numTasks; i++) {
MPI_CHECK(MPI_Recv(stringArray[i], MAX_STR, MPI_CHAR,
MPI_ANY_SOURCE, MPI_ANY_TAG, comm,
&status), "MPI_Recv() error");
}
} else {
/* MPI_send string to root node */
MPI_CHECK(MPI_Send
(stringArray[rank], MAX_STR, MPI_CHAR, 0, 0, comm),
"MPI_Send() error");
}
MPI_CHECK(MPI_Barrier(comm), "barrier error");
/* sort strings using bubblesort */
if (rank == 0) {
pairsToSwap = numTasks - 1;
while (swapNeeded) {
swapNeeded = FALSE;
for (i = 0; i < pairsToSwap; i++) {
if (strcmp(stringArray[i], stringArray[i + 1]) >
0) {
strcpy(tmpString, stringArray[i]);
strcpy(stringArray[i],
stringArray[i + 1]);
strcpy(stringArray[i + 1], tmpString);
swapNeeded = TRUE;
}
}
pairsToSwap--;
}
}
/* display strings */
if (rank == 0) {
for (i = 0; i < numTasks; i++) {
fprintf(out_logfile, "%s\n", stringArray[i]);
}
}
/* free strings */
for (i = 0; i < numTasks; i++) {
free(stringArray[i]);
}
free(stringArray);
return count;
}
#else
/*
* Count the number of tasks that share a host.
*
* This function employees the gethostname() call, rather than using
* MPI_Get_processor_name(). We are interested in knowing the number
* of tasks that share a file system client (I/O node, compute node,
* whatever that may be). However on machines like BlueGene/Q,
* MPI_Get_processor_name() uniquely identifies a cpu in a compute node,
* not the node where the I/O is function shipped to. gethostname()
* is assumed to identify the shared filesystem client in more situations.
*
* NOTE: This also assumes that the task count on all nodes is equal
* to the task count on the host running MPI task 0.
*/
int CountTasksPerNode(MPI_Comm comm) {
/* for debugging and testing */
if (getenv("IOR_FAKE_TASK_PER_NODES")){
int tasksPerNode = atoi(getenv("IOR_FAKE_TASK_PER_NODES"));
int rank;
MPI_Comm_rank(comm, & rank);
if(rank == 0){
printf("Fake tasks per node: using %d\n", tasksPerNode);
}
return tasksPerNode;
}
char localhost[MAX_LEN],
hostname[MAX_LEN];
int count = 1,
i;
MPI_Status status;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( out_logfile, "V-1: Entering count_tasks_per_node...\n" );
fflush( out_logfile );
}
if (gethostname(localhost, MAX_LEN) != 0) {
FAIL("gethostname()");
}
if (rank == 0) {
/* MPI_receive all hostnames, and compare to local hostname */
for (i = 0; i < size-1; i++) {
MPI_Recv(hostname, MAX_LEN, MPI_CHAR, MPI_ANY_SOURCE,
MPI_ANY_TAG, testComm, &status);
if (strcmp(hostname, localhost) == 0) {
count++;
}
}
} else {
/* MPI_send hostname to root node */
MPI_Send(localhost, MAX_LEN, MPI_CHAR, 0, 0, testComm);
}
MPI_Bcast(&count, 1, MPI_INT, 0, testComm);
return(count);
}
#endif
/*
* Extract key/value pair from hint string.

View File

@ -48,13 +48,13 @@ extern FILE * out_logfile;
void set_o_direct_flag(int *fd);
char *CurrentTimeString(void);
void OutputToRoot(int, MPI_Comm, char *);
int Regex(char *, char *);
void ShowFileSystemSize(char *);
void DumpBuffer(void *, size_t);
void SeedRandGen(MPI_Comm);
void SetHints (MPI_Info *, char *);
void ShowHints (MPI_Info *);
int CountTasksPerNode(MPI_Comm comm);
/* Returns -1, if cannot be read */
int64_t ReadStoneWallingIterations(char * const filename);