From 6c0fadc2a9f82623bbd6016194802e35245daea6 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 1 Aug 2019 17:20:01 +0100 Subject: [PATCH 1/3] Include performance when stonewall is hit to output. --- src/ior-output.c | 23 +++++++++++++++++++---- src/ior.c | 6 +----- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/ior-output.c b/src/ior-output.c index 4bdb37d..7049a97 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -18,8 +18,8 @@ static void PrintNextToken(); void PrintTableHeader(){ if (outputFormat == OUTPUT_DEFAULT){ fprintf(out_resultfile, "\n"); - fprintf(out_resultfile, "access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); - fprintf(out_resultfile, "------ --------- ---------- --------- -------- -------- -------- -------- ----\n"); + fprintf(out_resultfile, "access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); + fprintf(out_resultfile, "------ --------- ---------- --------- -------- -------- -------- -------- ----\n"); } } @@ -534,10 +534,14 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) reps = params->repetitions; double * times = malloc(sizeof(double)* reps); + long long stonewall_avg_data_accessed = 0; + double stonewall_time = 0; for(int i=0; i < reps; i++){ IOR_point_t *point = (access == WRITE) ? &results[i].write : &results[i].read; times[i] = point->time; + stonewall_time += point->stonewall_time; + stonewall_avg_data_accessed += point->stonewall_avg_data_accessed; } bw = bw_values(reps, results, times, access); @@ -558,6 +562,13 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) fprintf(out_resultfile, "%10.2f ", ops->mean); fprintf(out_resultfile, "%10.2f ", ops->sd); fprintf(out_resultfile, "%10.5f ", mean_of_array_of_doubles(times, reps)); + if(test->params.stoneWallingWearOut){ + fprintf(out_resultfile, "%10.2f ", stonewall_time / reps); + fprintf(out_resultfile, "%13.2f ", stonewall_avg_data_accessed / stonewall_time / MEBIBYTE); + }else{ + fprintf(out_resultfile, "%10s ", "NA"); + fprintf(out_resultfile, "%13s ", "NA"); + } fprintf(out_resultfile, "%5d ", params->id); fprintf(out_resultfile, "%6d ", params->numTasks); fprintf(out_resultfile, "%3d ", params->tasksPerNode); @@ -603,6 +614,10 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) PrintKeyValDouble("OPsMean", ops->mean); PrintKeyValDouble("OPsSD", ops->sd); PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps)); + if(test->params.stoneWallingWearOut){ + PrintKeyValDouble("StoneWallTime", stonewall_time / reps); + PrintKeyValDouble("StoneWallbwMeanMIB", stonewall_avg_data_accessed / stonewall_time / MEBIBYTE); + } PrintKeyValDouble("xsizeMiB", (double) point->aggFileSizeForBW / MEBIBYTE); PrintEndSection(); }else if (outputFormat == OUTPUT_CSV){ @@ -635,10 +650,10 @@ void PrintLongSummaryHeader() } fprintf(out_resultfile, "\n"); - fprintf(out_resultfile, "%-9s %10s %10s %10s %10s %10s %10s %10s %10s %10s", + fprintf(out_resultfile, "%-9s %10s %10s %10s %10s %10s %10s %10s %10s %10s %10s %13s", "Operation", "Max(MiB)", "Min(MiB)", "Mean(MiB)", "StdDev", "Max(OPs)", "Min(OPs)", "Mean(OPs)", "StdDev", - "Mean(s)"); + "Mean(s)", "Stonewall(s)", "Stonewall(MiB)"); fprintf(out_resultfile, " Test# #Tasks tPN reps fPP reord reordoff reordrand seed" " segcnt "); fprintf(out_resultfile, "%8s %8s %9s %5s", " blksiz", "xsize","aggs(MiB)", "API"); diff --git a/src/ior.c b/src/ior.c index 15b1d07..4cd5571 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1400,7 +1400,7 @@ static void TestIoSys(IOR_test_t *test) /* Get rankOffset [file offset] for this process to read, based on -C,-Z,-Q,-X options */ /* Constant process offset reading */ if (params->reorderTasks) { - /* move one node away from writing node */ + /* move one node away from writing node */ int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */ if (params->tasksBlockMapping) { shift=params->tasksPerNode; /* switch to a by-slot (contiguous block) mapping */ @@ -1944,10 +1944,6 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , point->stonewall_time); point->stonewall_min_data_accessed *= test->numTasks; } - if(pairs_accessed_min == pairCnt){ - point->stonewall_min_data_accessed = 0; - point->stonewall_avg_data_accessed = 0; - } if(pairCnt != point->pairs_accessed){ // some work needs still to be done ! for(; pairCnt < point->pairs_accessed; pairCnt++ ) { From 061b5a860f6ac010ec6cabab54d83ef2a1cad0d7 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 1 Aug 2019 17:54:11 +0100 Subject: [PATCH 2/3] Backmerged: New option: print rate AND time; improves debugging. --- src/mdtest.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index f65ea7f..d2cd068 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -131,6 +131,7 @@ static uint64_t items_per_dir; static uint64_t num_dirs_in_tree_calc; /* this is a workaround until the overal code is refactored */ static int directory_loops; static int print_time; +static int print_rate_and_time; static int random_seed; static int shared_file; static int files_only; @@ -1197,7 +1198,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], summary_table[iteration].rate[7]); } -void summarize_results(int iterations) { +void summarize_results(int iterations, int print_time) { char access[MAX_PATHLEN]; int i, j, k; int start, stop, tableSize = MDTEST_LAST_NUM; @@ -1794,6 +1795,7 @@ void mdtest_init_args(){ items_per_dir = 0; random_seed = 0; print_time = 0; + print_rate_and_time = 0; shared_file = 0; files_only = 0; dirs_only = 0; @@ -1864,6 +1866,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'n', NULL, "every process will creat/stat/read/remove # directories and files", OPTION_OPTIONAL_ARGUMENT, 'l', & items}, {'N', NULL, "stride # between tasks for file/dir operation (local=0; set to 1 to avoid client cache)", OPTION_OPTIONAL_ARGUMENT, 'd', & nstride}, {'p', NULL, "pre-iteration delay (in seconds)", OPTION_OPTIONAL_ARGUMENT, 'd', & pre_delay}, + {'P', NULL, "print rate AND time", OPTION_FLAG, 'd', & print_rate_and_time}, {'R', NULL, "random access to files (only for stat)", OPTION_FLAG, 'd', & randomize}, {0, "random-seed", "random seed for -R", OPTION_OPTIONAL_ARGUMENT, 'd', & random_seed}, {'s', NULL, "stride between the number of tasks for each test", OPTION_OPTIONAL_ARGUMENT, 'd', & stride}, @@ -2154,7 +2157,12 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * // keep track of the current status for stonewalling mdtest_iteration(i, j, testgroup, & summary_table[j]); } - summarize_results(iterations); + if (print_rate_and_time){ + summarize_results(iterations, 0); + summarize_results(iterations, 1); + }else{ + summarize_results(iterations, print_time); + } if (i == 1 && stride > 1) { i = 0; } From df8355a9bc755c9c83384fa14d7c128ce6b347b3 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 1 Aug 2019 17:57:45 +0100 Subject: [PATCH 3/3] Added output of mdtest stonewall timer. --- src/mdtest.c | 232 +++++++++++++++++++++++++++------------------------ 1 file changed, 123 insertions(+), 109 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index d2cd068..1ff204d 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -968,10 +968,10 @@ int updateStoneWallIterations(int iteration, rank_progress_t * progress, double MPI_Reduce(& progress->items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm); long long sum_accessed = 0; MPI_Reduce(& progress->items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm); + summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; + summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size; if(items != (sum_accessed / size)){ - summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; - summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size; VERBOSE(0,-1, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size); hit = 1; } @@ -1193,6 +1193,9 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], summary_table[iteration].rate[4]); + if(summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM]){ + VERBOSE(1,-1," File creation (stonewall): %14.3f sec, %14.3f ops/sec", summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM], summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]); + } VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], summary_table[iteration].rate[5]); VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], summary_table[iteration].rate[6]); VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], summary_table[iteration].rate[7]); @@ -1211,129 +1214,141 @@ void summarize_results(int iterations, int print_time) { MPI_Barrier(testComm); for(int i=0; i < iterations; i++){ - if(print_time){ - MPI_Gather(& summary_table[i].time[0], tableSize, MPI_DOUBLE, & all[i*tableSize*size], tableSize, MPI_DOUBLE, 0, testComm); - }else{ - MPI_Gather(& summary_table[i].rate[0], tableSize, MPI_DOUBLE, & all[i*tableSize*size], tableSize, MPI_DOUBLE, 0, testComm); - } + MPI_Gather(& summary_table[i].time[0], tableSize, MPI_DOUBLE, & all[i*tableSize*size], tableSize, MPI_DOUBLE, 0, testComm); + MPI_Gather(& summary_table[i].rate[0], tableSize, MPI_DOUBLE, & all[i*tableSize*size], tableSize, MPI_DOUBLE, 0, testComm); } - if (rank == 0) { + if (rank != 0) { + return; + } - VERBOSE(0,-1,"\nSUMMARY %s: (of %d iterations)", print_time ? "time": "rate", iterations); - VERBOSE(0,-1," Operation Max Min Mean Std Dev"); - VERBOSE(0,-1," --------- --- --- ---- -------"); + VERBOSE(0,-1,"\nSUMMARY %s: (of %d iterations)", print_time ? "time": "rate", iterations); + VERBOSE(0,-1," Operation Max Min Mean Std Dev"); + VERBOSE(0,-1," --------- --- --- ---- -------"); - /* if files only access, skip entries 0-3 (the dir tests) */ - if (files_only && !dirs_only) { - start = 4; - } else { - start = 0; - } + /* if files only access, skip entries 0-3 (the dir tests) */ + if (files_only && !dirs_only) { + start = 4; + } else { + start = 0; + } - /* if directories only access, skip entries 4-7 (the file tests) */ - if (dirs_only && !files_only) { - stop = 4; - } else { - stop = 8; - } + /* if directories only access, skip entries 4-7 (the file tests) */ + if (dirs_only && !files_only) { + stop = 4; + } else { + stop = 8; + } - /* special case: if no directory or file tests, skip all */ - if (!dirs_only && !files_only) { - start = stop = 0; - } + /* special case: if no directory or file tests, skip all */ + if (!dirs_only && !files_only) { + start = stop = 0; + } - for (i = start; i < stop; i++) { - min = max = all[i]; - for (k=0; k < size; k++) { - for (j = 0; j < iterations; j++) { - curr = all[(k*tableSize*iterations) - + (j*tableSize) + i]; - if (min > curr) { - min = curr; - } - if (max < curr) { - max = curr; - } - sum += curr; - } - } - mean = sum / (iterations * size); - for (k=0; k curr) { + min = curr; + } + if (max < curr) { + max = curr; + } + sum += curr; } - - if (min > curr) { - min = curr; - } - if (max < curr) { - max = curr; - } - sum += curr; } - mean = sum / (iterations); - for (j = 0; j < iterations; j++) { - if(print_time){ - curr = summary_table[j].time[i]; - }else{ - curr = summary_table[j].rate[i]; + mean = sum / (iterations * size); + for (k=0; k curr) { + min = curr; + } + if (max < curr) { + max = curr; + } + sum += curr; } + mean = sum / (iterations); + for (j = 0; j < iterations; j++) { + if(print_time){ + curr = summary_table[j].time[i]; + }else{ + curr = summary_table[j].rate[i]; + } + + var += pow((mean - curr), 2); + } + var = var / (iterations); + sd = sqrt(var); + switch (i) { + case 8: strcpy(access, "Tree creation :"); break; + case 9: strcpy(access, "Tree removal :"); break; + default: strcpy(access, "ERR"); break; + } + fprintf(out_logfile, " %s ", access); + fprintf(out_logfile, "%14.3f ", max); + fprintf(out_logfile, "%14.3f ", min); + fprintf(out_logfile, "%14.3f ", mean); + fprintf(out_logfile, "%14.3f\n", sd); + fflush(out_logfile); + sum = var = 0; } } @@ -1474,7 +1489,7 @@ void show_file_system_size(char *file_system) { /* show results */ VERBOSE(0,-1,"Path: %s", real_path); - VERBOSE(0,-1,"FS: %.1f %s Used FS: %2.1f%% Inodes: %.1f %s Used Inodes: %2.1f%%\n", + VERBOSE(0,-1,"FS: %.1f %s Used FS: %2.1f%% Inodes: %.1f %s Used Inodes: %2.1f%%\n", total_file_system_size_hr, file_system_unit_str, used_file_system_percentage, (double)total_inodes / (double)inode_unit_val, inode_unit_str, used_inode_percentage); @@ -1881,7 +1896,6 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & stoneWallingStatusFile}, {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & sync_file}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & depth}, - {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & print_time}, LAST_OPTION }; options_all_t * global_options = airoi_create_all_module_options(options);