From aafdf897ff422b3afc3ddfe31b8162ae3e01687e Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 19 Dec 2018 20:29:47 +0000 Subject: [PATCH 1/2] Bugfix for stonewall computation. --- src/mdtest.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index e83121c..3998017 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -320,7 +320,7 @@ static void create_file (const char *path, uint64_t itemNum) { //create files sprintf(curr_item, "%s/file.%s"LLU"", path, mk_name, itemNum); - if (rank == 0 && verbose >= 3) { + if ((rank == 0 && verbose >= 3) || verbose >= 5) { fprintf(out_logfile, "V-3: create_remove_items_helper (non-dirs create): curr_item is \"%s\"\n", curr_item); fflush(out_logfile); } @@ -401,6 +401,10 @@ void create_remove_items_helper(const int dirs, const int create, const char *pa create_remove_dirs (path, create, itemNum + i); } if(CHECK_STONE_WALL(progress)){ + if(progress->items_done != 0){ + printf("Error, this is an invalid configuration with stonewall!\n"); + exit(1); + } progress->items_done = i + 1; return; } @@ -1056,13 +1060,13 @@ int updateStoneWallIterations(int iteration, rank_progress_t * progress, double long long sum_accessed = 0; MPI_Reduce(& progress->items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm); - if(items != (sum_accessed / size) && rank == 0){ + if(items != (sum_accessed / size)){ summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size; - fprintf( out_logfile, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size); - fflush( out_logfile ); - } - if( done != max_iter ){ + if (rank == 0){ + fprintf( out_logfile, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size); + fflush( out_logfile ); + } hit = 1; } progress->items_start = done; @@ -1119,10 +1123,13 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro if (hit){ progress->stone_wall_timer_seconds = 0; - printf("stonewall rank %d: %lld of %lld \n", rank, (long long) progress->items_start, (long long) progress->items_per_dir); + if (verbose > 1){ + printf("stonewall rank %d: %lld of %lld \n", rank, (long long) progress->items_start, (long long) progress->items_per_dir); + } create_remove_items(0, 0, 1, 0, temp_path, 0, progress); // now reset the values progress->stone_wall_timer_seconds = stone_wall_timer_seconds; + items = progress->items_done; } if (stoneWallingStatusFile){ StoreStoneWallingIterations(stoneWallingStatusFile, progress->items_done); From cb40c99e1baca256b2a445c48fd8bbec8da27586 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 19 Dec 2018 21:37:37 +0000 Subject: [PATCH 2/2] Reset stonewall timer to make it work again with running all phases in one execution; i.e. not using multiple runs specifying: -C, -r --- src/mdtest.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 3998017..37032f1 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -300,7 +300,6 @@ static void remove_file (const char *path, uint64_t itemNum) { fprintf(out_logfile, "V-3: create_remove_items_helper (non-dirs remove): curr_item is \"%s\"\n", curr_item); fflush(out_logfile); } - if (!(shared_file && rank != 0)) { backend->delete (curr_item, ¶m); } @@ -401,11 +400,9 @@ void create_remove_items_helper(const int dirs, const int create, const char *pa create_remove_dirs (path, create, itemNum + i); } if(CHECK_STONE_WALL(progress)){ - if(progress->items_done != 0){ - printf("Error, this is an invalid configuration with stonewall!\n"); - exit(1); + if(progress->items_done == 0){ + progress->items_done = i + 1; } - progress->items_done = i + 1; return; } } @@ -1124,7 +1121,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro if (hit){ progress->stone_wall_timer_seconds = 0; if (verbose > 1){ - printf("stonewall rank %d: %lld of %lld \n", rank, (long long) progress->items_start, (long long) progress->items_per_dir); + printf("stonewall rank %d: %lld of %lld \n", rank, (long long) progress->items_start, (long long) progress->items_per_dir); } create_remove_items(0, 0, 1, 0, temp_path, 0, progress); // now reset the values @@ -1134,6 +1131,8 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro if (stoneWallingStatusFile){ StoreStoneWallingIterations(stoneWallingStatusFile, progress->items_done); } + // reset stone wall timer to allow proper cleanup + progress->stone_wall_timer_seconds = 0; } } }else{ @@ -1221,6 +1220,8 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro t[3] = MPI_Wtime(); if (remove_only) { + progress->items_start = 0; + for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); if (unique_dir_per_task) { @@ -2032,6 +2033,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t MPI_Barrier(testComm); if (remove_only) { + progress->items_start = 0; startCreate = MPI_Wtime(); for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ prep_testdir(j, dir_iter);