diff --git a/src/ior.c b/src/ior.c index 6a2a153..4889109 100755 --- a/src/ior.c +++ b/src/ior.c @@ -100,7 +100,11 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out TestIoSys(tptr); if(rank == 0 && tptr->params.stoneWallingWearOut){ - fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed); + if (tptr->params.stoneWallingStatusFile){ + StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed); + }else{ + fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed); + } } tptr->results->errors = totalErrorCount; } @@ -197,7 +201,9 @@ int ior_main(int argc, char **argv) } TestIoSys(tptr); - if(rank == 0 && tptr->params.stoneWallingWearOut){ + if (tptr->params.stoneWallingStatusFile){ + StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed); + }else{ fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed); } } @@ -796,6 +802,7 @@ static void DisplayUsage(char **argv) " -D N deadlineForStonewalling -- seconds before stopping write or read phase", " -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", " -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", + " -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read" " -e fsync -- perform fsync/msync upon POSIX/MMAP write close", " -E useExistingTestFile -- do not remove test file before write access", " -f S scriptFile -- test script name", @@ -2707,6 +2714,13 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi && ((GetTimeStamp() - startForStonewall) > test->deadlineForStonewalling)); + if(access == READ && test->stoneWallingStatusFile){ + test->stoneWallingWearOutIterations = ReadStoneWallingIterations(test->stoneWallingStatusFile); + if(test->stoneWallingWearOutIterations == -1){ + ERR("Could not read back the stonewalling status from the file!"); + } + } + /* loop over offsets to access */ while ((offsetArray[pairCnt] != -1) && !hitStonewall ) { dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); diff --git a/src/ior.h b/src/ior.h index ec386d7..76ea323 100755 --- a/src/ior.h +++ b/src/ior.h @@ -135,6 +135,8 @@ typedef struct int deadlineForStonewalling; /* max time in seconds to run any test phase */ int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */ uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */ + char stoneWallingStatusFile[MAXPATHLEN]; + int maxTimeDuration; /* max time in minutes to run each test */ int outlierThreshold; /* warn on outlier N seconds from mean */ int verbose; /* verbosity */ diff --git a/src/mdtest.c b/src/mdtest.c index 838a586..a7b5ebc 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -174,39 +174,7 @@ typedef struct{ /* for making/removing unique directory && stating/deleting subdirectory */ enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR}; -#ifdef __linux__ -#define FAIL(msg) do { \ - fprintf(out_logfile, "%s: Process %d(%s): FAILED in %s, %s: %s\n", \ - print_timestamp(), rank, hostname, __func__, \ - msg, strerror(errno)); \ - fflush(out_logfile); \ - MPI_Abort(testComm, 1); \ - } while(0) -#else -#define FAIL(msg) do { \ - fprintf(out_logfile, "%s: Process %d(%s): FAILED at %d, %s: %s\n", \ - print_timestamp(), rank, hostname, __LINE__, \ - msg, strerror(errno)); \ - fflush(out_logfile); \ - MPI_Abort(testComm, 1); \ - } while(0) -#endif -static char *print_timestamp() { - static char datestring[80]; - time_t cur_timestamp; - - - if (( rank == 0 ) && ( verbose >= 1 )) { - fprintf( out_logfile, "V-1: Entering print_timestamp...\n" ); - } - - fflush(out_logfile); - cur_timestamp = time(NULL); - strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp)); - - return datestring; -} #if MPI_VERSION >= 3 int count_tasks_per_node(void) { @@ -2199,7 +2167,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * nodeCount = size / count_tasks_per_node(); if (rank == 0) { - fprintf(out_logfile, "-- started at %s --\n\n", print_timestamp()); + fprintf(out_logfile, "-- started at %s --\n\n", PrintTimestamp()); fprintf(out_logfile, "mdtest-%s was launched with %d total task(s) on %d node(s)\n", RELEASE_VERS, size, nodeCount); fflush(out_logfile); @@ -2570,7 +2538,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * if(CHECK_STONE_WALL(& progress)){ fprintf(out_logfile, "\n-- hit stonewall\n"); } - fprintf(out_logfile, "\n-- finished at %s --\n", print_timestamp()); + fprintf(out_logfile, "\n-- finished at %s --\n", PrintTimestamp()); fflush(out_logfile); } diff --git a/src/parse_options.c b/src/parse_options.c index 506df33..0ebb691 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -186,6 +186,8 @@ void DecodeDirective(char *line, IOR_param_t *params) params->stoneWallingWearOut = atoi(value); } else if (strcasecmp(option, "stoneWallingWearOutIterations") == 0) { params->stoneWallingWearOutIterations = atoll(value); + } else if (strcasecmp(option, "stoneWallingStatusFile") == 0) { + strcpy(params->stoneWallingStatusFile, value); } else if (strcasecmp(option, "maxtimeduration") == 0) { params->maxTimeDuration = atoi(value); } else if (strcasecmp(option, "outlierthreshold") == 0) { diff --git a/src/utilities.c b/src/utilities.c index ed6d830..f71d790 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -539,3 +539,50 @@ void init_clock(){ /* check for skew between tasks' start times */ wall_clock_deviation = TimeDeviation(); } + +char * PrintTimestamp() { + static char datestring[80]; + time_t cur_timestamp; + + if (( rank == 0 ) && ( verbose >= 1 )) { + fprintf( out_logfile, "V-1: Entering PrintTimestamp...\n" ); + } + + fflush(out_logfile); + cur_timestamp = time(NULL); + strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp)); + + return datestring; +} + +int64_t ReadStoneWallingIterations(char * const filename){ + long long data; + if(rank != 0){ + MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world); + return data; + }else{ + FILE * out = fopen(filename, "r"); + if (out == NULL){ + return -1; + } + int ret = fscanf(out, "%lld", & data); + if (ret != 1){ + return -1; + } + fclose(out); + MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world); + return data; + } +} + +void StoreStoneWallingIterations(char * const filename, int64_t count){ + if(rank != 0){ + return; + } + FILE * out = fopen(filename, "w"); + if (out == NULL){ + FAIL("Cannot write to the stonewalling file!"); + } + fprintf(out, "%lld", (long long) count); + fclose(out); +} diff --git a/src/utilities.h b/src/utilities.h index cc50e1a..3865757 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -27,6 +27,24 @@ extern MPI_Comm testComm; extern MPI_Comm mpi_comm_world; extern FILE * out_logfile; +#ifdef __linux__ +#define FAIL(msg) do { \ + fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", \ + PrintTimestamp(), rank, __func__, \ + msg, strerror(errno)); \ + fflush(out_logfile); \ + MPI_Abort(testComm, 1); \ + } while(0) +#else +#define FAIL(msg) do { \ + fprintf(out_logfile, "%s: Process %d: FAILED at %d, %s: %s\n", \ + PrintTimestamp(), rank, __LINE__, \ + msg, strerror(errno)); \ + fflush(out_logfile); \ + MPI_Abort(testComm, 1); \ + } while(0) +#endif + void set_o_direct_flag(int *fd); char *CurrentTimeString(void); @@ -38,8 +56,13 @@ void SeedRandGen(MPI_Comm); void SetHints (MPI_Info *, char *); void ShowHints (MPI_Info *); +/* Returns -1, if cannot be read */ +int64_t ReadStoneWallingIterations(char * const filename); +void StoreStoneWallingIterations(char * const filename, int64_t count); + void init_clock(void); double GetTimeStamp(void); +char * PrintTimestamp(); // TODO remove this function extern double wall_clock_deviation; extern double wall_clock_delta; diff --git a/testing/complex-tests.sh b/testing/complex-tests.sh index 665727b..a759cf2 100755 --- a/testing/complex-tests.sh +++ b/testing/complex-tests.sh @@ -8,6 +8,12 @@ ROOT=${0%/*} source $ROOT/test-lib.sh +#stonewalling tests +IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15 +IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still! +IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30 + + #shared tests IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 100k