From 7a7655e95916c9211e95769b2cebe27f08c7f98e Mon Sep 17 00:00:00 2001 From: Jean-Yves VET Date: Wed, 19 Sep 2018 23:39:25 +0200 Subject: [PATCH] Fix erroneous write bandwidth with stonewalling Context: write and read results from the same iteration use the same length value in Bytes. When stonewalling is used the size variates depending on the performance of the access. This leads to wrong max bandwidths reported for writes as shown in the following example: write 10052 ... read 9910 ... write 10022 ... read 9880 ... write 10052 ... read 9894 ... Max Write: 9371.43 MiB/sec (9826.66 MB/sec) Max Read: 9910.48 MiB/sec (10391.89 MB/sec) This patch makes IOR separate variables used for read and write tests. --- src/ior-output.c | 57 ++++++++++++++++++++++----------------- src/ior.c | 70 ++++++++++++++++++++++++++---------------------- src/ior.h | 16 ++++++----- 3 files changed, 80 insertions(+), 63 deletions(-) diff --git a/src/ior-output.c b/src/ior-output.c index 64bc730..a6b8d9c 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -385,10 +385,12 @@ void ShowTestStart(IOR_param_t *test) void ShowTestEnd(IOR_test_t *tptr){ if(rank == 0 && tptr->params.stoneWallingWearOut){ + + size_t pairs_accessed = tptr->results->write.pairs_accessed; if (tptr->params.stoneWallingStatusFile){ - StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed); + StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, pairs_accessed); }else{ - fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed); + fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %ld\n", pairs_accessed); } } PrintEndSection(); @@ -455,8 +457,8 @@ void ShowSetup(IOR_param_t *params) } static struct results *bw_ops_values(const int reps, IOR_results_t *measured, - const int offset, IOR_offset_t transfer_size, - const double *vals) + IOR_offset_t transfer_size, + const double *vals, const int access) { struct results *r; int i; @@ -468,7 +470,10 @@ static struct results *bw_ops_values(const int reps, IOR_results_t *measured, r->val = (double *)&r[1]; for (i = 0; i < reps; i++, measured++) { - r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset)) + IOR_point_t *point = (access == WRITE) ? &measured->write : + &measured->read; + + r->val[i] = ((double) (point->aggFileSizeForBW)) / transfer_size / vals[i]; if (i == 0) { @@ -492,24 +497,22 @@ static struct results *bw_ops_values(const int reps, IOR_results_t *measured, } static struct results *bw_values(const int reps, IOR_results_t *measured, - const int offset, const double *vals) + const double *vals, const int access) { - return bw_ops_values(reps, measured, offset, 1, vals); + return bw_ops_values(reps, measured, 1, vals, access); } static struct results *ops_values(const int reps, IOR_results_t *measured, - const int offset, IOR_offset_t transfer_size, - const double *vals) + IOR_offset_t transfer_size, + const double *vals, const int access) { - return bw_ops_values(reps, measured, offset, transfer_size, vals); + return bw_ops_values(reps, measured, transfer_size, vals, access); } /* * Summarize results - * - * operation is typically "write" or "read" */ -static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, char *operation) +static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) { IOR_param_t *params = &test->params; IOR_results_t *results = test->results; @@ -524,14 +527,20 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha double * times = malloc(sizeof(double)* reps); for(int i=0; i < reps; i++){ - times[i] = *(double*)((char*) & results[i] + times_offset); + IOR_point_t *point = (access == WRITE) ? &results[i].write : + &results[i].read; + times[i] = point->time; } - bw = bw_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), times); - ops = ops_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), params->transferSize, times); + bw = bw_values(reps, results, times, access); + ops = ops_values(reps, results, params->transferSize, times, access); + + IOR_point_t *point = (access == WRITE) ? &results[0].write : + &results[0].read; + if(outputFormat == OUTPUT_DEFAULT){ - fprintf(out_resultfile, "%-9s ", operation); + fprintf(out_resultfile, "%-9s ", access == WRITE ? "write" : "read"); fprintf(out_resultfile, "%10.2f ", bw->max / MEBIBYTE); fprintf(out_resultfile, "%10.2f ", bw->min / MEBIBYTE); fprintf(out_resultfile, "%10.2f ", bw->mean / MEBIBYTE); @@ -553,13 +562,13 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha fprintf(out_resultfile, "%6lld ", params->segmentCount); fprintf(out_resultfile, "%8lld ", params->blockSize); fprintf(out_resultfile, "%8lld ", params->transferSize); - fprintf(out_resultfile, "%9.1f ", (float)results[0].aggFileSizeForBW / MEBIBYTE); + fprintf(out_resultfile, "%9.1f ", (float)point->aggFileSizeForBW / MEBIBYTE); fprintf(out_resultfile, "%3s ", params->api); fprintf(out_resultfile, "%6d", params->referenceNumber); fprintf(out_resultfile, "\n"); }else if (outputFormat == OUTPUT_JSON){ PrintStartSection(); - PrintKeyVal("operation", operation); + PrintKeyVal("operation", access == WRITE ? "write" : "read"); PrintKeyVal("API", params->api); PrintKeyValInt("TestID", params->id); PrintKeyValInt("ReferenceNumber", params->referenceNumber); @@ -586,7 +595,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha PrintKeyValDouble("OPsMean", ops->mean); PrintKeyValDouble("OPsSD", ops->sd); PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps)); - PrintKeyValDouble("xsizeMiB", (double) results[0].aggFileSizeForBW / MEBIBYTE); + PrintKeyValDouble("xsizeMiB", (double) point->aggFileSizeForBW / MEBIBYTE); PrintEndSection(); }else if (outputFormat == OUTPUT_CSV){ @@ -604,9 +613,9 @@ void PrintLongSummaryOneTest(IOR_test_t *test) IOR_param_t *params = &test->params; if (params->writeFile) - PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, writeTime), "write"); + PrintLongSummaryOneOperation(test, WRITE); if (params->readFile) - PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, readTime), "read"); + PrintLongSummaryOneOperation(test, READ); } void PrintLongSummaryHeader() @@ -672,9 +681,9 @@ void PrintShortSummary(IOR_test_t * test) reps = params->repetitions; for (i = 0; i < reps; i++) { - bw = (double)results[i].aggFileSizeForBW / results[i].writeTime; + bw = (double)results[i].write.aggFileSizeForBW / results[i].write.time; max_write_bw = MAX(bw, max_write_bw); - bw = (double)results[i].aggFileSizeForBW / results[i].readTime; + bw = (double)results[i].read.aggFileSizeForBW / results[i].read.time; max_read_bw = MAX(bw, max_read_bw); } diff --git a/src/ior.c b/src/ior.c index edfb806..e8c477f 100755 --- a/src/ior.c +++ b/src/ior.c @@ -281,30 +281,33 @@ CheckForOutliers(IOR_param_t *test, const double *timer, const int access) * Check if actual file size equals expected size; if not use actual for * calculating performance rate. */ -static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep) +static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep, + const int access) { IOR_param_t *params = &test->params; IOR_results_t *results = test->results; + IOR_point_t *point = (access == WRITE) ? &results[rep].write : + &results[rep].read; - MPI_CHECK(MPI_Allreduce(&dataMoved, & results[rep].aggFileSizeFromXfer, + MPI_CHECK(MPI_Allreduce(&dataMoved, &point->aggFileSizeFromXfer, 1, MPI_LONG_LONG_INT, MPI_SUM, testComm), "cannot total data moved"); if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) { if (verbose >= VERBOSE_0 && rank == 0) { if ((params->expectedAggFileSize - != results[rep].aggFileSizeFromXfer) - || (results[rep].aggFileSizeFromStat - != results[rep].aggFileSizeFromXfer)) { + != point->aggFileSizeFromXfer) + || (point->aggFileSizeFromStat + != point->aggFileSizeFromXfer)) { fprintf(out_logfile, "WARNING: Expected aggregate file size = %lld.\n", (long long) params->expectedAggFileSize); fprintf(out_logfile, "WARNING: Stat() of aggregate file size = %lld.\n", - (long long) results[rep].aggFileSizeFromStat); + (long long) point->aggFileSizeFromStat); fprintf(out_logfile, "WARNING: Using actual aggregate bytes moved = %lld.\n", - (long long) results[rep].aggFileSizeFromXfer); + (long long) point->aggFileSizeFromXfer); if(params->deadlineForStonewalling){ fprintf(out_logfile, "WARNING: maybe caused by deadlineForStonewalling\n"); @@ -312,7 +315,8 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep) } } } - results[rep].aggFileSizeForBW = results[rep].aggFileSizeFromXfer; + + point->aggFileSizeForBW = point->aggFileSizeFromXfer; } /* @@ -871,15 +875,15 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce totalTime = reduced[5] - reduced[0]; - double *time = (access == WRITE) ? &test->results[rep].writeTime : - &test->results[rep].readTime; + IOR_point_t *point = (access == WRITE) ? &test->results[rep].write : + &test->results[rep].read; - *time = totalTime; + point->time = totalTime; if (verbose < VERBOSE_0) return; - bw = (double)test->results[rep].aggFileSizeForBW / totalTime; + bw = (double)point->aggFileSizeForBW / totalTime; PrintReducedResult(test, access, bw, diff, totalTime, rep); } @@ -1312,7 +1316,7 @@ static void TestIoSys(IOR_test_t *test) CurrentTimeString()); } timer[2] = GetTimeStamp(); - dataMoved = WriteOrRead(params, & results[rep], fd, WRITE, &ioBuffers); + dataMoved = WriteOrRead(params, &results[rep], fd, WRITE, &ioBuffers); if (params->verbose >= VERBOSE_4) { fprintf(out_logfile, "* data moved = %llu\n", dataMoved); fflush(out_logfile); @@ -1328,12 +1332,12 @@ static void TestIoSys(IOR_test_t *test) MPI_CHECK(MPI_Barrier(testComm), "barrier error"); /* get the size of the file just written */ - results[rep].aggFileSizeFromStat = + results[rep].write.aggFileSizeFromStat = backend->get_file_size(params, testComm, testFileName); /* check if stat() of file doesn't equal expected file size, use actual amount of byte moved */ - CheckFileSize(test, dataMoved, rep); + CheckFileSize(test, dataMoved, rep, WRITE); if (verbose >= VERBOSE_3) WriteTimes(params, timer, rep, WRITE); @@ -1344,7 +1348,7 @@ static void TestIoSys(IOR_test_t *test) /* check if in this round we run write with stonewalling */ if(params->deadlineForStonewalling > 0){ - params->stoneWallingWearOutIterations = results[rep].pairs_accessed; + params->stoneWallingWearOutIterations = results[rep].write.pairs_accessed; } } @@ -1372,7 +1376,7 @@ static void TestIoSys(IOR_test_t *test) GetTestFileName(testFileName, params); params->open = WRITECHECK; fd = backend->open(testFileName, params); - dataMoved = WriteOrRead(params, & results[rep], fd, WRITECHECK, &ioBuffers); + dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers); backend->close(fd, params); rankOffset = 0; } @@ -1451,7 +1455,7 @@ static void TestIoSys(IOR_test_t *test) CurrentTimeString()); } timer[2] = GetTimeStamp(); - dataMoved = WriteOrRead(params, & results[rep], fd, operation_flag, &ioBuffers); + dataMoved = WriteOrRead(params, &results[rep], fd, operation_flag, &ioBuffers); timer[3] = GetTimeStamp(); if (params->intraTestBarriers) MPI_CHECK(MPI_Barrier(testComm), @@ -1461,13 +1465,13 @@ static void TestIoSys(IOR_test_t *test) timer[5] = GetTimeStamp(); /* get the size of the file just read */ - results[rep].aggFileSizeFromStat = + results[rep].read.aggFileSizeFromStat = backend->get_file_size(params, testComm, testFileName); /* check if stat() of file doesn't equal expected file size, use actual amount of byte moved */ - CheckFileSize(test, dataMoved, rep); + CheckFileSize(test, dataMoved, rep, READ); if (verbose >= VERBOSE_3) WriteTimes(params, timer, rep, READ); @@ -1882,6 +1886,8 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, IOR_offset_t dataMoved = 0; /* for data rate calculation */ double startForStonewall; int hitStonewall; + IOR_point_t *point = ((access == WRITE) || (access == WRITECHECK)) ? + &results->write : &results->read; /* initialize values */ pretendRank = (rank + rankOffset) % test->numTasks; @@ -1910,35 +1916,35 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, } long long data_moved_ll = (long long) dataMoved; long long pairs_accessed_min = 0; - MPI_CHECK(MPI_Allreduce(& pairCnt, &results->pairs_accessed, + MPI_CHECK(MPI_Allreduce(& pairCnt, &point->pairs_accessed, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm), "cannot reduce pairs moved"); double stonewall_runtime = GetTimeStamp() - startForStonewall; - results->stonewall_time = stonewall_runtime; + point->stonewall_time = stonewall_runtime; MPI_CHECK(MPI_Reduce(& pairCnt, & pairs_accessed_min, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved"); - MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_min_data_accessed, + MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_min_data_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved"); - MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_avg_data_accessed, + MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_avg_data_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm), "cannot reduce pairs moved"); if(rank == 0){ fprintf(out_logfile, "stonewalling pairs accessed min: %lld max: %zu -- min data: %.1f GiB mean data: %.1f GiB time: %.1fs\n", - pairs_accessed_min, results->pairs_accessed, - results->stonewall_min_data_accessed /1024.0 / 1024 / 1024, results->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , results->stonewall_time); - results->stonewall_min_data_accessed *= test->numTasks; + pairs_accessed_min, point->pairs_accessed, + point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , point->stonewall_time); + point->stonewall_min_data_accessed *= test->numTasks; } if(pairs_accessed_min == pairCnt){ - results->stonewall_min_data_accessed = 0; - results->stonewall_avg_data_accessed = 0; + point->stonewall_min_data_accessed = 0; + point->stonewall_avg_data_accessed = 0; } - if(pairCnt != results->pairs_accessed){ + if(pairCnt != point->pairs_accessed){ // some work needs still to be done ! - for(; pairCnt < results->pairs_accessed; pairCnt++ ) { + for(; pairCnt < point->pairs_accessed; pairCnt++ ) { dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); } } }else{ - results->pairs_accessed = pairCnt; + point->pairs_accessed = pairCnt; } diff --git a/src/ior.h b/src/ior.h index 67198c3..e76a64d 100755 --- a/src/ior.h +++ b/src/ior.h @@ -204,12 +204,9 @@ typedef struct int intraTestBarriers; /* barriers between open/op and op/close */ } IOR_param_t; -/* each pointer is to an array, each of length equal to the number of - repetitions in the test */ +/* each pointer for a single test */ typedef struct { - double writeTime; - double readTime; - int errors; + double time; size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling double stonewall_time; @@ -219,16 +216,21 @@ typedef struct { IOR_offset_t aggFileSizeFromStat; IOR_offset_t aggFileSizeFromXfer; IOR_offset_t aggFileSizeForBW; +} IOR_point_t; + +typedef struct { + int errors; + IOR_point_t write; + IOR_point_t read; } IOR_results_t; /* define the queuing structure for the test parameters */ typedef struct IOR_test_t { IOR_param_t params; - IOR_results_t *results; /* This is an array of reps times IOR_results_t */ + IOR_results_t *results; struct IOR_test_t *next; } IOR_test_t; - IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num); void AllocResults(IOR_test_t *test);