Fix erroneous write bandwidth with stonewalling

Context: write and read results from the same iteration
use the same length value in Bytes. When stonewalling is
used the size variates depending on the performance of
the access. This leads to wrong max bandwidths reported
for writes as shown in the following example:

    write     10052      ...
    read      9910       ...
    write     10022      ...
    read      9880       ...
    write     10052      ...
    read      9894       ...
    Max Write: 9371.43 MiB/sec (9826.66 MB/sec)
    Max Read:  9910.48 MiB/sec (10391.89 MB/sec)

This patch makes IOR separate variables used for read
and write tests.
master
Jean-Yves VET 2018-09-19 23:39:25 +02:00
parent 8c727fa99c
commit 7a7655e959
3 changed files with 80 additions and 63 deletions

View File

@ -385,10 +385,12 @@ void ShowTestStart(IOR_param_t *test)
void ShowTestEnd(IOR_test_t *tptr){
if(rank == 0 && tptr->params.stoneWallingWearOut){
size_t pairs_accessed = tptr->results->write.pairs_accessed;
if (tptr->params.stoneWallingStatusFile){
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, pairs_accessed);
}else{
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %ld\n", pairs_accessed);
}
}
PrintEndSection();
@ -455,8 +457,8 @@ void ShowSetup(IOR_param_t *params)
}
static struct results *bw_ops_values(const int reps, IOR_results_t *measured,
const int offset, IOR_offset_t transfer_size,
const double *vals)
IOR_offset_t transfer_size,
const double *vals, const int access)
{
struct results *r;
int i;
@ -468,7 +470,10 @@ static struct results *bw_ops_values(const int reps, IOR_results_t *measured,
r->val = (double *)&r[1];
for (i = 0; i < reps; i++, measured++) {
r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset))
IOR_point_t *point = (access == WRITE) ? &measured->write :
&measured->read;
r->val[i] = ((double) (point->aggFileSizeForBW))
/ transfer_size / vals[i];
if (i == 0) {
@ -492,24 +497,22 @@ static struct results *bw_ops_values(const int reps, IOR_results_t *measured,
}
static struct results *bw_values(const int reps, IOR_results_t *measured,
const int offset, const double *vals)
const double *vals, const int access)
{
return bw_ops_values(reps, measured, offset, 1, vals);
return bw_ops_values(reps, measured, 1, vals, access);
}
static struct results *ops_values(const int reps, IOR_results_t *measured,
const int offset, IOR_offset_t transfer_size,
const double *vals)
IOR_offset_t transfer_size,
const double *vals, const int access)
{
return bw_ops_values(reps, measured, offset, transfer_size, vals);
return bw_ops_values(reps, measured, transfer_size, vals, access);
}
/*
* Summarize results
*
* operation is typically "write" or "read"
*/
static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, char *operation)
static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access)
{
IOR_param_t *params = &test->params;
IOR_results_t *results = test->results;
@ -524,14 +527,20 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha
double * times = malloc(sizeof(double)* reps);
for(int i=0; i < reps; i++){
times[i] = *(double*)((char*) & results[i] + times_offset);
IOR_point_t *point = (access == WRITE) ? &results[i].write :
&results[i].read;
times[i] = point->time;
}
bw = bw_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), times);
ops = ops_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), params->transferSize, times);
bw = bw_values(reps, results, times, access);
ops = ops_values(reps, results, params->transferSize, times, access);
IOR_point_t *point = (access == WRITE) ? &results[0].write :
&results[0].read;
if(outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "%-9s ", operation);
fprintf(out_resultfile, "%-9s ", access == WRITE ? "write" : "read");
fprintf(out_resultfile, "%10.2f ", bw->max / MEBIBYTE);
fprintf(out_resultfile, "%10.2f ", bw->min / MEBIBYTE);
fprintf(out_resultfile, "%10.2f ", bw->mean / MEBIBYTE);
@ -553,13 +562,13 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha
fprintf(out_resultfile, "%6lld ", params->segmentCount);
fprintf(out_resultfile, "%8lld ", params->blockSize);
fprintf(out_resultfile, "%8lld ", params->transferSize);
fprintf(out_resultfile, "%9.1f ", (float)results[0].aggFileSizeForBW / MEBIBYTE);
fprintf(out_resultfile, "%9.1f ", (float)point->aggFileSizeForBW / MEBIBYTE);
fprintf(out_resultfile, "%3s ", params->api);
fprintf(out_resultfile, "%6d", params->referenceNumber);
fprintf(out_resultfile, "\n");
}else if (outputFormat == OUTPUT_JSON){
PrintStartSection();
PrintKeyVal("operation", operation);
PrintKeyVal("operation", access == WRITE ? "write" : "read");
PrintKeyVal("API", params->api);
PrintKeyValInt("TestID", params->id);
PrintKeyValInt("ReferenceNumber", params->referenceNumber);
@ -586,7 +595,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha
PrintKeyValDouble("OPsMean", ops->mean);
PrintKeyValDouble("OPsSD", ops->sd);
PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps));
PrintKeyValDouble("xsizeMiB", (double) results[0].aggFileSizeForBW / MEBIBYTE);
PrintKeyValDouble("xsizeMiB", (double) point->aggFileSizeForBW / MEBIBYTE);
PrintEndSection();
}else if (outputFormat == OUTPUT_CSV){
@ -604,9 +613,9 @@ void PrintLongSummaryOneTest(IOR_test_t *test)
IOR_param_t *params = &test->params;
if (params->writeFile)
PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, writeTime), "write");
PrintLongSummaryOneOperation(test, WRITE);
if (params->readFile)
PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, readTime), "read");
PrintLongSummaryOneOperation(test, READ);
}
void PrintLongSummaryHeader()
@ -672,9 +681,9 @@ void PrintShortSummary(IOR_test_t * test)
reps = params->repetitions;
for (i = 0; i < reps; i++) {
bw = (double)results[i].aggFileSizeForBW / results[i].writeTime;
bw = (double)results[i].write.aggFileSizeForBW / results[i].write.time;
max_write_bw = MAX(bw, max_write_bw);
bw = (double)results[i].aggFileSizeForBW / results[i].readTime;
bw = (double)results[i].read.aggFileSizeForBW / results[i].read.time;
max_read_bw = MAX(bw, max_read_bw);
}

View File

@ -281,30 +281,33 @@ CheckForOutliers(IOR_param_t *test, const double *timer, const int access)
* Check if actual file size equals expected size; if not use actual for
* calculating performance rate.
*/
static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep,
const int access)
{
IOR_param_t *params = &test->params;
IOR_results_t *results = test->results;
IOR_point_t *point = (access == WRITE) ? &results[rep].write :
&results[rep].read;
MPI_CHECK(MPI_Allreduce(&dataMoved, & results[rep].aggFileSizeFromXfer,
MPI_CHECK(MPI_Allreduce(&dataMoved, &point->aggFileSizeFromXfer,
1, MPI_LONG_LONG_INT, MPI_SUM, testComm),
"cannot total data moved");
if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) {
if (verbose >= VERBOSE_0 && rank == 0) {
if ((params->expectedAggFileSize
!= results[rep].aggFileSizeFromXfer)
|| (results[rep].aggFileSizeFromStat
!= results[rep].aggFileSizeFromXfer)) {
!= point->aggFileSizeFromXfer)
|| (point->aggFileSizeFromStat
!= point->aggFileSizeFromXfer)) {
fprintf(out_logfile,
"WARNING: Expected aggregate file size = %lld.\n",
(long long) params->expectedAggFileSize);
fprintf(out_logfile,
"WARNING: Stat() of aggregate file size = %lld.\n",
(long long) results[rep].aggFileSizeFromStat);
(long long) point->aggFileSizeFromStat);
fprintf(out_logfile,
"WARNING: Using actual aggregate bytes moved = %lld.\n",
(long long) results[rep].aggFileSizeFromXfer);
(long long) point->aggFileSizeFromXfer);
if(params->deadlineForStonewalling){
fprintf(out_logfile,
"WARNING: maybe caused by deadlineForStonewalling\n");
@ -312,7 +315,8 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
}
}
}
results[rep].aggFileSizeForBW = results[rep].aggFileSizeFromXfer;
point->aggFileSizeForBW = point->aggFileSizeFromXfer;
}
/*
@ -871,15 +875,15 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce
totalTime = reduced[5] - reduced[0];
double *time = (access == WRITE) ? &test->results[rep].writeTime :
&test->results[rep].readTime;
IOR_point_t *point = (access == WRITE) ? &test->results[rep].write :
&test->results[rep].read;
*time = totalTime;
point->time = totalTime;
if (verbose < VERBOSE_0)
return;
bw = (double)test->results[rep].aggFileSizeForBW / totalTime;
bw = (double)point->aggFileSizeForBW / totalTime;
PrintReducedResult(test, access, bw, diff, totalTime, rep);
}
@ -1312,7 +1316,7 @@ static void TestIoSys(IOR_test_t *test)
CurrentTimeString());
}
timer[2] = GetTimeStamp();
dataMoved = WriteOrRead(params, & results[rep], fd, WRITE, &ioBuffers);
dataMoved = WriteOrRead(params, &results[rep], fd, WRITE, &ioBuffers);
if (params->verbose >= VERBOSE_4) {
fprintf(out_logfile, "* data moved = %llu\n", dataMoved);
fflush(out_logfile);
@ -1328,12 +1332,12 @@ static void TestIoSys(IOR_test_t *test)
MPI_CHECK(MPI_Barrier(testComm), "barrier error");
/* get the size of the file just written */
results[rep].aggFileSizeFromStat =
results[rep].write.aggFileSizeFromStat =
backend->get_file_size(params, testComm, testFileName);
/* check if stat() of file doesn't equal expected file size,
use actual amount of byte moved */
CheckFileSize(test, dataMoved, rep);
CheckFileSize(test, dataMoved, rep, WRITE);
if (verbose >= VERBOSE_3)
WriteTimes(params, timer, rep, WRITE);
@ -1344,7 +1348,7 @@ static void TestIoSys(IOR_test_t *test)
/* check if in this round we run write with stonewalling */
if(params->deadlineForStonewalling > 0){
params->stoneWallingWearOutIterations = results[rep].pairs_accessed;
params->stoneWallingWearOutIterations = results[rep].write.pairs_accessed;
}
}
@ -1372,7 +1376,7 @@ static void TestIoSys(IOR_test_t *test)
GetTestFileName(testFileName, params);
params->open = WRITECHECK;
fd = backend->open(testFileName, params);
dataMoved = WriteOrRead(params, & results[rep], fd, WRITECHECK, &ioBuffers);
dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers);
backend->close(fd, params);
rankOffset = 0;
}
@ -1451,7 +1455,7 @@ static void TestIoSys(IOR_test_t *test)
CurrentTimeString());
}
timer[2] = GetTimeStamp();
dataMoved = WriteOrRead(params, & results[rep], fd, operation_flag, &ioBuffers);
dataMoved = WriteOrRead(params, &results[rep], fd, operation_flag, &ioBuffers);
timer[3] = GetTimeStamp();
if (params->intraTestBarriers)
MPI_CHECK(MPI_Barrier(testComm),
@ -1461,13 +1465,13 @@ static void TestIoSys(IOR_test_t *test)
timer[5] = GetTimeStamp();
/* get the size of the file just read */
results[rep].aggFileSizeFromStat =
results[rep].read.aggFileSizeFromStat =
backend->get_file_size(params, testComm,
testFileName);
/* check if stat() of file doesn't equal expected file size,
use actual amount of byte moved */
CheckFileSize(test, dataMoved, rep);
CheckFileSize(test, dataMoved, rep, READ);
if (verbose >= VERBOSE_3)
WriteTimes(params, timer, rep, READ);
@ -1882,6 +1886,8 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
IOR_offset_t dataMoved = 0; /* for data rate calculation */
double startForStonewall;
int hitStonewall;
IOR_point_t *point = ((access == WRITE) || (access == WRITECHECK)) ?
&results->write : &results->read;
/* initialize values */
pretendRank = (rank + rankOffset) % test->numTasks;
@ -1910,35 +1916,35 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
}
long long data_moved_ll = (long long) dataMoved;
long long pairs_accessed_min = 0;
MPI_CHECK(MPI_Allreduce(& pairCnt, &results->pairs_accessed,
MPI_CHECK(MPI_Allreduce(& pairCnt, &point->pairs_accessed,
1, MPI_LONG_LONG_INT, MPI_MAX, testComm), "cannot reduce pairs moved");
double stonewall_runtime = GetTimeStamp() - startForStonewall;
results->stonewall_time = stonewall_runtime;
point->stonewall_time = stonewall_runtime;
MPI_CHECK(MPI_Reduce(& pairCnt, & pairs_accessed_min,
1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_min_data_accessed,
MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_min_data_accessed,
1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_avg_data_accessed,
MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_avg_data_accessed,
1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm), "cannot reduce pairs moved");
if(rank == 0){
fprintf(out_logfile, "stonewalling pairs accessed min: %lld max: %zu -- min data: %.1f GiB mean data: %.1f GiB time: %.1fs\n",
pairs_accessed_min, results->pairs_accessed,
results->stonewall_min_data_accessed /1024.0 / 1024 / 1024, results->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , results->stonewall_time);
results->stonewall_min_data_accessed *= test->numTasks;
pairs_accessed_min, point->pairs_accessed,
point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , point->stonewall_time);
point->stonewall_min_data_accessed *= test->numTasks;
}
if(pairs_accessed_min == pairCnt){
results->stonewall_min_data_accessed = 0;
results->stonewall_avg_data_accessed = 0;
point->stonewall_min_data_accessed = 0;
point->stonewall_avg_data_accessed = 0;
}
if(pairCnt != results->pairs_accessed){
if(pairCnt != point->pairs_accessed){
// some work needs still to be done !
for(; pairCnt < results->pairs_accessed; pairCnt++ ) {
for(; pairCnt < point->pairs_accessed; pairCnt++ ) {
dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access);
}
}
}else{
results->pairs_accessed = pairCnt;
point->pairs_accessed = pairCnt;
}

View File

@ -204,12 +204,9 @@ typedef struct
int intraTestBarriers; /* barriers between open/op and op/close */
} IOR_param_t;
/* each pointer is to an array, each of length equal to the number of
repetitions in the test */
/* each pointer for a single test */
typedef struct {
double writeTime;
double readTime;
int errors;
double time;
size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling
double stonewall_time;
@ -219,16 +216,21 @@ typedef struct {
IOR_offset_t aggFileSizeFromStat;
IOR_offset_t aggFileSizeFromXfer;
IOR_offset_t aggFileSizeForBW;
} IOR_point_t;
typedef struct {
int errors;
IOR_point_t write;
IOR_point_t read;
} IOR_results_t;
/* define the queuing structure for the test parameters */
typedef struct IOR_test_t {
IOR_param_t params;
IOR_results_t *results; /* This is an array of reps times IOR_results_t */
IOR_results_t *results;
struct IOR_test_t *next;
} IOR_test_t;
IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num);
void AllocResults(IOR_test_t *test);