Refactored results structure to AoS; allowed to keep results per repeat.

master
Julian M. Kunkel 2018-07-15 19:38:17 +01:00
parent c7a598a435
commit f55761d5d2
3 changed files with 88 additions and 101 deletions

View File

@ -10,8 +10,8 @@
extern char **environ; extern char **environ;
static struct results *bw_values(int reps, IOR_offset_t *agg_file_size, double *vals); static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals);
static struct results *ops_values(int reps, IOR_offset_t *agg_file_size, IOR_offset_t transfer_size, double *vals); static struct results *ops_values(int reps, IOR_results_t * measured, int offset, IOR_offset_t transfer_size, double *vals);
static double mean_of_array_of_doubles(double *values, int len); static double mean_of_array_of_doubles(double *values, int len);
static void PPDouble(int leftjustify, double number, char *append); static void PPDouble(int leftjustify, double number, char *append);
static void PrintNextToken(); static void PrintNextToken();
@ -468,21 +468,26 @@ void ShowSetup(IOR_param_t *params)
* *
* operation is typically "write" or "read" * operation is typically "write" or "read"
*/ */
void PrintLongSummaryOneOperation(IOR_test_t *test, double *times, char *operation) static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, char *operation)
{ {
IOR_param_t *params = &test->params; IOR_param_t *params = &test->params;
IOR_results_t *results = test->results; IOR_results_t *results = test->results;
struct results *bw; struct results *bw;
struct results *ops; struct results *ops;
int reps; int reps;
if (rank != 0 || verbose < VERBOSE_0) if (rank != 0 || verbose < VERBOSE_0)
return; return;
reps = params->repetitions; reps = params->repetitions;
bw = bw_values(reps, results->aggFileSizeForBW, times); double * times = malloc(sizeof(double)* reps);
ops = ops_values(reps, results->aggFileSizeForBW, for(int i=0; i < reps; i++){
params->transferSize, times); times[i] = *(double*)((char*) & results[i] + times_offset);
}
bw = bw_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), times);
ops = ops_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), params->transferSize, times);
if(outputFormat == OUTPUT_DEFAULT){ if(outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "%-9s ", operation); fprintf(out_resultfile, "%-9s ", operation);
@ -507,7 +512,7 @@ void PrintLongSummaryOneOperation(IOR_test_t *test, double *times, char *operati
fprintf(out_resultfile, "%6lld ", params->segmentCount); fprintf(out_resultfile, "%6lld ", params->segmentCount);
fprintf(out_resultfile, "%8lld ", params->blockSize); fprintf(out_resultfile, "%8lld ", params->blockSize);
fprintf(out_resultfile, "%8lld ", params->transferSize); fprintf(out_resultfile, "%8lld ", params->transferSize);
fprintf(out_resultfile, "%9.1f ", (float)results->aggFileSizeForBW[0] / MEBIBYTE); fprintf(out_resultfile, "%9.1f ", (float)results[0].aggFileSizeForBW / MEBIBYTE);
fprintf(out_resultfile, "%3s ", params->api); fprintf(out_resultfile, "%3s ", params->api);
fprintf(out_resultfile, "%6d", params->referenceNumber); fprintf(out_resultfile, "%6d", params->referenceNumber);
fprintf(out_resultfile, "\n"); fprintf(out_resultfile, "\n");
@ -540,7 +545,7 @@ void PrintLongSummaryOneOperation(IOR_test_t *test, double *times, char *operati
PrintKeyValDouble("OPsMean", ops->mean); PrintKeyValDouble("OPsMean", ops->mean);
PrintKeyValDouble("OPsSD", ops->sd); PrintKeyValDouble("OPsSD", ops->sd);
PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps)); PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps));
PrintKeyValDouble("xsizeMiB", (double) results->aggFileSizeForBW[0] / MEBIBYTE); PrintKeyValDouble("xsizeMiB", (double) results[0].aggFileSizeForBW / MEBIBYTE);
PrintEndSection(); PrintEndSection();
}else if (outputFormat == OUTPUT_CSV){ }else if (outputFormat == OUTPUT_CSV){
@ -550,17 +555,17 @@ void PrintLongSummaryOneOperation(IOR_test_t *test, double *times, char *operati
free(bw); free(bw);
free(ops); free(ops);
free(times);
} }
void PrintLongSummaryOneTest(IOR_test_t *test) void PrintLongSummaryOneTest(IOR_test_t *test)
{ {
IOR_param_t *params = &test->params; IOR_param_t *params = &test->params;
IOR_results_t *results = test->results;
if (params->writeFile) if (params->writeFile)
PrintLongSummaryOneOperation(test, results->writeTime, "write"); PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, writeTime), "write");
if (params->readFile) if (params->readFile)
PrintLongSummaryOneOperation(test, results->readTime, "read"); PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, readTime), "read");
} }
void PrintLongSummaryHeader() void PrintLongSummaryHeader()
@ -625,12 +630,12 @@ void PrintShortSummary(IOR_test_t * test)
reps = params->repetitions; reps = params->repetitions;
max_write = results->writeTime[0]; max_write = results[0].writeTime;
max_read = results->readTime[0]; max_read = results[0].readTime;
for (i = 0; i < reps; i++) { for (i = 0; i < reps; i++) {
bw = (double)results->aggFileSizeForBW[i]/results->writeTime[i]; bw = (double)results[i].aggFileSizeForBW / results[i].writeTime;
max_write = MAX(bw, max_write); max_write = MAX(bw, max_write);
bw = (double)results->aggFileSizeForBW[i]/results->readTime[i]; bw = (double)results[i].aggFileSizeForBW / results[i].readTime;
max_read = MAX(bw, max_read); max_read = MAX(bw, max_read);
} }
@ -739,19 +744,19 @@ static void PPDouble(int leftjustify, double number, char *append)
static struct results *bw_values(int reps, IOR_offset_t *agg_file_size, double *vals) static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals)
{ {
struct results *r; struct results *r;
int i; int i;
r = (struct results *)malloc(sizeof(struct results) r = (struct results *) malloc(sizeof(struct results) + (reps * sizeof(double)));
+ (reps * sizeof(double)));
if (r == NULL) if (r == NULL)
ERR("malloc failed"); ERR("malloc failed");
r->val = (double *)&r[1]; r->val = (double *)&r[1];
for (i = 0; i < reps; i++) { for (i = 0; i < reps; i++, measured++) {
r->val[i] = (double)agg_file_size[i] / vals[i];
r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset)) / vals[i];
if (i == 0) { if (i == 0) {
r->min = r->val[i]; r->min = r->val[i];
r->max = r->val[i]; r->max = r->val[i];
@ -772,7 +777,7 @@ static struct results *bw_values(int reps, IOR_offset_t *agg_file_size, double *
return r; return r;
} }
static struct results *ops_values(int reps, IOR_offset_t *agg_file_size, static struct results *ops_values(int reps, IOR_results_t * measured, int offset,
IOR_offset_t transfer_size, IOR_offset_t transfer_size,
double *vals) double *vals)
{ {
@ -785,8 +790,9 @@ static struct results *ops_values(int reps, IOR_offset_t *agg_file_size,
ERR("malloc failed"); ERR("malloc failed");
r->val = (double *)&r[1]; r->val = (double *)&r[1];
for (i = 0; i < reps; i++) { for (i = 0; i < reps; i++, measured++) {
r->val[i] = (double)agg_file_size[i] / transfer_size / vals[i]; r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset))
/ transfer_size / vals[i];
if (i == 0) { if (i == 0) {
r->min = r->val[i]; r->min = r->val[i];
r->max = r->val[i]; r->max = r->val[i];

117
src/ior.c
View File

@ -294,25 +294,25 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
IOR_param_t *params = &test->params; IOR_param_t *params = &test->params;
IOR_results_t *results = test->results; IOR_results_t *results = test->results;
MPI_CHECK(MPI_Allreduce(&dataMoved, &results->aggFileSizeFromXfer[rep], MPI_CHECK(MPI_Allreduce(&dataMoved, & results[rep].aggFileSizeFromXfer,
1, MPI_LONG_LONG_INT, MPI_SUM, testComm), 1, MPI_LONG_LONG_INT, MPI_SUM, testComm),
"cannot total data moved"); "cannot total data moved");
if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) { if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) {
if (verbose >= VERBOSE_0 && rank == 0) { if (verbose >= VERBOSE_0 && rank == 0) {
if ((params->expectedAggFileSize if ((params->expectedAggFileSize
!= results->aggFileSizeFromXfer[rep]) != results[rep].aggFileSizeFromXfer)
|| (results->aggFileSizeFromStat[rep] || (results[rep].aggFileSizeFromStat
!= results->aggFileSizeFromXfer[rep])) { != results[rep].aggFileSizeFromXfer)) {
fprintf(out_logfile, fprintf(out_logfile,
"WARNING: Expected aggregate file size = %lld.\n", "WARNING: Expected aggregate file size = %lld.\n",
(long long) params->expectedAggFileSize); (long long) params->expectedAggFileSize);
fprintf(out_logfile, fprintf(out_logfile,
"WARNING: Stat() of aggregate file size = %lld.\n", "WARNING: Stat() of aggregate file size = %lld.\n",
(long long) results->aggFileSizeFromStat[rep]); (long long) results[rep].aggFileSizeFromStat);
fprintf(out_logfile, fprintf(out_logfile,
"WARNING: Using actual aggregate bytes moved = %lld.\n", "WARNING: Using actual aggregate bytes moved = %lld.\n",
(long long) results->aggFileSizeFromXfer[rep]); (long long) results[rep].aggFileSizeFromXfer);
if(params->deadlineForStonewalling){ if(params->deadlineForStonewalling){
fprintf(out_logfile, fprintf(out_logfile,
"WARNING: maybe caused by deadlineForStonewalling\n"); "WARNING: maybe caused by deadlineForStonewalling\n");
@ -320,7 +320,7 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
} }
} }
} }
results->aggFileSizeForBW[rep] = results->aggFileSizeFromXfer[rep]; results[rep].aggFileSizeForBW = results[rep].aggFileSizeFromXfer;
} }
/* /*
@ -491,54 +491,30 @@ static void aligned_buffer_free(void *buf)
free(*(void **)((char *)buf - sizeof(char *))); free(*(void **)((char *)buf - sizeof(char *)));
} }
static void* safeMalloc(uint64_t size){
void * d = malloc(size);
if (d == NULL){
ERR("Could not malloc an array");
}
memset(d, 0, size);
return d;
}
static void AllocResults(IOR_test_t *test) static void AllocResults(IOR_test_t *test)
{ {
int reps; int reps;
if (test->results != NULL) if (test->results != NULL)
return; return;
reps = test->params.repetitions;
test->results = (IOR_results_t *)malloc(sizeof(IOR_results_t));
if (test->results == NULL)
ERR("malloc of IOR_results_t failed");
test->results->writeTime = (double *)malloc(reps * sizeof(double));
if (test->results->writeTime == NULL)
ERR("malloc of writeTime array failed");
memset(test->results->writeTime, 0, reps * sizeof(double));
test->results->readTime = (double *)malloc(reps * sizeof(double));
if (test->results->readTime == NULL)
ERR("malloc of readTime array failed");
memset(test->results->readTime, 0, reps * sizeof(double));
test->results->aggFileSizeFromStat =
(IOR_offset_t *)malloc(reps * sizeof(IOR_offset_t));
if (test->results->aggFileSizeFromStat == NULL)
ERR("malloc of aggFileSizeFromStat failed");
test->results->aggFileSizeFromXfer =
(IOR_offset_t *)malloc(reps * sizeof(IOR_offset_t));
if (test->results->aggFileSizeFromXfer == NULL)
ERR("malloc of aggFileSizeFromXfer failed");
test->results->aggFileSizeForBW =
(IOR_offset_t *)malloc(reps * sizeof(IOR_offset_t));
if (test->results->aggFileSizeForBW == NULL)
ERR("malloc of aggFileSizeForBW failed");
reps = test->params.repetitions;
test->results = (IOR_results_t *) safeMalloc(sizeof(IOR_results_t) * reps);
} }
void FreeResults(IOR_test_t *test) void FreeResults(IOR_test_t *test)
{ {
if (test->results != NULL) { if (test->results != NULL) {
free(test->results->aggFileSizeFromStat); free(test->results);
free(test->results->aggFileSizeFromXfer); }
free(test->results->aggFileSizeForBW);
free(test->results->readTime);
free(test->results->writeTime);
free(test->results);
}
} }
@ -908,11 +884,11 @@ static void ReduceIterResults(IOR_test_t *test, double **timer, int rep,
} }
if (access == WRITE) { if (access == WRITE) {
totalTime = reduced[5] - reduced[0]; totalTime = reduced[5] - reduced[0];
test->results->writeTime[rep] = totalTime; test->results[rep].writeTime = totalTime;
diff_subset = &diff[0]; diff_subset = &diff[0];
} else { /* READ */ } else { /* READ */
totalTime = reduced[11] - reduced[6]; totalTime = reduced[11] - reduced[6];
test->results->readTime[rep] = totalTime; test->results[rep].readTime = totalTime;
diff_subset = &diff[3]; diff_subset = &diff[3];
} }
@ -920,7 +896,7 @@ static void ReduceIterResults(IOR_test_t *test, double **timer, int rep,
return; return;
} }
bw = (double)test->results->aggFileSizeForBW[rep] / totalTime; bw = (double)test->results[rep].aggFileSizeForBW / totalTime;
PrintReducedResult(test, access, bw, diff_subset, totalTime, rep); PrintReducedResult(test, access, bw, diff_subset, totalTime, rep);
} }
@ -1233,6 +1209,7 @@ static void TestIoSys(IOR_test_t *test)
startTime = GetTimeStamp(); startTime = GetTimeStamp();
/* loop over test iterations */ /* loop over test iterations */
uint64_t params_saved_wearout = params->stoneWallingWearOutIterations;
for (rep = 0; rep < params->repetitions; rep++) { for (rep = 0; rep < params->repetitions; rep++) {
PrintRepeatStart(); PrintRepeatStart();
/* Get iteration start time in seconds in task 0 and broadcast to /* Get iteration start time in seconds in task 0 and broadcast to
@ -1280,6 +1257,8 @@ static void TestIoSys(IOR_test_t *test)
RemoveFile(testFileName, params->filePerProc, RemoveFile(testFileName, params->filePerProc,
params); params);
} }
params->stoneWallingWearOutIterations = params_saved_wearout;
MPI_CHECK(MPI_Barrier(testComm), "barrier error"); MPI_CHECK(MPI_Barrier(testComm), "barrier error");
params->open = WRITE; params->open = WRITE;
timer[0][rep] = GetTimeStamp(); timer[0][rep] = GetTimeStamp();
@ -1294,7 +1273,7 @@ static void TestIoSys(IOR_test_t *test)
CurrentTimeString()); CurrentTimeString());
} }
timer[2][rep] = GetTimeStamp(); timer[2][rep] = GetTimeStamp();
dataMoved = WriteOrRead(params, results, fd, WRITE, &ioBuffers); dataMoved = WriteOrRead(params, & results[rep], fd, WRITE, &ioBuffers);
if (params->verbose >= VERBOSE_4) { if (params->verbose >= VERBOSE_4) {
fprintf(out_logfile, "* data moved = %llu\n", dataMoved); fprintf(out_logfile, "* data moved = %llu\n", dataMoved);
fflush(out_logfile); fflush(out_logfile);
@ -1310,7 +1289,7 @@ static void TestIoSys(IOR_test_t *test)
MPI_CHECK(MPI_Barrier(testComm), "barrier error"); MPI_CHECK(MPI_Barrier(testComm), "barrier error");
/* get the size of the file just written */ /* get the size of the file just written */
results->aggFileSizeFromStat[rep] = results[rep].aggFileSizeFromStat =
backend->get_file_size(params, testComm, testFileName); backend->get_file_size(params, testComm, testFileName);
/* check if stat() of file doesn't equal expected file size, /* check if stat() of file doesn't equal expected file size,
@ -1323,6 +1302,11 @@ static void TestIoSys(IOR_test_t *test)
if (params->outlierThreshold) { if (params->outlierThreshold) {
CheckForOutliers(params, timer, rep, WRITE); CheckForOutliers(params, timer, rep, WRITE);
} }
/* check if in this round we run write with stonewalling */
if(params->deadlineForStonewalling > 0){
params->stoneWallingWearOutIterations = results[rep].pairs_accessed;
}
} }
/* /*
@ -1349,7 +1333,7 @@ static void TestIoSys(IOR_test_t *test)
GetTestFileName(testFileName, params); GetTestFileName(testFileName, params);
params->open = WRITECHECK; params->open = WRITECHECK;
fd = backend->open(testFileName, params); fd = backend->open(testFileName, params);
dataMoved = WriteOrRead(params, results, fd, WRITECHECK, &ioBuffers); dataMoved = WriteOrRead(params, & results[rep], fd, WRITECHECK, &ioBuffers);
backend->close(fd, params); backend->close(fd, params);
rankOffset = 0; rankOffset = 0;
} }
@ -1357,6 +1341,14 @@ static void TestIoSys(IOR_test_t *test)
* read the file(s), getting timing between I/O calls * read the file(s), getting timing between I/O calls
*/ */
if ((params->readFile || params->checkRead ) && !test_time_elapsed(params, startTime)) { if ((params->readFile || params->checkRead ) && !test_time_elapsed(params, startTime)) {
/* check for stonewall */
if(params->stoneWallingStatusFile){
params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile);
if(params->stoneWallingWearOutIterations == -1 && rank == 0){
fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!");
params->stoneWallingWearOutIterations = 0;
}
}
int operation_flag = READ; int operation_flag = READ;
if ( params->checkRead ){ if ( params->checkRead ){
// actually read and then compare the buffer // actually read and then compare the buffer
@ -1420,7 +1412,7 @@ static void TestIoSys(IOR_test_t *test)
CurrentTimeString()); CurrentTimeString());
} }
timer[8][rep] = GetTimeStamp(); timer[8][rep] = GetTimeStamp();
dataMoved = WriteOrRead(params, results, fd, operation_flag, &ioBuffers); dataMoved = WriteOrRead(params, & results[rep], fd, operation_flag, &ioBuffers);
timer[9][rep] = GetTimeStamp(); timer[9][rep] = GetTimeStamp();
if (params->intraTestBarriers) if (params->intraTestBarriers)
MPI_CHECK(MPI_Barrier(testComm), MPI_CHECK(MPI_Barrier(testComm),
@ -1430,7 +1422,7 @@ static void TestIoSys(IOR_test_t *test)
timer[11][rep] = GetTimeStamp(); timer[11][rep] = GetTimeStamp();
/* get the size of the file just read */ /* get the size of the file just read */
results->aggFileSizeFromStat[rep] = results[rep].aggFileSizeFromStat =
backend->get_file_size(params, testComm, backend->get_file_size(params, testComm,
testFileName); testFileName);
@ -1863,19 +1855,8 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
offsetArray = GetOffsetArraySequential(test, pretendRank); offsetArray = GetOffsetArraySequential(test, pretendRank);
} }
/* check for stonewall */
startForStonewall = GetTimeStamp(); startForStonewall = GetTimeStamp();
hitStonewall = ((test->deadlineForStonewalling != 0) hitStonewall = 0;
&& ((GetTimeStamp() - startForStonewall)
> test->deadlineForStonewalling));
if(test->stoneWallingStatusFile && (access == READ || access == READCHECK)){
test->stoneWallingWearOutIterations = ReadStoneWallingIterations(test->stoneWallingStatusFile);
if(test->stoneWallingWearOutIterations == -1 && rank == 0){
fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!");
test->stoneWallingWearOutIterations = 0;
}
}
/* loop over offsets to access */ /* loop over offsets to access */
while ((offsetArray[pairCnt] != -1) && !hitStonewall ) { while ((offsetArray[pairCnt] != -1) && !hitStonewall ) {

View File

@ -205,24 +205,24 @@ typedef struct
/* each pointer is to an array, each of length equal to the number of /* each pointer is to an array, each of length equal to the number of
repetitions in the test */ repetitions in the test */
typedef struct { typedef struct {
double *writeTime; double writeTime;
double *readTime; double readTime;
int errors; int errors;
size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling
double stonewall_time; double stonewall_time;
long long stonewall_min_data_accessed; long long stonewall_min_data_accessed;
long long stonewall_avg_data_accessed; long long stonewall_avg_data_accessed;
IOR_offset_t *aggFileSizeFromStat; IOR_offset_t aggFileSizeFromStat;
IOR_offset_t *aggFileSizeFromXfer; IOR_offset_t aggFileSizeFromXfer;
IOR_offset_t *aggFileSizeForBW; IOR_offset_t aggFileSizeForBW;
} IOR_results_t; } IOR_results_t;
/* define the queuing structure for the test parameters */ /* define the queuing structure for the test parameters */
typedef struct IOR_test_t { typedef struct IOR_test_t {
IOR_param_t params; IOR_param_t params;
IOR_results_t *results; IOR_results_t *results; /* This is an array of reps times IOR_results_t */
struct IOR_test_t *next; struct IOR_test_t *next;
} IOR_test_t; } IOR_test_t;