Merge pull request #90 from DDNStorage/fixstonewalling

Fix erroneous reported write bandwidth with stonewalling.
master
Julian Kunkel 2018-09-21 13:20:55 +01:00 committed by GitHub
commit 78fd92267c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 237 additions and 276 deletions

View File

@ -11,8 +11,6 @@
extern char **environ;
static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals);
static struct results *ops_values(int reps, IOR_results_t * measured, int offset, IOR_offset_t transfer_size, double *vals);
static double mean_of_array_of_doubles(double *values, int len);
static void PPDouble(int leftjustify, double number, char *append);
static void PrintNextToken();
@ -387,10 +385,12 @@ void ShowTestStart(IOR_param_t *test)
void ShowTestEnd(IOR_test_t *tptr){
if(rank == 0 && tptr->params.stoneWallingWearOut){
size_t pairs_accessed = tptr->results->write.pairs_accessed;
if (tptr->params.stoneWallingStatusFile){
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, pairs_accessed);
}else{
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %ld\n", pairs_accessed);
}
}
PrintEndSection();
@ -456,14 +456,63 @@ void ShowSetup(IOR_param_t *params)
fflush(out_resultfile);
}
static struct results *bw_ops_values(const int reps, IOR_results_t *measured,
IOR_offset_t transfer_size,
const double *vals, const int access)
{
struct results *r;
int i;
r = (struct results *)malloc(sizeof(struct results)
+ (reps * sizeof(double)));
if (r == NULL)
ERR("malloc failed");
r->val = (double *)&r[1];
for (i = 0; i < reps; i++, measured++) {
IOR_point_t *point = (access == WRITE) ? &measured->write :
&measured->read;
r->val[i] = ((double) (point->aggFileSizeForBW))
/ transfer_size / vals[i];
if (i == 0) {
r->min = r->val[i];
r->max = r->val[i];
r->sum = 0.0;
}
r->min = MIN(r->min, r->val[i]);
r->max = MAX(r->max, r->val[i]);
r->sum += r->val[i];
}
r->mean = r->sum / reps;
r->var = 0.0;
for (i = 0; i < reps; i++) {
r->var += pow((r->mean - r->val[i]), 2);
}
r->var = r->var / reps;
r->sd = sqrt(r->var);
return r;
}
static struct results *bw_values(const int reps, IOR_results_t *measured,
const double *vals, const int access)
{
return bw_ops_values(reps, measured, 1, vals, access);
}
static struct results *ops_values(const int reps, IOR_results_t *measured,
IOR_offset_t transfer_size,
const double *vals, const int access)
{
return bw_ops_values(reps, measured, transfer_size, vals, access);
}
/*
* Summarize results
*
* operation is typically "write" or "read"
*/
static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, char *operation)
static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access)
{
IOR_param_t *params = &test->params;
IOR_results_t *results = test->results;
@ -478,14 +527,20 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha
double * times = malloc(sizeof(double)* reps);
for(int i=0; i < reps; i++){
times[i] = *(double*)((char*) & results[i] + times_offset);
IOR_point_t *point = (access == WRITE) ? &results[i].write :
&results[i].read;
times[i] = point->time;
}
bw = bw_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), times);
ops = ops_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), params->transferSize, times);
bw = bw_values(reps, results, times, access);
ops = ops_values(reps, results, params->transferSize, times, access);
IOR_point_t *point = (access == WRITE) ? &results[0].write :
&results[0].read;
if(outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "%-9s ", operation);
fprintf(out_resultfile, "%-9s ", access == WRITE ? "write" : "read");
fprintf(out_resultfile, "%10.2f ", bw->max / MEBIBYTE);
fprintf(out_resultfile, "%10.2f ", bw->min / MEBIBYTE);
fprintf(out_resultfile, "%10.2f ", bw->mean / MEBIBYTE);
@ -507,13 +562,13 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha
fprintf(out_resultfile, "%6lld ", params->segmentCount);
fprintf(out_resultfile, "%8lld ", params->blockSize);
fprintf(out_resultfile, "%8lld ", params->transferSize);
fprintf(out_resultfile, "%9.1f ", (float)results[0].aggFileSizeForBW / MEBIBYTE);
fprintf(out_resultfile, "%9.1f ", (float)point->aggFileSizeForBW / MEBIBYTE);
fprintf(out_resultfile, "%3s ", params->api);
fprintf(out_resultfile, "%6d", params->referenceNumber);
fprintf(out_resultfile, "\n");
}else if (outputFormat == OUTPUT_JSON){
PrintStartSection();
PrintKeyVal("operation", operation);
PrintKeyVal("operation", access == WRITE ? "write" : "read");
PrintKeyVal("API", params->api);
PrintKeyValInt("TestID", params->id);
PrintKeyValInt("ReferenceNumber", params->referenceNumber);
@ -540,7 +595,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha
PrintKeyValDouble("OPsMean", ops->mean);
PrintKeyValDouble("OPsSD", ops->sd);
PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps));
PrintKeyValDouble("xsizeMiB", (double) results[0].aggFileSizeForBW / MEBIBYTE);
PrintKeyValDouble("xsizeMiB", (double) point->aggFileSizeForBW / MEBIBYTE);
PrintEndSection();
}else if (outputFormat == OUTPUT_CSV){
@ -558,9 +613,9 @@ void PrintLongSummaryOneTest(IOR_test_t *test)
IOR_param_t *params = &test->params;
if (params->writeFile)
PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, writeTime), "write");
PrintLongSummaryOneOperation(test, WRITE);
if (params->readFile)
PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, readTime), "read");
PrintLongSummaryOneOperation(test, READ);
}
void PrintLongSummaryHeader()
@ -626,9 +681,9 @@ void PrintShortSummary(IOR_test_t * test)
reps = params->repetitions;
for (i = 0; i < reps; i++) {
bw = (double)results[i].aggFileSizeForBW / results[i].writeTime;
bw = (double)results[i].write.aggFileSizeForBW / results[i].write.time;
max_write_bw = MAX(bw, max_write_bw);
bw = (double)results[i].aggFileSizeForBW / results[i].readTime;
bw = (double)results[i].read.aggFileSizeForBW / results[i].read.time;
max_read_bw = MAX(bw, max_read_bw);
}
@ -735,78 +790,6 @@ static void PPDouble(int leftjustify, double number, char *append)
fprintf(out_resultfile, format, number, append);
}
static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals)
{
struct results *r;
int i;
r = (struct results *) malloc(sizeof(struct results) + (reps * sizeof(double)));
if (r == NULL)
ERR("malloc failed");
r->val = (double *)&r[1];
for (i = 0; i < reps; i++, measured++) {
r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset)) / vals[i];
if (i == 0) {
r->min = r->val[i];
r->max = r->val[i];
r->sum = 0.0;
}
r->min = MIN(r->min, r->val[i]);
r->max = MAX(r->max, r->val[i]);
r->sum += r->val[i];
}
r->mean = r->sum / reps;
r->var = 0.0;
for (i = 0; i < reps; i++) {
r->var += pow((r->mean - r->val[i]), 2);
}
r->var = r->var / reps;
r->sd = sqrt(r->var);
return r;
}
static struct results *ops_values(int reps, IOR_results_t * measured, int offset,
IOR_offset_t transfer_size,
double *vals)
{
struct results *r;
int i;
r = (struct results *)malloc(sizeof(struct results)
+ (reps * sizeof(double)));
if (r == NULL)
ERR("malloc failed");
r->val = (double *)&r[1];
for (i = 0; i < reps; i++, measured++) {
r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset))
/ transfer_size / vals[i];
if (i == 0) {
r->min = r->val[i];
r->max = r->val[i];
r->sum = 0.0;
}
r->min = MIN(r->min, r->val[i]);
r->max = MAX(r->max, r->val[i]);
r->sum += r->val[i];
}
r->mean = r->sum / reps;
r->var = 0.0;
for (i = 0; i < reps; i++) {
r->var += pow((r->mean - r->val[i]), 2);
}
r->var = r->var / reps;
r->sd = sqrt(r->var);
return r;
}
static double mean_of_array_of_doubles(double *values, int len)
{
double tot = 0.0;

333
src/ior.c
View File

@ -36,6 +36,7 @@
#include "utilities.h"
#include "parse_options.h"
#define IOR_NB_TIMERS 6
/* file scope globals */
extern char **environ;
@ -48,8 +49,9 @@ static char **ParseFileName(char *, int *);
static void InitTests(IOR_test_t * , MPI_Comm);
static void TestIoSys(IOR_test_t *);
static void ValidateTests(IOR_param_t *);
static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, void *fd, int access, IOR_io_buffers* ioBuffers);
static void WriteTimes(IOR_param_t *, double **, int, int);
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
void *fd, const int access,
IOR_io_buffers *ioBuffers);
IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out){
IOR_test_t *tests_head;
@ -256,62 +258,56 @@ DisplayOutliers(int numTasks,
/*
* Check for outliers in start/end times and elapsed create/xfer/close times.
*/
static void CheckForOutliers(IOR_param_t * test, double **timer, int rep,
int access)
static void
CheckForOutliers(IOR_param_t *test, const double *timer, const int access)
{
int shift;
if (access == WRITE) {
shift = 0;
} else { /* READ */
shift = 6;
}
DisplayOutliers(test->numTasks, timer[shift + 0][rep],
DisplayOutliers(test->numTasks, timer[0],
"start time", access, test->outlierThreshold);
DisplayOutliers(test->numTasks,
timer[shift + 1][rep] - timer[shift + 0][rep],
timer[1] - timer[0],
"elapsed create time", access, test->outlierThreshold);
DisplayOutliers(test->numTasks,
timer[shift + 3][rep] - timer[shift + 2][rep],
timer[3] - timer[2],
"elapsed transfer time", access,
test->outlierThreshold);
DisplayOutliers(test->numTasks,
timer[shift + 5][rep] - timer[shift + 4][rep],
timer[5] - timer[4],
"elapsed close time", access, test->outlierThreshold);
DisplayOutliers(test->numTasks, timer[shift + 5][rep], "end time",
DisplayOutliers(test->numTasks, timer[5], "end time",
access, test->outlierThreshold);
}
/*
* Check if actual file size equals expected size; if not use actual for
* calculating performance rate.
*/
static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep,
const int access)
{
IOR_param_t *params = &test->params;
IOR_results_t *results = test->results;
IOR_point_t *point = (access == WRITE) ? &results[rep].write :
&results[rep].read;
MPI_CHECK(MPI_Allreduce(&dataMoved, & results[rep].aggFileSizeFromXfer,
MPI_CHECK(MPI_Allreduce(&dataMoved, &point->aggFileSizeFromXfer,
1, MPI_LONG_LONG_INT, MPI_SUM, testComm),
"cannot total data moved");
if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) {
if (verbose >= VERBOSE_0 && rank == 0) {
if ((params->expectedAggFileSize
!= results[rep].aggFileSizeFromXfer)
|| (results[rep].aggFileSizeFromStat
!= results[rep].aggFileSizeFromXfer)) {
!= point->aggFileSizeFromXfer)
|| (point->aggFileSizeFromStat
!= point->aggFileSizeFromXfer)) {
fprintf(out_logfile,
"WARNING: Expected aggregate file size = %lld.\n",
(long long) params->expectedAggFileSize);
fprintf(out_logfile,
"WARNING: Stat() of aggregate file size = %lld.\n",
(long long) results[rep].aggFileSizeFromStat);
(long long) point->aggFileSizeFromStat);
fprintf(out_logfile,
"WARNING: Using actual aggregate bytes moved = %lld.\n",
(long long) results[rep].aggFileSizeFromXfer);
(long long) point->aggFileSizeFromXfer);
if(params->deadlineForStonewalling){
fprintf(out_logfile,
"WARNING: maybe caused by deadlineForStonewalling\n");
@ -319,7 +315,8 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
}
}
}
results[rep].aggFileSizeForBW = results[rep].aggFileSizeFromXfer;
point->aggFileSizeForBW = point->aggFileSizeFromXfer;
}
/*
@ -848,54 +845,46 @@ static char *PrependDir(IOR_param_t * test, char *rootDir)
/*
* Reduce test results, and show if verbose set.
*/
static void ReduceIterResults(IOR_test_t *test, double **timer, int rep,
int access)
static void
ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int access)
{
double reduced[12] = { 0 };
double diff[6];
double *diff_subset;
double totalTime;
double bw;
int i;
MPI_Op op;
double reduced[IOR_NB_TIMERS] = { 0 };
double diff[IOR_NB_TIMERS / 2 + 1];
double totalTime;
double bw;
int i;
MPI_Op op;
assert(access == WRITE || access == READ);
assert(access == WRITE || access == READ);
/* Find the minimum start time of the even numbered timers, and the
maximum finish time for the odd numbered timers */
for (i = 0; i < 12; i++) {
for (i = 0; i < IOR_NB_TIMERS; i++) {
op = i % 2 ? MPI_MAX : MPI_MIN;
MPI_CHECK(MPI_Reduce(&timer[i][rep], &reduced[i], 1, MPI_DOUBLE,
MPI_CHECK(MPI_Reduce(&timer[i], &reduced[i], 1, MPI_DOUBLE,
op, 0, testComm), "MPI_Reduce()");
}
if (rank != 0) {
/* Only rank 0 tallies and prints the results. */
return;
}
/* Only rank 0 tallies and prints the results. */
if (rank != 0)
return;
/* Calculate elapsed times and throughput numbers */
for (i = 0; i < 6; i++) {
diff[i] = reduced[2 * i + 1] - reduced[2 * i];
}
if (access == WRITE) {
totalTime = reduced[5] - reduced[0];
test->results[rep].writeTime = totalTime;
diff_subset = &diff[0];
} else { /* READ */
totalTime = reduced[11] - reduced[6];
test->results[rep].readTime = totalTime;
diff_subset = &diff[3];
}
/* Calculate elapsed times and throughput numbers */
for (i = 0; i < IOR_NB_TIMERS / 2; i++)
diff[i] = reduced[2 * i + 1] - reduced[2 * i];
if (verbose < VERBOSE_0) {
return;
}
totalTime = reduced[5] - reduced[0];
bw = (double)test->results[rep].aggFileSizeForBW / totalTime;
IOR_point_t *point = (access == WRITE) ? &test->results[rep].write :
&test->results[rep].read;
PrintReducedResult(test, access, bw, diff_subset, totalTime, rep);
point->time = totalTime;
if (verbose < VERBOSE_0)
return;
bw = (double)point->aggFileSizeForBW / totalTime;
PrintReducedResult(test, access, bw, diff, totalTime, rep);
}
/*
@ -1116,7 +1105,72 @@ static void *HogMemory(IOR_param_t *params)
return buf;
}
/*
* Write times taken during each iteration of the test.
*/
static void
WriteTimes(IOR_param_t *test, const double *timer, const int iteration,
const int access)
{
char timerName[MAX_STR];
for (int i = 0; i < IOR_NB_TIMERS; i++) {
if (access == WRITE) {
switch (i) {
case 0:
strcpy(timerName, "write open start");
break;
case 1:
strcpy(timerName, "write open stop");
break;
case 2:
strcpy(timerName, "write start");
break;
case 3:
strcpy(timerName, "write stop");
break;
case 4:
strcpy(timerName, "write close start");
break;
case 5:
strcpy(timerName, "write close stop");
break;
default:
strcpy(timerName, "invalid timer");
break;
}
}
else {
switch (i) {
case 0:
strcpy(timerName, "read open start");
break;
case 1:
strcpy(timerName, "read open stop");
break;
case 2:
strcpy(timerName, "read start");
break;
case 3:
strcpy(timerName, "read stop");
break;
case 4:
strcpy(timerName, "read close start");
break;
case 5:
strcpy(timerName, "read close stop");
break;
default:
strcpy(timerName, "invalid timer");
break;
}
}
fprintf(out_logfile, "Test %d: Iter=%d, Task=%d, Time=%f, %s\n",
test->id, iteration, (int)rank, timer[i],
timerName);
}
}
/*
* Using the test parameters, run iteration(s) of single test.
*/
@ -1125,10 +1179,10 @@ static void TestIoSys(IOR_test_t *test)
IOR_param_t *params = &test->params;
IOR_results_t *results = test->results;
char testFileName[MAX_STR];
double *timer[12];
double timer[IOR_NB_TIMERS];
double startTime;
int pretendRank;
int i, rep;
int rep;
void *fd;
MPI_Group orig_group, new_group;
int range[3];
@ -1175,13 +1229,6 @@ static void TestIoSys(IOR_test_t *test)
}
params->tasksPerNode = CountTasksPerNode(testComm);
/* setup timers */
for (i = 0; i < 12; i++) {
timer[i] = (double *)malloc(params->repetitions * sizeof(double));
if (timer[i] == NULL)
ERR("malloc failed");
}
/* bind I/O calls to specific API */
backend = aiori_select(params->api);
@ -1257,9 +1304,9 @@ static void TestIoSys(IOR_test_t *test)
params->stoneWallingWearOutIterations = params_saved_wearout;
MPI_CHECK(MPI_Barrier(testComm), "barrier error");
params->open = WRITE;
timer[0][rep] = GetTimeStamp();
timer[0] = GetTimeStamp();
fd = backend->create(testFileName, params);
timer[1][rep] = GetTimeStamp();
timer[1] = GetTimeStamp();
if (params->intraTestBarriers)
MPI_CHECK(MPI_Barrier(testComm),
"barrier error");
@ -1268,40 +1315,40 @@ static void TestIoSys(IOR_test_t *test)
"Commencing write performance test: %s",
CurrentTimeString());
}
timer[2][rep] = GetTimeStamp();
dataMoved = WriteOrRead(params, & results[rep], fd, WRITE, &ioBuffers);
timer[2] = GetTimeStamp();
dataMoved = WriteOrRead(params, &results[rep], fd, WRITE, &ioBuffers);
if (params->verbose >= VERBOSE_4) {
fprintf(out_logfile, "* data moved = %llu\n", dataMoved);
fflush(out_logfile);
}
timer[3][rep] = GetTimeStamp();
timer[3] = GetTimeStamp();
if (params->intraTestBarriers)
MPI_CHECK(MPI_Barrier(testComm),
"barrier error");
timer[4][rep] = GetTimeStamp();
timer[4] = GetTimeStamp();
backend->close(fd, params);
timer[5][rep] = GetTimeStamp();
timer[5] = GetTimeStamp();
MPI_CHECK(MPI_Barrier(testComm), "barrier error");
/* get the size of the file just written */
results[rep].aggFileSizeFromStat =
results[rep].write.aggFileSizeFromStat =
backend->get_file_size(params, testComm, testFileName);
/* check if stat() of file doesn't equal expected file size,
use actual amount of byte moved */
CheckFileSize(test, dataMoved, rep);
CheckFileSize(test, dataMoved, rep, WRITE);
if (verbose >= VERBOSE_3)
WriteTimes(params, timer, rep, WRITE);
ReduceIterResults(test, timer, rep, WRITE);
if (params->outlierThreshold) {
CheckForOutliers(params, timer, rep, WRITE);
CheckForOutliers(params, timer, WRITE);
}
/* check if in this round we run write with stonewalling */
if(params->deadlineForStonewalling > 0){
params->stoneWallingWearOutIterations = results[rep].pairs_accessed;
params->stoneWallingWearOutIterations = results[rep].write.pairs_accessed;
}
}
@ -1329,7 +1376,7 @@ static void TestIoSys(IOR_test_t *test)
GetTestFileName(testFileName, params);
params->open = WRITECHECK;
fd = backend->open(testFileName, params);
dataMoved = WriteOrRead(params, & results[rep], fd, WRITECHECK, &ioBuffers);
dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers);
backend->close(fd, params);
rankOffset = 0;
}
@ -1396,9 +1443,9 @@ static void TestIoSys(IOR_test_t *test)
DelaySecs(params->interTestDelay);
MPI_CHECK(MPI_Barrier(testComm), "barrier error");
params->open = READ;
timer[6][rep] = GetTimeStamp();
timer[0] = GetTimeStamp();
fd = backend->open(testFileName, params);
timer[7][rep] = GetTimeStamp();
timer[1] = GetTimeStamp();
if (params->intraTestBarriers)
MPI_CHECK(MPI_Barrier(testComm),
"barrier error");
@ -1407,30 +1454,30 @@ static void TestIoSys(IOR_test_t *test)
"Commencing read performance test: %s",
CurrentTimeString());
}
timer[8][rep] = GetTimeStamp();
dataMoved = WriteOrRead(params, & results[rep], fd, operation_flag, &ioBuffers);
timer[9][rep] = GetTimeStamp();
timer[2] = GetTimeStamp();
dataMoved = WriteOrRead(params, &results[rep], fd, operation_flag, &ioBuffers);
timer[3] = GetTimeStamp();
if (params->intraTestBarriers)
MPI_CHECK(MPI_Barrier(testComm),
"barrier error");
timer[10][rep] = GetTimeStamp();
timer[4] = GetTimeStamp();
backend->close(fd, params);
timer[11][rep] = GetTimeStamp();
timer[5] = GetTimeStamp();
/* get the size of the file just read */
results[rep].aggFileSizeFromStat =
results[rep].read.aggFileSizeFromStat =
backend->get_file_size(params, testComm,
testFileName);
/* check if stat() of file doesn't equal expected file size,
use actual amount of byte moved */
CheckFileSize(test, dataMoved, rep);
CheckFileSize(test, dataMoved, rep, READ);
if (verbose >= VERBOSE_3)
WriteTimes(params, timer, rep, READ);
ReduceIterResults(test, timer, rep, READ);
if (params->outlierThreshold) {
CheckForOutliers(params, timer, rep, READ);
CheckForOutliers(params, timer, READ);
}
}
@ -1465,9 +1512,6 @@ static void TestIoSys(IOR_test_t *test)
if (hog_buf != NULL)
free(hog_buf);
for (i = 0; i < 12; i++) {
free(timer[i]);
}
/* Sync with the tasks that did not participate in this test */
MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error");
@ -1831,7 +1875,8 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset
* Write or Read data to file(s). This loops through the strides, writing
* out the data to each block in transfer sizes, until the remainder left is 0.
*/
static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, void *fd, int access, IOR_io_buffers* ioBuffers)
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
void *fd, const int access, IOR_io_buffers *ioBuffers)
{
int errors = 0;
IOR_offset_t transferCount = 0;
@ -1841,6 +1886,8 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
IOR_offset_t dataMoved = 0; /* for data rate calculation */
double startForStonewall;
int hitStonewall;
IOR_point_t *point = ((access == WRITE) || (access == WRITECHECK)) ?
&results->write : &results->read;
/* initialize values */
pretendRank = (rank + rankOffset) % test->numTasks;
@ -1869,35 +1916,35 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
}
long long data_moved_ll = (long long) dataMoved;
long long pairs_accessed_min = 0;
MPI_CHECK(MPI_Allreduce(& pairCnt, &results->pairs_accessed,
MPI_CHECK(MPI_Allreduce(& pairCnt, &point->pairs_accessed,
1, MPI_LONG_LONG_INT, MPI_MAX, testComm), "cannot reduce pairs moved");
double stonewall_runtime = GetTimeStamp() - startForStonewall;
results->stonewall_time = stonewall_runtime;
point->stonewall_time = stonewall_runtime;
MPI_CHECK(MPI_Reduce(& pairCnt, & pairs_accessed_min,
1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_min_data_accessed,
MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_min_data_accessed,
1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_avg_data_accessed,
MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_avg_data_accessed,
1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm), "cannot reduce pairs moved");
if(rank == 0){
fprintf(out_logfile, "stonewalling pairs accessed min: %lld max: %zu -- min data: %.1f GiB mean data: %.1f GiB time: %.1fs\n",
pairs_accessed_min, results->pairs_accessed,
results->stonewall_min_data_accessed /1024.0 / 1024 / 1024, results->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , results->stonewall_time);
results->stonewall_min_data_accessed *= test->numTasks;
pairs_accessed_min, point->pairs_accessed,
point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , point->stonewall_time);
point->stonewall_min_data_accessed *= test->numTasks;
}
if(pairs_accessed_min == pairCnt){
results->stonewall_min_data_accessed = 0;
results->stonewall_avg_data_accessed = 0;
point->stonewall_min_data_accessed = 0;
point->stonewall_avg_data_accessed = 0;
}
if(pairCnt != results->pairs_accessed){
if(pairCnt != point->pairs_accessed){
// some work needs still to be done !
for(; pairCnt < results->pairs_accessed; pairCnt++ ) {
for(; pairCnt < point->pairs_accessed; pairCnt++ ) {
dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access);
}
}
}else{
results->pairs_accessed = pairCnt;
point->pairs_accessed = pairCnt;
}
@ -1910,73 +1957,3 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
}
return (dataMoved);
}
/*
* Write times taken during each iteration of the test.
*/
static void
WriteTimes(IOR_param_t * test, double **timer, int iteration, int writeOrRead)
{
char accessType[MAX_STR];
char timerName[MAX_STR];
int i, start = 0, stop = 0;
if (writeOrRead == WRITE) {
start = 0;
stop = 6;
strcpy(accessType, "WRITE");
} else if (writeOrRead == READ) {
start = 6;
stop = 12;
strcpy(accessType, "READ");
} else {
ERR("incorrect WRITE/READ option");
}
for (i = start; i < stop; i++) {
switch (i) {
case 0:
strcpy(timerName, "write open start");
break;
case 1:
strcpy(timerName, "write open stop");
break;
case 2:
strcpy(timerName, "write start");
break;
case 3:
strcpy(timerName, "write stop");
break;
case 4:
strcpy(timerName, "write close start");
break;
case 5:
strcpy(timerName, "write close stop");
break;
case 6:
strcpy(timerName, "read open start");
break;
case 7:
strcpy(timerName, "read open stop");
break;
case 8:
strcpy(timerName, "read start");
break;
case 9:
strcpy(timerName, "read stop");
break;
case 10:
strcpy(timerName, "read close start");
break;
case 11:
strcpy(timerName, "read close stop");
break;
default:
strcpy(timerName, "invalid timer");
break;
}
fprintf(out_logfile, "Test %d: Iter=%d, Task=%d, Time=%f, %s\n",
test->id, iteration, (int)rank, timer[i][iteration],
timerName);
}
}

View File

@ -204,12 +204,9 @@ typedef struct
int intraTestBarriers; /* barriers between open/op and op/close */
} IOR_param_t;
/* each pointer is to an array, each of length equal to the number of
repetitions in the test */
/* each pointer for a single test */
typedef struct {
double writeTime;
double readTime;
int errors;
double time;
size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling
double stonewall_time;
@ -219,16 +216,21 @@ typedef struct {
IOR_offset_t aggFileSizeFromStat;
IOR_offset_t aggFileSizeFromXfer;
IOR_offset_t aggFileSizeForBW;
} IOR_point_t;
typedef struct {
int errors;
IOR_point_t write;
IOR_point_t read;
} IOR_results_t;
/* define the queuing structure for the test parameters */
typedef struct IOR_test_t {
IOR_param_t params;
IOR_results_t *results; /* This is an array of reps times IOR_results_t */
IOR_results_t *results;
struct IOR_test_t *next;
} IOR_test_t;
IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num);
void AllocResults(IOR_test_t *test);

View File

@ -96,7 +96,6 @@ enum OutputFormat_t{
#define WRITECHECK 1
#define READ 2
#define READCHECK 3
#define CHECK 4
/* verbosity settings */
#define VERBOSE_0 0