Merge pull request #90 from DDNStorage/fixstonewalling
Fix erroneous reported write bandwidth with stonewalling.master
commit
78fd92267c
163
src/ior-output.c
163
src/ior-output.c
|
@ -11,8 +11,6 @@
|
|||
|
||||
extern char **environ;
|
||||
|
||||
static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals);
|
||||
static struct results *ops_values(int reps, IOR_results_t * measured, int offset, IOR_offset_t transfer_size, double *vals);
|
||||
static double mean_of_array_of_doubles(double *values, int len);
|
||||
static void PPDouble(int leftjustify, double number, char *append);
|
||||
static void PrintNextToken();
|
||||
|
@ -387,10 +385,12 @@ void ShowTestStart(IOR_param_t *test)
|
|||
|
||||
void ShowTestEnd(IOR_test_t *tptr){
|
||||
if(rank == 0 && tptr->params.stoneWallingWearOut){
|
||||
|
||||
size_t pairs_accessed = tptr->results->write.pairs_accessed;
|
||||
if (tptr->params.stoneWallingStatusFile){
|
||||
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
|
||||
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, pairs_accessed);
|
||||
}else{
|
||||
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
|
||||
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %ld\n", pairs_accessed);
|
||||
}
|
||||
}
|
||||
PrintEndSection();
|
||||
|
@ -456,14 +456,63 @@ void ShowSetup(IOR_param_t *params)
|
|||
fflush(out_resultfile);
|
||||
}
|
||||
|
||||
static struct results *bw_ops_values(const int reps, IOR_results_t *measured,
|
||||
IOR_offset_t transfer_size,
|
||||
const double *vals, const int access)
|
||||
{
|
||||
struct results *r;
|
||||
int i;
|
||||
|
||||
r = (struct results *)malloc(sizeof(struct results)
|
||||
+ (reps * sizeof(double)));
|
||||
if (r == NULL)
|
||||
ERR("malloc failed");
|
||||
r->val = (double *)&r[1];
|
||||
|
||||
for (i = 0; i < reps; i++, measured++) {
|
||||
IOR_point_t *point = (access == WRITE) ? &measured->write :
|
||||
&measured->read;
|
||||
|
||||
r->val[i] = ((double) (point->aggFileSizeForBW))
|
||||
/ transfer_size / vals[i];
|
||||
|
||||
if (i == 0) {
|
||||
r->min = r->val[i];
|
||||
r->max = r->val[i];
|
||||
r->sum = 0.0;
|
||||
}
|
||||
r->min = MIN(r->min, r->val[i]);
|
||||
r->max = MAX(r->max, r->val[i]);
|
||||
r->sum += r->val[i];
|
||||
}
|
||||
r->mean = r->sum / reps;
|
||||
r->var = 0.0;
|
||||
for (i = 0; i < reps; i++) {
|
||||
r->var += pow((r->mean - r->val[i]), 2);
|
||||
}
|
||||
r->var = r->var / reps;
|
||||
r->sd = sqrt(r->var);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct results *bw_values(const int reps, IOR_results_t *measured,
|
||||
const double *vals, const int access)
|
||||
{
|
||||
return bw_ops_values(reps, measured, 1, vals, access);
|
||||
}
|
||||
|
||||
static struct results *ops_values(const int reps, IOR_results_t *measured,
|
||||
IOR_offset_t transfer_size,
|
||||
const double *vals, const int access)
|
||||
{
|
||||
return bw_ops_values(reps, measured, transfer_size, vals, access);
|
||||
}
|
||||
|
||||
/*
|
||||
* Summarize results
|
||||
*
|
||||
* operation is typically "write" or "read"
|
||||
*/
|
||||
static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, char *operation)
|
||||
static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access)
|
||||
{
|
||||
IOR_param_t *params = &test->params;
|
||||
IOR_results_t *results = test->results;
|
||||
|
@ -478,14 +527,20 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha
|
|||
|
||||
double * times = malloc(sizeof(double)* reps);
|
||||
for(int i=0; i < reps; i++){
|
||||
times[i] = *(double*)((char*) & results[i] + times_offset);
|
||||
IOR_point_t *point = (access == WRITE) ? &results[i].write :
|
||||
&results[i].read;
|
||||
times[i] = point->time;
|
||||
}
|
||||
|
||||
bw = bw_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), times);
|
||||
ops = ops_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), params->transferSize, times);
|
||||
bw = bw_values(reps, results, times, access);
|
||||
ops = ops_values(reps, results, params->transferSize, times, access);
|
||||
|
||||
IOR_point_t *point = (access == WRITE) ? &results[0].write :
|
||||
&results[0].read;
|
||||
|
||||
|
||||
if(outputFormat == OUTPUT_DEFAULT){
|
||||
fprintf(out_resultfile, "%-9s ", operation);
|
||||
fprintf(out_resultfile, "%-9s ", access == WRITE ? "write" : "read");
|
||||
fprintf(out_resultfile, "%10.2f ", bw->max / MEBIBYTE);
|
||||
fprintf(out_resultfile, "%10.2f ", bw->min / MEBIBYTE);
|
||||
fprintf(out_resultfile, "%10.2f ", bw->mean / MEBIBYTE);
|
||||
|
@ -507,13 +562,13 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha
|
|||
fprintf(out_resultfile, "%6lld ", params->segmentCount);
|
||||
fprintf(out_resultfile, "%8lld ", params->blockSize);
|
||||
fprintf(out_resultfile, "%8lld ", params->transferSize);
|
||||
fprintf(out_resultfile, "%9.1f ", (float)results[0].aggFileSizeForBW / MEBIBYTE);
|
||||
fprintf(out_resultfile, "%9.1f ", (float)point->aggFileSizeForBW / MEBIBYTE);
|
||||
fprintf(out_resultfile, "%3s ", params->api);
|
||||
fprintf(out_resultfile, "%6d", params->referenceNumber);
|
||||
fprintf(out_resultfile, "\n");
|
||||
}else if (outputFormat == OUTPUT_JSON){
|
||||
PrintStartSection();
|
||||
PrintKeyVal("operation", operation);
|
||||
PrintKeyVal("operation", access == WRITE ? "write" : "read");
|
||||
PrintKeyVal("API", params->api);
|
||||
PrintKeyValInt("TestID", params->id);
|
||||
PrintKeyValInt("ReferenceNumber", params->referenceNumber);
|
||||
|
@ -540,7 +595,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, cha
|
|||
PrintKeyValDouble("OPsMean", ops->mean);
|
||||
PrintKeyValDouble("OPsSD", ops->sd);
|
||||
PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps));
|
||||
PrintKeyValDouble("xsizeMiB", (double) results[0].aggFileSizeForBW / MEBIBYTE);
|
||||
PrintKeyValDouble("xsizeMiB", (double) point->aggFileSizeForBW / MEBIBYTE);
|
||||
PrintEndSection();
|
||||
}else if (outputFormat == OUTPUT_CSV){
|
||||
|
||||
|
@ -558,9 +613,9 @@ void PrintLongSummaryOneTest(IOR_test_t *test)
|
|||
IOR_param_t *params = &test->params;
|
||||
|
||||
if (params->writeFile)
|
||||
PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, writeTime), "write");
|
||||
PrintLongSummaryOneOperation(test, WRITE);
|
||||
if (params->readFile)
|
||||
PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, readTime), "read");
|
||||
PrintLongSummaryOneOperation(test, READ);
|
||||
}
|
||||
|
||||
void PrintLongSummaryHeader()
|
||||
|
@ -626,9 +681,9 @@ void PrintShortSummary(IOR_test_t * test)
|
|||
reps = params->repetitions;
|
||||
|
||||
for (i = 0; i < reps; i++) {
|
||||
bw = (double)results[i].aggFileSizeForBW / results[i].writeTime;
|
||||
bw = (double)results[i].write.aggFileSizeForBW / results[i].write.time;
|
||||
max_write_bw = MAX(bw, max_write_bw);
|
||||
bw = (double)results[i].aggFileSizeForBW / results[i].readTime;
|
||||
bw = (double)results[i].read.aggFileSizeForBW / results[i].read.time;
|
||||
max_read_bw = MAX(bw, max_read_bw);
|
||||
}
|
||||
|
||||
|
@ -735,78 +790,6 @@ static void PPDouble(int leftjustify, double number, char *append)
|
|||
fprintf(out_resultfile, format, number, append);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals)
|
||||
{
|
||||
struct results *r;
|
||||
int i;
|
||||
|
||||
r = (struct results *) malloc(sizeof(struct results) + (reps * sizeof(double)));
|
||||
if (r == NULL)
|
||||
ERR("malloc failed");
|
||||
r->val = (double *)&r[1];
|
||||
|
||||
for (i = 0; i < reps; i++, measured++) {
|
||||
|
||||
r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset)) / vals[i];
|
||||
if (i == 0) {
|
||||
r->min = r->val[i];
|
||||
r->max = r->val[i];
|
||||
r->sum = 0.0;
|
||||
}
|
||||
r->min = MIN(r->min, r->val[i]);
|
||||
r->max = MAX(r->max, r->val[i]);
|
||||
r->sum += r->val[i];
|
||||
}
|
||||
r->mean = r->sum / reps;
|
||||
r->var = 0.0;
|
||||
for (i = 0; i < reps; i++) {
|
||||
r->var += pow((r->mean - r->val[i]), 2);
|
||||
}
|
||||
r->var = r->var / reps;
|
||||
r->sd = sqrt(r->var);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static struct results *ops_values(int reps, IOR_results_t * measured, int offset,
|
||||
IOR_offset_t transfer_size,
|
||||
double *vals)
|
||||
{
|
||||
struct results *r;
|
||||
int i;
|
||||
|
||||
r = (struct results *)malloc(sizeof(struct results)
|
||||
+ (reps * sizeof(double)));
|
||||
if (r == NULL)
|
||||
ERR("malloc failed");
|
||||
r->val = (double *)&r[1];
|
||||
|
||||
for (i = 0; i < reps; i++, measured++) {
|
||||
r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset))
|
||||
/ transfer_size / vals[i];
|
||||
if (i == 0) {
|
||||
r->min = r->val[i];
|
||||
r->max = r->val[i];
|
||||
r->sum = 0.0;
|
||||
}
|
||||
r->min = MIN(r->min, r->val[i]);
|
||||
r->max = MAX(r->max, r->val[i]);
|
||||
r->sum += r->val[i];
|
||||
}
|
||||
r->mean = r->sum / reps;
|
||||
r->var = 0.0;
|
||||
for (i = 0; i < reps; i++) {
|
||||
r->var += pow((r->mean - r->val[i]), 2);
|
||||
}
|
||||
r->var = r->var / reps;
|
||||
r->sd = sqrt(r->var);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
static double mean_of_array_of_doubles(double *values, int len)
|
||||
{
|
||||
double tot = 0.0;
|
||||
|
|
333
src/ior.c
333
src/ior.c
|
@ -36,6 +36,7 @@
|
|||
#include "utilities.h"
|
||||
#include "parse_options.h"
|
||||
|
||||
#define IOR_NB_TIMERS 6
|
||||
|
||||
/* file scope globals */
|
||||
extern char **environ;
|
||||
|
@ -48,8 +49,9 @@ static char **ParseFileName(char *, int *);
|
|||
static void InitTests(IOR_test_t * , MPI_Comm);
|
||||
static void TestIoSys(IOR_test_t *);
|
||||
static void ValidateTests(IOR_param_t *);
|
||||
static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, void *fd, int access, IOR_io_buffers* ioBuffers);
|
||||
static void WriteTimes(IOR_param_t *, double **, int, int);
|
||||
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
|
||||
void *fd, const int access,
|
||||
IOR_io_buffers *ioBuffers);
|
||||
|
||||
IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out){
|
||||
IOR_test_t *tests_head;
|
||||
|
@ -256,62 +258,56 @@ DisplayOutliers(int numTasks,
|
|||
/*
|
||||
* Check for outliers in start/end times and elapsed create/xfer/close times.
|
||||
*/
|
||||
static void CheckForOutliers(IOR_param_t * test, double **timer, int rep,
|
||||
int access)
|
||||
static void
|
||||
CheckForOutliers(IOR_param_t *test, const double *timer, const int access)
|
||||
{
|
||||
int shift;
|
||||
|
||||
if (access == WRITE) {
|
||||
shift = 0;
|
||||
} else { /* READ */
|
||||
shift = 6;
|
||||
}
|
||||
|
||||
DisplayOutliers(test->numTasks, timer[shift + 0][rep],
|
||||
DisplayOutliers(test->numTasks, timer[0],
|
||||
"start time", access, test->outlierThreshold);
|
||||
DisplayOutliers(test->numTasks,
|
||||
timer[shift + 1][rep] - timer[shift + 0][rep],
|
||||
timer[1] - timer[0],
|
||||
"elapsed create time", access, test->outlierThreshold);
|
||||
DisplayOutliers(test->numTasks,
|
||||
timer[shift + 3][rep] - timer[shift + 2][rep],
|
||||
timer[3] - timer[2],
|
||||
"elapsed transfer time", access,
|
||||
test->outlierThreshold);
|
||||
DisplayOutliers(test->numTasks,
|
||||
timer[shift + 5][rep] - timer[shift + 4][rep],
|
||||
timer[5] - timer[4],
|
||||
"elapsed close time", access, test->outlierThreshold);
|
||||
DisplayOutliers(test->numTasks, timer[shift + 5][rep], "end time",
|
||||
DisplayOutliers(test->numTasks, timer[5], "end time",
|
||||
access, test->outlierThreshold);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if actual file size equals expected size; if not use actual for
|
||||
* calculating performance rate.
|
||||
*/
|
||||
static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
|
||||
static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep,
|
||||
const int access)
|
||||
{
|
||||
IOR_param_t *params = &test->params;
|
||||
IOR_results_t *results = test->results;
|
||||
IOR_point_t *point = (access == WRITE) ? &results[rep].write :
|
||||
&results[rep].read;
|
||||
|
||||
MPI_CHECK(MPI_Allreduce(&dataMoved, & results[rep].aggFileSizeFromXfer,
|
||||
MPI_CHECK(MPI_Allreduce(&dataMoved, &point->aggFileSizeFromXfer,
|
||||
1, MPI_LONG_LONG_INT, MPI_SUM, testComm),
|
||||
"cannot total data moved");
|
||||
|
||||
if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) {
|
||||
if (verbose >= VERBOSE_0 && rank == 0) {
|
||||
if ((params->expectedAggFileSize
|
||||
!= results[rep].aggFileSizeFromXfer)
|
||||
|| (results[rep].aggFileSizeFromStat
|
||||
!= results[rep].aggFileSizeFromXfer)) {
|
||||
!= point->aggFileSizeFromXfer)
|
||||
|| (point->aggFileSizeFromStat
|
||||
!= point->aggFileSizeFromXfer)) {
|
||||
fprintf(out_logfile,
|
||||
"WARNING: Expected aggregate file size = %lld.\n",
|
||||
(long long) params->expectedAggFileSize);
|
||||
fprintf(out_logfile,
|
||||
"WARNING: Stat() of aggregate file size = %lld.\n",
|
||||
(long long) results[rep].aggFileSizeFromStat);
|
||||
(long long) point->aggFileSizeFromStat);
|
||||
fprintf(out_logfile,
|
||||
"WARNING: Using actual aggregate bytes moved = %lld.\n",
|
||||
(long long) results[rep].aggFileSizeFromXfer);
|
||||
(long long) point->aggFileSizeFromXfer);
|
||||
if(params->deadlineForStonewalling){
|
||||
fprintf(out_logfile,
|
||||
"WARNING: maybe caused by deadlineForStonewalling\n");
|
||||
|
@ -319,7 +315,8 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep)
|
|||
}
|
||||
}
|
||||
}
|
||||
results[rep].aggFileSizeForBW = results[rep].aggFileSizeFromXfer;
|
||||
|
||||
point->aggFileSizeForBW = point->aggFileSizeFromXfer;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -848,54 +845,46 @@ static char *PrependDir(IOR_param_t * test, char *rootDir)
|
|||
/*
|
||||
* Reduce test results, and show if verbose set.
|
||||
*/
|
||||
|
||||
static void ReduceIterResults(IOR_test_t *test, double **timer, int rep,
|
||||
int access)
|
||||
static void
|
||||
ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int access)
|
||||
{
|
||||
double reduced[12] = { 0 };
|
||||
double diff[6];
|
||||
double *diff_subset;
|
||||
double totalTime;
|
||||
double bw;
|
||||
int i;
|
||||
MPI_Op op;
|
||||
double reduced[IOR_NB_TIMERS] = { 0 };
|
||||
double diff[IOR_NB_TIMERS / 2 + 1];
|
||||
double totalTime;
|
||||
double bw;
|
||||
int i;
|
||||
MPI_Op op;
|
||||
|
||||
assert(access == WRITE || access == READ);
|
||||
assert(access == WRITE || access == READ);
|
||||
|
||||
/* Find the minimum start time of the even numbered timers, and the
|
||||
maximum finish time for the odd numbered timers */
|
||||
for (i = 0; i < 12; i++) {
|
||||
for (i = 0; i < IOR_NB_TIMERS; i++) {
|
||||
op = i % 2 ? MPI_MAX : MPI_MIN;
|
||||
MPI_CHECK(MPI_Reduce(&timer[i][rep], &reduced[i], 1, MPI_DOUBLE,
|
||||
MPI_CHECK(MPI_Reduce(&timer[i], &reduced[i], 1, MPI_DOUBLE,
|
||||
op, 0, testComm), "MPI_Reduce()");
|
||||
}
|
||||
|
||||
if (rank != 0) {
|
||||
/* Only rank 0 tallies and prints the results. */
|
||||
return;
|
||||
}
|
||||
/* Only rank 0 tallies and prints the results. */
|
||||
if (rank != 0)
|
||||
return;
|
||||
|
||||
/* Calculate elapsed times and throughput numbers */
|
||||
for (i = 0; i < 6; i++) {
|
||||
diff[i] = reduced[2 * i + 1] - reduced[2 * i];
|
||||
}
|
||||
if (access == WRITE) {
|
||||
totalTime = reduced[5] - reduced[0];
|
||||
test->results[rep].writeTime = totalTime;
|
||||
diff_subset = &diff[0];
|
||||
} else { /* READ */
|
||||
totalTime = reduced[11] - reduced[6];
|
||||
test->results[rep].readTime = totalTime;
|
||||
diff_subset = &diff[3];
|
||||
}
|
||||
/* Calculate elapsed times and throughput numbers */
|
||||
for (i = 0; i < IOR_NB_TIMERS / 2; i++)
|
||||
diff[i] = reduced[2 * i + 1] - reduced[2 * i];
|
||||
|
||||
if (verbose < VERBOSE_0) {
|
||||
return;
|
||||
}
|
||||
totalTime = reduced[5] - reduced[0];
|
||||
|
||||
bw = (double)test->results[rep].aggFileSizeForBW / totalTime;
|
||||
IOR_point_t *point = (access == WRITE) ? &test->results[rep].write :
|
||||
&test->results[rep].read;
|
||||
|
||||
PrintReducedResult(test, access, bw, diff_subset, totalTime, rep);
|
||||
point->time = totalTime;
|
||||
|
||||
if (verbose < VERBOSE_0)
|
||||
return;
|
||||
|
||||
bw = (double)point->aggFileSizeForBW / totalTime;
|
||||
PrintReducedResult(test, access, bw, diff, totalTime, rep);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1116,7 +1105,72 @@ static void *HogMemory(IOR_param_t *params)
|
|||
|
||||
return buf;
|
||||
}
|
||||
/*
|
||||
* Write times taken during each iteration of the test.
|
||||
*/
|
||||
static void
|
||||
WriteTimes(IOR_param_t *test, const double *timer, const int iteration,
|
||||
const int access)
|
||||
{
|
||||
char timerName[MAX_STR];
|
||||
|
||||
for (int i = 0; i < IOR_NB_TIMERS; i++) {
|
||||
|
||||
if (access == WRITE) {
|
||||
switch (i) {
|
||||
case 0:
|
||||
strcpy(timerName, "write open start");
|
||||
break;
|
||||
case 1:
|
||||
strcpy(timerName, "write open stop");
|
||||
break;
|
||||
case 2:
|
||||
strcpy(timerName, "write start");
|
||||
break;
|
||||
case 3:
|
||||
strcpy(timerName, "write stop");
|
||||
break;
|
||||
case 4:
|
||||
strcpy(timerName, "write close start");
|
||||
break;
|
||||
case 5:
|
||||
strcpy(timerName, "write close stop");
|
||||
break;
|
||||
default:
|
||||
strcpy(timerName, "invalid timer");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
switch (i) {
|
||||
case 0:
|
||||
strcpy(timerName, "read open start");
|
||||
break;
|
||||
case 1:
|
||||
strcpy(timerName, "read open stop");
|
||||
break;
|
||||
case 2:
|
||||
strcpy(timerName, "read start");
|
||||
break;
|
||||
case 3:
|
||||
strcpy(timerName, "read stop");
|
||||
break;
|
||||
case 4:
|
||||
strcpy(timerName, "read close start");
|
||||
break;
|
||||
case 5:
|
||||
strcpy(timerName, "read close stop");
|
||||
break;
|
||||
default:
|
||||
strcpy(timerName, "invalid timer");
|
||||
break;
|
||||
}
|
||||
}
|
||||
fprintf(out_logfile, "Test %d: Iter=%d, Task=%d, Time=%f, %s\n",
|
||||
test->id, iteration, (int)rank, timer[i],
|
||||
timerName);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Using the test parameters, run iteration(s) of single test.
|
||||
*/
|
||||
|
@ -1125,10 +1179,10 @@ static void TestIoSys(IOR_test_t *test)
|
|||
IOR_param_t *params = &test->params;
|
||||
IOR_results_t *results = test->results;
|
||||
char testFileName[MAX_STR];
|
||||
double *timer[12];
|
||||
double timer[IOR_NB_TIMERS];
|
||||
double startTime;
|
||||
int pretendRank;
|
||||
int i, rep;
|
||||
int rep;
|
||||
void *fd;
|
||||
MPI_Group orig_group, new_group;
|
||||
int range[3];
|
||||
|
@ -1175,13 +1229,6 @@ static void TestIoSys(IOR_test_t *test)
|
|||
}
|
||||
params->tasksPerNode = CountTasksPerNode(testComm);
|
||||
|
||||
/* setup timers */
|
||||
for (i = 0; i < 12; i++) {
|
||||
timer[i] = (double *)malloc(params->repetitions * sizeof(double));
|
||||
if (timer[i] == NULL)
|
||||
ERR("malloc failed");
|
||||
}
|
||||
|
||||
/* bind I/O calls to specific API */
|
||||
backend = aiori_select(params->api);
|
||||
|
||||
|
@ -1257,9 +1304,9 @@ static void TestIoSys(IOR_test_t *test)
|
|||
params->stoneWallingWearOutIterations = params_saved_wearout;
|
||||
MPI_CHECK(MPI_Barrier(testComm), "barrier error");
|
||||
params->open = WRITE;
|
||||
timer[0][rep] = GetTimeStamp();
|
||||
timer[0] = GetTimeStamp();
|
||||
fd = backend->create(testFileName, params);
|
||||
timer[1][rep] = GetTimeStamp();
|
||||
timer[1] = GetTimeStamp();
|
||||
if (params->intraTestBarriers)
|
||||
MPI_CHECK(MPI_Barrier(testComm),
|
||||
"barrier error");
|
||||
|
@ -1268,40 +1315,40 @@ static void TestIoSys(IOR_test_t *test)
|
|||
"Commencing write performance test: %s",
|
||||
CurrentTimeString());
|
||||
}
|
||||
timer[2][rep] = GetTimeStamp();
|
||||
dataMoved = WriteOrRead(params, & results[rep], fd, WRITE, &ioBuffers);
|
||||
timer[2] = GetTimeStamp();
|
||||
dataMoved = WriteOrRead(params, &results[rep], fd, WRITE, &ioBuffers);
|
||||
if (params->verbose >= VERBOSE_4) {
|
||||
fprintf(out_logfile, "* data moved = %llu\n", dataMoved);
|
||||
fflush(out_logfile);
|
||||
}
|
||||
timer[3][rep] = GetTimeStamp();
|
||||
timer[3] = GetTimeStamp();
|
||||
if (params->intraTestBarriers)
|
||||
MPI_CHECK(MPI_Barrier(testComm),
|
||||
"barrier error");
|
||||
timer[4][rep] = GetTimeStamp();
|
||||
timer[4] = GetTimeStamp();
|
||||
backend->close(fd, params);
|
||||
|
||||
timer[5][rep] = GetTimeStamp();
|
||||
timer[5] = GetTimeStamp();
|
||||
MPI_CHECK(MPI_Barrier(testComm), "barrier error");
|
||||
|
||||
/* get the size of the file just written */
|
||||
results[rep].aggFileSizeFromStat =
|
||||
results[rep].write.aggFileSizeFromStat =
|
||||
backend->get_file_size(params, testComm, testFileName);
|
||||
|
||||
/* check if stat() of file doesn't equal expected file size,
|
||||
use actual amount of byte moved */
|
||||
CheckFileSize(test, dataMoved, rep);
|
||||
CheckFileSize(test, dataMoved, rep, WRITE);
|
||||
|
||||
if (verbose >= VERBOSE_3)
|
||||
WriteTimes(params, timer, rep, WRITE);
|
||||
ReduceIterResults(test, timer, rep, WRITE);
|
||||
if (params->outlierThreshold) {
|
||||
CheckForOutliers(params, timer, rep, WRITE);
|
||||
CheckForOutliers(params, timer, WRITE);
|
||||
}
|
||||
|
||||
/* check if in this round we run write with stonewalling */
|
||||
if(params->deadlineForStonewalling > 0){
|
||||
params->stoneWallingWearOutIterations = results[rep].pairs_accessed;
|
||||
params->stoneWallingWearOutIterations = results[rep].write.pairs_accessed;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1329,7 +1376,7 @@ static void TestIoSys(IOR_test_t *test)
|
|||
GetTestFileName(testFileName, params);
|
||||
params->open = WRITECHECK;
|
||||
fd = backend->open(testFileName, params);
|
||||
dataMoved = WriteOrRead(params, & results[rep], fd, WRITECHECK, &ioBuffers);
|
||||
dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers);
|
||||
backend->close(fd, params);
|
||||
rankOffset = 0;
|
||||
}
|
||||
|
@ -1396,9 +1443,9 @@ static void TestIoSys(IOR_test_t *test)
|
|||
DelaySecs(params->interTestDelay);
|
||||
MPI_CHECK(MPI_Barrier(testComm), "barrier error");
|
||||
params->open = READ;
|
||||
timer[6][rep] = GetTimeStamp();
|
||||
timer[0] = GetTimeStamp();
|
||||
fd = backend->open(testFileName, params);
|
||||
timer[7][rep] = GetTimeStamp();
|
||||
timer[1] = GetTimeStamp();
|
||||
if (params->intraTestBarriers)
|
||||
MPI_CHECK(MPI_Barrier(testComm),
|
||||
"barrier error");
|
||||
|
@ -1407,30 +1454,30 @@ static void TestIoSys(IOR_test_t *test)
|
|||
"Commencing read performance test: %s",
|
||||
CurrentTimeString());
|
||||
}
|
||||
timer[8][rep] = GetTimeStamp();
|
||||
dataMoved = WriteOrRead(params, & results[rep], fd, operation_flag, &ioBuffers);
|
||||
timer[9][rep] = GetTimeStamp();
|
||||
timer[2] = GetTimeStamp();
|
||||
dataMoved = WriteOrRead(params, &results[rep], fd, operation_flag, &ioBuffers);
|
||||
timer[3] = GetTimeStamp();
|
||||
if (params->intraTestBarriers)
|
||||
MPI_CHECK(MPI_Barrier(testComm),
|
||||
"barrier error");
|
||||
timer[10][rep] = GetTimeStamp();
|
||||
timer[4] = GetTimeStamp();
|
||||
backend->close(fd, params);
|
||||
timer[11][rep] = GetTimeStamp();
|
||||
timer[5] = GetTimeStamp();
|
||||
|
||||
/* get the size of the file just read */
|
||||
results[rep].aggFileSizeFromStat =
|
||||
results[rep].read.aggFileSizeFromStat =
|
||||
backend->get_file_size(params, testComm,
|
||||
testFileName);
|
||||
|
||||
/* check if stat() of file doesn't equal expected file size,
|
||||
use actual amount of byte moved */
|
||||
CheckFileSize(test, dataMoved, rep);
|
||||
CheckFileSize(test, dataMoved, rep, READ);
|
||||
|
||||
if (verbose >= VERBOSE_3)
|
||||
WriteTimes(params, timer, rep, READ);
|
||||
ReduceIterResults(test, timer, rep, READ);
|
||||
if (params->outlierThreshold) {
|
||||
CheckForOutliers(params, timer, rep, READ);
|
||||
CheckForOutliers(params, timer, READ);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1465,9 +1512,6 @@ static void TestIoSys(IOR_test_t *test)
|
|||
|
||||
if (hog_buf != NULL)
|
||||
free(hog_buf);
|
||||
for (i = 0; i < 12; i++) {
|
||||
free(timer[i]);
|
||||
}
|
||||
|
||||
/* Sync with the tasks that did not participate in this test */
|
||||
MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error");
|
||||
|
@ -1831,7 +1875,8 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset
|
|||
* Write or Read data to file(s). This loops through the strides, writing
|
||||
* out the data to each block in transfer sizes, until the remainder left is 0.
|
||||
*/
|
||||
static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, void *fd, int access, IOR_io_buffers* ioBuffers)
|
||||
static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
|
||||
void *fd, const int access, IOR_io_buffers *ioBuffers)
|
||||
{
|
||||
int errors = 0;
|
||||
IOR_offset_t transferCount = 0;
|
||||
|
@ -1841,6 +1886,8 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
|
|||
IOR_offset_t dataMoved = 0; /* for data rate calculation */
|
||||
double startForStonewall;
|
||||
int hitStonewall;
|
||||
IOR_point_t *point = ((access == WRITE) || (access == WRITECHECK)) ?
|
||||
&results->write : &results->read;
|
||||
|
||||
/* initialize values */
|
||||
pretendRank = (rank + rankOffset) % test->numTasks;
|
||||
|
@ -1869,35 +1916,35 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
|
|||
}
|
||||
long long data_moved_ll = (long long) dataMoved;
|
||||
long long pairs_accessed_min = 0;
|
||||
MPI_CHECK(MPI_Allreduce(& pairCnt, &results->pairs_accessed,
|
||||
MPI_CHECK(MPI_Allreduce(& pairCnt, &point->pairs_accessed,
|
||||
1, MPI_LONG_LONG_INT, MPI_MAX, testComm), "cannot reduce pairs moved");
|
||||
double stonewall_runtime = GetTimeStamp() - startForStonewall;
|
||||
results->stonewall_time = stonewall_runtime;
|
||||
point->stonewall_time = stonewall_runtime;
|
||||
MPI_CHECK(MPI_Reduce(& pairCnt, & pairs_accessed_min,
|
||||
1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
|
||||
MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_min_data_accessed,
|
||||
MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_min_data_accessed,
|
||||
1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved");
|
||||
MPI_CHECK(MPI_Reduce(& data_moved_ll, & results->stonewall_avg_data_accessed,
|
||||
MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_avg_data_accessed,
|
||||
1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm), "cannot reduce pairs moved");
|
||||
|
||||
if(rank == 0){
|
||||
fprintf(out_logfile, "stonewalling pairs accessed min: %lld max: %zu -- min data: %.1f GiB mean data: %.1f GiB time: %.1fs\n",
|
||||
pairs_accessed_min, results->pairs_accessed,
|
||||
results->stonewall_min_data_accessed /1024.0 / 1024 / 1024, results->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , results->stonewall_time);
|
||||
results->stonewall_min_data_accessed *= test->numTasks;
|
||||
pairs_accessed_min, point->pairs_accessed,
|
||||
point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , point->stonewall_time);
|
||||
point->stonewall_min_data_accessed *= test->numTasks;
|
||||
}
|
||||
if(pairs_accessed_min == pairCnt){
|
||||
results->stonewall_min_data_accessed = 0;
|
||||
results->stonewall_avg_data_accessed = 0;
|
||||
point->stonewall_min_data_accessed = 0;
|
||||
point->stonewall_avg_data_accessed = 0;
|
||||
}
|
||||
if(pairCnt != results->pairs_accessed){
|
||||
if(pairCnt != point->pairs_accessed){
|
||||
// some work needs still to be done !
|
||||
for(; pairCnt < results->pairs_accessed; pairCnt++ ) {
|
||||
for(; pairCnt < point->pairs_accessed; pairCnt++ ) {
|
||||
dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
results->pairs_accessed = pairCnt;
|
||||
point->pairs_accessed = pairCnt;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1910,73 +1957,3 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
|
|||
}
|
||||
return (dataMoved);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write times taken during each iteration of the test.
|
||||
*/
|
||||
static void
|
||||
WriteTimes(IOR_param_t * test, double **timer, int iteration, int writeOrRead)
|
||||
{
|
||||
char accessType[MAX_STR];
|
||||
char timerName[MAX_STR];
|
||||
int i, start = 0, stop = 0;
|
||||
|
||||
if (writeOrRead == WRITE) {
|
||||
start = 0;
|
||||
stop = 6;
|
||||
strcpy(accessType, "WRITE");
|
||||
} else if (writeOrRead == READ) {
|
||||
start = 6;
|
||||
stop = 12;
|
||||
strcpy(accessType, "READ");
|
||||
} else {
|
||||
ERR("incorrect WRITE/READ option");
|
||||
}
|
||||
|
||||
for (i = start; i < stop; i++) {
|
||||
switch (i) {
|
||||
case 0:
|
||||
strcpy(timerName, "write open start");
|
||||
break;
|
||||
case 1:
|
||||
strcpy(timerName, "write open stop");
|
||||
break;
|
||||
case 2:
|
||||
strcpy(timerName, "write start");
|
||||
break;
|
||||
case 3:
|
||||
strcpy(timerName, "write stop");
|
||||
break;
|
||||
case 4:
|
||||
strcpy(timerName, "write close start");
|
||||
break;
|
||||
case 5:
|
||||
strcpy(timerName, "write close stop");
|
||||
break;
|
||||
case 6:
|
||||
strcpy(timerName, "read open start");
|
||||
break;
|
||||
case 7:
|
||||
strcpy(timerName, "read open stop");
|
||||
break;
|
||||
case 8:
|
||||
strcpy(timerName, "read start");
|
||||
break;
|
||||
case 9:
|
||||
strcpy(timerName, "read stop");
|
||||
break;
|
||||
case 10:
|
||||
strcpy(timerName, "read close start");
|
||||
break;
|
||||
case 11:
|
||||
strcpy(timerName, "read close stop");
|
||||
break;
|
||||
default:
|
||||
strcpy(timerName, "invalid timer");
|
||||
break;
|
||||
}
|
||||
fprintf(out_logfile, "Test %d: Iter=%d, Task=%d, Time=%f, %s\n",
|
||||
test->id, iteration, (int)rank, timer[i][iteration],
|
||||
timerName);
|
||||
}
|
||||
}
|
||||
|
|
16
src/ior.h
16
src/ior.h
|
@ -204,12 +204,9 @@ typedef struct
|
|||
int intraTestBarriers; /* barriers between open/op and op/close */
|
||||
} IOR_param_t;
|
||||
|
||||
/* each pointer is to an array, each of length equal to the number of
|
||||
repetitions in the test */
|
||||
/* each pointer for a single test */
|
||||
typedef struct {
|
||||
double writeTime;
|
||||
double readTime;
|
||||
int errors;
|
||||
double time;
|
||||
size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling
|
||||
|
||||
double stonewall_time;
|
||||
|
@ -219,16 +216,21 @@ typedef struct {
|
|||
IOR_offset_t aggFileSizeFromStat;
|
||||
IOR_offset_t aggFileSizeFromXfer;
|
||||
IOR_offset_t aggFileSizeForBW;
|
||||
} IOR_point_t;
|
||||
|
||||
typedef struct {
|
||||
int errors;
|
||||
IOR_point_t write;
|
||||
IOR_point_t read;
|
||||
} IOR_results_t;
|
||||
|
||||
/* define the queuing structure for the test parameters */
|
||||
typedef struct IOR_test_t {
|
||||
IOR_param_t params;
|
||||
IOR_results_t *results; /* This is an array of reps times IOR_results_t */
|
||||
IOR_results_t *results;
|
||||
struct IOR_test_t *next;
|
||||
} IOR_test_t;
|
||||
|
||||
|
||||
IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num);
|
||||
void AllocResults(IOR_test_t *test);
|
||||
|
||||
|
|
|
@ -96,7 +96,6 @@ enum OutputFormat_t{
|
|||
#define WRITECHECK 1
|
||||
#define READ 2
|
||||
#define READCHECK 3
|
||||
#define CHECK 4
|
||||
|
||||
/* verbosity settings */
|
||||
#define VERBOSE_0 0
|
||||
|
|
Loading…
Reference in New Issue