diff --git a/src/ior-internal.h b/src/ior-internal.h index 6b89af1..7daf8de 100644 --- a/src/ior-internal.h +++ b/src/ior-internal.h @@ -20,7 +20,8 @@ void PrintLongSummaryOneTest(IOR_test_t *test); void DisplayFreespace(IOR_param_t * test); void GetTestFileName(char *, IOR_param_t *); void PrintRemoveTiming(double start, double finish, int rep); -void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep); +void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency, + double *diff_subset, double totalTime, int rep); void PrintTestEnds(); void PrintTableHeader(); /* End of ior-output */ diff --git a/src/ior-output.c b/src/ior-output.c index 560d995..c3e0cb2 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -18,8 +18,8 @@ static void PrintNextToken(); void PrintTableHeader(){ if (outputFormat == OUTPUT_DEFAULT){ fprintf(out_resultfile, "\n"); - fprintf(out_resultfile, "access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); - fprintf(out_resultfile, "------ --------- ---------- --------- -------- -------- -------- -------- ----\n"); + fprintf(out_resultfile, "access bw(MiB/s) IOPS Latency(s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); + fprintf(out_resultfile, "------ --------- ---- ---------- ---------- --------- -------- -------- -------- -------- ----\n"); } } @@ -219,10 +219,13 @@ void PrintTestEnds(){ PrintEndSection(); } -void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep){ +void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency, + double *diff_subset, double totalTime, int rep){ if (outputFormat == OUTPUT_DEFAULT){ fprintf(out_resultfile, "%-10s", access == WRITE ? "write" : "read"); PPDouble(1, bw / MEBIBYTE, " "); + PPDouble(1, iops, " "); + PPDouble(1, latency, " "); PPDouble(1, (double)test->params.blockSize / KIBIBYTE, " "); PPDouble(1, (double)test->params.transferSize / KIBIBYTE, " "); PPDouble(1, diff_subset[0], " "); @@ -772,7 +775,7 @@ void PrintRemoveTiming(double start, double finish, int rep) return; if (outputFormat == OUTPUT_DEFAULT){ - fprintf(out_resultfile, "remove - - - - - - "); + fprintf(out_resultfile, "remove - - - - - - - - "); PPDouble(1, finish-start, " "); fprintf(out_resultfile, "%-4d\n", rep); }else if (outputFormat == OUTPUT_JSON){ diff --git a/src/ior.c b/src/ior.c index 2d08234..ef8b44c 100755 --- a/src/ior.c +++ b/src/ior.c @@ -841,8 +841,9 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce { double reduced[IOR_NB_TIMERS] = { 0 }; double diff[IOR_NB_TIMERS / 2 + 1]; - double totalTime; - double bw; + double totalTime, accessTime; + IOR_param_t *params = &test->params; + double bw, iops, latency, minlatency; int i; MPI_Op op; @@ -856,15 +857,12 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce op, 0, testComm), "MPI_Reduce()"); } - /* Only rank 0 tallies and prints the results. */ - if (rank != 0) - return; - /* Calculate elapsed times and throughput numbers */ for (i = 0; i < IOR_NB_TIMERS / 2; i++) diff[i] = reduced[2 * i + 1] - reduced[2 * i]; totalTime = reduced[5] - reduced[0]; + accessTime = reduced[3] - reduced[2]; IOR_point_t *point = (access == WRITE) ? &test->results[rep].write : &test->results[rep].read; @@ -875,7 +873,25 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce return; bw = (double)point->aggFileSizeForBW / totalTime; - PrintReducedResult(test, access, bw, diff, totalTime, rep); + + /* For IOPS in this iteration, we divide the total amount of IOs from + * all ranks over the entire access time (first start -> last end). */ + iops = (point->aggFileSizeForBW / params->transferSize) / accessTime; + + /* For Latency, we divide the total access time for each task over the + * number of I/Os issued from that task; then reduce and display the + * minimum (best) latency achieved. So what is reported is the average + * latency of all ops from a single task, then taking the minimum of + * that between all tasks. */ + latency = (timer[3] - timer[2]) / (params->blockSize / params->transferSize); + MPI_CHECK(MPI_Reduce(&latency, &minlatency, 1, MPI_DOUBLE, + MPI_MIN, 0, testComm), "MPI_Reduce()"); + + /* Only rank 0 tallies and prints the results. */ + if (rank != 0) + return; + + PrintReducedResult(test, access, bw, iops, latency, diff, totalTime, rep); } /*