Add latency and iops numbers to each iteration.

- Latency reported is computed by taking the average latency of all ops from a single task, then taking the minimum of that between all tasks. - IOPS is computed by taking the total number of ops across all tasks divided by the total access time to execute those ops. Signed-off-by: Mohamad Chaarawi <mohamad.chaarawi@intel.com>
2019-09-10 18:39:31 +00:00 · 2019-09-10 18:39:31 +00:00 · 12284ae04a
parent bcd449c360
commit 12284ae04a
3 changed files with 32 additions and 12 deletions
--- a/src/ior-internal.h
+++ b/src/ior-internal.h
@ -20,7 +20,8 @@ void PrintLongSummaryOneTest(IOR_test_t *test);
 void DisplayFreespace(IOR_param_t * test);
 void GetTestFileName(char *, IOR_param_t *);
 void PrintRemoveTiming(double start, double finish, int rep);
-void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep);
+void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency,
+			double *diff_subset, double totalTime, int rep);
 void PrintTestEnds();
 void PrintTableHeader();
 /* End of ior-output */
--- a/src/ior-output.c
+++ b/src/ior-output.c
@ -18,8 +18,8 @@ static void PrintNextToken();
 void PrintTableHeader(){
  if (outputFormat == OUTPUT_DEFAULT){
    fprintf(out_resultfile, "\n");
-    fprintf(out_resultfile, "access    bw(MiB/s)  block(KiB) xfer(KiB)  open(s)    wr/rd(s)   close(s)   total(s) iter\n");
-    fprintf(out_resultfile, "------    ---------  ---------- ---------  --------   --------   --------   -------- ----\n");
+    fprintf(out_resultfile, "access    bw(MiB/s)  IOPS       Latency(s)  block(KiB) xfer(KiB)  open(s)    wr/rd(s)   close(s)   total(s)   iter\n");
+    fprintf(out_resultfile, "------    ---------  ----       ----------  ---------- ---------  --------   --------   --------   --------   ----\n");
  }
 }

@ -219,10 +219,13 @@ void PrintTestEnds(){
  PrintEndSection();
 }

-void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep){
+void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency,
+			double *diff_subset, double totalTime, int rep){
  if (outputFormat == OUTPUT_DEFAULT){
    fprintf(out_resultfile, "%-10s", access == WRITE ? "write" : "read");
    PPDouble(1, bw / MEBIBYTE, " ");
+    PPDouble(1, iops, " ");
+    PPDouble(1, latency, "  ");
    PPDouble(1, (double)test->params.blockSize / KIBIBYTE, " ");
    PPDouble(1, (double)test->params.transferSize / KIBIBYTE, " ");
    PPDouble(1, diff_subset[0], " ");
@ -772,7 +775,7 @@ void PrintRemoveTiming(double start, double finish, int rep)
    return;

  if (outputFormat == OUTPUT_DEFAULT){
-    fprintf(out_resultfile, "remove    -          -          -          -          -          -          ");
+    fprintf(out_resultfile, "remove    -          -          -           -          -          -          -          -          ");
    PPDouble(1, finish-start, " ");
    fprintf(out_resultfile, "%-4d\n", rep);
  }else if (outputFormat == OUTPUT_JSON){
--- a/src/ior.c
+++ b/src/ior.c
@ -841,8 +841,9 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce
 {
        double reduced[IOR_NB_TIMERS] = { 0 };
        double diff[IOR_NB_TIMERS / 2 + 1];
-        double totalTime;
-        double bw;
+        double totalTime, accessTime;
+        IOR_param_t *params = &test->params;
+        double bw, iops, latency, minlatency;
        int i;
        MPI_Op op;

@ -856,15 +857,12 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce
                                     op, 0, testComm), "MPI_Reduce()");
        }

-        /* Only rank 0 tallies and prints the results. */
-        if (rank != 0)
-                return;
-
        /* Calculate elapsed times and throughput numbers */
        for (i = 0; i < IOR_NB_TIMERS / 2; i++)
                diff[i] = reduced[2 * i + 1] - reduced[2 * i];

        totalTime = reduced[5] - reduced[0];
+        accessTime = reduced[3] - reduced[2];

        IOR_point_t *point = (access == WRITE) ? &test->results[rep].write :
                                                 &test->results[rep].read;
@ -875,7 +873,25 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce
                return;

        bw = (double)point->aggFileSizeForBW / totalTime;
-        PrintReducedResult(test, access, bw, diff, totalTime, rep);
+
+        /* For IOPS in this iteration, we divide the total amount of IOs from
+         * all ranks over the entire access time (first start -> last end). */
+        iops = (point->aggFileSizeForBW / params->transferSize) / accessTime;
+
+        /* For Latency, we divide the total access time for each task over the
+         * number of I/Os issued from that task; then reduce and display the
+         * minimum (best) latency achieved. So what is reported is the average
+         * latency of all ops from a single task, then taking the minimum of
+         * that between all tasks. */ 
+        latency = (timer[3] - timer[2]) / (params->blockSize / params->transferSize);
+        MPI_CHECK(MPI_Reduce(&latency, &minlatency, 1, MPI_DOUBLE,
+                             MPI_MIN, 0, testComm), "MPI_Reduce()");
+
+        /* Only rank 0 tallies and prints the results. */
+        if (rank != 0)
+                return;
+
+        PrintReducedResult(test, access, bw, iops, latency, diff, totalTime, rep);
 }

 /*