Merge remote branch 'daos-stack/daos'

Signed-off-by: Mohamad Chaarawi <mohamad.chaarawi@intel.com> Conflicts: configure.ac src/Makefile.am src/aiori.c src/aiori.h src/ior.c src/mdtest.c src/parse_options.c
2018-08-29 21:14:19 +00:00 · 2018-08-29 21:14:19 +00:00 · 46ff4e26be
parent 5fb850c810 f4b03efd72
commit 46ff4e26be
64 changed files with 6689 additions and 6148 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
+tags
 Makefile
 Makefile.in
 aclocal.m4
@ -11,11 +12,13 @@ config/config.sub
 config/depcomp
 config/install-sh
 config/missing
+config/test-driver
 configure
 contrib/.deps/
 contrib/cbif
 contrib/Makefile
 contrib/Makefile.in
+contrib/cbif
 doc/Makefile
 doc/Makefile.in
 src/.deps/
@ -32,7 +35,13 @@ contrib/cbif.o
 src/*.o
 src/*.i
 src/*.s
+src/*.a
 src/ior
+src/mdtest
+src/testlib
+src/test/.deps/
+src/test/.dirstamp
+src/test/lib.o

 doc/doxygen/build
 doc/sphinx/_*/
--- a/.travis.yml
+++ b/.travis.yml
@ -29,7 +29,7 @@ install:
    # GPFS
    # NOTE: Think GPFS need a license and is therefore not testable with travis.
 before_script: ./bootstrap
-script: ./configure --with-hdf5 && make
+script: mkdir build && cd build && ../configure --with-hdf5 && make && cd .. && ./testing/basic-tests.sh


 # notifications:
--- a/Makefile.am
+++ b/Makefile.am
@ -1,4 +1,5 @@
+MAKEFLAGS = --no-print-directory
 SUBDIRS = src doc contrib
-EXTRA_DIST = META COPYRIGHT README ChangeLog
+EXTRA_DIST = META COPYRIGHT README.md ChangeLog
 # ACLOCAL_AMFLAGS needed for autoconf < 2.69
 ACLOCAL_AMFLAGS = -I config
--- a/README.md
+++ b/README.md
@ -1,9 +1,9 @@
+# HPC IO Benchmark Repository [![Build Status](https://travis-ci.org/hpc/ior.svg?branch=master)](https://travis-ci.org/hpc/ior)
+
 This repo now contains both IOR and mdtest.
+See also NOTES.txt

-[See also NOTES.txt]
-
-Building
--------
+# Building

 0. If "configure" is missing from the top level directory, you
   probably retrieved this code directly from the repository.
@ -21,3 +21,11 @@ Building

 3. Optionally, run "make install".  The installation prefix
   can be changed as an option to the "configure" script.
+
+# Testing
+
+  Run "make check" to invoke the unit test framework of Automake.
+
+  * To run basic functionality tests that we use for continuous integration, see ./testing/
+  * There are docker scripts provided to test various distributions at once.
+  * See ./testing/docker/
--- a/48
+++ b/48
@ -0,0 +1,48 @@
+Building with DAOS API
+----------------------
+
+At step 1 above, one must specify "--with-daos". If the DAOS
+headers and libraries are not installed at respective system
+default locations, then one may also needs to set CPPFLAGS and
+LDFLAGS accordingly.
+
+Running with DAOS API
+---------------------
+
+One must specify an existing pool using "-O
+daospool=<pool_uuid>". IOR must be launched in a way that
+attaches the IOR process group to the DAOS server process group.
+
+One must also specify a container UUID using "-o
+<container_uuid>". If the "-E" option is given, then this UUID
+shall denote an existing container created by a "matching" IOR
+run. Otherwise, IOR will create a new container with this UUID.
+In the latter case, one may use uuidgen(1) to generate the UUID
+of the new container.
+
+When benchmarking write performance, one likely do not want
+"-W", which causes the write phase to do one additional memory
+copy for every I/O. This is due to IOR's assumption that when a
+DAOS_Xfer() call returns the buffer may be released. Therefore,
+random data is written when "-W" is absent, while data is copied
+from IOR buffers when "-W" is present.
+
+See doc/USER_GUIDE for all options and directives. Note that not
+all combinations of options are supported.
+
+Examples that should work include:
+
+  - "ior -a DAOS -w -W -o <container_uuid> -O
+    daospool=<pool_uuid>,daospoolsvc=<svc_ranks>" writes into a new container
+    and verifies the data, using default daosRecordSize, transferSize,
+    daosStripeSize, blockSize, daosAios, etc.
+
+  - "ior -a DAOS -w -W -r -R -o <container_uuid> -b 1g -t 4m -C -O
+    daospool=<pool_uuid>,daospoolsvc=<svc_ranks>,daosrecordsize=1m,
+    daosstripesize=4m, daosstripecount=256,daosaios=8" does all IOR tests and
+    shifts ranks during checkWrite and checkRead.
+
+  - "ior -a DAOS -w -r -o <container_uuid> -b 8g -t 1m -C -O
+    daospool=<pool_uuid>,daospoolsvc=<svc_ranks>,daosrecordsize=1m,daosstripesize=4m,
+    daosstripecount=256,daosaios=8" may be a base to be tuned for performance
+    benchmarking.
--- a/configure.ac
+++ b/configure.ac
@ -15,7 +15,8 @@ AC_CONFIG_HEADER([src/config.h])
 AC_CANONICAL_HOST

 # Automake support
-AM_INIT_AUTOMAKE([check-news dist-bzip2 gnu no-define])
+AM_INIT_AUTOMAKE([check-news dist-bzip2 gnu no-define foreign subdir-objects])
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 AM_MAINTAINER_MODE

 # Checks for programs
@ -25,6 +26,7 @@ AX_PROG_CC_MPI(,,[
    AC_MSG_FAILURE([MPI compiler requested, but couldn't use MPI.])
 ])

+AC_PROG_RANLIB
 # No reason not to require modern C at this point
 AC_PROG_CC_C99

@ -76,6 +78,17 @@ AS_IF([test "x$with_lustre" != xno], [
        ])
 ])

+# IME (DDN's Infinite Memory Engine) support
+AC_ARG_WITH([ime],
+        [AS_HELP_STRING([--with-ime],
+           [support IO with IME backend @<:@default=no@:>@])],
+        [],
+        [with_ime=no])
+AM_CONDITIONAL([USE_IME_AIORI], [test x$with_ime = xyes])
+AM_COND_IF([USE_IME_AIORI],[
+        AC_DEFINE([USE_IME_AIORI], [], [Build IME backend AIORI])
+])
+
 # HDF5 support
 AC_ARG_WITH([hdf5],
        [AS_HELP_STRING([--with-hdf5],
@ -142,22 +155,21 @@ AM_COND_IF([USE_POSIX_AIORI],[
        AC_DEFINE([USE_POSIX_AIORI], [], [Build POSIX backend AIORI])
 ])

-AC_ARG_WITH([cart],
-    [AS_HELP_STRING([--with-cart],
-            [Build DAOS ROMIO driver[default=no]])],,
-    [with_cart=no])
+# RADOS support
+AC_ARG_WITH([rados],
+        [AS_HELP_STRING([--with-rados],
+           [support IO with librados backend @<:@default=no@:>@])],
+        [],
+        [with_rados=no])
+AM_CONDITIONAL([USE_RADOS_AIORI], [test x$with_rados = xyes])
+AM_COND_IF([USE_RADOS_AIORI],[
+        AC_DEFINE([USE_RADOS_AIORI], [], [Build RADOS backend AIORI])
+])

-AS_IF([test "x$with_cart" != xno],
-    CART="yes"
-    LDFLAGS="$LDFLAGS -L$with_cart/lib"
-    CPPFLAGS="$CPPFLAGS -I$with_cart/include/"
-    AC_CHECK_HEADERS(gurt/common.h,, [unset CART])
-    AC_CHECK_LIB([gurt], [d_rank_list_alloc],, [unset CART]))
-
-# DFS IO support
+# DAOS Backends (DAOS and DFS) IO support
 AC_ARG_WITH([daos],
    [AS_HELP_STRING([--with-daos],
-	    [support IO with DFS backend @<:@default=no@:>@])],
+	    [support IO with DAOS backends @<:@default=no@:>@])],
 	    [],
    	    [with_daos=no])

@ -171,11 +183,9 @@ AS_IF([test "x$with_daos" != xno],
    AC_CHECK_LIB([daos], [daos_init],, [unset DAOS])
    AC_CHECK_LIB([dfs], [dfs_mkdir],, [unset DAOS]))

-AS_IF([test "x$CART" != xyes], [unset DAOS])
-
-AM_CONDITIONAL([USE_DFS_AIORI], [test x$DAOS = xyes])
-AM_COND_IF([USE_DFS_AIORI],[
-        AC_DEFINE([USE_DFS_AIORI], [], [Build DFS backend AIORI])
+AM_CONDITIONAL([USE_DAOS_AIORI], [test x$DAOS = xyes])
+AM_COND_IF([USE_DAOS_AIORI],[
+        AC_DEFINE([USE_DAOS_AIORI], [], [Build DAOS backends AIORI])
 ])

 # aws4c is needed for the S3 backend (see --with-S3, below).
--- a/doc/USER_GUIDE
+++ b/doc/USER_GUIDE
@ -23,10 +23,10 @@ Index:
 *******************
 * 1.  DESCRIPTION *
 *******************
-IOR can be used for testing performance of parallel file systems using various 
-interfaces and access patterns.  IOR uses MPI for process synchronization.  
-IOR version 2 is a complete rewrite of the original IOR (Interleaved-Or-Random) 
-version 1 code.  
+IOR can be used for testing performance of parallel file systems using various
+interfaces and access patterns.  IOR uses MPI for process synchronization.
+IOR version 2 is a complete rewrite of the original IOR (Interleaved-Or-Random)
+version 1 code.


 ******************
@ -39,7 +39,7 @@ Two ways to run IOR:
    E.g., to execute:  IOR -w -r -o filename
    This performs a write and a read to the file 'filename'.

-  * Command line with scripts -- any arguments on the command line will 
+  * Command line with scripts -- any arguments on the command line will
    establish the default for the test run, but a script may be used in
    conjunction with this for varying specific tests during an execution of the
    code.
@ -47,12 +47,22 @@ Two ways to run IOR:
    E.g., to execute:  IOR -W -f script
    This defaults all tests in 'script' to use write data checking.

+  * The Command line supports to specify additional parameters for the choosen API.
+    For example, username and password for the storage.
+    Available options are listed in the help text after selecting the API when running with -h.
+    For example, 'IOR -a DUMMY -h' shows the supported options for the DUMMY backend.
+    The options for the backend must be specified at last and are separated with
+    two dashes '--'. Example: 'IOR -a DUMMY -- -c 1000' defines a delay for the
+    file creation for the plugin. Currently, it is not possible to set these
+    backend options using a command line script (-f option).
+
+

 **************
 * 3. OPTIONS *
 **************
 These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'.
-  -a S  api --  API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
+  -a S  api --  API for I/O, e.g., POSIX
  -A N  refNum -- user reference number to include in long summary
  -b N  blockSize -- contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)
  -B    useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers
@ -121,11 +131,11 @@ GENERAL:
  * refNum               - user supplied reference number, included in
                           long summary [0]

-  * api                  - must be set to one of POSIX, MPIIO, HDF5, HDFS, S3,
-                           S3_EMC, or NCMPI, depending on test [POSIX]
+  * api                  - must be set to one of POSIX, MPIIO, HDF5, HDFS, IME,
+                           S3, S3_EMC, or NCMPI, depending on test [POSIX]

  * testFile             - name of the output file [testFile]
-                           NOTE: with filePerProc set, the tasks can round 
+                           NOTE: with filePerProc set, the tasks can round
                                 robin across multiple file names '-o S@S@S'

  * hintsFileName        - name of the hints file []
@ -267,7 +277,7 @@ GENERAL:
                                    data, this option measures the amount of
                                    data moved in a fixed amount of time.  The
                                    objective is to prevent tasks slow to
-                                    complete from skewing the performance. 
+                                    complete from skewing the performance.
                                  * setting this to zero (0) unsets this option
                                  * this option is incompatible w/data checking

@ -280,7 +290,8 @@ GENERAL:
  * summaryAlways        - Always print the long summary for each test.
                           Useful for long runs that may be interrupted, preventing
                           the final long summary for ALL tests to be printed.
-
+  * summaryFile=File     - Output the summary to the file instead on stdout/stderr.
+  * summaryFormat=FMT    - Choose the output format -- default, JSON, CSV

 POSIX-ONLY:
 ===========
@ -319,7 +330,7 @@ HDF5-ONLY:
                           NOTE: default IOR creates a dataset the size of
                                 numTasks * blockSize to be accessed by all
                                 tasks
- 
+
  * noFill               - no pre-filling of data in HDF5 file creation [0=FALSE]

  * setAlignment         - HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g) [1]
@ -356,6 +367,43 @@ BeeGFS-SPECIFIC (POSIX only):
  * beegfsChunkSize      - set the striping chunk size. Must be a power of two,
                             and greater than 64kiB, (e.g.: 256k, 1M, ...)

+DAOS-ONLY:
+==========
+  * daosGroup            - group name [NULL]
+
+  * daosPool             - UUID of the pool []
+
+  * daosPoolSvc          - pool service replica ranks (e.g., 1:2:3:4:5) []
+
+  * daosRecordSize       - size (in bytes) of an akey record [256k]
+                           NOTE: must divide transferSize
+
+  * daosStripeSize       - size (in bytes) of a chunk in a stripe [512k]
+                           NOTE: must be a multiple of transferSize
+
+  * daosStripeCount      - number of stripes [64 * number of targets]
+                           NOTE: i.e., number of dkeys
+
+  * daosStripeMax        - max length of each stripe [0]
+                           NOTE: must be a multiple of daosStripeSize
+                           NOTE: for write testing with small storage
+                           NOTE: offsets in a stripe larger than daosStripeMax
+                                 are mapped to offset % daosStripeMax
+
+  * daosAios             - max number of asychonous I/Os [1]
+
+  * daosWriteOnly        - skip flushing and committing [0=FALSE]
+
+  * daosEpoch            - epoch to read or write [0]
+                           NOTE: 0 denotes reading GHCE or writing GHCE + 1
+
+  * daosWait             - epoch to wait when opening the container [0]
+
+  * daosKill             - kill a target in the middle of the test [0]
+                           NOTE: must also specify daosObjectClass=repl
+
+  * daosObjectClass      - object class (tiny, small, large, repl, repl_max)
+                           [large]

 ***********************
 * 5. VERBOSITY LEVELS *
@ -395,7 +443,7 @@ various application codes.  Details are included in each script as necessary.
 An example of a script:
 ===============> start script <===============
 IOR START
-  api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
+  api=[POSIX|MPIIO|HDF5|HDFS|IME|S3|S3_EMC|NCMPI]
  testFile=testFile
  hintsFileName=hintsFile
  repetitions=8
@ -483,8 +531,8 @@ zip, gzip, and bzip.

 2) gzip: For gzipped files, a transfer size of 1k is sufficient.

-3) bzip2: For bziped files a transfer size of 1k is insufficient (~50% compressed).  
-   To avoid compression a transfer size of greater than the bzip block size is required 
+3) bzip2: For bziped files a transfer size of 1k is insufficient (~50% compressed).
+   To avoid compression a transfer size of greater than the bzip block size is required
   (default = 900KB). I suggest a transfer size of greather than 1MB to avoid bzip2 compression.

 Be aware of the block size your compression algorithm will look at, and adjust the transfer size
@ -508,9 +556,9 @@ HOW DO I PERFORM MULTIPLE DATA CHECKS ON AN EXISTING FILE?
  and -r implied using both.  This semantic has been subsequently altered to be
  omitting -w, -r, -W, and -R implied using both -w and -r.)

-  If you're running new tests to create a file and want repeat data checking on 
-  this file multiple times, there is an undocumented option for this.  It's -O 
-  multiReRead=1, and you'd need to have an IOR version compiled with the 
+  If you're running new tests to create a file and want repeat data checking on
+  this file multiple times, there is an undocumented option for this.  It's -O
+  multiReRead=1, and you'd need to have an IOR version compiled with the
  USE_UNDOC_OPT=1 (in iordef.h).  The command line would look like this:

  IOR -k -E -w -W -i 5 -o file -O multiReRead=1
@ -586,7 +634,7 @@ HOW DO I USE STONEWALLING?
  actually reading the same amount from disk in the allotted time, but they
  are also reading the cached data from the previous test each time to get the
  increased performance.  Setting -D high enough so that the cache is
-  overfilled will prevent this.  
+  overfilled will prevent this.


 HOW DO I BYPASS CACHING WHEN READING BACK A FILE I'VE JUST WRITTEN?
--- a/doc/sphinx/userDoc/options.rst
+++ b/doc/sphinx/userDoc/options.rst
@ -17,7 +17,7 @@ normal parameters override each other, so the last one executed.
 Command line options
 --------------------
 These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'.
-  -a S  api --  API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
+  -a S  api --  API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI|RADOS]
  -A N  refNum -- user reference number to include in long summary
  -b N  blockSize -- contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)
  -B    useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers
--- a/doc/sphinx/userDoc/skripts.rst
+++ b/doc/sphinx/userDoc/skripts.rst
@ -25,7 +25,7 @@ Syntax:
 An example of a script: ::

  IOR START
-    api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
+    api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI|RADOS]
    testFile=testFile
    hintsFileName=hintsFile
    repetitions=8
--- a/scripts/exampleScript
+++ b/scripts/exampleScript
@ -1,24 +0,0 @@
-IOR START
-	testFile = /tmp/work/swh13/testfile
-	filePerProc=1
-	api=POSIX
-	repetitions=2
-	verbose=1
-        reordertasksrandom=1
-        reordertasksrandomseed=-113
-	RUN
-	verbose = 2
-	repetitions=1#more foo
-        reordertasksconstant=1
-	#foobar
-	blockSize=10m
-	transferSize=128k
-        randomoffset=1
-	  
-	RUN
-	verbose = 0
-
-	#blockSize=
-	transferSize=64k
-	RUN
-IOR STOP
--- a/scripts/run_script.cnl
+++ b/scripts/run_script.cnl
@ -1,93 +0,0 @@
-#!/bin/bash -x
-
-#PBS -N IOR
-#PBS -j oe
-#PBS -q batch
-#PBS -A stf006
-#PBS -V
-#PBS -l walltime=0:60:00,size=8
-
-VERS=IOR-2.10.1.ornl.16
-WORK=/tmp/work/${USER}
-echo $PBS_O_WORKDIR
-
-cd /ccs/proj/quadcore
-tar -czvf ${WORK}/${VERS}.tar.gz ./${VERS}
-cd ${WORK}
-rm -fr ./${VERS}
-tar -xzvf ${WORK}/${VERS}.tar.gz
-cd ${WORK}/${VERS}
-gmake clean
-gmake mpiio
-EXEC=${WORK}/${VERS}/src/C/IOR
-IODIR=/tmp/work/swh13/test_files_x
-cd ${WORK}/${VERS}/tests
-
-which mpirun
-
-rm -fr $IODIR
-mkdir  $IODIR
-
-let "w=128"
-let "s=1024*1024"
-let "i=3"
-
-MPIRUN="aprun -n"
-
-RESULTS="."
-
-let "tid=1"
-XFERS="1048576 262144 32768 4096 1024"
-XFERS="262144"
-for xfer in `echo $XFERS`
-do
-       let "n=8"
-until [ "$n" -gt 8 ]
-do
-
-    let "m=$n/4"
-  #TESTS="POSIX MPIIO HDF5 NCMPI"
-  TESTS="POSIX MPIIO"
-  for test in `echo $TESTS`
-  do
-    runid="p$n.$xfer.${test}"
-    date 
-
-    V="  "
-    BLOCKS="1 10 1 10 1 10"
-    for blocks in `echo $BLOCKS`
-    do
-
-      let "block=${xfer} * ${blocks}"
-
-    #fileperproc tests
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w    -z                  ${V} -F -o $IODIR/testwrite.${runid} -Y -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w    -z                  ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z                  ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -C              ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -C -Q $m        ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -Z -Q $m        ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -Z -Q $m -X  13 ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -Z -Q $m -X -13 ${V} -F -o $IODIR/testwrite.${runid}    -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-
-    #shared tests
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w    -z                  ${V}    -o $IODIR/testwrite.${runid} -Y -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w                        ${V}    -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z                  ${V}    -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-
-    #test mutually exclusive options
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -C              ${V}    -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -Z              ${V}    -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -Z -C                  ${V}    -o $IODIR/testwrite.${runid}       -i${i} -m -t ${xfer} -b ${block}  -d 0.0 
-    let "tid=$tid + 17"
-
-    V=$V" -v"
-
-    done #blocks
-
-    date 
-  done #test
-  let "n = $n * 2"
- done #n
-done #xfer
-exit
--- a/scripts/run_script.linux
+++ b/scripts/run_script.linux
@ -1,93 +0,0 @@
-#!/bin/bash -x
-
-#PBS -N IOR
-#PBS -j oe
-#PBS -q batch
-#PBS -A stf006
-#PBS -V
-#PBS -l walltime=0:60:00,nodes=8:ppn=2
-
-VERS=IOR-2.10.1
-WORK=/tmp/work/${USER}
-echo $PBS_O_WORKDIR
-
-cd /ccs/proj/quadcore
-tar -czvf ${WORK}/${VERS}.tar.gz ./${VERS}
-cd ${WORK}
-rm -fr ./${VERS}
-tar -xzvf ${WORK}/${VERS}.tar.gz
-cd ${WORK}/${VERS}
-gmake clean
-gmake mpiio
-EXEC=${WORK}/${VERS}/src/C/IOR
-IODIR=/tmp/work/swh13/test_files_x
-cd ${WORK}/${VERS}/tests
-
-which mpirun
-
-rm -fr $IODIR
-mkdir  $IODIR
-
-let "w=128"
-let "s=1024*1024"
-let "i=3"
-
-MPIRUN="mpirun -np"
-
-RESULTS="."
-
-let "tid=1"
-XFERS="1048576 262144 32768 4096 1024"
-XFERS="262144"
-for xfer in `echo $XFERS`
-do
-       let "n=8"
-until [ "$n" -gt 8 ]
-do
-
-    let "m=$n/4"
-  #TESTS="POSIX MPIIO HDF5 NCMPI"
-  TESTS="POSIX MPIIO"
-  for test in `echo $TESTS`
-  do
-    runid="p$n.$xfer.${test}"
-    date 
-
-    V="  "
-    BLOCKS="1 10 1 10 1 10"
-    for blocks in `echo $BLOCKS`
-    do
-
-      let "block=${xfer} * ${blocks}"
-
-    #fileperproc tests
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w    -z                  ${V} -F -o $IODIR/testwrite.${runid} -Y -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w    -z                  ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z                  ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -C              ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -C -Q $m        ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -Z -Q $m        ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -Z -Q $m -X  13 ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -Z -Q $m -X -13 ${V} -F -o $IODIR/testwrite.${runid}    -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-
-    #shared tests
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w    -z                  ${V}    -o $IODIR/testwrite.${runid} -Y -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w                        ${V}    -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z                  ${V}    -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-
-    #test mutually exclusive options
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -C              ${V}    -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r    -z  -Z              ${V}    -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block}  -d 0.1 
-    ${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -Z -C                  ${V}    -o $IODIR/testwrite.${runid}       -i${i} -m -t ${xfer} -b ${block}  -d 0.0 
-    let "tid=$tid + 17"
-
-    V=$V" -v"
-
-    done #blocks
-
-    date 
-  done #test
-  let "n = $n * 2"
- done #n
-done #xfer
-exit
--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -1,23 +1,28 @@
+SUBDIRS = .
+
 bin_PROGRAMS = ior mdtest
 if USE_CAPS
 bin_PROGRAMS += IOR MDTEST
 endif

-noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h
+noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h

-extraSOURCES = aiori.c
+lib_LIBRARIES = libaiori.a
+libaiori_a_SOURCES = ior.c mdtest.c utilities.c parse_options.c ior-output.c option.c
+
+extraSOURCES = aiori.c aiori-DUMMY.c
 extraLDADD =
 extraLDFLAGS =
 extraCPPFLAGS =

-ior_SOURCES = ior.c utilities.c parse_options.c
+ior_SOURCES = ior-main.c
 ior_LDFLAGS =
-ior_LDADD =
+ior_LDADD = libaiori.a
 ior_CPPFLAGS =

-mdtest_SOURCES = mdtest.c utilities.c
+mdtest_SOURCES = mdtest-main.c
 mdtest_LDFLAGS =
-mdtest_LDADD =
+mdtest_LDADD = libaiori.a
 mdtest_CPPFLAGS =

 if USE_HDFS_AIORI
@ -36,6 +41,13 @@ extraSOURCES  += aiori-HDF5.c
 extraLDADD    += -lhdf5 -lz
 endif

+if USE_IME_AIORI
+extraSOURCES  += aiori-IME.c
+extraCPPFLAGS += -I/opt/ddn/ime/include
+extraLDFLAGS  += -L/opt/ddn/ime/lib
+extraLDADD    += -lim_client
+endif
+
 if USE_MPIIO_AIORI
 extraSOURCES += aiori-MPIIO.c
 endif
@ -53,10 +65,14 @@ if USE_POSIX_AIORI
 extraSOURCES += aiori-POSIX.c
 endif

-if USE_DFS_AIORI
-extraSOURCES += aiori-DFS.c
+if USE_RADOS_AIORI
+extraSOURCES += aiori-RADOS.c
+extraLDADD += -lrados
 endif

+if USE_DAOS_AIORI
+extraSOURCES += aiori-DAOS.c aiori-DFS.c list.h
+endif

 if USE_S3_AIORI
 extraSOURCES  += aiori-S3.c
@ -70,21 +86,33 @@ extraLDADD    += -laws4c
 extraLDADD    += -laws4c_extra
 endif

-
-ior_SOURCES += $(extraSOURCES)
-ior_LDFLAGS += $(extraLDFLAGS)
-ior_LDADD += $(extraLDADD)
+ior_SOURCES  += $(extraSOURCES)
+ior_LDFLAGS  += $(extraLDFLAGS)
+ior_LDADD    += $(extraLDADD)
 ior_CPPFLAGS += $(extraCPPFLAGS)

-mdtest_SOURCES += $(extraSOURCES)
-mdtest_LDFLAGS += $(extraLDFLAGS)
-mdtest_LDADD += $(extraLDADD)
+mdtest_SOURCES  += $(extraSOURCES)
+mdtest_LDFLAGS  += $(extraLDFLAGS)
+mdtest_LDADD    += $(extraLDADD)
 mdtest_CPPFLAGS += $(extraCPPFLAGS)

 IOR_SOURCES  = $(ior_SOURCES)
+IOR_LDFLAGS  = $(ior_LDFLAGS)
 IOR_LDADD    = $(ior_LDADD)
-IOT_CPPFLAGS = $(ior_CPPFLAGS)
+IOR_CPPFLAGS = $(ior_CPPFLAGS)

 MDTEST_SOURCES  = $(mdtest_SOURCES)
+MDTEST_LDFLAGS  = $(mdtest_LDFLAGS)
 MDTEST_LDADD    = $(mdtest_LDADD)
 MDTEST_CPPFLAGS = $(mdtest_CPPFLAGS)
+
+libaiori_a_SOURCES += $(extraSOURCES)
+libaiori_a_CPPFLAGS = $(extraCPPFLAGS)
+
+
+TESTS = testlib
+bin_PROGRAMS += testlib
+
+testlib_SOURCES  = ./test/lib.c
+testlib_LDFLAGS  = $(extraLDFLAGS)
+testlib_LDADD    = libaiori.a $(extraLDADD)
--- a/src/aiori-DAOS.c
+++ b/src/aiori-DAOS.c
@ -0,0 +1,939 @@
+/*
+ * -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+/*
+ * SPECIAL LICENSE RIGHTS-OPEN SOURCE SOFTWARE
+ * The Government's rights to use, modify, reproduce, release, perform, display,
+ * or disclose this software are subject to the terms of Contract No. B599860,
+ * and the terms of the GNU General Public License version 2.
+ * Any reproduction of computer software, computer software documentation, or
+ * portions thereof marked with this legend must also reproduce the markings.
+ */
+/*
+ * Copyright (c) 2013, 2016 Intel Corporation.
+ */
+/*
+ * This file implements the abstract I/O interface for DAOS.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdint.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <daos.h>
+#include <daos_types.h>
+
+#include "ior.h"
+#include "aiori.h"
+#include "iordef.h"
+#include "list.h"
+
+/************************** O P T I O N S *****************************/
+struct daos_options{
+        char		*daosPool;
+        char		*daosPoolSvc;
+        char		*daosGroup;
+	int		daosRecordSize;
+	int		daosStripeSize;
+	uint64_t	daosStripeCount;
+	uint64_t	daosStripeMax;	/* max length of a stripe */
+	int		daosAios;	/* max number of concurrent async I/Os */
+	int		daosWriteOnly;	/* write only, no flush and commit */
+	uint64_t	daosEpoch;	/* epoch to access */
+	uint64_t	daosWait;	/* epoch to wait for before reading */
+	int		daosKill;	/* kill a target while running IOR */
+	char		*daosObjectClass; /* object class */
+};
+
+static struct daos_options o = {
+        .daosPool		= NULL,
+        .daosPoolSvc		= NULL,
+        .daosGroup		= NULL,
+	.daosRecordSize		= 262144,
+	.daosStripeSize		= 524288,
+	.daosStripeCount	= -1,
+	.daosStripeMax		= 0,
+	.daosAios		= 1,
+	.daosWriteOnly		= 0,
+	.daosEpoch		= 0,
+	.daosWait		= 0,
+	.daosKill		= 0,
+	.daosObjectClass	= NULL,
+};
+
+static option_help options [] = {
+      {'p', "daosPool", "pool uuid", OPTION_REQUIRED_ARGUMENT, 's', &o.daosPool},
+      {'v', "daosPoolSvc", "pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', &o.daosPoolSvc},
+      {'g', "daosGroup", "server group", OPTION_OPTIONAL_ARGUMENT, 's', &o.daosGroup},
+      {'r', "daosRecordSize", "Record Size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosRecordSize},
+      {'s', "daosStripeSize", "Stripe Size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosStripeSize},
+      {'c', "daosStripeCount", "Stripe Count", OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeCount},
+      {'m', "daosStripeMax", "Max Stripe",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeMax},
+      {'a', "daosAios", "Concurrent Async IOs",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosAios},
+      {'w', "daosWriteOnly", "Write Only, no commit",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosWriteOnly},
+      {'e', "daosEpoch", "Epoch Number to Access",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosEpoch},
+      {'t', "daosWait", "Epoch to wait for before read",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosWait},
+      {'k', "daosKill", "Kill target while running",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosKill},
+      {'o', "daosObjectClass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.daosObjectClass},
+      LAST_OPTION
+};
+
+/**************************** P R O T O T Y P E S *****************************/
+
+static void DAOS_Init(IOR_param_t *);
+static void DAOS_Fini(IOR_param_t *);
+static void *DAOS_Create(char *, IOR_param_t *);
+static void *DAOS_Open(char *, IOR_param_t *);
+static IOR_offset_t DAOS_Xfer(int, void *, IOR_size_t *,
+                              IOR_offset_t, IOR_param_t *);
+static void DAOS_Close(void *, IOR_param_t *);
+static void DAOS_Delete(char *, IOR_param_t *);
+static char* DAOS_GetVersion();
+static void DAOS_Fsync(void *, IOR_param_t *);
+static IOR_offset_t DAOS_GetFileSize(IOR_param_t *, MPI_Comm, char *);
+
+/************************** D E C L A R A T I O N S ***************************/
+
+ior_aiori_t daos_aiori = {
+        .name = "DAOS",
+        .create = DAOS_Create,
+        .open = DAOS_Open,
+        .xfer = DAOS_Xfer,
+        .close = DAOS_Close,
+        .delete = DAOS_Delete,
+        .get_version = DAOS_GetVersion,
+        .fsync = DAOS_Fsync,
+        .get_file_size = DAOS_GetFileSize,
+        .initialize = DAOS_Init,
+        .finalize = DAOS_Fini,
+};
+
+enum handleType {
+        POOL_HANDLE,
+        CONTAINER_HANDLE
+};
+
+struct fileDescriptor {
+        daos_handle_t    container;
+        daos_cont_info_t containerInfo;
+        daos_handle_t    object;
+        daos_epoch_t     epoch;
+};
+
+struct aio {
+        cfs_list_t              a_list;
+        char                    a_dkeyBuf[32];
+        daos_key_t              a_dkey;
+        daos_recx_t             a_recx;
+        unsigned char           a_csumBuf[32];
+        daos_csum_buf_t         a_csum;
+        daos_epoch_range_t      a_epochRange;
+        daos_iod_t              a_iod;
+        daos_iov_t              a_iov;
+        daos_sg_list_t          a_sgl;
+        struct daos_event       a_event;
+};
+
+static daos_handle_t       eventQueue;
+static struct daos_event **events;
+static unsigned char      *buffers;
+static int                 nAios;
+static daos_handle_t       pool;
+static daos_pool_info_t    poolInfo;
+static daos_oclass_id_t    objectClass = DAOS_OC_LARGE_RW;
+static CFS_LIST_HEAD(aios);
+static IOR_offset_t        total_size;
+
+/***************************** F U N C T I O N S ******************************/
+
+/* For DAOS methods. */
+#define DCHECK(rc, format, ...)                                         \
+do {                                                                    \
+        int _rc = (rc);                                                 \
+                                                                        \
+        if (_rc < 0) {                                                  \
+                fprintf(stdout, "ior ERROR (%s:%d): %d: %d: "           \
+                        format"\n", __FILE__, __LINE__, rank, _rc,      \
+                        ##__VA_ARGS__);                                 \
+                fflush(stdout);                                         \
+                MPI_Abort(MPI_COMM_WORLD, -1);                          \
+        }                                                               \
+} while (0)
+
+#define INFO(level, param, format, ...)                                 \
+do {                                                                    \
+        if (param->verbose >= level)                                    \
+                printf("[%d] "format"\n", rank, ##__VA_ARGS__);         \
+} while (0)
+
+/* For generic errors like invalid command line options. */
+#define GERR(format, ...)                                               \
+do {                                                                    \
+        fprintf(stdout, format"\n", ##__VA_ARGS__);                     \
+        MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");  \
+} while (0)
+
+/* Distribute process 0's pool or container handle to others. */
+static void HandleDistribute(daos_handle_t *handle, enum handleType type,
+                             IOR_param_t *param)
+{
+        daos_iov_t global;
+        int        rc;
+
+        assert(type == POOL_HANDLE || !daos_handle_is_inval(pool));
+
+        global.iov_buf = NULL;
+        global.iov_buf_len = 0;
+        global.iov_len = 0;
+
+        if (rank == 0) {
+                /* Get the global handle size. */
+                if (type == POOL_HANDLE)
+                        rc = daos_pool_local2global(*handle, &global);
+                else
+                        rc = daos_cont_local2global(*handle, &global);
+                DCHECK(rc, "Failed to get global handle size");
+        }
+
+        MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0,
+                            param->testComm),
+                  "Failed to bcast global handle buffer size");
+
+        global.iov_buf = malloc(global.iov_buf_len);
+        if (global.iov_buf == NULL)
+                ERR("Failed to allocate global handle buffer");
+
+        if (rank == 0) {
+                if (type == POOL_HANDLE)
+                        rc = daos_pool_local2global(*handle, &global);
+                else
+                        rc = daos_cont_local2global(*handle, &global);
+                DCHECK(rc, "Failed to create global handle");
+        }
+
+        MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0,
+                            param->testComm),
+                  "Failed to bcast global pool handle");
+
+        if (rank != 0) {
+                /* A larger-than-actual length works just fine. */
+                global.iov_len = global.iov_buf_len;
+
+                if (type == POOL_HANDLE)
+                        rc = daos_pool_global2local(global, handle);
+                else
+                        rc = daos_cont_global2local(pool, global, handle);
+                DCHECK(rc, "Failed to get local handle");
+        }
+
+        free(global.iov_buf);
+}
+
+static void ContainerOpen(char *testFileName, IOR_param_t *param,
+                          daos_handle_t *container, daos_cont_info_t *info)
+{
+        int rc;
+
+        if (rank == 0) {
+                uuid_t       uuid;
+                unsigned int dFlags;
+
+                rc = uuid_parse(testFileName, uuid);
+                DCHECK(rc, "Failed to parse 'testFile': %s", testFileName);
+
+                if (param->open == WRITE &&
+                    param->useExistingTestFile == FALSE) {
+                        INFO(VERBOSE_2, param, "Creating container %s",
+                             testFileName);
+
+                        rc = daos_cont_create(pool, uuid, NULL /* ev */);
+                        DCHECK(rc, "Failed to create container %s",
+                               testFileName);
+                }
+
+                INFO(VERBOSE_2, param, "Openning container %s", testFileName);
+
+                if (param->open == WRITE)
+                        dFlags = DAOS_COO_RW;
+                else
+                        dFlags = DAOS_COO_RO;
+
+                rc = daos_cont_open(pool, uuid, dFlags, container, info,
+                                    NULL /* ev */);
+                DCHECK(rc, "Failed to open container %s", testFileName);
+
+                INFO(VERBOSE_2, param, "Container epoch state:");
+                INFO(VERBOSE_2, param, "   HCE: %lu",
+                     info->ci_epoch_state.es_hce);
+                INFO(VERBOSE_2, param, "   LRE: %lu",
+                     info->ci_epoch_state.es_lre);
+                INFO(VERBOSE_2, param, "   LHE: %lu (%lx)",
+                     info->ci_epoch_state.es_lhe, info->ci_epoch_state.es_lhe);
+                INFO(VERBOSE_2, param, "  GHCE: %lu",
+                     info->ci_epoch_state.es_ghce);
+                INFO(VERBOSE_2, param, "  GLRE: %lu",
+                     info->ci_epoch_state.es_glre);
+                INFO(VERBOSE_2, param, " GHPCE: %lu",
+                     info->ci_epoch_state.es_ghpce);
+
+#if 0
+                if (param->open != WRITE && o.daosWait != 0) {
+                        daos_epoch_t e;
+
+                        e = o.daosWait;
+
+                        INFO(VERBOSE_2, param, "Waiting for epoch %lu", e);
+
+                        rc = daos_epoch_wait(*container, &e,
+                                             NULL /* ignore HLE */,
+                                             NULL /* synchronous */);
+                        DCHECK(rc, "Failed to wait for epoch %lu",
+                               o.daosWait);
+                }
+
+                if (param->open == WRITE &&
+                    param->useExistingTestFile == FALSE) {
+                        daos_oclass_attr_t attr = {
+                                .ca_schema              = DAOS_OS_STRIPED,
+                                .ca_resil_degree        = 0,
+                                .ca_resil               = DAOS_RES_REPL,
+                                .ca_grp_nr              = 4,
+                                .u.repl                 = {
+                                        .r_method       = 0,
+                                        .r_num          = 2
+                                }
+                        };
+
+                        INFO(VERBOSE_2, param, "Registering object class");
+
+                        rc = daos_oclass_register(container, objectClass, &attr,
+                                                  NULL /* ev */);
+                        DCHECK(rc, "Failed to register object class");
+                }
+#endif
+        }
+
+        HandleDistribute(container, CONTAINER_HANDLE, param);
+
+        MPI_CHECK(MPI_Bcast(info, sizeof *info, MPI_BYTE, 0, param->testComm),
+                  "Failed to broadcast container info");
+}
+
+static void ContainerClose(daos_handle_t container, IOR_param_t *param)
+{
+        int rc;
+
+        if (rank != 0) {
+                rc = daos_cont_close(container, NULL /* ev */);
+                DCHECK(rc, "Failed to close container");
+        }
+
+        /* An MPI_Gather() call would probably be more efficient. */
+        MPI_CHECK(MPI_Barrier(param->testComm),
+                  "Failed to synchronize processes");
+
+        if (rank == 0) {
+                rc = daos_cont_close(container, NULL /* ev */);
+                DCHECK(rc, "Failed to close container");
+        }
+}
+
+static void ObjectOpen(daos_handle_t container, daos_handle_t *object,
+                       daos_epoch_t epoch, IOR_param_t *param)
+{
+        daos_obj_id_t oid;
+        unsigned int  flags;
+        int           rc;
+
+        oid.hi = 0;
+        oid.lo = 1;
+        daos_obj_id_generate(&oid, 0, objectClass);
+
+#if 0
+        /** declaring object not implemented commenting it */
+        if (rank == 0 && param->open == WRITE &&
+            param->useExistingTestFile == FALSE) {
+                INFO(VERBOSE_2, param, "Declaring object");
+
+                rc = daos_obj_declare(container, oid, epoch, NULL /* oa */,
+                                      NULL /* ev */);
+                DCHECK(rc, "Failed to declare object");
+        }
+#endif
+        /* An MPI_Bcast() call would probably be more efficient. */
+        MPI_CHECK(MPI_Barrier(param->testComm),
+                  "Failed to synchronize processes");
+
+        if (param->open == WRITE)
+                flags = DAOS_OO_RW;
+        else
+                flags = DAOS_OO_RO;
+
+        rc = daos_obj_open(container, oid, epoch, flags, object, NULL /* ev */);
+        DCHECK(rc, "Failed to open object");
+}
+
+static void ObjectClose(daos_handle_t object)
+{
+        int rc;
+
+        rc = daos_obj_close(object, NULL /* ev */);
+        DCHECK(rc, "Failed to close object");
+}
+
+static void AIOInit(IOR_param_t *param)
+{
+        struct aio *aio;
+        int         i;
+        int         rc;
+
+        rc = posix_memalign((void **) &buffers, sysconf(_SC_PAGESIZE),
+                            param->transferSize * o.daosAios);
+        DCHECK(rc, "Failed to allocate buffer array");
+
+        for (i = 0; i < o.daosAios; i++) {
+                aio = malloc(sizeof *aio);
+                if (aio == NULL)
+                        ERR("Failed to allocate aio array");
+
+                memset(aio, 0, sizeof *aio);
+
+                aio->a_dkey.iov_buf     = aio->a_dkeyBuf;
+                aio->a_dkey.iov_buf_len = sizeof aio->a_dkeyBuf;
+
+                aio->a_recx.rx_nr       = 1;
+
+                aio->a_csum.cs_csum     = &aio->a_csumBuf;
+                aio->a_csum.cs_buf_len  = sizeof aio->a_csumBuf;
+                aio->a_csum.cs_len      = aio->a_csum.cs_buf_len;
+
+                aio->a_epochRange.epr_hi = DAOS_EPOCH_MAX;
+
+                aio->a_iod.iod_name.iov_buf = "data";
+                aio->a_iod.iod_name.iov_buf_len =
+                        strlen(aio->a_iod.iod_name.iov_buf) + 1;
+                aio->a_iod.iod_name.iov_len = aio->a_iod.iod_name.iov_buf_len;
+                aio->a_iod.iod_nr = 1;
+                aio->a_iod.iod_type  = DAOS_IOD_ARRAY;
+                aio->a_iod.iod_recxs = &aio->a_recx;
+                aio->a_iod.iod_csums = &aio->a_csum;
+                aio->a_iod.iod_eprs  = &aio->a_epochRange;
+                aio->a_iod.iod_size  = param->transferSize;
+
+                aio->a_iov.iov_buf = buffers + param->transferSize * i;
+                aio->a_iov.iov_buf_len = param->transferSize;
+                aio->a_iov.iov_len = aio->a_iov.iov_buf_len;
+
+                aio->a_sgl.sg_nr = 1;
+                aio->a_sgl.sg_iovs = &aio->a_iov;
+
+                rc = daos_event_init(&aio->a_event, eventQueue,
+                                     NULL /* parent */);
+                DCHECK(rc, "Failed to initialize event for aio[%d]", i);
+
+                cfs_list_add(&aio->a_list, &aios);
+
+                INFO(VERBOSE_3, param, "Allocated AIO %p: buffer %p", aio,
+                     aio->a_iov.iov_buf);
+        }
+
+        nAios = o.daosAios;
+
+        events = malloc((sizeof *events) * o.daosAios);
+        if (events == NULL)
+                ERR("Failed to allocate events array");
+}
+
+static void AIOFini(IOR_param_t *param)
+{
+        struct aio *aio;
+        struct aio *tmp;
+
+        free(events);
+
+        cfs_list_for_each_entry_safe(aio, tmp, &aios, a_list) {
+                INFO(VERBOSE_3, param, "Freeing AIO %p: buffer %p", aio,
+                     aio->a_iov.iov_buf);
+                cfs_list_del_init(&aio->a_list);
+                daos_event_fini(&aio->a_event);
+                free(aio);
+        }
+
+        free(buffers);
+}
+
+static void AIOWait(IOR_param_t *param)
+{
+        struct aio *aio;
+        int         i;
+        int         rc;
+
+        rc = daos_eq_poll(eventQueue, 0, DAOS_EQ_WAIT, o.daosAios,
+                          events);
+        DCHECK(rc, "Failed to poll event queue");
+        assert(rc <= o.daosAios - nAios);
+
+        for (i = 0; i < rc; i++) {
+                int ret;
+
+                aio = (struct aio *)
+                      ((char *) events[i] -
+                       (char *) (&((struct aio *) 0)->a_event));
+
+                DCHECK(aio->a_event.ev_error, "Failed to transfer (%lu, %lu)",
+                       aio->a_iod.iod_recxs->rx_idx,
+                       aio->a_iod.iod_recxs->rx_nr);
+
+                daos_event_fini(&aio->a_event);
+                ret = daos_event_init(&aio->a_event, eventQueue,
+                                      NULL /* parent */);
+                DCHECK(ret, "Failed to reinitialize event for AIO %p", aio);
+
+                cfs_list_move(&aio->a_list, &aios);
+                nAios++;
+
+                if (param->verbose >= VERBOSE_3)
+                INFO(VERBOSE_3, param, "Completed AIO %p: buffer %p", aio,
+                     aio->a_iov.iov_buf);
+        }
+
+        INFO(VERBOSE_3, param, "Found %d completed AIOs (%d free %d busy)", rc,
+             nAios, o.daosAios - nAios);
+}
+
+static void ObjectClassParse(const char *string)
+{
+        if (strcasecmp(string, "tiny") == 0)
+                objectClass = DAOS_OC_TINY_RW;
+        else if (strcasecmp(string, "small") == 0)
+                objectClass = DAOS_OC_SMALL_RW;
+        else if (strcasecmp(string, "large") == 0)
+                objectClass = DAOS_OC_LARGE_RW;
+        else if (strcasecmp(string, "echo") == 0)
+                objectClass = DAOS_OC_ECHO_RW;
+        else if (strcasecmp(string, "R2") == 0)
+                objectClass = DAOS_OC_R2_RW;
+        else if (strcasecmp(string, "R2S") == 0)
+                objectClass = DAOS_OC_R2S_RW;
+        else if (strcasecmp(string, "R3S") == 0)
+                objectClass = DAOS_OC_R3S_RW;
+        else if (strcasecmp(string, "R3") == 0)
+                objectClass = DAOS_OC_R3_RW;
+        else if (strcasecmp(string, "R4") == 0)
+                objectClass = DAOS_OC_R4_RW;
+        else if (strcasecmp(string, "R4S") == 0)
+                objectClass = DAOS_OC_R4S_RW;
+        else if (strcasecmp(string, "repl_max") == 0)
+                objectClass = DAOS_OC_REPL_MAX_RW;
+        else
+                GERR("Invalid 'daosObjectClass' argument: '%s'", string);
+}
+
+static const char *GetGroup(IOR_param_t *param)
+{
+        if (strlen(o.daosGroup) == 0)
+                return NULL;
+        return o.daosGroup;
+}
+
+static void ParseService(IOR_param_t *param, int max, d_rank_list_t *ranks)
+{
+        char *s;
+
+        s = strdup(o.daosPoolSvc);
+        if (s == NULL)
+                GERR("failed to duplicate argument");
+        ranks->rl_nr = 0;
+        while ((s = strtok(s, ":")) != NULL) {
+                if (ranks->rl_nr >= max) {
+                        free(s);
+                        GERR("at most %d pool service replicas supported", max);
+                }
+                ranks->rl_ranks[ranks->rl_nr] = atoi(s);
+                ranks->rl_nr++;
+                s = NULL;
+        }
+        free(s);
+}
+
+static void DAOS_Init(IOR_param_t *param)
+{
+        int rc;
+
+        if (strlen(o.daosObjectClass) != 0)
+                ObjectClassParse(o.daosObjectClass);
+
+        if (param->filePerProc)
+                GERR("'filePerProc' not yet supported");
+        if (o.daosStripeMax % o.daosStripeSize != 0)
+                GERR("'daosStripeMax' must be a multiple of 'daosStripeSize'");
+        if (o.daosStripeSize % param->transferSize != 0)
+                GERR("'daosStripeSize' must be a multiple of 'transferSize'");
+        if (param->transferSize % o.daosRecordSize != 0)
+                GERR("'transferSize' must be a multiple of 'daosRecordSize'");
+        if (o.daosKill && ((objectClass != DAOS_OC_R2_RW) ||
+                                (objectClass != DAOS_OC_R3_RW) ||
+                                (objectClass != DAOS_OC_R4_RW) ||
+                                (objectClass != DAOS_OC_R2S_RW) ||
+                                (objectClass != DAOS_OC_R3S_RW) ||
+                                (objectClass != DAOS_OC_R4S_RW) ||
+                                (objectClass != DAOS_OC_REPL_MAX_RW)))
+                GERR("'daosKill' only makes sense with 'daosObjectClass=repl'");
+
+        if (rank == 0)
+                INFO(VERBOSE_0, param, "WARNING: USING daosStripeMax CAUSES READS TO RETURN INVALID DATA");
+
+        rc = daos_init();
+        DCHECK(rc, "Failed to initialize daos");
+
+        rc = daos_eq_create(&eventQueue);
+        DCHECK(rc, "Failed to create event queue");
+
+        if (rank == 0) {
+                uuid_t           uuid;
+                d_rank_t         rank[13];
+                d_rank_list_t    ranks;
+
+                if (strlen(o.daosPool) == 0)
+                        GERR("'daosPool' must be specified");
+                if (strlen(o.daosPoolSvc) == 0)
+                        GERR("'daosPoolSvc' must be specified");
+
+                INFO(VERBOSE_2, param, "Connecting to pool %s %s",
+                     o.daosPool, o.daosPoolSvc);
+
+                rc = uuid_parse(o.daosPool, uuid);
+                DCHECK(rc, "Failed to parse 'daosPool': %s", o.daosPool);
+                ranks.rl_ranks = rank;
+                ParseService(param, sizeof(rank) / sizeof(rank[0]), &ranks);
+
+                rc = daos_pool_connect(uuid, GetGroup(param), &ranks,
+                                       DAOS_PC_RW, &pool, &poolInfo,
+                                       NULL /* ev */);
+                DCHECK(rc, "Failed to connect to pool %s", o.daosPool);
+        }
+
+        HandleDistribute(&pool, POOL_HANDLE, param);
+
+        MPI_CHECK(MPI_Bcast(&poolInfo, sizeof poolInfo, MPI_BYTE, 0,
+                            param->testComm),
+                  "Failed to bcast pool info");
+
+        if (o.daosStripeCount == -1)
+                o.daosStripeCount = poolInfo.pi_ntargets * 64UL;
+}
+
+static void DAOS_Fini(IOR_param_t *param)
+{
+        int rc;
+
+	rc = daos_pool_disconnect(pool, NULL /* ev */);
+	DCHECK(rc, "Failed to disconnect from pool %s", o.daosPool);
+
+        rc = daos_eq_destroy(eventQueue, 0 /* flags */);
+        DCHECK(rc, "Failed to destroy event queue");
+
+        rc = daos_fini();
+        DCHECK(rc, "Failed to finalize daos");
+}
+
+static void *DAOS_Create(char *testFileName, IOR_param_t *param)
+{
+        return DAOS_Open(testFileName, param);
+}
+
+static void *DAOS_Open(char *testFileName, IOR_param_t *param)
+{
+        struct fileDescriptor *fd;
+        daos_epoch_t           ghce;
+
+        fd = malloc(sizeof *fd);
+        if (fd == NULL)
+                ERR("Failed to allocate fd");
+
+        ContainerOpen(testFileName, param, &fd->container, &fd->containerInfo);
+
+        ghce = fd->containerInfo.ci_epoch_state.es_ghce;
+        if (param->open == WRITE) {
+                if (o.daosEpoch == 0)
+                        fd->epoch = ghce + 1;
+                else if (o.daosEpoch <= ghce)
+                        GERR("Can't modify committed epoch\n");
+                else
+                        fd->epoch = o.daosEpoch;
+        } else {
+                if (o.daosEpoch == 0) {
+                        if (o.daosWait == 0)
+                                fd->epoch = ghce;
+                        else
+                                fd->epoch = o.daosWait;
+                } else if (o.daosEpoch > ghce) {
+                        GERR("Can't read uncommitted epoch\n");
+                } else {
+                        fd->epoch = o.daosEpoch;
+                }
+        }
+
+        if (rank == 0)
+                INFO(VERBOSE_2, param, "Accessing epoch %lu", fd->epoch);
+
+        if (rank == 0 && param->open == WRITE) {
+                daos_epoch_t e = fd->epoch;
+                int          rc;
+
+                INFO(VERBOSE_2, param, "Holding epoch %lu", fd->epoch);
+
+                rc = daos_epoch_hold(fd->container, &fd->epoch,
+                                     NULL /* state */, NULL /* ev */);
+                DCHECK(rc, "Failed to hold epoch");
+                assert(fd->epoch == e);
+        }
+
+        ObjectOpen(fd->container, &fd->object, fd->epoch, param);
+
+        AIOInit(param);
+
+        return fd;
+}
+
+static void
+kill_daos_server(IOR_param_t *param)
+{
+	daos_pool_info_t        info;
+	d_rank_t                rank, svc_ranks[13];
+	d_rank_list_t           svc, targets;
+        uuid_t                  uuid;
+        char                    *s;
+        int                     rc;
+
+	rc = daos_pool_query(pool, NULL, &info, NULL);
+	DCHECK(rc, "Error in querying pool\n");
+
+	if (info.pi_ntargets - info.pi_ndisabled <= 1)
+		return;
+	/* choose the last alive one */
+	rank = info.pi_ntargets - 1 - info.pi_ndisabled;
+
+        rc = uuid_parse(o.daosPool, uuid);
+        DCHECK(rc, "Failed to parse 'daosPool': %s", o.daosPool);
+
+        if (rc != 0)
+	printf("Killing tgt rank: %d (total of %d of %d already disabled)\n",
+	       rank,  info.pi_ndisabled, info.pi_ntargets);
+	fflush(stdout);
+
+	rc = daos_mgmt_svc_rip(GetGroup(param), rank, true, NULL);
+	DCHECK(rc, "Error in killing server\n");
+
+	targets.rl_nr = 1;
+	targets.rl_ranks = &rank;
+
+        svc.rl_ranks = svc_ranks;
+        ParseService(param, sizeof(svc_ranks)/ sizeof(svc_ranks[0]), &svc);
+
+	rc = daos_pool_exclude(uuid, NULL, &svc, &targets, NULL);
+	DCHECK(rc, "Error in excluding pool from poolmap\n");
+
+        rc = daos_pool_query(pool, NULL, &info, NULL);
+	DCHECK(rc, "Error in querying pool\n");
+
+        printf("%d targets succesfully disabled\n",
+               info.pi_ndisabled);
+
+}
+
+static void
+kill_and_sync(IOR_param_t *param)
+{
+        double start, end;
+
+        start = MPI_Wtime();
+        if (rank == 0)
+                kill_daos_server(param);
+
+        if (rank == 0)
+                printf("Done killing and excluding\n");
+
+        MPI_CHECK(MPI_Barrier(param->testComm),
+                  "Failed to synchronize processes");
+
+        end = MPI_Wtime();
+        if (rank == 0)
+                printf("Time spent inducing failure: %lf\n", (end - start));
+}
+
+static IOR_offset_t DAOS_Xfer(int access, void *file, IOR_size_t *buffer,
+                              IOR_offset_t length, IOR_param_t *param)
+{
+        struct fileDescriptor *fd = file;
+        struct aio            *aio;
+        uint64_t               stripe;
+        IOR_offset_t           stripeOffset;
+        uint64_t               round;
+        int                    rc;
+
+        assert(length == param->transferSize);
+        assert(param->offset % length == 0);
+
+        /**
+         * Currently killing only during writes
+         * Kills once when 1/2 of blocksize is
+         * written
+         **/
+        total_size += length;
+        if (o.daosKill && (access == WRITE) &&
+            ((param->blockSize)/2) == total_size) {
+                /** More than half written lets kill */
+                if (rank == 0)
+                        printf("Killing and Syncing\n", rank);
+                kill_and_sync(param);
+                o.daosKill = 0;
+        }
+
+        /*
+         * Find an available AIO descriptor.  If none, wait for one.
+         */
+        while (nAios == 0)
+                AIOWait(param);
+        aio = cfs_list_entry(aios.next, struct aio, a_list);
+        cfs_list_move_tail(&aio->a_list, &aios);
+        nAios--;
+
+        stripe = (param->offset / o.daosStripeSize) %
+                 o.daosStripeCount;
+        rc = snprintf(aio->a_dkeyBuf, sizeof aio->a_dkeyBuf, "%lu", stripe);
+        assert(rc < sizeof aio->a_dkeyBuf);
+        aio->a_dkey.iov_len = strlen(aio->a_dkeyBuf) + 1;
+        round = param->offset / (o.daosStripeSize * o.daosStripeCount);
+        stripeOffset = o.daosStripeSize * round +
+                       param->offset % o.daosStripeSize;
+        if (o.daosStripeMax != 0)
+                stripeOffset %= o.daosStripeMax;
+        aio->a_recx.rx_idx = stripeOffset / o.daosRecordSize;
+        aio->a_epochRange.epr_lo = fd->epoch;
+
+        /*
+         * If the data written will be checked later, we have to copy in valid
+         * data instead of writing random bytes.  If the data being read is for
+         * checking purposes, poison the buffer first.
+         */
+        if (access == WRITE && param->checkWrite)
+                memcpy(aio->a_iov.iov_buf, buffer, length);
+        else if (access == WRITECHECK || access == READCHECK)
+                memset(aio->a_iov.iov_buf, '#', length);
+
+        INFO(VERBOSE_3, param, "Starting AIO %p (%d free %d busy): access %d "
+             "dkey '%s' iod <%llu, %llu> sgl <%p, %lu>", aio, nAios,
+             o.daosAios - nAios, access, (char *) aio->a_dkey.iov_buf,
+             (unsigned long long) aio->a_iod.iod_recxs->rx_idx,
+             (unsigned long long) aio->a_iod.iod_recxs->rx_nr,
+             aio->a_sgl.sg_iovs->iov_buf,
+             (unsigned long long) aio->a_sgl.sg_iovs->iov_buf_len);
+
+        if (access == WRITE) {
+                rc = daos_obj_update(fd->object, fd->epoch, &aio->a_dkey,
+                                     1 /* nr */, &aio->a_iod, &aio->a_sgl,
+                                     &aio->a_event);
+                DCHECK(rc, "Failed to start update operation");
+        } else {
+                rc = daos_obj_fetch(fd->object, fd->epoch, &aio->a_dkey,
+                                    1 /* nr */, &aio->a_iod, &aio->a_sgl,
+                                    NULL /* maps */, &aio->a_event);
+                DCHECK(rc, "Failed to start fetch operation");
+        }
+
+        /*
+         * If this is a WRITECHECK or READCHECK, we are expected to fill data
+         * into the buffer before returning.  Note that if this is a READ, we
+         * don't have to return valid data as WriteOrRead() doesn't care.
+         */
+        if (access == WRITECHECK || access == READCHECK) {
+                while (o.daosAios - nAios > 0)
+                        AIOWait(param);
+                memcpy(buffer, aio->a_sgl.sg_iovs->iov_buf, length);
+        }
+
+        return length;
+}
+
+static void DAOS_Close(void *file, IOR_param_t *param)
+{
+        struct fileDescriptor *fd = file;
+        int                    rc;
+
+        while (o.daosAios - nAios > 0)
+                AIOWait(param);
+        AIOFini(param);
+
+        ObjectClose(fd->object);
+
+        if (param->open == WRITE && !o.daosWriteOnly) {
+                /* Wait for everybody for to complete the writes. */
+                MPI_CHECK(MPI_Barrier(param->testComm),
+                          "Failed to synchronize processes");
+
+                if (rank == 0) {
+                        INFO(VERBOSE_2, param, "Flushing epoch %lu", fd->epoch);
+
+                        rc = daos_epoch_flush(fd->container, fd->epoch,
+                                              NULL /* state */, NULL /* ev */);
+                        DCHECK(rc, "Failed to flush epoch");
+
+                        INFO(VERBOSE_2, param, "Committing epoch %lu",
+                             fd->epoch);
+
+                        rc = daos_epoch_commit(fd->container, fd->epoch,
+                                               NULL /* state */, NULL /* ev */);
+                        DCHECK(rc, "Failed to commit object write");
+                }
+        }
+
+        ContainerClose(fd->container, param);
+
+        free(fd);
+}
+
+static void DAOS_Delete(char *testFileName, IOR_param_t *param)
+{
+        uuid_t uuid;
+        int    rc;
+
+        INFO(VERBOSE_2, param, "Deleting container %s", testFileName);
+
+        rc = uuid_parse(testFileName, uuid);
+        DCHECK(rc, "Failed to parse 'testFile': %s", testFileName);
+
+        rc = daos_cont_destroy(pool, uuid, 1 /* force */, NULL /* ev */);
+        if (rc != -DER_NONEXIST)
+                DCHECK(rc, "Failed to destroy container %s", testFileName);
+}
+
+static char* DAOS_GetVersion()
+{
+	static char ver[1024] = {};
+
+	sprintf(ver, "%s", "DAOS");
+	return ver;
+}
+
+static void DAOS_Fsync(void *file, IOR_param_t *param)
+{
+        while (o.daosAios - nAios > 0)
+                AIOWait(param);
+}
+
+static IOR_offset_t DAOS_GetFileSize(IOR_param_t *test, MPI_Comm testComm,
+                                     char *testFileName)
+{
+        /*
+         * Sizes are inapplicable to containers at the moment.
+         */
+        return 0;
+}
--- a/src/aiori-DFS.c
+++ b/src/aiori-DFS.c
@ -46,6 +46,99 @@
 dfs_t *dfs;
 daos_handle_t poh, coh;

+/************************** O P T I O N S *****************************/
+struct dfs_options{
+        char * pool;
+        char * svcl;
+        char * group;
+        char * cont;
+};
+
+static struct dfs_options o = {
+        .pool = NULL,
+        .svcl = NULL,
+        .group = NULL,
+        .cont = NULL,
+};
+
+static option_help options [] = {
+      {'p', "pool", "DAOS pool uuid", OPTION_REQUIRED_ARGUMENT, 's', & o.pool},
+      {'s', "svcl", "DAOS pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', & o.svcl},
+      {'g', "group", "DAOS server group", OPTION_OPTIONAL_ARGUMENT, 's', & o.group},
+      {'c', "cont", "DFS container uuid", OPTION_REQUIRED_ARGUMENT, 's', & o.cont},
+      LAST_OPTION
+};
+
+/**************************** P R O T O T Y P E S *****************************/
+static void *DFS_Create(char *, IOR_param_t *);
+static void *DFS_Open(char *, IOR_param_t *);
+static IOR_offset_t DFS_Xfer(int, void *, IOR_size_t *,
+                             IOR_offset_t, IOR_param_t *);
+static void DFS_Close(void *, IOR_param_t *);
+static void DFS_Delete(char *, IOR_param_t *);
+static char* DFS_GetVersion();
+static void DFS_Fsync(void *, IOR_param_t *);
+static IOR_offset_t DFS_GetFileSize(IOR_param_t *, MPI_Comm, char *);
+static int DFS_Statfs (const char *, ior_aiori_statfs_t *, IOR_param_t *);
+static int DFS_Stat (const char *, struct stat *, IOR_param_t *);
+static int DFS_Mkdir (const char *, mode_t, IOR_param_t *);
+static int DFS_Rmdir (const char *, IOR_param_t *);
+static int DFS_Access (const char *, int, IOR_param_t *);
+static void DFS_Init(IOR_param_t *param);
+static void DFS_Finalize(IOR_param_t *param);
+static option_help * DFS_options();
+
+/************************** D E C L A R A T I O N S ***************************/
+
+ior_aiori_t dfs_aiori = {
+        .name		= "DFS",
+        .create		= DFS_Create,
+        .open		= DFS_Open,
+        .xfer		= DFS_Xfer,
+        .close		= DFS_Close,
+        .delete		= DFS_Delete,
+        .get_version	= DFS_GetVersion,
+        .fsync		= DFS_Fsync,
+        .get_file_size	= DFS_GetFileSize,
+        .statfs		= DFS_Statfs,
+        .mkdir		= DFS_Mkdir,
+        .rmdir		= DFS_Rmdir,
+        .access		= DFS_Access,
+        .stat		= DFS_Stat,
+        .initialize	= DFS_Init,
+        .finalize	= DFS_Finalize,
+        .get_options	= DFS_options,
+};
+
+/***************************** F U N C T I O N S ******************************/
+
+/* For DAOS methods. */
+#define DCHECK(rc, format, ...)                                         \
+do {                                                                    \
+        int _rc = (rc);                                                 \
+                                                                        \
+        if (_rc < 0) {                                                  \
+                fprintf(stderr, "ERROR (%s:%d): %d: %d: "               \
+                        format"\n", __FILE__, __LINE__, rank, _rc,      \
+                        ##__VA_ARGS__);                                 \
+                fflush(stderr);                                         \
+                MPI_Abort(MPI_COMM_WORLD, -1);                          \
+        }                                                               \
+} while (0)
+
+#define DERR(rc, format, ...)                                           \
+do {                                                                    \
+        int _rc = (rc);                                                 \
+                                                                        \
+        if (_rc < 0) {                                                  \
+                fprintf(stderr, "ERROR (%s:%d): %d: %d: "               \
+                        format"\n", __FILE__, __LINE__, rank, _rc,      \
+                        ##__VA_ARGS__);                                 \
+                fflush(stderr);                                         \
+                goto out;                                               \
+        }                                                               \
+} while (0)
+
 static int
 parse_filename(const char *path, char **_obj_name, char **_cont_name)
 {
@ -124,48 +217,11 @@ out:
 	return rc;
 }

-/**************************** P R O T O T Y P E S *****************************/
-static void *DFS_Create(char *, IOR_param_t *);
-static void *DFS_Open(char *, IOR_param_t *);
-static IOR_offset_t DFS_Xfer(int, void *, IOR_size_t *,
-                             IOR_offset_t, IOR_param_t *);
-static void DFS_Close(void *, IOR_param_t *);
-static void DFS_Delete(char *, IOR_param_t *);
-static void DFS_SetVersion(IOR_param_t *);
-static void DFS_Fsync(void *, IOR_param_t *);
-static IOR_offset_t DFS_GetFileSize(IOR_param_t *, MPI_Comm, char *);
-static int DFS_Statfs (const char *, ior_aiori_statfs_t *, IOR_param_t *);
-static int DFS_Stat (const char *, struct stat *, IOR_param_t *);
-static int DFS_Mkdir (const char *, mode_t, IOR_param_t *);
-static int DFS_Rmdir (const char *, IOR_param_t *);
-static int DFS_Access (const char *, int, IOR_param_t *);
-static int DFS_Init(IOR_param_t *param);
-static int DFS_Finalize(IOR_param_t *param);
+static option_help * DFS_options(){
+  return options;
+}

-/************************** D E C L A R A T I O N S ***************************/
-
-ior_aiori_t dfs_aiori = {
-        .name = "DFS",
-        .create = DFS_Create,
-        .open = DFS_Open,
-        .xfer = DFS_Xfer,
-        .close = DFS_Close,
-        .delete = DFS_Delete,
-        .set_version = DFS_SetVersion,
-        .fsync = DFS_Fsync,
-        .get_file_size = DFS_GetFileSize,
-        .statfs = DFS_Statfs,
-        .mkdir = DFS_Mkdir,
-        .rmdir = DFS_Rmdir,
-        .access = DFS_Access,
-        .stat = DFS_Stat,
-        .init = DFS_Init,
-        .finalize = DFS_Finalize,
-};
-
-/***************************** F U N C T I O N S ******************************/
-
-static int
+static void
 DFS_Init(IOR_param_t *param) {
 	uuid_t			pool_uuid, co_uuid;
 	daos_pool_info_t	pool_info;
@ -174,46 +230,37 @@ DFS_Init(IOR_param_t *param) {
        bool			cont_created = false;
 	int			rc;

-	if (uuid_parse(param->daosPool, pool_uuid) < 0) {
-		fprintf(stderr, "Invalid pool uuid\n");
-                return -1;
-	}
+        if (o.pool == NULL || o.svcl == NULL || o.cont == NULL)
+                ERR("Invalid Arguments to DFS\n");

-	if (uuid_parse(param->daosCont, co_uuid) < 0) {
-		fprintf(stderr, "Invalid pool uuid\n");
-                return -1;
-	}
+	rc = uuid_parse(o.pool, pool_uuid);
+        DCHECK(rc, "Failed to parse 'Pool uuid': %s", o.pool);

-	svcl = daos_rank_list_parse(param->daosPoolSvc, ":");
-	if (svcl == NULL) {
-		fprintf(stderr, "Invalid pool service rank list\n");
-                return -1;
-	}
+	rc = uuid_parse(o.cont, co_uuid);
+        DCHECK(rc, "Failed to parse 'Cont uuid': %s", o.cont);

-        printf("Pool uuid = %s, SVCL = %s\n", param->daosPool,
-               param->daosPoolSvc);
+	svcl = daos_rank_list_parse(o.svcl, ":");
+	if (svcl == NULL)
+                ERR("Failed to allocate svcl");

-        printf("DFS Container namespace uuid = %s\n", param->daosCont);
+        if (verbose >= 3) {
+                printf("Pool uuid = %s, SVCL = %s\n", o.pool, o.svcl);
+                printf("DFS Container namespace uuid = %s\n", o.cont);
+        }

 	rc = daos_init();
-	if (rc) {
-		fprintf(stderr, "daos_init() failed with %d\n", rc);
-		return rc;
-	}
+        DCHECK(rc, "Failed to initialize daos");

 	/** Connect to DAOS pool */
-	rc = daos_pool_connect(pool_uuid,
-                               strlen(param->daosGroup) ? param->daosGroup : NULL,
-                               svcl, DAOS_PC_RW, &poh, &pool_info, NULL);
-	if (rc < 0) {
-		fprintf(stderr, "Failed to connect to pool (%d)\n", rc);
-                goto err_daos;
-	}
+	rc = daos_pool_connect(pool_uuid, o.group, svcl, DAOS_PC_RW, &poh,
+                               &pool_info, NULL);
+        DCHECK(rc, "Failed to connect to pool");

 	rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL);
 	/* If NOEXIST we create it */
 	if (rc == -DER_NONEXIST) {
-                printf("Creating DFS Container ...\n");
+                if (verbose >= 3)
+                        printf("Creating DFS Container ...\n");
 		rc = daos_cont_create(poh, co_uuid, NULL);
 		if (rc == 0) {
 			cont_created = true;
@ -221,61 +268,28 @@ DFS_Init(IOR_param_t *param) {
 					    &co_info, NULL);
 		}
 	}
-	if (rc) {
-		fprintf(stderr, "Failed to create container (%d)\n", rc);
-                goto err_pool;
-	}
+        DCHECK(rc, "Failed to create container");

 	rc = dfs_mount(poh, coh, O_RDWR, &dfs);
-	if (rc) {
-		fprintf(stderr, "dfs_mount failed (%d)\n", rc);
-                goto err_cont;
-	}
-
-out:
-        daos_rank_list_free(svcl);
-	return rc;
-err_cont:
-	daos_cont_close(coh, NULL);
-err_pool:
-	if (cont_created)
-		daos_cont_destroy(poh, co_uuid, 1, NULL);
-	daos_pool_disconnect(poh, NULL);
-err_daos:
-	daos_fini();
-        goto out;
+        DCHECK(rc, "Failed to mount DFS namespace");
 }

-int
+static void
 DFS_Finalize(IOR_param_t *param)
 {
        int rc;

 	rc = dfs_umount(dfs, true);
-	if (rc) {
-                fprintf(stderr, "dfs_umount() failed (%d)\n", rc);
-                return -1;
-        }
+        DCHECK(rc, "Failed to umount DFS namespace");

 	rc = daos_cont_close(coh, NULL);
-	if (rc) {
-                fprintf(stderr, "daos_cont_close() failed (%d)\n", rc);
-                return -1;
-        }
+        DCHECK(rc, "Failed to close container");

        daos_pool_disconnect(poh, NULL);
-	if (rc) {
-                fprintf(stderr, "daos_pool_disconnect() failed (%d)\n", rc);
-                return -1;
-        }
+        DCHECK(rc, "Failed to disconnect from pool");

 	rc = daos_fini();
-	if (rc) {
-                fprintf(stderr, "daos_fini() failed (%d)\n", rc);
-                return -1;
-        }
-
-        return 0;
+        DCHECK(rc, "Failed to finalize DAOS");
 }

 /*
@ -296,20 +310,17 @@ DFS_Create(char *testFileName, IOR_param_t *param)
 	mode = S_IFREG | param->mode;

 	rc = parse_filename(testFileName, &name, &dir_name);
-	if (rc)
-                goto out;
+        DERR(rc, "Failed to parse path %s", testFileName);

 	assert(dir_name);
 	assert(name);

 	rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
-	if (rc || !S_ISDIR(pmode))
-                goto out;
+        DERR(rc, "dfs_lookup() of %s Failed", dir_name);

 	rc = dfs_open(dfs, parent, name, mode, fd_oflag, DAOS_OC_LARGE_RW,
                      NULL, &obj);
-	if (rc)
-                goto out;
+        DERR(rc, "dfs_open() of %s Failed", name);

 out:
 	if (name)
@ -337,19 +348,16 @@ DFS_Open(char *testFileName, IOR_param_t *param)
        fd_oflag |= O_RDWR;

 	rc = parse_filename(testFileName, &name, &dir_name);
-	if (rc)
-                goto out;
+        DERR(rc, "Failed to parse path %s", testFileName);

 	assert(dir_name);
 	assert(name);

 	rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
-	if (rc || !S_ISDIR(pmode))
-                goto out;
+        DERR(rc, "dfs_lookup() of %s Failed", dir_name);

 	rc = dfs_open(dfs, parent, name, S_IFREG, fd_oflag, 0, NULL, &obj);
-	if (rc)
-                goto out;
+        DERR(rc, "dfs_open() of %s Failed", name);

 out:
 	if (name)
@ -391,13 +399,15 @@ DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length,
                /* write/read file */
                if (access == WRITE) {
                        rc = dfs_write(dfs, obj, sgl, param->offset);
-                        if (rc)
-                                ERR("write() failed");
+                        if (rc) {
+                                fprintf(stderr, "dfs_write() failed (%d)", rc);
+                                return -1;
+                        }
                        ret = remaining;
                } else {
                        rc = dfs_read(dfs, obj, sgl, param->offset, &ret);
                        if (rc || ret == 0)
-                                ERR("read() failed");
+                                fprintf(stderr, "dfs_read() failed(%d)", rc);
                }

                if (ret < remaining) {
@ -449,19 +459,16 @@ DFS_Delete(char *testFileName, IOR_param_t * param)
 	int rc;

 	rc = parse_filename(testFileName, &name, &dir_name);
-	if (rc)
-                goto out;
+        DERR(rc, "Failed to parse path %s", testFileName);

 	assert(dir_name);
 	assert(name);

 	rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
-	if (rc || !S_ISDIR(pmode))
-                goto out;
+        DERR(rc, "dfs_lookup() of %s Failed", dir_name);

 	rc = dfs_remove(dfs, parent, name, false);
-	if (rc)
-                goto out;
+        DERR(rc, "dfs_remove() of %s Failed", name);

 out:
 	if (name)
@ -472,13 +479,12 @@ out:
 		dfs_release(parent);
 }

-/*
- * Determine api version.
- */
-static void
-DFS_SetVersion(IOR_param_t * test)
+static char* DFS_GetVersion()
 {
-        strcpy(test->apiVersion, test->api);
+	static char ver[1024] = {};
+
+	sprintf(ver, "%s", "DAOS");
+	return ver;
 }

 /*
@ -492,8 +498,10 @@ DFS_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName)
        int rc;

 	rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL);
-	if (rc)
+        if (rc) {
+                fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc);
                return -1;
+        }

        rc = dfs_get_size(dfs, obj, &fsize);
        if (rc)
@ -540,19 +548,16 @@ DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param)
 	int rc;

 	rc = parse_filename(path, &name, &dir_name);
-	if (rc)
-		return rc;
+        DERR(rc, "Failed to parse path %s", path);

 	assert(dir_name);
        assert(name);

 	rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
-	if (rc || !S_ISDIR(pmode))
-                goto out;
+        DERR(rc, "dfs_lookup() of %s Failed", dir_name);

 	rc = dfs_mkdir(dfs, parent, name, mode);
-	if (rc)
-                goto out;
+        DERR(rc, "dfs_mkdir() of %s Failed", name);

 out:
 	if (name)
@ -575,19 +580,16 @@ DFS_Rmdir(const char *path, IOR_param_t * param)
 	int rc;

 	rc = parse_filename(path, &name, &dir_name);
-	if (rc)
-		return rc;
+        DERR(rc, "Failed to parse path %s", path);

 	assert(dir_name);
        assert(name);

 	rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
-	if (rc || !S_ISDIR(pmode))
-                goto out;
+        DERR(rc, "dfs_lookup() of %s Failed", dir_name);

 	rc = dfs_remove(dfs, parent, name, false);
-	if (rc)
-                goto out;
+        DERR(rc, "dfs_remove() of %s Failed", name);

 out:
 	if (name)
@ -611,26 +613,19 @@ DFS_Access(const char *path, int mode, IOR_param_t * param)
 	int rc;

 	rc = parse_filename(path, &name, &dir_name);
-	if (rc)
-		return rc;
+        DERR(rc, "Failed to parse path %s", path);

 	assert(dir_name);
-        assert(name);

 	rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
-	if (rc || !S_ISDIR(pmode))
-                goto out;
+        DERR(rc, "dfs_lookup() of %s Failed", dir_name);

-        if (strcmp(name, ".") == 0) {
+        if (name && strcmp(name, ".") == 0) {
                free(name);
                name = NULL;
        }
 	rc = dfs_stat(dfs, parent, name, &stbuf);
-	if (rc) {
-                rc = -1;
-                errno = -ENOENT;
-                goto out;
-        }
+        DERR(rc, "dfs_stat() of %s Failed", name);

 out:
 	if (name)
@ -653,19 +648,16 @@ DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param)
 	int rc;

 	rc = parse_filename(path, &name, &dir_name);
-	if (rc)
-		return rc;
+        DERR(rc, "Failed to parse path %s", path);

 	assert(dir_name);
        assert(name);

 	rc = dfs_lookup(dfs, dir_name, O_RDONLY, &parent, &pmode);
-	if (rc || !S_ISDIR(pmode))
-                goto out;
+        DERR(rc, "dfs_lookup() of %s Failed", dir_name);

 	rc = dfs_stat(dfs, parent, name, buf);
-	if (rc)
-                goto out;
+        DERR(rc, "dfs_stat() of %s Failed", name);

 out:
 	if (name)
--- a/src/aiori-DUMMY.c
+++ b/src/aiori-DUMMY.c
@ -0,0 +1,155 @@
+/*
+* Dummy implementation doesn't do anything besides waiting
+*/
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "ior.h"
+#include "aiori.h"
+#include "utilities.h"
+
+
+/************************** O P T I O N S *****************************/
+struct dummy_options{
+  uint64_t delay_creates;
+  uint64_t delay_xfer;
+  int delay_rank_0_only;
+};
+
+static struct dummy_options o = {
+  .delay_creates = 0,
+  .delay_xfer = 0,
+  .delay_rank_0_only = 0,
+};
+
+static option_help options [] = {
+      {'c', "delay-create",        "Delay per create in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o.delay_creates},
+      {'x', "delay-xfer",          "Delay per xfer in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o.delay_xfer},
+      {'z', "delay-only-rank0",    "Delay only Rank0", OPTION_FLAG, 'd', & o.delay_rank_0_only},
+      LAST_OPTION
+};
+
+static char * current = (char*) 1;
+
+static option_help * DUMMY_options(){
+  return options;
+}
+
+static void *DUMMY_Create(char *testFileName, IOR_param_t * param)
+{
+  if(verbose > 4){
+    fprintf(out_logfile, "DUMMY create: %s = %p\n", testFileName, current);
+  }
+  if (o.delay_creates){
+    if (! o.delay_rank_0_only || (o.delay_rank_0_only && rank == 0)){
+      usleep(o.delay_creates);
+    }
+  }
+  return current++;
+}
+
+static void *DUMMY_Open(char *testFileName, IOR_param_t * param)
+{
+  if(verbose > 4){
+    fprintf(out_logfile, "DUMMY open: %s = %p\n", testFileName, current);
+  }
+  return current++;
+}
+
+static void DUMMY_Fsync(void *fd, IOR_param_t * param)
+{
+  if(verbose > 4){
+    fprintf(out_logfile, "DUMMY fsync %p\n", fd);
+  }
+}
+
+static void DUMMY_Close(void *fd, IOR_param_t * param)
+{
+  if(verbose > 4){
+    fprintf(out_logfile, "DUMMY close %p\n", fd);
+  }
+}
+
+static void DUMMY_Delete(char *testFileName, IOR_param_t * param)
+{
+    if(verbose > 4){
+      fprintf(out_logfile, "DUMMY delete: %s\n", testFileName);
+    }
+}
+
+static char * DUMMY_getVersion()
+{
+  return "0.5";
+}
+
+static IOR_offset_t DUMMY_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName)
+{
+  if(verbose > 4){
+    fprintf(out_logfile, "DUMMY getFileSize: %s\n", testFileName);
+  }
+  return 0;
+}
+
+static IOR_offset_t DUMMY_Xfer(int access, void *file, IOR_size_t * buffer, IOR_offset_t length, IOR_param_t * param){
+  if(verbose > 4){
+    fprintf(out_logfile, "DUMMY xfer: %p\n", file);
+  }
+  if (o.delay_xfer){
+    if (! o.delay_rank_0_only || (o.delay_rank_0_only && rank == 0)){
+      usleep(o.delay_xfer);
+    }
+  }
+  return length;
+}
+
+static int DUMMY_statfs (const char * path, ior_aiori_statfs_t * stat, IOR_param_t * param){
+  stat->f_bsize = 1;
+  stat->f_blocks = 1;
+  stat->f_bfree = 1;
+  stat->f_bavail = 1;
+  stat->f_files = 1;
+  stat->f_ffree = 1;
+  return 0;
+}
+
+static int DUMMY_mkdir (const char *path, mode_t mode, IOR_param_t * param){
+  return 0;
+}
+
+static int DUMMY_rmdir (const char *path, IOR_param_t * param){
+  return 0;
+}
+
+static int DUMMY_access (const char *path, int mode, IOR_param_t * param){
+  return 0;
+}
+
+static int DUMMY_stat (const char *path, struct stat *buf, IOR_param_t * param){
+  return 0;
+}
+
+ior_aiori_t dummy_aiori = {
+  "DUMMY",
+  DUMMY_Create,
+  DUMMY_Open,
+  DUMMY_Xfer,
+  DUMMY_Close,
+  DUMMY_Delete,
+  DUMMY_getVersion,
+  DUMMY_Fsync,
+  DUMMY_GetFileSize,
+  DUMMY_statfs,
+  DUMMY_mkdir,
+  DUMMY_rmdir,
+  DUMMY_access,
+  DUMMY_stat,
+  NULL,
+  NULL,
+  DUMMY_options
+};
--- a/src/aiori-HDF5.c
+++ b/src/aiori-HDF5.c
@ -89,9 +89,10 @@ static IOR_offset_t HDF5_Xfer(int, void *, IOR_size_t *,
                           IOR_offset_t, IOR_param_t *);
 static void HDF5_Close(void *, IOR_param_t *);
 static void HDF5_Delete(char *, IOR_param_t *);
-static void HDF5_SetVersion(IOR_param_t *);
+static char* HDF5_GetVersion();
 static void HDF5_Fsync(void *, IOR_param_t *);
 static IOR_offset_t HDF5_GetFileSize(IOR_param_t *, MPI_Comm, char *);
+static int HDF5_Access(const char *, int, IOR_param_t *);

 /************************** D E C L A R A T I O N S ***************************/

@ -102,9 +103,14 @@ ior_aiori_t hdf5_aiori = {
        .xfer = HDF5_Xfer,
        .close = HDF5_Close,
        .delete = HDF5_Delete,
-        .set_version = HDF5_SetVersion,
+        .get_version = HDF5_GetVersion,
        .fsync = HDF5_Fsync,
        .get_file_size = HDF5_GetFileSize,
+        .statfs = aiori_posix_statfs,
+        .mkdir = aiori_posix_mkdir,
+        .rmdir = aiori_posix_rmdir,
+        .access = HDF5_Access,
+        .stat = aiori_posix_stat,
 };

 static hid_t xferPropList;      /* xfer property list */
@ -435,27 +441,29 @@ static void HDF5_Close(void *fd, IOR_param_t * param)
 */
 static void HDF5_Delete(char *testFileName, IOR_param_t * param)
 {
-        if (unlink(testFileName) != 0)
-                WARN("cannot delete file");
+        return(MPIIO_Delete(testFileName, param));
 }

 /*
 * Determine api version.
 */
-static void HDF5_SetVersion(IOR_param_t * test)
+static char * HDF5_GetVersion()
 {
+  static char version[1024] = {0};
+  if(version[0]) return version;
+
        unsigned major, minor, release;
        if (H5get_libversion(&major, &minor, &release) < 0) {
                WARN("cannot get HDF5 library version");
        } else {
-                sprintf(test->apiVersion, "%s-%u.%u.%u",
-                        test->api, major, minor, release);
+                sprintf(version, "%u.%u.%u", major, minor, release);
        }
 #ifndef H5_HAVE_PARALLEL
-        strcat(test->apiVersion, " (Serial)");
+        strcat(version, " (Serial)");
 #else                           /* H5_HAVE_PARALLEL */
-        strcat(test->apiVersion, " (Parallel)");
+        strcat(version, " (Parallel)");
 #endif                          /* not H5_HAVE_PARALLEL */
+  return version;
 }

 /*
@ -565,5 +573,13 @@ static void SetupDataSet(void *fd, IOR_param_t * param)
 static IOR_offset_t
 HDF5_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName)
 {
-        return (MPIIO_GetFileSize(test, testComm, testFileName));
+        return(MPIIO_GetFileSize(test, testComm, testFileName));
+}
+
+/*
+ * Use MPIIO call to check for access.
+ */
+static int HDF5_Access(const char *path, int mode, IOR_param_t *param)
+{
+        return(MPIIO_Access(path, mode, param));
 }
--- a/src/aiori-IME.c
+++ b/src/aiori-IME.c
@ -0,0 +1,357 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+/******************************************************************************\
+*                                                                              *
+*        Copyright (c) 2003, The Regents of the University of California.      *
+*        Copyright (c) 2018, DataDirect Networks.                              *
+*      See the file COPYRIGHT for a complete copyright notice and license.     *
+*                                                                              *
+********************************************************************************
+*
+* Implement abstract I/O interface for DDN Infinite Memory Engine (IME).
+*
+\******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <errno.h>                                  /* sys_errlist */
+#include <fcntl.h>                                  /* IO operations */
+
+#include "ior.h"
+#include "iordef.h"
+#include "aiori.h"
+#include "utilities.h"
+#include "ime_native.h"
+
+#ifndef   O_BINARY                                  /* Required on Windows */
+#  define O_BINARY 0
+#endif
+
+/**************************** P R O T O T Y P E S *****************************/
+
+static void        *IME_Create(char *, IOR_param_t *);
+static void        *IME_Open(char *, IOR_param_t *);
+static void         IME_Close(void *, IOR_param_t *);
+static void         IME_Delete(char *, IOR_param_t *);
+static char        *IME_GetVersion();
+static void         IME_Fsync(void *, IOR_param_t *);
+static int          IME_Access(const char *, int, IOR_param_t *);
+static IOR_offset_t IME_GetFileSize(IOR_param_t *, MPI_Comm, char *);
+static IOR_offset_t IME_Xfer(int, void *, IOR_size_t *,
+                             IOR_offset_t, IOR_param_t *);
+static int          IME_StatFS(const char *, ior_aiori_statfs_t *,
+                               IOR_param_t *);
+static int          IME_RmDir(const char *, IOR_param_t *);
+static int          IME_MkDir(const char *, mode_t, IOR_param_t *);
+static int          IME_Stat(const char *, struct stat *, IOR_param_t *);
+static void         IME_Initialize();
+static void         IME_Finalize();
+
+/************************** D E C L A R A T I O N S ***************************/
+
+extern int      rank;
+extern int      rankOffset;
+extern int      verbose;
+extern MPI_Comm testComm;
+
+ior_aiori_t ime_aiori = {
+        .name          = "IME",
+        .create        = IME_Create,
+        .open          = IME_Open,
+        .xfer          = IME_Xfer,
+        .close         = IME_Close,
+        .delete        = IME_Delete,
+        .get_version   = IME_GetVersion,
+        .fsync         = IME_Fsync,
+        .get_file_size = IME_GetFileSize,
+        .access        = IME_Access,
+        .statfs        = IME_StatFS,
+        .rmdir         = IME_RmDir,
+        .mkdir         = IME_MkDir,
+        .stat          = IME_Stat,
+        .initialize    = IME_Initialize,
+        .finalize      = IME_Finalize,
+};
+
+/***************************** F U N C T I O N S ******************************/
+
+/*
+ * Initialize IME (before MPI is started).
+ */
+static void IME_Initialize()
+{
+        ime_native_init();
+}
+
+/*
+ * Finlize IME (after MPI is shutdown).
+ */
+static void IME_Finalize()
+{
+        (void)ime_native_finalize();
+}
+
+/*
+ * Try to access a file through the IME interface.
+ */
+static int IME_Access(const char *path, int mode, IOR_param_t *param)
+{
+        (void)param;
+
+        return ime_native_access(path, mode);
+}
+
+/*
+ * Creat and open a file through the IME interface.
+ */
+static void *IME_Create(char *testFileName, IOR_param_t *param)
+{
+        return IME_Open(testFileName, param);
+}
+
+/*
+ * Open a file through the IME interface.
+ */
+static void *IME_Open(char *testFileName, IOR_param_t *param)
+{
+        int fd_oflag = O_BINARY;
+        int *fd;
+
+        fd = (int *)malloc(sizeof(int));
+        if (fd == NULL)
+                ERR("Unable to malloc file descriptor");
+
+        if (param->useO_DIRECT)
+                set_o_direct_flag(&fd_oflag);
+
+        if (param->openFlags & IOR_RDONLY)
+                fd_oflag |= O_RDONLY;
+        if (param->openFlags & IOR_WRONLY)
+                fd_oflag |= O_WRONLY;
+        if (param->openFlags & IOR_RDWR)
+                fd_oflag |= O_RDWR;
+        if (param->openFlags & IOR_APPEND)
+                fd_oflag |= O_APPEND;
+        if (param->openFlags & IOR_CREAT)
+                fd_oflag |= O_CREAT;
+        if (param->openFlags & IOR_EXCL)
+                fd_oflag |= O_EXCL;
+        if (param->openFlags & IOR_TRUNC)
+                fd_oflag |= O_TRUNC;
+
+        *fd = ime_native_open(testFileName, fd_oflag, 0664);
+        if (*fd < 0) {
+                free(fd);
+                ERR("cannot open file");
+        }
+
+        return((void *)fd);
+}
+
+/*
+ * Write or read access to file using the IM interface.
+ */
+static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer,
+                             IOR_offset_t length, IOR_param_t *param)
+{
+        int xferRetries = 0;
+        long long remaining = (long long)length;
+        char *ptr = (char *)buffer;
+        int fd = *(int *)file;
+        long long rc;
+
+        while (remaining > 0) {
+                /* write/read file */
+                if (access == WRITE) { /* WRITE */
+                        if (verbose >= VERBOSE_4) {
+                                fprintf(stdout, "task %d writing to offset %lld\n",
+                                        rank, param->offset + length - remaining);
+                        }
+
+                        rc = ime_native_pwrite(fd, ptr, remaining, param->offset);
+
+                        if (param->fsyncPerWrite)
+                                IME_Fsync(&fd, param);
+                } else {               /* READ or CHECK */
+                        if (verbose >= VERBOSE_4) {
+                                fprintf(stdout, "task %d reading from offset %lld\n",
+                                        rank, param->offset + length - remaining);
+                        }
+
+                        rc = ime_native_pread(fd, ptr, remaining, param->offset);
+                        if (rc == 0)
+                                ERR("hit EOF prematurely");
+                        else if (rc < 0)
+                                ERR("read failed");
+                 }
+
+                if (rc < remaining) {
+                        fprintf(stdout, "WARNING: Task %d, partial %s, %lld of "
+                                "%lld bytes at offset %lld\n",
+                                rank, access == WRITE ? "write" : "read", rc,
+                                remaining, param->offset + length - remaining );
+
+                        if (param->singleXferAttempt) {
+                                MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1),
+                                          "barrier error");
+                        }
+
+                        if (xferRetries > MAX_RETRY) {
+                                ERR( "too many retries -- aborting" );
+                        }
+                } else if (rc > remaining) /* this should never happen */
+                        ERR("too many bytes transferred!?!");
+
+                assert(rc >= 0);
+                assert(rc <= remaining);
+                remaining -= rc;
+                ptr += rc;
+                xferRetries++;
+        }
+
+        return(length);
+}
+
+/*
+ * Perform fsync().
+ */
+static void IME_Fsync(void *fd, IOR_param_t *param)
+{
+        if (ime_native_fsync(*(int *)fd) != 0)
+                WARN("cannot perform fsync on file");
+}
+
+/*
+ * Close a file through the IME interface.
+ */
+static void IME_Close(void *fd, IOR_param_t *param)
+{
+        if (ime_native_close(*(int *)fd) != 0)
+        {
+                free(fd);
+                ERR("cannot close file");
+        }
+        else
+                free(fd);
+}
+
+/*
+ * Delete a file through the IME interface.
+ */
+static void IME_Delete(char *testFileName, IOR_param_t *param)
+{
+        char errmsg[256];
+        sprintf(errmsg, "[RANK %03d]:cannot delete file %s\n",
+                rank, testFileName);
+        if (ime_native_unlink(testFileName) != 0)
+                WARN(errmsg);
+}
+
+/*
+ * Determine API version.
+ */
+static char *IME_GetVersion()
+{
+        static char ver[1024] = {};
+#if (IME_NATIVE_API_VERSION >= 120)
+        strcpy(ver, ime_native_version());
+#else
+        strcpy(ver, "not supported");
+#endif
+        return ver;
+}
+
+/*
+ * XXX: statfs call is currently not exposed by IME native interface.
+ */
+static int IME_StatFS(const char *oid, ior_aiori_statfs_t *stat_buf,
+                      IOR_param_t *param)
+{
+        (void)oid;
+        (void)stat_buf;
+        (void)param;
+
+        WARN("statfs is currently not supported in IME backend!");
+        return -1;
+}
+
+/*
+ * XXX: mkdir call is currently not exposed by IME native interface.
+ */
+static int IME_MkDir(const char *oid, mode_t mode, IOR_param_t *param)
+{
+        (void)oid;
+        (void)mode;
+        (void)param;
+
+        WARN("mkdir is currently not supported in IME backend!");
+        return -1;
+}
+
+/*
+ * XXX: rmdir call is curretly not exposed by IME native interface.
+ */
+static int IME_RmDir(const char *oid, IOR_param_t *param)
+{
+        (void)oid;
+        (void)param;
+
+        WARN("rmdir is currently not supported in IME backend!");
+        return -1;
+}
+
+/*
+ * Perform stat() through the IME interface.
+ */
+static int IME_Stat(const char *path, struct stat *buf, IOR_param_t *param)
+{
+    (void)param;
+
+    return ime_native_stat(path, buf);
+}
+
+/*
+ * Use IME stat() to return aggregate file size.
+ */
+static IOR_offset_t IME_GetFileSize(IOR_param_t *test, MPI_Comm testComm,
+                                    char *testFileName)
+{
+        struct stat stat_buf;
+        IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum;
+
+        if (ime_native_stat(testFileName, &stat_buf) != 0) {
+                ERR("cannot get status of written file");
+        }
+        aggFileSizeFromStat = stat_buf.st_size;
+
+        if (test->filePerProc) {
+                MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1,
+                                        MPI_LONG_LONG_INT, MPI_SUM, testComm),
+                          "cannot total data moved");
+                aggFileSizeFromStat = tmpSum;
+        } else {
+                MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1,
+                                        MPI_LONG_LONG_INT, MPI_MIN, testComm),
+                          "cannot total data moved");
+                MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1,
+                                        MPI_LONG_LONG_INT, MPI_MAX, testComm),
+                          "cannot total data moved");
+
+                if (tmpMin != tmpMax) {
+                        if (rank == 0) {
+                                WARN("inconsistent file size by different tasks");
+                        }
+                        /* incorrect, but now consistent across tasks */
+                        aggFileSizeFromStat = tmpMin;
+                }
+        }
+
+        return(aggFileSizeFromStat);
+}
--- a/src/aiori-MMAP.c
+++ b/src/aiori-MMAP.c
@ -42,7 +42,7 @@ ior_aiori_t mmap_aiori = {
        .xfer = MMAP_Xfer,
        .close = MMAP_Close,
        .delete = POSIX_Delete,
-        .set_version = POSIX_SetVersion,
+        .get_version = aiori_get_version,
        .fsync = MMAP_Fsync,
        .get_file_size = POSIX_GetFileSize,
 };
--- a/src/aiori-MPIIO.c
+++ b/src/aiori-MPIIO.c
@ -38,10 +38,9 @@ static void *MPIIO_Open(char *, IOR_param_t *);
 static IOR_offset_t MPIIO_Xfer(int, void *, IOR_size_t *,
                                   IOR_offset_t, IOR_param_t *);
 static void MPIIO_Close(void *, IOR_param_t *);
-static void MPIIO_Delete(char *, IOR_param_t *);
-static void MPIIO_SetVersion(IOR_param_t *);
+static char* MPIIO_GetVersion();
 static void MPIIO_Fsync(void *, IOR_param_t *);
-static int MPIIO_Access(const char *, int, IOR_param_t *);
+

 /************************** D E C L A R A T I O N S ***************************/

@ -52,10 +51,14 @@ ior_aiori_t mpiio_aiori = {
        .xfer = MPIIO_Xfer,
        .close = MPIIO_Close,
        .delete = MPIIO_Delete,
-        .set_version = MPIIO_SetVersion,
+        .get_version = MPIIO_GetVersion,
        .fsync = MPIIO_Fsync,
        .get_file_size = MPIIO_GetFileSize,
+        .statfs = aiori_posix_statfs,
+        .mkdir = aiori_posix_mkdir,
+        .rmdir = aiori_posix_rmdir,
        .access = MPIIO_Access,
+        .stat = aiori_posix_stat,
 };

 /***************************** F U N C T I O N S ******************************/
@ -63,7 +66,7 @@ ior_aiori_t mpiio_aiori = {
 /*
 * Try to access a file through the MPIIO interface.
 */
-static int MPIIO_Access(const char *path, int mode, IOR_param_t *param)
+int MPIIO_Access(const char *path, int mode, IOR_param_t *param)
 {
    MPI_File fd;
    int mpi_mode = MPI_MODE_UNIQUE_OPEN;
@ -268,10 +271,14 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer,

        /* point functions to appropriate MPIIO calls */
        if (access == WRITE) {  /* WRITE */
-                Access = MPI_File_write;
-                Access_at = MPI_File_write_at;
-                Access_all = MPI_File_write_all;
-                Access_at_all = MPI_File_write_at_all;
+                Access = (int (MPIAPI *)(MPI_File, void *, int,
+                          MPI_Datatype, MPI_Status *)) MPI_File_write;
+                Access_at = (int (MPIAPI *)(MPI_File, MPI_Offset, void *, int,
+                             MPI_Datatype, MPI_Status *))  MPI_File_write_at;
+                Access_all = (int (MPIAPI *) (MPI_File, void *, int,
+                              MPI_Datatype, MPI_Status *)) MPI_File_write_all;
+                Access_at_all = (int (MPIAPI *) (MPI_File, MPI_Offset, void *, int,
+                                 MPI_Datatype, MPI_Status *)) MPI_File_write_at_all;
                /*
                 * this needs to be properly implemented:
                 *
@ -364,15 +371,18 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer,
                        }
                }
        }
+        if((access == WRITE) && (param->fsyncPerWrite == TRUE))
+               MPIIO_Fsync(fd, param);
        return (length);
 }

 /*
 * Perform fsync().
 */
-static void MPIIO_Fsync(void *fd, IOR_param_t * param)
+static void MPIIO_Fsync(void *fdp, IOR_param_t * param)
 {
-        ;
+        if (MPI_File_sync(*(MPI_File *)fdp) != MPI_SUCCESS)
+                EWARN("fsync() failed");
 }

 /*
@ -396,7 +406,7 @@ static void MPIIO_Close(void *fd, IOR_param_t * param)
 /*
 * Delete a file through the MPIIO interface.
 */
-static void MPIIO_Delete(char *testFileName, IOR_param_t * param)
+void MPIIO_Delete(char *testFileName, IOR_param_t * param)
 {
        MPI_CHECK(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL),
                  "cannot delete file");
@ -405,13 +415,13 @@ static void MPIIO_Delete(char *testFileName, IOR_param_t * param)
 /*
 * Determine api version.
 */
-static void MPIIO_SetVersion(IOR_param_t * test)
+static char* MPIIO_GetVersion()
 {
-        int version, subversion;
-        MPI_CHECK(MPI_Get_version(&version, &subversion),
-                  "cannot get MPI version");
-        sprintf(test->apiVersion, "%s (version=%d, subversion=%d)",
-                test->api, version, subversion);
+  static char ver[1024] = {};
+  int version, subversion;
+  MPI_CHECK(MPI_Get_version(&version, &subversion), "cannot get MPI version");
+  sprintf(ver, "(%d.%d)", version, subversion);
+  return ver;
 }

 /*
@ -438,7 +448,7 @@ static IOR_offset_t SeekOffset(MPI_File fd, IOR_offset_t offset,
                if (param->filePerProc) {
                        tempOffset = tempOffset / param->transferSize;
                } else {
-                        /* 
+                        /*
                         * this formula finds a file view offset for a task
                         * from an absolute offset
                         */
--- a/src/aiori-NCMPI.c
+++ b/src/aiori-NCMPI.c
@ -56,6 +56,7 @@ static void NCMPI_Delete(char *, IOR_param_t *);
 static void NCMPI_SetVersion(IOR_param_t *);
 static void NCMPI_Fsync(void *, IOR_param_t *);
 static IOR_offset_t NCMPI_GetFileSize(IOR_param_t *, MPI_Comm, char *);
+static int NCMPI_Access(const char *, int, IOR_param_t *);

 /************************** D E C L A R A T I O N S ***************************/

@ -66,9 +67,14 @@ ior_aiori_t ncmpi_aiori = {
        .xfer = NCMPI_Xfer,
        .close = NCMPI_Close,
        .delete = NCMPI_Delete,
-        .set_version = NCMPI_SetVersion,
+        .get_version = NCMPI_GetVersion,
        .fsync = NCMPI_Fsync,
        .get_file_size = NCMPI_GetFileSize,
+        .statfs = aiori_posix_statfs,
+        .mkdir = aiori_posix_mkdir,
+        .rmdir = aiori_posix_rmdir,
+        .access = NCMPI_Access,
+        .stat = aiori_posix_stat,
 };

 /***************************** F U N C T I O N S ******************************/
@ -329,16 +335,15 @@ static void NCMPI_Close(void *fd, IOR_param_t * param)
 */
 static void NCMPI_Delete(char *testFileName, IOR_param_t * param)
 {
-        if (unlink(testFileName) != 0)
-                WARN("unlink() failed");
+        return(MPIIO_Delete(testFileName, param));
 }

 /*
 * Determine api version.
 */
-static void NCMPI_SetVersion(IOR_param_t * test)
+static char* NCMPI_GetVersion()
 {
-        sprintf(test->apiVersion, "%s (%s)", test->api, ncmpi_inq_libvers());
+  return ncmpi_inq_libvers();
 }

 /*
@ -387,5 +392,13 @@ static int GetFileMode(IOR_param_t * param)
 static IOR_offset_t NCMPI_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
                                      char *testFileName)
 {
-        return (MPIIO_GetFileSize(test, testComm, testFileName));
+        return(MPIIO_GetFileSize(test, testComm, testFileName));
+}
+
+/*
+ * Use MPIIO call to check for access.
+ */
+static int NCMPI_Access(const char *path, int mode, IOR_param_t *param)
+{
+        return(MPIIO_Access(path, mode, param));
 }
--- a/src/aiori-POSIX.c
+++ b/src/aiori-POSIX.c
@ -78,9 +78,14 @@ ior_aiori_t posix_aiori = {
        .xfer = POSIX_Xfer,
        .close = POSIX_Close,
        .delete = POSIX_Delete,
-        .set_version = POSIX_SetVersion,
+        .get_version = aiori_get_version,
        .fsync = POSIX_Fsync,
        .get_file_size = POSIX_GetFileSize,
+        .statfs = aiori_posix_statfs,
+        .mkdir = aiori_posix_mkdir,
+        .rmdir = aiori_posix_rmdir,
+        .access = aiori_posix_access,
+        .stat = aiori_posix_stat,
 };

 /***************************** F U N C T I O N S ******************************/
@ -507,14 +512,6 @@ void POSIX_Delete(char *testFileName, IOR_param_t * param)
                EWARN(errmsg);
 }

-/*
- * Determine api version.
- */
-void POSIX_SetVersion(IOR_param_t * test)
-{
-        strcpy(test->apiVersion, test->api);
-}
-
 /*
 * Use POSIX stat() to return aggregate file size.
 */
--- a/src/aiori-RADOS.c
+++ b/src/aiori-RADOS.c
@ -0,0 +1,361 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+/******************************************************************************\
+*                                                                              *
+* (C) 2015 The University of Chicago                                           *
+*                                                                              *
+* See COPYRIGHT in top-level directory.                                        *
+*                                                                              *
+********************************************************************************
+*
+* Implement abstract I/O interface for RADOS.
+*
+\******************************************************************************/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <rados/librados.h>
+
+#include "ior.h"
+#include "iordef.h"
+#include "aiori.h"
+#include "utilities.h"
+
+/************************** O P T I O N S *****************************/
+struct rados_options{
+  char * user;
+  char * conf;
+  char * pool;
+};
+
+static struct rados_options o = {
+  .user = NULL,
+  .conf = NULL,
+  .pool = NULL,
+};
+
+static option_help options [] = {
+      {'u', "user", "Username for the RADOS cluster", OPTION_REQUIRED_ARGUMENT, 's', & o.user},
+      {'c', "conf", "Config file for the RADOS cluster", OPTION_REQUIRED_ARGUMENT, 's', & o.conf},
+      {'p', "pool", "RADOS pool to use for I/O", OPTION_REQUIRED_ARGUMENT, 's', & o.pool},
+      LAST_OPTION
+};
+
+
+/**************************** P R O T O T Y P E S *****************************/
+static void *RADOS_Create(char *, IOR_param_t *);
+static void *RADOS_Open(char *, IOR_param_t *);
+static IOR_offset_t RADOS_Xfer(int, void *, IOR_size_t *,
+                           IOR_offset_t, IOR_param_t *);
+static void RADOS_Close(void *, IOR_param_t *);
+static void RADOS_Delete(char *, IOR_param_t *);
+static void RADOS_Fsync(void *, IOR_param_t *);
+static IOR_offset_t RADOS_GetFileSize(IOR_param_t *, MPI_Comm, char *);
+static int RADOS_StatFS(const char *, ior_aiori_statfs_t *, IOR_param_t *);
+static int RADOS_MkDir(const char *, mode_t, IOR_param_t *);
+static int RADOS_RmDir(const char *, IOR_param_t *);
+static int RADOS_Access(const char *, int, IOR_param_t *);
+static int RADOS_Stat(const char *, struct stat *, IOR_param_t *);
+static option_help * RADOS_options();
+
+/************************** D E C L A R A T I O N S ***************************/
+ior_aiori_t rados_aiori = {
+        .name = "RADOS",
+        .create = RADOS_Create,
+        .open = RADOS_Open,
+        .xfer = RADOS_Xfer,
+        .close = RADOS_Close,
+        .delete = RADOS_Delete,
+        .get_version = aiori_get_version,
+        .fsync = RADOS_Fsync,
+        .get_file_size = RADOS_GetFileSize,
+        .statfs = RADOS_StatFS,
+        .mkdir = RADOS_MkDir,
+        .rmdir = RADOS_RmDir,
+        .access = RADOS_Access,
+        .stat = RADOS_Stat,
+        .get_options = RADOS_options,
+};
+
+#define RADOS_ERR(__err_str, __ret) do { \
+        errno = -__ret; \
+        ERR(__err_str); \
+} while(0)
+
+/***************************** F U N C T I O N S ******************************/
+static option_help * RADOS_options(){
+  return options;
+}
+
+static void RADOS_Cluster_Init(IOR_param_t * param)
+{
+        int ret;
+
+        /* create RADOS cluster handle */
+        ret = rados_create(&param->rados_cluster, o.user);
+        if (ret)
+                RADOS_ERR("unable to create RADOS cluster handle", ret);
+
+        /* set the handle using the Ceph config */
+        ret = rados_conf_read_file(param->rados_cluster, o.conf);
+        if (ret)
+                RADOS_ERR("unable to read RADOS config file", ret);
+
+        /* connect to the RADOS cluster */
+        ret = rados_connect(param->rados_cluster);
+        if (ret)
+                RADOS_ERR("unable to connect to the RADOS cluster", ret);
+
+        /* create an io context for the pool we are operating on */
+        ret = rados_ioctx_create(param->rados_cluster, o.pool, &param->rados_ioctx);
+        if (ret)
+                RADOS_ERR("unable to create an I/O context for the RADOS cluster", ret);
+
+        return;
+}
+
+static void RADOS_Cluster_Finalize(IOR_param_t * param)
+{
+        /* ioctx destroy */
+        rados_ioctx_destroy(param->rados_ioctx);
+
+        /* shutdown */
+        rados_shutdown(param->rados_cluster);
+}
+
+static void *RADOS_Create_Or_Open(char *testFileName, IOR_param_t * param, int create_flag)
+{
+        int ret;
+        char *oid;
+
+        RADOS_Cluster_Init(param);
+
+        if (param->useO_DIRECT == TRUE)
+                WARN("direct I/O mode is not implemented in RADOS\n");
+
+        oid = strdup(testFileName);
+        if (!oid)
+                ERR("unable to allocate RADOS oid");
+
+        if (create_flag)
+        {
+                rados_write_op_t create_op;
+                int rados_create_flag;
+
+                if (param->openFlags & IOR_EXCL)
+                        rados_create_flag = LIBRADOS_CREATE_EXCLUSIVE;
+                else
+                        rados_create_flag = LIBRADOS_CREATE_IDEMPOTENT;
+
+                /* create a RADOS "write op" for creating the object */
+                create_op = rados_create_write_op();
+                rados_write_op_create(create_op, rados_create_flag, NULL);
+                ret = rados_write_op_operate(create_op, param->rados_ioctx, oid,
+                                       NULL, 0);
+                rados_release_write_op(create_op);
+                if (ret)
+                        RADOS_ERR("unable to create RADOS object", ret);
+        }
+        else
+        {
+                /* XXX actually, we should probably assert oid existence here? */
+        }
+
+        return (void *)oid;
+}
+
+static void *RADOS_Create(char *testFileName, IOR_param_t * param)
+{
+        return RADOS_Create_Or_Open(testFileName, param, TRUE);
+}
+
+static void *RADOS_Open(char *testFileName, IOR_param_t * param)
+{
+        if (param->openFlags & IOR_CREAT)
+                return RADOS_Create_Or_Open(testFileName, param, TRUE);
+        else
+                return RADOS_Create_Or_Open(testFileName, param, FALSE);
+}
+
+static IOR_offset_t RADOS_Xfer(int access, void *fd, IOR_size_t * buffer,
+                               IOR_offset_t length, IOR_param_t * param)
+{
+        int ret;
+        char *oid = (char *)fd;
+
+        if (access == WRITE)
+        {
+                rados_write_op_t write_op;
+
+                write_op = rados_create_write_op();
+                rados_write_op_write(write_op, (const char *)buffer,
+                                     length, param->offset);
+                ret = rados_write_op_operate(write_op, param->rados_ioctx,
+                                             oid, NULL, 0);
+                rados_release_write_op(write_op);
+                if (ret)
+                        RADOS_ERR("unable to write RADOS object", ret);
+        }
+        else /* READ */
+        {
+                int read_ret;
+                size_t bytes_read;
+                rados_read_op_t read_op;
+
+                read_op = rados_create_read_op();
+                rados_read_op_read(read_op, param->offset, length, (char *)buffer,
+                                   &bytes_read, &read_ret);
+                ret = rados_read_op_operate(read_op, param->rados_ioctx, oid, 0);
+                rados_release_read_op(read_op);
+                if (ret || read_ret || ((IOR_offset_t)bytes_read != length))
+                        RADOS_ERR("unable to read RADOS object", ret);
+        }
+
+        return length;
+}
+
+static void RADOS_Fsync(void *fd, IOR_param_t * param)
+{
+        return;
+}
+
+static void RADOS_Close(void *fd, IOR_param_t * param)
+{
+        char *oid = (char *)fd;
+
+        /* object does not need to be "closed", but we should tear the cluster down */
+        RADOS_Cluster_Finalize(param);
+        free(oid);
+
+        return;
+}
+
+static void RADOS_Delete(char *testFileName, IOR_param_t * param)
+{
+        int ret;
+        char *oid = testFileName;
+        rados_write_op_t remove_op;
+
+        /* we have to reestablish cluster connection here... */
+        RADOS_Cluster_Init(param);
+
+        /* remove the object */
+        remove_op = rados_create_write_op();
+        rados_write_op_remove(remove_op);
+        ret = rados_write_op_operate(remove_op, param->rados_ioctx,
+                                     oid, NULL, 0);
+        rados_release_write_op(remove_op);
+        if (ret)
+                RADOS_ERR("unable to remove RADOS object", ret);
+
+        RADOS_Cluster_Finalize(param);
+
+        return;
+}
+
+static IOR_offset_t RADOS_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
+                                      char *testFileName)
+{
+        int ret;
+        char *oid = testFileName;
+        rados_read_op_t stat_op;
+        uint64_t oid_size;
+        int stat_ret;
+        IOR_offset_t aggSizeFromStat, tmpMin, tmpMax, tmpSum;
+
+        /* we have to reestablish cluster connection here... */
+        RADOS_Cluster_Init(test);
+
+        /* stat the object */
+        stat_op = rados_create_read_op();
+        rados_read_op_stat(stat_op, &oid_size, NULL, &stat_ret);
+        ret = rados_read_op_operate(stat_op, test->rados_ioctx, oid, 0);
+        rados_release_read_op(stat_op);
+        if (ret || stat_ret)
+                RADOS_ERR("unable to stat RADOS object", stat_ret);
+        aggSizeFromStat = oid_size;
+
+        if (test->filePerProc == TRUE)
+        {
+                MPI_CHECK(MPI_Allreduce(&aggSizeFromStat, &tmpSum, 1,
+                                        MPI_LONG_LONG_INT, MPI_SUM, testComm),
+                          "cannot total data moved");
+                aggSizeFromStat = tmpSum;
+        }
+        else
+        {
+                MPI_CHECK(MPI_Allreduce(&aggSizeFromStat, &tmpMin, 1,
+                                        MPI_LONG_LONG_INT, MPI_MIN, testComm),
+                          "cannot total data moved");
+                MPI_CHECK(MPI_Allreduce(&aggSizeFromStat, &tmpMax, 1,
+                                        MPI_LONG_LONG_INT, MPI_MAX, testComm),
+                          "cannot total data moved");
+                if (tmpMin != tmpMax)
+                {
+                        if (rank == 0)
+                                WARN("inconsistent file size by different tasks");
+
+                        /* incorrect, but now consistent across tasks */
+                        aggSizeFromStat = tmpMin;
+                }
+        }
+
+        RADOS_Cluster_Finalize(test);
+
+        return aggSizeFromStat;
+}
+
+static int RADOS_StatFS(const char *oid, ior_aiori_statfs_t *stat_buf,
+                        IOR_param_t *param)
+{
+        WARN("statfs not supported in RADOS backend!");
+        return -1;
+}
+
+static int RADOS_MkDir(const char *oid, mode_t mode, IOR_param_t *param)
+{
+        WARN("mkdir not supported in RADOS backend!");
+        return -1;
+}
+
+static int RADOS_RmDir(const char *oid, IOR_param_t *param)
+{
+        WARN("rmdir not supported in RADOS backend!");
+        return -1;
+}
+
+static int RADOS_Access(const char *oid, int mode, IOR_param_t *param)
+{
+        rados_read_op_t read_op;
+        int ret;
+        int prval;
+        uint64_t oid_size;
+
+        /* we have to reestablish cluster connection here... */
+        RADOS_Cluster_Init(param);
+
+        /* use read_op stat to check for oid existence */
+        read_op = rados_create_read_op();
+        rados_read_op_stat(read_op, &oid_size, NULL, &prval);
+        ret = rados_read_op_operate(read_op, param->rados_ioctx, oid, 0);
+        rados_release_read_op(read_op);
+
+        RADOS_Cluster_Finalize(param);
+
+        if (ret | prval)
+                return -1;
+        else
+                return 0;
+}
+
+static int RADOS_Stat(const char *oid, struct stat *buf, IOR_param_t *param)
+{
+        WARN("stat not supported in RADOS backend!");
+        return -1;
+}
--- a/src/aiori-S3.c
+++ b/src/aiori-S3.c
@ -110,6 +110,39 @@
 #include "aws4c_extra.h"        // utilities, e.g. for parsing XML in responses


+
+
+/* buffer is used to generate URLs, err_msgs, etc */
+#define            BUFF_SIZE  1024
+static char        buff[BUFF_SIZE];
+
+const int          ETAG_SIZE = 32;
+
+CURLcode           rc;
+
+/* Any objects we create or delete will be under this bucket */
+const char* bucket_name = "ior";
+
+/* TODO: The following stuff goes into options! */
+/* REST/S3 variables */
+//    CURL*       curl;             /* for libcurl "easy" fns (now managed by aws4c) */
+#   define      IOR_CURL_INIT        0x01 /* curl top-level inits were perfomed once? */
+#   define      IOR_CURL_NOCONTINUE  0x02
+#   define      IOR_CURL_S3_EMC_EXT  0x04 /* allow EMC extensions to S3? */
+
+#ifdef USE_S3_AIORI
+#  include <curl/curl.h>
+#  include "aws4c.h"
+#else
+   typedef void     CURL;       /* unused, but needs a type */
+   typedef void     IOBuf;      /* unused, but needs a type */
+#endif
+
+    IOBuf*      io_buf;              /* aws4c places parsed header values here */
+    IOBuf*      etags;               /* accumulate ETags for N:1 parts */
+
+///////////////////////////////////////////////
+
 /**************************** P R O T O T Y P E S *****************************/
 static void*        S3_Create(char*, IOR_param_t*);
 static void*        S3_Open(char*, IOR_param_t*);
@ -122,9 +155,10 @@ static IOR_offset_t EMC_Xfer(int, void*, IOR_size_t*, IOR_offset_t, IOR_param_t*
 static void         EMC_Close(void*, IOR_param_t*);

 static void         S3_Delete(char*, IOR_param_t*);
-static void         S3_SetVersion(IOR_param_t*);
 static void         S3_Fsync(void*, IOR_param_t*);
 static IOR_offset_t S3_GetFileSize(IOR_param_t*, MPI_Comm, char*);
+static void S3_init();
+static void S3_finalize();

 /************************** D E C L A R A T I O N S ***************************/

@ -138,9 +172,11 @@ ior_aiori_t s3_aiori = {
 	.xfer = S3_Xfer,
 	.close = S3_Close,
 	.delete = S3_Delete,
-	.set_version = S3_SetVersion,
+	.get_version = aiori_get_version,
 	.fsync = S3_Fsync,
 	.get_file_size = S3_GetFileSize,
+	.initialize = S3_init,
+	.finalize = S3_finalize
 };

 // "S3", plus EMC-extensions enabled
@ -156,6 +192,8 @@ ior_aiori_t s3_plus_aiori = {
 	.set_version = S3_SetVersion,
 	.fsync = S3_Fsync,
 	.get_file_size = S3_GetFileSize,
+	.initialize = S3_init,
+	.finalize = S3_finalize
 };

 // Use EMC-extensions for N:1 write, as well
@ -171,9 +209,24 @@ ior_aiori_t s3_emc_aiori = {
 	.set_version = S3_SetVersion,
 	.fsync = S3_Fsync,
 	.get_file_size = S3_GetFileSize,
+	.initialize = S3_init,
+	.finalize = S3_finalize
 };


+static void S3_init(){
+  /* This is supposed to be done before *any* threads are created.
+   * Could MPI_Init() create threads (or call multi-threaded
+   * libraries)?  We'll assume so. */
+  AWS4C_CHECK( aws_init() );
+}
+
+static void S3_finalize(){
+  /* done once per program, after exiting all threads.
+ 	* NOTE: This fn doesn't return a value that can be checked for success. */
+  aws_cleanup();
+}
+
 /* modelled on similar macros in iordef.h */
 #define CURL_ERR(MSG, CURL_ERRNO, PARAM)										\
 	do {																					\
@ -183,7 +236,7 @@ ior_aiori_t s3_emc_aiori = {
 		fflush(stdout);																\
 		MPI_Abort((PARAM)->testComm, -1);										\
 	} while (0)
-	
+

 #define CURL_WARN(MSG, CURL_ERRNO)													\
 	do {																						\
@ -192,20 +245,6 @@ ior_aiori_t s3_emc_aiori = {
 				  __FILE__, __LINE__);													\
 		fflush(stdout);																	\
 	} while (0)
-	
-
-
-/* buffer is used to generate URLs, err_msgs, etc */
-#define            BUFF_SIZE  1024
-static char        buff[BUFF_SIZE];
-
-const int          ETAG_SIZE = 32;
-
-CURLcode           rc;
-
-/* Any objects we create or delete will be under this bucket */
-const char* bucket_name = "ior";
-//const char* bucket_name = "brettk";


 /***************************** F U N C T I O N S ******************************/
@ -232,9 +271,8 @@ const char* bucket_name = "ior";
 * ---------------------------------------------------------------------------
 */

-static
-void
-s3_connect( IOR_param_t* param ) {
+
+static void s3_connect( IOR_param_t* param ) {
 	if (param->verbose >= VERBOSE_2) {
 		printf("-> s3_connect\n"); /* DEBUGGING */
 	}
@ -446,7 +484,7 @@ S3_Create_Or_Open_internal(char*         testFileName,
 			if ( n_to_n || (rank == 0) ) {

 				// rank0 handles truncate
-				if ( needs_reset) { 
+				if ( needs_reset) {
 					aws_iobuf_reset(param->io_buf);
 					AWS4C_CHECK( s3_put(param->io_buf, testFileName) ); /* 0-length write */
 					AWS4C_CHECK_OK( param->io_buf );
@ -510,7 +548,7 @@ S3_Create_Or_Open_internal(char*         testFileName,
               fprintf( stdout, "rank %d resetting\n",
                        rank);
            }
-            
+
 				aws_iobuf_reset(param->io_buf);
 				AWS4C_CHECK( s3_put(param->io_buf, testFileName) );
 				AWS4C_CHECK_OK( param->io_buf );
@ -641,7 +679,7 @@ EMC_Open( char *testFileName, IOR_param_t * param ) {
 /* In the EMC case, instead of Multi-Part Upload we can use HTTP
 * "byte-range" headers to write parts of a single object.  This appears to
 * have several advantages over the S3 MPU spec:
- * 
+ *
 * (a) no need for a special "open" operation, to capture an "UploadID".
 *     Instead we simply write byte-ranges, and the server-side resolves
 *     any races, producing a single winner.  In the IOR case, there should
@ -808,7 +846,7 @@ S3_Xfer_internal(int          access,
 				printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag);
 			}

-			// drop ptrs to <data_ptr>, in param->io_buf 
+			// drop ptrs to <data_ptr>, in param->io_buf
 			aws_iobuf_reset(param->io_buf);
 		}
 		else {	 // use EMC's byte-range write-support, instead of MPU
@ -830,7 +868,7 @@ S3_Xfer_internal(int          access,
 			AWS4C_CHECK   ( s3_put(param->io_buf, file) );
 			AWS4C_CHECK_OK( param->io_buf );

-			// drop ptrs to <data_ptr>, in param->io_buf 
+			// drop ptrs to <data_ptr>, in param->io_buf
 			aws_iobuf_reset(param->io_buf);
 		}

@ -867,7 +905,7 @@ S3_Xfer_internal(int          access,
 			ERR_SIMPLE(buff);
 		}

-		// drop refs to <data_ptr>, in param->io_buf 
+		// drop refs to <data_ptr>, in param->io_buf
 		aws_iobuf_reset(param->io_buf);
 	}

@ -1126,7 +1164,7 @@ S3_Close_internal( void*         fd,
 						start_multiplier = ETAG_SIZE;				/* one ETag */
 						stride           = etag_data_size;		/* one rank's-worth of Etag data */
 					}
-						
+

 					xml = aws_iobuf_new();
 					aws_iobuf_growth_size(xml, 1024 * 8);
@ -1305,7 +1343,7 @@ S3_Delete( char *testFileName, IOR_param_t * param ) {
 #if 0
 	// EMC BUG: If file was written with appends, and is deleted,
 	//      Then any future recreation will result in an object that can't be read.
-	//      this 
+	//      this
 	AWS4C_CHECK( s3_delete(param->io_buf, testFileName) );
 #else
 	// just replace with a zero-length object for now
@ -1334,7 +1372,7 @@ EMC_Delete( char *testFileName, IOR_param_t * param ) {
 #if 0
 	// EMC BUG: If file was written with appends, and is deleted,
 	//      Then any future recreation will result in an object that can't be read.
-	//      this 
+	//      this
 	AWS4C_CHECK( s3_delete(param->io_buf, testFileName) );
 #else
 	// just replace with a zero-length object for now
@ -1353,25 +1391,6 @@ EMC_Delete( char *testFileName, IOR_param_t * param ) {



-
-/*
- * Determine API version.
- */
-
-static
-void
-S3_SetVersion( IOR_param_t * param ) {
-	if (param->verbose >= VERBOSE_2) {
-		printf("-> S3_SetVersion\n");
-	}
-
-	strcpy( param->apiVersion, param->api );
-
-	if (param->verbose >= VERBOSE_2) {
-		printf("<- S3_SetVersion\n");
-	}
-}
-
 /*
 * HTTP HEAD returns meta-data for a "file".
 *
--- a/src/aiori.c
+++ b/src/aiori.c
@ -28,22 +28,25 @@
 */

 ior_aiori_t *available_aiori[] = {
-
+#ifdef USE_POSIX_AIORI
+        &posix_aiori,
+#endif
+        & dummy_aiori,
 #ifdef USE_HDF5_AIORI
        &hdf5_aiori,
 #endif
 #ifdef USE_HDFS_AIORI
        &hdfs_aiori,
 #endif
+#ifdef USE_IME_AIORI
+        &ime_aiori,
+#endif
 #ifdef USE_MPIIO_AIORI
        &mpiio_aiori,
 #endif
 #ifdef USE_NCMPI_AIORI
        &ncmpi_aiori,
 #endif
-#ifdef USE_POSIX_AIORI
-        &posix_aiori,
-#endif
 #ifdef USE_MMAP_AIORI
        &mmap_aiori,
 #endif
@ -52,12 +55,27 @@ ior_aiori_t *available_aiori[] = {
        &s3_plus_aiori,
        &s3_emc_aiori,
 #endif
-#ifdef USE_DFS_AIORI
+#ifdef USE_RADOS_AIORI
+        &rados_aiori,
+#endif
+#ifdef USE_DAOS_AIORI
+        &daos_aiori,
        &dfs_aiori,
 #endif
        NULL
 };

+void aiori_supported_apis(char * APIs){
+  ior_aiori_t **tmp = available_aiori;
+  if(*tmp != NULL){
+    APIs += sprintf(APIs, "%s", (*tmp)->name);
+    tmp++;
+    for (; *tmp != NULL; ++tmp) {
+      APIs += sprintf(APIs, "|%s", (*tmp)->name);
+    }
+  }
+}
+
 /**
 * Default statfs implementation.
 *
@ -67,7 +85,7 @@ ior_aiori_t *available_aiori[] = {
 * This function provides a AIORI statfs for POSIX-compliant filesystems. It
 * uses statvfs is available and falls back on statfs.
 */
-static int aiori_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param)
+int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param)
 {
        int ret;
 #if defined(HAVE_STATVFS)
@ -92,44 +110,95 @@ static int aiori_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_par
        return 0;
 }

-static int aiori_mkdir (const char *path, mode_t mode, IOR_param_t * param)
+int aiori_posix_mkdir (const char *path, mode_t mode, IOR_param_t * param)
 {
        return mkdir (path, mode);
 }

-static int aiori_rmdir (const char *path, IOR_param_t * param)
+int aiori_posix_rmdir (const char *path, IOR_param_t * param)
 {
        return rmdir (path);
 }

-static int aiori_access (const char *path, int mode, IOR_param_t * param)
+int aiori_posix_access (const char *path, int mode, IOR_param_t * param)
 {
        return access (path, mode);
 }

-static int aiori_stat (const char *path, struct stat *buf, IOR_param_t * param)
+int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param)
 {
        return stat (path, buf);
 }

+char* aiori_get_version()
+{
+  return "";
+}
+
+static int is_initialized = FALSE;
+
+void aiori_initialize(IOR_test_t *tests_head){
+	if (is_initialized) return;
+	is_initialized = TRUE;
+
+  /* Sanity check, we were compiled with SOME backend, right? */
+  if (0 == aiori_count ()) {
+          ERR("No IO backends compiled into aiori.  "
+              "Run 'configure --with-<backend>', and recompile.");
+  }
+
+  for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) {
+    if((*tmp)->initialize){
+            (*tmp)->initialize(tests_head ? &tests_head->params : NULL);
+    }
+  }
+}
+
+void aiori_finalize(IOR_test_t *tests_head){
+  if (! is_initialized) return;
+  is_initialized = FALSE;
+
+  for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) {
+    if((*tmp)->finalize){
+            (*tmp)->finalize(tests_head ? &tests_head->params : NULL);
+    }
+  }
+}
+
 const ior_aiori_t *aiori_select (const char *api)
 {
+        char warn_str[256] = {0};
        for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) {
                if (NULL == api || strcasecmp(api, (*tmp)->name) == 0) {
                        if (NULL == (*tmp)->statfs) {
-                                (*tmp)->statfs = aiori_statfs;
+                                (*tmp)->statfs = aiori_posix_statfs;
+                                snprintf(warn_str, 256, "assuming POSIX-based backend for"
+                                         " %s statfs call", api);
+                                WARN(warn_str);
                        }
                        if (NULL == (*tmp)->mkdir) {
-                                (*tmp)->mkdir = aiori_mkdir;
+                                (*tmp)->mkdir = aiori_posix_mkdir;
+                                snprintf(warn_str, 256, "assuming POSIX-based backend for"
+                                         " %s mkdir call", api);
+                                WARN(warn_str);
                        }
                        if (NULL == (*tmp)->rmdir) {
-                                (*tmp)->rmdir = aiori_rmdir;
+                                (*tmp)->rmdir = aiori_posix_rmdir;
+                                snprintf(warn_str, 256, "assuming POSIX-based backend for"
+                                         " %s rmdir call", api);
+                                WARN(warn_str);
                        }
                        if (NULL == (*tmp)->access) {
-                                (*tmp)->access = aiori_access;
+                                (*tmp)->access = aiori_posix_access;
+                                snprintf(warn_str, 256, "assuming POSIX-based backend for"
+                                         " %s access call", api);
+                                WARN(warn_str);
                        }
                        if (NULL == (*tmp)->stat) {
-                                (*tmp)->stat = aiori_stat;
+                                (*tmp)->stat = aiori_posix_stat;
+                                snprintf(warn_str, 256, "assuming POSIX-based backend for"
+                                         " %s stat call", api);
+                                WARN(warn_str);
                        }
                        return *tmp;
                }
--- a/src/aiori.h
+++ b/src/aiori.h
@ -25,6 +25,7 @@

 #include "ior.h"
 #include "iordef.h"                                     /* IOR Definitions */
+#include "option.h"

 /*************************** D E F I N I T I O N S ****************************/

@ -61,6 +62,7 @@ typedef struct ior_aiori_statfs {
        uint64_t f_ffree;
 } ior_aiori_statfs_t;

+
 typedef struct ior_aiori {
        char *name;
        void *(*create)(char *, IOR_param_t *);
@ -69,7 +71,7 @@ typedef struct ior_aiori {
                             IOR_offset_t, IOR_param_t *);
        void (*close)(void *, IOR_param_t *);
        void (*delete)(char *, IOR_param_t *);
-        void (*set_version)(IOR_param_t *);
+        char* (*get_version)();
        void (*fsync)(void *, IOR_param_t *);
        IOR_offset_t (*get_file_size)(IOR_param_t *, MPI_Comm, char *);
        int (*statfs) (const char *, ior_aiori_statfs_t *, IOR_param_t * param);
@ -77,12 +79,15 @@ typedef struct ior_aiori {
        int (*rmdir) (const char *path, IOR_param_t * param);
        int (*access) (const char *path, int mode, IOR_param_t * param);
        int (*stat) (const char *path, struct stat *buf, IOR_param_t * param);
-        int (*init)(IOR_param_t *);
-        int (*finalize)(IOR_param_t *);
+        void (*initialize)(IOR_param_t *); /* called once per program before MPI is started */
+        void (*finalize)(IOR_param_t *); /* called once per program after MPI is shutdown */
+        option_help * (*get_options)();
 } ior_aiori_t;

+extern ior_aiori_t dummy_aiori;
 extern ior_aiori_t hdf5_aiori;
 extern ior_aiori_t hdfs_aiori;
+extern ior_aiori_t ime_aiori;
 extern ior_aiori_t mpiio_aiori;
 extern ior_aiori_t ncmpi_aiori;
 extern ior_aiori_t posix_aiori;
@ -90,21 +95,35 @@ extern ior_aiori_t mmap_aiori;
 extern ior_aiori_t s3_aiori;
 extern ior_aiori_t s3_plus_aiori;
 extern ior_aiori_t s3_emc_aiori;
+extern ior_aiori_t rados_aiori;
+extern ior_aiori_t daos_aiori;
 extern ior_aiori_t dfs_aiori;

+void aiori_initialize(IOR_test_t *th);
+void aiori_finalize(IOR_test_t *th);
 const ior_aiori_t *aiori_select (const char *api);
 int aiori_count (void);
+void aiori_supported_apis(char * APIs);
 const char *aiori_default (void);

+/* some generic POSIX-based backend calls */
+char * aiori_get_version();
+int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param);
+int aiori_posix_mkdir (const char *path, mode_t mode, IOR_param_t * param);
+int aiori_posix_rmdir (const char *path, IOR_param_t * param);
+int aiori_posix_access (const char *path, int mode, IOR_param_t * param);
+int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param);
+
+void *POSIX_Create(char *testFileName, IOR_param_t * param);
+void *POSIX_Open(char *testFileName, IOR_param_t * param);
+IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName);
+void POSIX_Delete(char *testFileName, IOR_param_t * param);
+void POSIX_Close(void *fd, IOR_param_t * param);
+
+/* NOTE: these 3 MPI-IO functions are exported for reuse by HDF5/PNetCDF */
+void MPIIO_Delete(char *testFileName, IOR_param_t * param);
 IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
                               char *testFileName);
-
-void *POSIX_Create(char *testFileName, IOR_param_t *test);
-void *POSIX_Open(char *testFileName, IOR_param_t *test);
-void POSIX_Close(void *fd, IOR_param_t *test);
-void POSIX_Delete(char *testFileName, IOR_param_t *test);
-void POSIX_SetVersion(IOR_param_t *test);
-IOR_offset_t POSIX_GetFileSize(IOR_param_t *test, MPI_Comm testComm,
-                               char *testFileName);
+int MPIIO_Access(const char *, int, IOR_param_t *);

 #endif /* not _AIORI_H */
--- a/src/ior-internal.h
+++ b/src/ior-internal.h
@ -0,0 +1,40 @@
+/*
+ * This file contains header information for support code that is only used within IOR.
+ * For code shared across benchmarks, see utilities.h
+ */
+#ifndef _IOR_INTERNAL_H
+#define _IOR_INTERNAL_H
+
+/* Part of ior-output.c */
+void PrintEarlyHeader();
+void PrintHeader(int argc, char **argv);
+void ShowTestStart(IOR_param_t *params);
+void ShowTestEnd(IOR_test_t *tptr);
+void ShowSetup(IOR_param_t *params);
+void PrintRepeatEnd();
+void PrintRepeatStart();
+
+void PrintShortSummary(IOR_test_t * test);
+void PrintLongSummaryAllTests(IOR_test_t *tests_head);
+void PrintLongSummaryHeader();
+void PrintLongSummaryOneTest(IOR_test_t *test);
+void DisplayFreespace(IOR_param_t * test);
+void GetTestFileName(char *, IOR_param_t *);
+void PrintRemoveTiming(double start, double finish, int rep);
+void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep);
+void PrintTestEnds();
+void PrintTableHeader();
+/* End of ior-output */
+
+struct results {
+  double min;
+  double max;
+  double mean;
+  double var;
+  double sd;
+  double sum;
+  double *val;
+};
+
+
+#endif
--- a/src/ior-main.c
+++ b/src/ior-main.c
@ -0,0 +1,6 @@
+#include "ior.h"
+
+int main(int argc, char **argv)
+{
+    return ior_main(argc, argv);
+}
--- a/src/ior-output.c
+++ b/src/ior-output.c
@ -0,0 +1,823 @@
+#ifndef _WIN32
+# include <sys/utsname.h>        /* uname() */
+#endif
+
+#include <math.h>
+#include <stddef.h>             /* needed for offsetof on some compilers */
+
+#include "ior.h"
+#include "ior-internal.h"
+#include "utilities.h"
+
+extern char **environ;
+
+static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals);
+static struct results *ops_values(int reps, IOR_results_t * measured, int offset, IOR_offset_t transfer_size, double *vals);
+static double mean_of_array_of_doubles(double *values, int len);
+static void PPDouble(int leftjustify, double number, char *append);
+static void PrintNextToken();
+
+void PrintTableHeader(){
+  if (outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "\n");
+    fprintf(out_resultfile, "access    bw(MiB/s)  block(KiB) xfer(KiB)  open(s)    wr/rd(s)   close(s)   total(s)   iter\n");
+    fprintf(out_resultfile, "------    ---------  ---------- ---------  --------   --------   --------   --------   ----\n");
+  }
+}
+
+static int indent = 0;
+static int needNextToken = 0;
+
+static void PrintIndent(){
+  if(outputFormat != OUTPUT_JSON){
+    return;
+  }
+  for(int i=0; i < indent; i++){
+    fprintf(out_resultfile, "  ");
+  }
+}
+
+
+static void PrintKeyValStart(char * key){
+  PrintNextToken();
+  if (outputFormat == OUTPUT_DEFAULT){
+    PrintIndent();
+    fprintf(out_resultfile, "%-20s: ", key);
+    return;
+  }
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "\"%s\": \"", key);
+  }else if(outputFormat == OUTPUT_CSV){
+
+  }
+}
+
+static void PrintNextToken(){
+  if(needNextToken){
+    needNextToken = 0;
+    if(outputFormat == OUTPUT_JSON){
+      fprintf(out_resultfile, ", \n");
+    }
+  }
+  PrintIndent();
+}
+
+static void PrintKeyValEnd(){
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "\"");
+  }
+  if (outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "\n");
+  }
+  needNextToken = 1;
+}
+
+static void PrintKeyVal(char * key, char * value){
+  if(value != NULL && value[0] != 0 && value[strlen(value) -1 ] == '\n'){
+    // remove \n
+    value[strlen(value) -1 ] = 0;
+  }
+  PrintNextToken();
+  needNextToken = 1;
+  if (outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "%-20s: %s\n", key, value);
+    return;
+  }
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "\"%s\": \"%s\"", key, value);
+  }else if(outputFormat == OUTPUT_CSV){
+    fprintf(out_resultfile, "%s", value);
+  }
+}
+
+static void PrintKeyValDouble(char * key, double value){
+  PrintNextToken();
+  needNextToken = 1;
+  if (outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "%-20s: %.4f\n", key, value);
+    return;
+  }
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "\"%s\": %.4f", key, value);
+  }else if(outputFormat == OUTPUT_CSV){
+    fprintf(out_resultfile, "%.4f", value);
+  }
+}
+
+
+static void PrintKeyValInt(char * key, int64_t value){
+  PrintNextToken();
+  needNextToken = 1;
+  if (outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "%-20s: %lld\n", key, (long long) value);
+    return;
+  }
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "\"%s\": %lld", key, (long long)  value);
+  }else if(outputFormat == OUTPUT_CSV){
+    fprintf(out_resultfile, "%lld", (long long) value);
+  }
+}
+
+static void PrintStartSection(){
+  PrintNextToken();
+  needNextToken = 0;
+  if(outputFormat == OUTPUT_JSON){
+    PrintIndent();
+    fprintf(out_resultfile, "{\n");
+  }
+  indent++;
+}
+
+static void PrintNamedSectionStart(char * key){
+  PrintNextToken();
+  needNextToken = 0;
+  indent++;
+
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "\"%s\": {\n", key);
+  }else if(outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "\n%s: \n", key);
+  }
+}
+
+static void PrintNamedArrayStart(char * key){
+  PrintNextToken();
+  needNextToken = 0;
+  indent++;
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "\"%s\": [\n", key);
+  }else if(outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "\n%s: \n", key);
+  }
+}
+
+static void PrintEndSection(){
+  indent--;
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "\n");
+    PrintIndent();
+    fprintf(out_resultfile, "}\n");
+  }
+  needNextToken = 1;
+}
+
+static void PrintArrayStart(){
+  PrintNextToken();
+  needNextToken = 0;
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "[ ");
+  }
+}
+
+static void PrintArrayNamedStart(char * key){
+  PrintNextToken();
+  needNextToken = 0;
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "\"%s\": [\n", key);
+  }
+}
+
+static void PrintArrayEnd(){
+  indent--;
+  if(outputFormat == OUTPUT_JSON){
+    fprintf(out_resultfile, "]\n");
+  }
+  needNextToken = 1;
+}
+
+void PrintRepeatEnd(){
+  PrintArrayEnd();
+}
+
+void PrintRepeatStart(){
+  if( outputFormat == OUTPUT_DEFAULT){
+    return;
+  }
+  PrintArrayStart();
+}
+
+void PrintTestEnds(){
+  if (rank != 0 ||  verbose < VERBOSE_0) {
+    PrintEndSection();
+    return;
+  }
+
+  PrintKeyVal("Finished", CurrentTimeString());
+  PrintEndSection();
+}
+
+void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep){
+  if (outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "%-10s", access == WRITE ? "write" : "read");
+    PPDouble(1, bw / MEBIBYTE, " ");
+    PPDouble(1, (double)test->params.blockSize / KIBIBYTE, " ");
+    PPDouble(1, (double)test->params.transferSize / KIBIBYTE, " ");
+    PPDouble(1, diff_subset[0], " ");
+    PPDouble(1, diff_subset[1], " ");
+    PPDouble(1, diff_subset[2], " ");
+    PPDouble(1, totalTime, " ");
+    fprintf(out_resultfile, "%-4d\n", rep);
+  }else if (outputFormat == OUTPUT_JSON){
+    PrintStartSection();
+    PrintKeyVal("access", access == WRITE ? "write" : "read");
+    PrintKeyValDouble("bwMiB", bw / MEBIBYTE);
+    PrintKeyValDouble("blockKiB", (double)test->params.blockSize / KIBIBYTE);
+    PrintKeyValDouble("xferKiB", (double)test->params.transferSize / KIBIBYTE);
+    PrintKeyValDouble("openTime", diff_subset[0]);
+    PrintKeyValDouble("wrRdTime", diff_subset[1]);
+    PrintKeyValDouble("closeTime", diff_subset[2]);
+    PrintKeyValDouble("totalTime", totalTime);
+    PrintEndSection();
+  }
+  fflush(out_resultfile);
+}
+
+
+/*
+ * Message to print immediately after MPI_Init so we know that
+ * ior has started.
+ */
+void PrintEarlyHeader()
+{
+        if (rank != 0)
+                return;
+
+        fprintf(out_resultfile, "IOR-" META_VERSION ": MPI Coordinated Test of Parallel I/O\n");
+        fflush(out_resultfile);
+}
+
+void PrintHeader(int argc, char **argv)
+{
+        struct utsname unamebuf;
+        int i;
+
+        if (rank != 0)
+                return;
+        PrintStartSection();
+
+        PrintKeyVal("Began", CurrentTimeString());
+        PrintKeyValStart("Command line");
+        fprintf(out_resultfile, "%s", argv[0]);
+        for (i = 1; i < argc; i++) {
+                fprintf(out_resultfile, " %s", argv[i]);
+        }
+        PrintKeyValEnd();
+        if (uname(&unamebuf) != 0) {
+                EWARN("uname failed");
+                PrintKeyVal("Machine", "Unknown");
+        } else {
+                PrintKeyValStart("Machine");
+                fprintf(out_resultfile, "%s %s", unamebuf.sysname,
+                        unamebuf.nodename);
+                if (verbose >= VERBOSE_2) {
+                        fprintf(out_resultfile, " %s %s %s", unamebuf.release,
+                                unamebuf.version, unamebuf.machine);
+                }
+                PrintKeyValEnd();
+        }
+
+#ifdef _NO_MPI_TIMER
+        if (verbose >= VERBOSE_2)
+                fprintf(out_logfile, "Using unsynchronized POSIX timer\n");
+#else                           /* not _NO_MPI_TIMER */
+        if (MPI_WTIME_IS_GLOBAL) {
+                if (verbose >= VERBOSE_2)
+                    fprintf(out_logfile, "Using synchronized MPI timer\n");
+        } else {
+                if (verbose >= VERBOSE_2)
+                  fprintf(out_logfile, "Using unsynchronized MPI timer\n");
+        }
+#endif                          /* _NO_MPI_TIMER */
+        if (verbose >= VERBOSE_1) {
+                fprintf(out_logfile, "Start time skew across all tasks: %.02f sec\n",
+                        wall_clock_deviation);
+        }
+        if (verbose >= VERBOSE_3) {     /* show env */
+                fprintf(out_logfile, "STARTING ENVIRON LOOP\n");
+                for (i = 0; environ[i] != NULL; i++) {
+                        fprintf(out_logfile, "%s\n", environ[i]);
+                }
+                fprintf(out_logfile, "ENDING ENVIRON LOOP\n");
+        }
+
+        PrintArrayNamedStart("tests");
+        fflush(out_resultfile);
+        fflush(out_logfile);
+}
+
+/*
+ * Print header information for test output.
+ */
+void ShowTestStart(IOR_param_t *test)
+{
+  PrintStartSection();
+  PrintKeyValInt("TestID", test->id);
+  PrintKeyVal("StartTime", CurrentTimeString());
+  /* if pvfs2:, then skip */
+  if (strcasecmp(test->api, "DFS") && 
+      Regex(test->testFileName, "^[a-z][a-z].*:") == 0) {
+      DisplayFreespace(test);
+  }
+
+  if (verbose >= VERBOSE_3 || outputFormat == OUTPUT_JSON) {
+    char* data_packets[] = {"g","t","o","i"};
+
+    PrintNamedSectionStart("Parameters");
+    PrintKeyValInt("testID", test->id);
+    PrintKeyValInt("refnum", test->referenceNumber);
+    PrintKeyVal("api", test->api);
+    PrintKeyVal("platform", test->platform);
+    PrintKeyVal("testFileName", test->testFileName);
+    PrintKeyVal("hintsFileName", test->hintsFileName);
+    PrintKeyValInt("deadlineForStonewall", test->deadlineForStonewalling);
+    PrintKeyValInt("stoneWallingWearOut", test->stoneWallingWearOut);
+    PrintKeyValInt("maxTimeDuration", test->maxTimeDuration);
+    PrintKeyValInt("outlierThreshold", test->outlierThreshold);
+
+    PrintKeyVal("options", test->options);
+    PrintKeyValInt("nodes", test->nodes);
+    PrintKeyValInt("memoryPerTask", (unsigned long) test->memoryPerTask);
+    PrintKeyValInt("memoryPerNode", (unsigned long) test->memoryPerNode);
+    PrintKeyValInt("tasksPerNode", tasksPerNode);
+    PrintKeyValInt("repetitions", test->repetitions);
+    PrintKeyValInt("multiFile", test->multiFile);
+    PrintKeyValInt("interTestDelay", test->interTestDelay);
+    PrintKeyValInt("fsync", test->fsync);
+    PrintKeyValInt("fsyncperwrite", test->fsyncPerWrite);
+    PrintKeyValInt("useExistingTestFile", test->useExistingTestFile);
+    PrintKeyValInt("showHints", test->showHints);
+    PrintKeyValInt("uniqueDir", test->uniqueDir);
+    PrintKeyValInt("individualDataSets", test->individualDataSets);
+    PrintKeyValInt("singleXferAttempt", test->singleXferAttempt);
+    PrintKeyValInt("readFile", test->readFile);
+    PrintKeyValInt("writeFile", test->writeFile);
+    PrintKeyValInt("filePerProc", test->filePerProc);
+    PrintKeyValInt("reorderTasks", test->reorderTasks);
+    PrintKeyValInt("reorderTasksRandom", test->reorderTasksRandom);
+    PrintKeyValInt("reorderTasksRandomSeed", test->reorderTasksRandomSeed);
+    PrintKeyValInt("randomOffset", test->randomOffset);
+    PrintKeyValInt("checkWrite", test->checkWrite);
+    PrintKeyValInt("checkRead", test->checkRead);
+    PrintKeyValInt("preallocate", test->preallocate);
+    PrintKeyValInt("useFileView", test->useFileView);
+    PrintKeyValInt("setAlignment", test->setAlignment);
+    PrintKeyValInt("storeFileOffset", test->storeFileOffset);
+    PrintKeyValInt("useSharedFilePointer", test->useSharedFilePointer);
+    PrintKeyValInt("useO_DIRECT", test->useO_DIRECT);
+    PrintKeyValInt("useStridedDatatype", test->useStridedDatatype);
+    PrintKeyValInt("keepFile", test->keepFile);
+    PrintKeyValInt("keepFileWithError", test->keepFileWithError);
+    PrintKeyValInt("quitOnError", test->quitOnError);
+    PrintKeyValInt("verbose", verbose);
+    PrintKeyVal("data packet type", data_packets[test->dataPacketType]);
+    PrintKeyValInt("setTimeStampSignature/incompressibleSeed", test->setTimeStampSignature); /* Seed value was copied into setTimeStampSignature as well */
+    PrintKeyValInt("collective", test->collective);
+    PrintKeyValInt("segmentCount", test->segmentCount);
+    #ifdef HAVE_GPFS_FCNTL_H
+    PrintKeyValInt("gpfsHintAccess", test->gpfs_hint_access);
+    PrintKeyValInt("gpfsReleaseToken", test->gpfs_release_token);
+    #endif
+    PrintKeyValInt("transferSize", test->transferSize);
+    PrintKeyValInt("blockSize", test->blockSize);
+    PrintEndSection();
+  }
+
+  fflush(out_resultfile);
+}
+
+void ShowTestEnd(IOR_test_t *tptr){
+  if(rank == 0 && tptr->params.stoneWallingWearOut){
+    if (tptr->params.stoneWallingStatusFile){
+      StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
+    }else{
+      fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
+    }
+  }
+  PrintEndSection();
+}
+
+/*
+ * Show simple test output with max results for iterations.
+ */
+void ShowSetup(IOR_param_t *params)
+{
+  if (params->debug) {
+      fprintf(out_logfile, "\n*** DEBUG MODE ***\n");
+      fprintf(out_logfile, "*** %s ***\n\n", params->debug);
+  }
+  PrintNamedSectionStart("Options");
+  PrintKeyVal("api", params->api);
+  PrintKeyVal("apiVersion", params->apiVersion);
+  PrintKeyVal("test filename", params->testFileName);
+  PrintKeyVal("access", params->filePerProc ? "file-per-process" : "single-shared-file");
+  PrintKeyVal("type", params->collective ? "collective" : "independent");
+  PrintKeyValInt("segments", params->segmentCount);
+  PrintKeyVal("ordering in a file", params->randomOffset ? "random" : "sequential");
+  if (params->reorderTasks == FALSE && params->reorderTasksRandom == FALSE) {
+    PrintKeyVal("ordering inter file", "no tasks offsets");
+  }
+  if (params->reorderTasks == TRUE) {
+    PrintKeyVal("ordering inter file", "constant task offset");
+    PrintKeyValInt("task offset", params->taskPerNodeOffset);
+  }
+  if (params->reorderTasksRandom == TRUE) {
+    PrintKeyVal("ordering inter file", "random task offset");
+    PrintKeyValInt("task offset", params->taskPerNodeOffset);
+    PrintKeyValInt("reorder random seed", params->reorderTasksRandomSeed);
+  }
+  PrintKeyValInt("tasks", params->numTasks);
+  PrintKeyValInt("clients per node", params->tasksPerNode);
+  if (params->memoryPerTask != 0){
+    PrintKeyVal("memoryPerTask", HumanReadable(params->memoryPerTask, BASE_TWO));
+  }
+  if (params->memoryPerNode != 0){
+    PrintKeyVal("memoryPerNode", HumanReadable(params->memoryPerNode, BASE_TWO));
+  }
+  PrintKeyValInt("repetitions", params->repetitions);
+  PrintKeyVal("xfersize", HumanReadable(params->transferSize, BASE_TWO));
+  PrintKeyVal("blocksize", HumanReadable(params->blockSize, BASE_TWO));
+  PrintKeyVal("aggregate filesize", HumanReadable(params->expectedAggFileSize, BASE_TWO));
+
+#ifdef HAVE_LUSTRE_LUSTRE_USER_H
+  if (params->lustre_set_striping) {
+    PrintKeyVal("Lustre stripe size", ((params->lustre_stripe_size == 0) ? "Use default" :
+     HumanReadable(params->lustre_stripe_size, BASE_TWO)));
+    PrintKeyVal("stripe count", (params->lustre_stripe_count == 0 ? "Use default" : HumanReadable(params->lustre_stripe_count, BASE_TWO)));
+  }
+#endif /* HAVE_LUSTRE_LUSTRE_USER_H */
+  if (params->deadlineForStonewalling > 0) {
+    PrintKeyValInt("stonewallingTime", params->deadlineForStonewalling);
+    PrintKeyValInt("stoneWallingWearOut", params->stoneWallingWearOut );
+  }
+  PrintEndSection();
+
+  PrintNamedArrayStart("Results");
+
+  fflush(out_resultfile);
+}
+
+
+
+/*
+ * Summarize results
+ *
+ * operation is typically "write" or "read"
+ */
+static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, char *operation)
+{
+        IOR_param_t *params = &test->params;
+        IOR_results_t *results = test->results;
+        struct results *bw;
+        struct results *ops;
+
+        int reps;
+        if (rank != 0 || verbose < VERBOSE_0)
+                return;
+
+        reps = params->repetitions;
+
+        double * times = malloc(sizeof(double)* reps);
+        for(int i=0; i < reps; i++){
+          times[i] = *(double*)((char*) & results[i] + times_offset);
+        }
+
+        bw = bw_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), times);
+        ops = ops_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), params->transferSize, times);
+
+        if(outputFormat == OUTPUT_DEFAULT){
+          fprintf(out_resultfile, "%-9s ", operation);
+          fprintf(out_resultfile, "%10.2f ", bw->max / MEBIBYTE);
+          fprintf(out_resultfile, "%10.2f ", bw->min / MEBIBYTE);
+          fprintf(out_resultfile, "%10.2f ", bw->mean / MEBIBYTE);
+          fprintf(out_resultfile, "%10.2f ", bw->sd / MEBIBYTE);
+          fprintf(out_resultfile, "%10.2f ", ops->max);
+          fprintf(out_resultfile, "%10.2f ", ops->min);
+          fprintf(out_resultfile, "%10.2f ", ops->mean);
+          fprintf(out_resultfile, "%10.2f ", ops->sd);
+          fprintf(out_resultfile, "%10.5f ", mean_of_array_of_doubles(times, reps));
+          fprintf(out_resultfile, "%5d ", params->id);
+          fprintf(out_resultfile, "%6d ", params->numTasks);
+          fprintf(out_resultfile, "%3d ", params->tasksPerNode);
+          fprintf(out_resultfile, "%4d ", params->repetitions);
+          fprintf(out_resultfile, "%3d ", params->filePerProc);
+          fprintf(out_resultfile, "%5d ", params->reorderTasks);
+          fprintf(out_resultfile, "%8d ", params->taskPerNodeOffset);
+          fprintf(out_resultfile, "%9d ", params->reorderTasksRandom);
+          fprintf(out_resultfile, "%4d ", params->reorderTasksRandomSeed);
+          fprintf(out_resultfile, "%6lld ", params->segmentCount);
+          fprintf(out_resultfile, "%8lld ", params->blockSize);
+          fprintf(out_resultfile, "%8lld ", params->transferSize);
+          fprintf(out_resultfile, "%9.1f ", (float)results[0].aggFileSizeForBW / MEBIBYTE);
+          fprintf(out_resultfile, "%3s ", params->api);
+          fprintf(out_resultfile, "%6d", params->referenceNumber);
+          fprintf(out_resultfile, "\n");
+        }else if (outputFormat == OUTPUT_JSON){
+          PrintStartSection();
+          PrintKeyVal("operation", operation);
+          PrintKeyVal("API", params->api);
+          PrintKeyValInt("TestID", params->id);
+          PrintKeyValInt("ReferenceNumber", params->referenceNumber);
+          PrintKeyValInt("segmentCount", params->segmentCount);
+          PrintKeyValInt("blockSize", params->blockSize);
+          PrintKeyValInt("transferSize", params->transferSize);
+          PrintKeyValInt("numTasks", params->numTasks);
+          PrintKeyValInt("tasksPerNode", params->tasksPerNode);
+          PrintKeyValInt("repetitions", params->repetitions);
+          PrintKeyValInt("filePerProc", params->filePerProc);
+          PrintKeyValInt("reorderTasks", params->reorderTasks);
+          PrintKeyValInt("taskPerNodeOffset", params->taskPerNodeOffset);
+          PrintKeyValInt("reorderTasksRandom", params->reorderTasksRandom);
+          PrintKeyValInt("reorderTasksRandomSeed", params->reorderTasksRandomSeed);
+          PrintKeyValInt("segmentCount", params->segmentCount);
+          PrintKeyValInt("blockSize", params->blockSize);
+          PrintKeyValInt("transferSize", params->transferSize);
+          PrintKeyValDouble("bwMaxMIB", bw->max / MEBIBYTE);
+          PrintKeyValDouble("bwMinMIB", bw->min / MEBIBYTE);
+          PrintKeyValDouble("bwMeanMIB", bw->mean / MEBIBYTE);
+          PrintKeyValDouble("bwStdMIB", bw->sd / MEBIBYTE);
+          PrintKeyValDouble("OPsMax", ops->max);
+          PrintKeyValDouble("OPsMin", ops->min);
+          PrintKeyValDouble("OPsMean", ops->mean);
+          PrintKeyValDouble("OPsSD", ops->sd);
+          PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps));
+          PrintKeyValDouble("xsizeMiB", (double) results[0].aggFileSizeForBW / MEBIBYTE);
+          PrintEndSection();
+        }else if (outputFormat == OUTPUT_CSV){
+
+        }
+
+        fflush(out_resultfile);
+
+        free(bw);
+        free(ops);
+        free(times);
+}
+
+void PrintLongSummaryOneTest(IOR_test_t *test)
+{
+        IOR_param_t *params = &test->params;
+
+        if (params->writeFile)
+                PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, writeTime), "write");
+        if (params->readFile)
+                PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, readTime), "read");
+}
+
+void PrintLongSummaryHeader()
+{
+        if (rank != 0 || verbose < VERBOSE_0)
+                return;
+        if(outputFormat != OUTPUT_DEFAULT){
+          return;
+        }
+
+        fprintf(out_resultfile, "\n");
+        fprintf(out_resultfile, "%-9s %10s %10s %10s %10s %10s %10s %10s %10s %10s",
+                "Operation", "Max(MiB)", "Min(MiB)", "Mean(MiB)", "StdDev",
+                "Max(OPs)", "Min(OPs)", "Mean(OPs)", "StdDev",
+                "Mean(s)");
+        fprintf(out_resultfile, " Test# #Tasks tPN reps fPP reord reordoff reordrand seed"
+                " segcnt ");
+        fprintf(out_resultfile, "%8s %8s %9s %5s", " blksiz", "xsize","aggs(MiB)", "API");
+        fprintf(out_resultfile, " RefNum\n");
+}
+
+void PrintLongSummaryAllTests(IOR_test_t *tests_head)
+{
+  IOR_test_t *tptr;
+  if (rank != 0 || verbose < VERBOSE_0)
+          return;
+
+  PrintArrayEnd();
+
+  if(outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "\n");
+    fprintf(out_resultfile, "Summary of all tests:");
+  }else if (outputFormat == OUTPUT_JSON){
+    PrintNamedArrayStart("summary");
+  }else if (outputFormat == OUTPUT_CSV){
+
+  }
+
+  PrintLongSummaryHeader();
+
+  for (tptr = tests_head; tptr != NULL; tptr = tptr->next) {
+          PrintLongSummaryOneTest(tptr);
+  }
+
+  PrintArrayEnd();
+}
+
+void PrintShortSummary(IOR_test_t * test)
+{
+        IOR_param_t *params = &test->params;
+        IOR_results_t *results = test->results;
+        double max_write = 0.0;
+        double max_read = 0.0;
+        double bw;
+        int reps;
+        int i;
+
+        if (rank != 0 || verbose < VERBOSE_0)
+                return;
+
+        PrintArrayEnd();
+
+        reps = params->repetitions;
+
+        max_write = results[0].writeTime;
+        max_read = results[0].readTime;
+        for (i = 0; i < reps; i++) {
+                bw = (double)results[i].aggFileSizeForBW / results[i].writeTime;
+                max_write = MAX(bw, max_write);
+                bw = (double)results[i].aggFileSizeForBW / results[i].readTime;
+                max_read = MAX(bw, max_read);
+        }
+
+        if(outputFormat == OUTPUT_DEFAULT){
+          if (params->writeFile) {
+                  fprintf(out_resultfile, "Max Write: %.2f MiB/sec (%.2f MB/sec)\n",
+                          max_write/MEBIBYTE, max_write/MEGABYTE);
+          }
+          if (params->readFile) {
+                  fprintf(out_resultfile, "Max Read:  %.2f MiB/sec (%.2f MB/sec)\n",
+                          max_read/MEBIBYTE, max_read/MEGABYTE);
+          }
+        }else if (outputFormat == OUTPUT_JSON){
+          PrintNamedSectionStart("max");
+          if (params->writeFile) {
+            PrintKeyValDouble("writeMiB", max_write/MEBIBYTE);
+            PrintKeyValDouble("writeMB", max_write/MEGABYTE);
+          }
+          if (params->readFile) {
+            PrintKeyValDouble("readMiB", max_read/MEBIBYTE);
+            PrintKeyValDouble("readMB", max_read/MEGABYTE);
+          }
+          PrintEndSection();
+        }
+}
+
+
+/*
+ * Display freespace (df).
+ */
+void DisplayFreespace(IOR_param_t * test)
+{
+        char fileName[MAX_STR] = { 0 };
+        int i;
+        int directoryFound = FALSE;
+
+        /* get outfile name */
+        GetTestFileName(fileName, test);
+
+        /* get directory for outfile */
+        i = strlen(fileName);
+        while (i-- > 0) {
+                if (fileName[i] == '/') {
+                        fileName[i] = '\0';
+                        directoryFound = TRUE;
+                        break;
+                }
+        }
+
+        /* if no directory/, use '.' */
+        if (directoryFound == FALSE) {
+                strcpy(fileName, ".");
+        }
+
+        ShowFileSystemSize(fileName);
+}
+
+
+void PrintRemoveTiming(double start, double finish, int rep)
+{
+  if (rank != 0 || verbose < VERBOSE_0)
+    return;
+
+  if (outputFormat == OUTPUT_DEFAULT){
+    fprintf(out_resultfile, "remove    -          -          -          -          -          -          ");
+    PPDouble(1, finish-start, " ");
+    fprintf(out_resultfile, "%-4d\n", rep);
+  }else if (outputFormat == OUTPUT_JSON){
+    PrintStartSection();
+    PrintKeyVal("access", "remove");
+    PrintKeyValDouble("totalTime", finish - start);
+    PrintEndSection();
+  }
+}
+
+
+/*
+ * Pretty Print a Double.  The First parameter is a flag determining if left
+ * justification should be used.  The third parameter a null-terminated string
+ * that should be appended to the number field.
+ */
+static void PPDouble(int leftjustify, double number, char *append)
+{
+        char format[16];
+        int width = 10;
+        int precision;
+
+        if (number < 0) {
+                fprintf(out_resultfile, "   -      %s", append);
+                return;
+        }
+
+        if (number < 1)
+                precision = 6;
+        else if (number < 3600)
+                precision = 2;
+        else
+                precision = 0;
+
+        sprintf(format, "%%%s%d.%df%%s",
+                leftjustify ? "-" : "",
+                width, precision);
+
+        fprintf(out_resultfile, format, number, append);
+}
+
+
+
+static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals)
+{
+        struct results *r;
+        int i;
+
+        r = (struct results *) malloc(sizeof(struct results) + (reps * sizeof(double)));
+        if (r == NULL)
+                ERR("malloc failed");
+        r->val = (double *)&r[1];
+
+        for (i = 0; i < reps; i++, measured++) {
+
+                r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset)) / vals[i];
+                if (i == 0) {
+                        r->min = r->val[i];
+                        r->max = r->val[i];
+                        r->sum = 0.0;
+                }
+                r->min = MIN(r->min, r->val[i]);
+                r->max = MAX(r->max, r->val[i]);
+                r->sum += r->val[i];
+        }
+        r->mean = r->sum / reps;
+        r->var = 0.0;
+        for (i = 0; i < reps; i++) {
+                r->var += pow((r->mean - r->val[i]), 2);
+        }
+        r->var = r->var / reps;
+        r->sd = sqrt(r->var);
+
+        return r;
+}
+
+static struct results *ops_values(int reps, IOR_results_t * measured, int offset,
+                                  IOR_offset_t transfer_size,
+                                  double *vals)
+{
+        struct results *r;
+        int i;
+
+        r = (struct results *)malloc(sizeof(struct results)
+                                     + (reps * sizeof(double)));
+        if (r == NULL)
+                ERR("malloc failed");
+        r->val = (double *)&r[1];
+
+        for (i = 0; i < reps; i++, measured++) {
+                r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset))
+                    / transfer_size / vals[i];
+                if (i == 0) {
+                        r->min = r->val[i];
+                        r->max = r->val[i];
+                        r->sum = 0.0;
+                }
+                r->min = MIN(r->min, r->val[i]);
+                r->max = MAX(r->max, r->val[i]);
+                r->sum += r->val[i];
+        }
+        r->mean = r->sum / reps;
+        r->var = 0.0;
+        for (i = 0; i < reps; i++) {
+                r->var += pow((r->mean - r->val[i]), 2);
+        }
+        r->var = r->var / reps;
+        r->sd = sqrt(r->var);
+
+        return r;
+}
+
+
+static double mean_of_array_of_doubles(double *values, int len)
+{
+        double tot = 0.0;
+        int i;
+
+        for (i = 0; i < len; i++) {
+                tot += values[i];
+        }
+        return tot / len;
+
+}
--- a/src/ior.c
+++ b/src/ior.c
--- a/src/ior.h
+++ b/src/ior.h
@ -29,26 +29,14 @@
   typedef void*    hdfsFS;      /* unused, but needs a type */
 #endif

-#ifdef USE_S3_AIORI
-#  include <curl/curl.h>
-#  include "aws4c.h"
+#ifdef USE_RADOS_AIORI
+#  include <rados/librados.h>
 #else
-   typedef void     CURL;       /* unused, but needs a type */
-   typedef void     IOBuf;      /* unused, but needs a type */
+    typedef void *rados_t;
+    typedef void *rados_ioctx_t;
 #endif

-
-
 #include "iordef.h"
-
-extern int numTasksWorld;
-extern int rank;
-extern int rankOffset;
-extern int tasksPerNode;
-extern int verbose;
-extern MPI_Comm testComm;
-
-
 /******************** DATA Packet Type ***************************************/
 /* Holds the types of data packets: generic, offset, timestamp, incompressible */

@ -91,17 +79,18 @@ typedef struct IO_BUFFERS

 typedef struct
 {
-    char debug[MAX_STR];             /* debug info string */
+    const void * backend;
+    char * debug;             /* debug info string */
    unsigned int mode;               /* file permissions */
    unsigned int openFlags;          /* open flags (see also <open>) */
    int referenceNumber;             /* user supplied reference number */
-    char api[MAX_STR];               /* API for I/O */
-    char apiVersion[MAX_STR];        /* API version */
-    char platform[MAX_STR];          /* platform type */
-    char testFileName[MAXPATHLEN];   /* full name for test */
-    char testFileName_fppReadCheck[MAXPATHLEN];/* filename for fpp read check */
-    char hintsFileName[MAXPATHLEN];  /* full name for hints file */
-    char options[MAXPATHLEN];        /* options string */
+    char * api;               /* API for I/O */
+    char * apiVersion;        /* API version */
+    char * platform;          /* platform type */
+    char * testFileName;   /* full name for test */
+    char * testFileName_fppReadCheck;/* filename for fpp read check */
+    char * hintsFileName;  /* full name for hints file */
+    char * options;        /* options string */
    int numTasks;                    /* number of tasks for test */
    int nodes;                       /* number of nodes for test */
    int tasksPerNode;                /* number of tasks per node */
@ -135,14 +124,15 @@ typedef struct
    int useStridedDatatype;          /* put strided access into datatype */
    int useO_DIRECT;                 /* use O_DIRECT, bypassing I/O buffers */
    int showHints;                   /* show hints */
-    int showHelp;                    /* show options and help */
    int summary_every_test;          /* flag to print summary every test, not just at end */
    int uniqueDir;                   /* use unique directory for each fpp */
    int useExistingTestFile;         /* do not delete test file before access */
    int storeFileOffset;             /* use file offset as stored signature */
    int deadlineForStonewalling;     /* max time in seconds to run any test phase */
    int stoneWallingWearOut;         /* wear out the stonewalling, once the timout is over, each process has to write the same amount */
-    int stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
+    uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
+    char * stoneWallingStatusFile;
+
    int maxTimeDuration;             /* max time in minutes to run each test */
    int outlierThreshold;            /* warn on outlier N seconds from mean */
    int verbose;                     /* verbosity */
@ -150,7 +140,7 @@ typedef struct
    unsigned int timeStampSignatureValue; /* value for time stamp signature */
    void * fd_fppReadCheck;          /* additional fd for fpp read check */
    int randomSeed;                  /* random seed for write/read check */
-    int incompressibleSeed;           /* random seed for incompressible file creation */
+    unsigned int incompressibleSeed; /* random seed for incompressible file creation */
    int randomOffset;                /* access is to random offsets */
    size_t memoryPerTask;            /* additional memory used per task */
    size_t memoryPerNode;            /* additional memory used per node */
@ -175,25 +165,20 @@ typedef struct
    IOR_offset_t setAlignment;       /* alignment in bytes */

    /* HDFS variables */
-    char        hdfs_user[MAX_STR];  /* copied from ENV, for now */
+    char      * hdfs_user;  /* copied from ENV, for now */
    const char* hdfs_name_node;
    tPort       hdfs_name_node_port; /* (uint16_t) */
    hdfsFS      hdfs_fs;             /* file-system handle */
    int         hdfs_replicas;       /* n block replicas.  (0 gets default) */
    int         hdfs_block_size;     /* internal blk-size. (0 gets default) */

-    /* REST/S3 variables */
-    //    CURL*       curl;             /* for libcurl "easy" fns (now managed by aws4c) */
-#   define      IOR_CURL_INIT        0x01 /* curl top-level inits were perfomed once? */
-#   define      IOR_CURL_NOCONTINUE  0x02
-#   define      IOR_CURL_S3_EMC_EXT  0x04 /* allow EMC extensions to S3? */
-    char        curl_flags;
    char*       URI;                 /* "path" to target object */
-    IOBuf*      io_buf;              /* aws4c places parsed header values here */
-    IOBuf*      etags;               /* accumulate ETags for N:1 parts */
    size_t      part_number;         /* multi-part upload increment (PER-RANK!) */
-#   define      MAX_UPLOAD_ID_SIZE    256 /* seems to be 32, actually */
-    char        UploadId[MAX_UPLOAD_ID_SIZE +1]; /* key for multi-part-uploads */
+    char*       UploadId; /* key for multi-part-uploads */
+
+    /* RADOS variables */
+    rados_t rados_cluster;           /* RADOS cluster handle */
+    rados_ioctx_t rados_ioctx;       /* I/O context for our pool in the RADOS cluster */

    /* NCMPI variables */
    int var_id;                      /* variable id handle for data set */
@ -213,12 +198,6 @@ typedef struct
    int beegfs_numTargets;           /* number storage targets to use */
    int beegfs_chunkSize;            /* srtipe pattern for new files */

-    /* daos variables */
-    char daosGroup[MAX_STR];         /* group name */
-    char daosPool[37];               /* pool UUID */
-    char daosPoolSvc[MAX_STR];       /* pool service ranks */
-    char daosCont[37];               /* Container UUID */
-
    int id;                          /* test's unique ID */
    int intraTestBarriers;           /* barriers between open/op and op/close */
 } IOR_param_t;
@ -226,25 +205,38 @@ typedef struct
 /* each pointer is to an array, each of length equal to the number of
   repetitions in the test */
 typedef struct {
-   double *writeTime;
-   double *readTime;
+   double writeTime;
+   double readTime;
+   int    errors;
   size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling
-   IOR_offset_t *aggFileSizeFromStat;
-   IOR_offset_t *aggFileSizeFromXfer;
-   IOR_offset_t *aggFileSizeForBW;
+
+   double     stonewall_time;
+   long long  stonewall_min_data_accessed;
+   long long  stonewall_avg_data_accessed;
+
+   IOR_offset_t aggFileSizeFromStat;
+   IOR_offset_t aggFileSizeFromXfer;
+   IOR_offset_t aggFileSizeForBW;
 } IOR_results_t;

 /* define the queuing structure for the test parameters */
 typedef struct IOR_test_t {
   IOR_param_t        params;
-   IOR_results_t     *results;
+   IOR_results_t     *results; /* This is an array of reps times IOR_results_t */
   struct IOR_test_t *next;
 } IOR_test_t;


 IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num);
-void AllocResults(IOR_test_t *test);
-void GetPlatformName(char *);
+char * GetPlatformName();
 void init_IOR_Param_t(IOR_param_t *p);

+/*
+ * This function runs IOR given by command line, useful for testing
+ */
+IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * out_logfile);
+
+/* Actual IOR Main function, renamed to allow library usage */
+int ior_main(int argc, char **argv);
+
 #endif /* !_IOR_H */
--- a/src/iordef.h
+++ b/src/iordef.h
@ -31,9 +31,7 @@
 #   include <Windows.h>
 #   include <io.h>
 #   include <direct.h>
-#   include "win/getopt.h"

-#   define MAXPATHLEN 1024
 #   define F_OK 00
 #   define W_OK 02
 #   define R_OK 04
@ -41,7 +39,7 @@

 #   define lseek _lseeki64
 #   define fsync _commit
-#   define mkdir(dir, mode) _mkdir(dir) 
+#   define mkdir(dir, mode) _mkdir(dir)
 #   define strcasecmp _stricmp
 #   define strncasecmp _strnicmp
 #   define srandom srand
@ -63,6 +61,12 @@ extern int verbose;                            /* verbose output */

 /*************************** D E F I N I T I O N S ****************************/

+enum OutputFormat_t{
+  OUTPUT_DEFAULT,
+  OUTPUT_CSV,
+  OUTPUT_JSON
+};
+
 #ifndef FALSE
 #   define FALSE           0
 #endif /* not FALSE */
@ -102,8 +106,8 @@ extern int verbose;                            /* verbose output */
 #define VERBOSE_4          4
 #define VERBOSE_5          5

-#define MAX_STR            1024                /* max string length */  
-#define MAX_HINTS          16                  /* max number of hints */  
+#define MAX_STR            1024                /* max string length */
+#define MAX_HINTS          16                  /* max number of hints */
 #define MAX_RETRY          10000               /* max retries for POSIX xfer */
 #ifndef PATH_MAX
 #define PATH_MAX           4096
@ -211,7 +215,7 @@ struct utsname {
    char nodename[257];
    char release [16];
    char version [16];
-    char machine [16];	
+    char machine [16];
 };

 extern int uname(struct utsname *name);
--- a/src/list.h
+++ b/src/list.h
@ -0,0 +1,556 @@
+/**
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+#ifndef __DAOS_LIST_H__
+#define __DAOS_LIST_H__
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#define prefetch(a) ((void)a)
+
+struct cfs_list_head {
+	struct cfs_list_head *next, *prev;
+};
+
+typedef struct cfs_list_head cfs_list_t;
+
+#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define CFS_LIST_HEAD(name) \
+	cfs_list_t name = CFS_LIST_HEAD_INIT(name)
+
+#define CFS_INIT_LIST_HEAD(ptr) do { \
+	(ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/**
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __cfs_list_add(cfs_list_t * new,
+                                  cfs_list_t * prev,
+                                  cfs_list_t * next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+/**
+ * Insert an entry at the start of a list.
+ * \param new  new entry to be inserted
+ * \param head list to add it to
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void cfs_list_add(cfs_list_t *new,
+                                cfs_list_t *head)
+{
+	__cfs_list_add(new, head, head->next);
+}
+
+/**
+ * Insert an entry at the end of a list.
+ * \param new  new entry to be inserted
+ * \param head list to add it to
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void cfs_list_add_tail(cfs_list_t *new,
+                                     cfs_list_t *head)
+{
+	__cfs_list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __cfs_list_del(cfs_list_t *prev,
+                                  cfs_list_t *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * Remove an entry from the list it is currently in.
+ * \param entry the entry to remove
+ * Note: list_empty(entry) does not return true after this, the entry is in an
+ * undefined state.
+ */
+static inline void cfs_list_del(cfs_list_t *entry)
+{
+	__cfs_list_del(entry->prev, entry->next);
+}
+
+/**
+ * Remove an entry from the list it is currently in and reinitialize it.
+ * \param entry the entry to remove.
+ */
+static inline void cfs_list_del_init(cfs_list_t *entry)
+{
+	__cfs_list_del(entry->prev, entry->next);
+	CFS_INIT_LIST_HEAD(entry);
+}
+
+/**
+ * Remove an entry from the list it is currently in and insert it at the start
+ * of another list.
+ * \param list the entry to move
+ * \param head the list to move it to
+ */
+static inline void cfs_list_move(cfs_list_t *list,
+                                 cfs_list_t *head)
+{
+	__cfs_list_del(list->prev, list->next);
+	cfs_list_add(list, head);
+}
+
+/**
+ * Remove an entry from the list it is currently in and insert it at the end of
+ * another list.
+ * \param list the entry to move
+ * \param head the list to move it to
+ */
+static inline void cfs_list_move_tail(cfs_list_t *list,
+                                      cfs_list_t *head)
+{
+	__cfs_list_del(list->prev, list->next);
+	cfs_list_add_tail(list, head);
+}
+
+/**
+ * Test whether a list is empty
+ * \param head the list to test.
+ */
+static inline int cfs_list_empty(cfs_list_t *head)
+{
+	return head->next == head;
+}
+
+/**
+ * Test whether a list is empty and not being modified
+ * \param head the list to test
+ *
+ * Tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using cfs_list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is cfs_list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int cfs_list_empty_careful(const cfs_list_t *head)
+{
+        cfs_list_t *next = head->next;
+        return (next == head) && (next == head->prev);
+}
+
+static inline void __cfs_list_splice(cfs_list_t *list,
+                                     cfs_list_t *head)
+{
+	cfs_list_t *first = list->next;
+	cfs_list_t *last = list->prev;
+	cfs_list_t *at = head->next;
+
+	first->prev = head;
+	head->next = first;
+
+	last->next = at;
+	at->prev = last;
+}
+
+/**
+ * Join two lists
+ * \param list the new list to add.
+ * \param head the place to add it in the first list.
+ *
+ * The contents of \a list are added at the start of \a head.  \a list is in an
+ * undefined state on return.
+ */
+static inline void cfs_list_splice(cfs_list_t *list,
+                                   cfs_list_t *head)
+{
+	if (!cfs_list_empty(list))
+		__cfs_list_splice(list, head);
+}
+
+/**
+ * Join two lists and reinitialise the emptied list.
+ * \param list the new list to add.
+ * \param head the place to add it in the first list.
+ *
+ * The contents of \a list are added at the start of \a head.  \a list is empty
+ * on return.
+ */
+static inline void cfs_list_splice_init(cfs_list_t *list,
+                                        cfs_list_t *head)
+{
+	if (!cfs_list_empty(list)) {
+		__cfs_list_splice(list, head);
+		CFS_INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * Get the container of a list
+ * \param ptr	 the embedded list.
+ * \param type	 the type of the struct this is embedded in.
+ * \param member the member name of the list within the struct.
+ */
+#define cfs_list_entry(ptr, type, member) \
+	((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
+
+/**
+ * Iterate over a list
+ * \param pos	the iterator
+ * \param head	the list to iterate over
+ *
+ * Behaviour is undefined if \a pos is removed from the list in the body of the
+ * loop.
+ */
+#define cfs_list_for_each(pos, head) \
+	for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+		pos = pos->next, prefetch(pos->next))
+
+/**
+ * Iterate over a list safely
+ * \param pos	the iterator
+ * \param n     temporary storage
+ * \param head	the list to iterate over
+ *
+ * This is safe to use if \a pos could be removed from the list in the body of
+ * the loop.
+ */
+#define cfs_list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+/**
+ * Iterate over a list continuing after existing point
+ * \param pos    the type * to use as a loop counter
+ * \param head   the list head
+ * \param member the name of the list_struct within the struct  
+ */
+#define cfs_list_for_each_entry_continue(pos, head, member)                 \
+        for (pos = cfs_list_entry(pos->member.next, typeof(*pos), member);  \
+             prefetch(pos->member.next), &pos->member != (head);            \
+             pos = cfs_list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * \defgroup hlist Hash List
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is too
+ * wasteful.  You lose the ability to access the tail in O(1).
+ * @{
+ */
+
+typedef struct cfs_hlist_node {
+	struct cfs_hlist_node *next, **pprev;
+} cfs_hlist_node_t;
+
+typedef struct cfs_hlist_head {
+	cfs_hlist_node_t *first;
+} cfs_hlist_head_t;
+
+/* @} */
+
+/*
+ * "NULL" might not be defined at this point
+ */
+#ifdef NULL
+#define NULL_P NULL
+#else
+#define NULL_P ((void *)0)
+#endif
+
+/**
+ * \addtogroup hlist
+ * @{
+ */
+
+#define CFS_HLIST_HEAD_INIT { NULL_P }
+#define CFS_HLIST_HEAD(name) cfs_hlist_head_t name = { NULL_P }
+#define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P)
+#define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P)
+
+static inline int cfs_hlist_unhashed(const cfs_hlist_node_t *h)
+{
+	return !h->pprev;
+}
+
+static inline int cfs_hlist_empty(const cfs_hlist_head_t *h)
+{
+	return !h->first;
+}
+
+static inline void __cfs_hlist_del(cfs_hlist_node_t *n)
+{
+	cfs_hlist_node_t *next = n->next;
+	cfs_hlist_node_t **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void cfs_hlist_del(cfs_hlist_node_t *n)
+{
+	__cfs_hlist_del(n);
+}
+
+static inline void cfs_hlist_del_init(cfs_hlist_node_t *n)
+{
+	if (n->pprev)  {
+		__cfs_hlist_del(n);
+		CFS_INIT_HLIST_NODE(n);
+	}
+}
+
+static inline void cfs_hlist_add_head(cfs_hlist_node_t *n,
+                                      cfs_hlist_head_t *h)
+{
+	cfs_hlist_node_t *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void cfs_hlist_add_before(cfs_hlist_node_t *n,
+					cfs_hlist_node_t *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+static inline void cfs_hlist_add_after(cfs_hlist_node_t *n,
+                                       cfs_hlist_node_t *next)
+{
+	next->next = n->next;
+	n->next = next;
+	next->pprev = &n->next;
+
+	if(next->next)
+		next->next->pprev  = &next->next;
+}
+
+#define cfs_hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define cfs_hlist_for_each(pos, head) \
+	for (pos = (head)->first; pos && (prefetch(pos->next), 1); \
+	     pos = pos->next)
+
+#define cfs_hlist_for_each_safe(pos, n, head) \
+	for (pos = (head)->first; pos && (n = pos->next, 1); \
+	     pos = n)
+
+/**
+ * Iterate over an hlist of given type
+ * \param tpos	 the type * to use as a loop counter.
+ * \param pos	 the &struct hlist_node to use as a loop counter.
+ * \param head	 the head for your list.
+ * \param member the name of the hlist_node within the struct.
+ */
+#define cfs_hlist_for_each_entry(tpos, pos, head, member)                    \
+	for (pos = (head)->first;                                            \
+	     pos && ({ prefetch(pos->next); 1;}) &&                          \
+		({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * Iterate over an hlist continuing after existing point
+ * \param tpos	 the type * to use as a loop counter.
+ * \param pos	 the &struct hlist_node to use as a loop counter.
+ * \param member the name of the hlist_node within the struct.
+ */
+#define cfs_hlist_for_each_entry_continue(tpos, pos, member)                 \
+	for (pos = (pos)->next;                                              \
+	     pos && ({ prefetch(pos->next); 1;}) &&                          \
+		({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * Iterate over an hlist continuing from an existing point
+ * \param tpos	 the type * to use as a loop counter.
+ * \param pos	 the &struct hlist_node to use as a loop counter.
+ * \param member the name of the hlist_node within the struct.
+ */
+#define cfs_hlist_for_each_entry_from(tpos, pos, member)			 \
+	for (; pos && ({ prefetch(pos->next); 1;}) &&                        \
+		({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * Iterate over an hlist of given type safe against removal of list entry
+ * \param tpos	 the type * to use as a loop counter.
+ * \param pos	 the &struct hlist_node to use as a loop counter.
+ * \param n	 another &struct hlist_node to use as temporary storage
+ * \param head	 the head for your list.
+ * \param member the name of the hlist_node within the struct.
+ */
+#define cfs_hlist_for_each_entry_safe(tpos, pos, n, head, member)            \
+	for (pos = (head)->first;                                            \
+	     pos && ({ n = pos->next; 1; }) &&                               \
+		({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+/* @} */
+
+#ifndef cfs_list_for_each_prev
+/**
+ * Iterate over a list in reverse order
+ * \param pos	the &struct list_head to use as a loop counter.
+ * \param head	the head for your list.
+ */
+#define cfs_list_for_each_prev(pos, head) \
+	for (pos = (head)->prev, prefetch(pos->prev); pos != (head);     \
+		pos = pos->prev, prefetch(pos->prev))
+
+#endif /* cfs_list_for_each_prev */
+
+#ifndef cfs_list_for_each_entry
+/**
+ * Iterate over a list of given type
+ * \param pos        the type * to use as a loop counter.
+ * \param head       the head for your list.
+ * \param member     the name of the list_struct within the struct.
+ */
+#define cfs_list_for_each_entry(pos, head, member)                          \
+        for (pos = cfs_list_entry((head)->next, typeof(*pos), member),      \
+		     prefetch(pos->member.next);                            \
+	     &pos->member != (head);                                        \
+	     pos = cfs_list_entry(pos->member.next, typeof(*pos), member),  \
+	     prefetch(pos->member.next))
+#endif /* cfs_list_for_each_entry */
+
+#ifndef cfs_list_for_each_entry_rcu
+#define cfs_list_for_each_entry_rcu(pos, head, member) \
+       list_for_each_entry(pos, head, member)
+#endif
+
+#ifndef cfs_list_for_each_entry_rcu
+#define cfs_list_for_each_entry_rcu(pos, head, member) \
+       list_for_each_entry(pos, head, member)
+#endif
+
+#ifndef cfs_list_for_each_entry_reverse
+/**
+ * Iterate backwards over a list of given type.
+ * \param pos        the type * to use as a loop counter.
+ * \param head       the head for your list.
+ * \param member     the name of the list_struct within the struct.
+ */
+#define cfs_list_for_each_entry_reverse(pos, head, member)                  \
+	for (pos = cfs_list_entry((head)->prev, typeof(*pos), member);      \
+	     prefetch(pos->member.prev), &pos->member != (head);            \
+	     pos = cfs_list_entry(pos->member.prev, typeof(*pos), member))
+#endif /* cfs_list_for_each_entry_reverse */
+
+#ifndef cfs_list_for_each_entry_safe
+/**
+ * Iterate over a list of given type safe against removal of list entry
+ * \param pos        the type * to use as a loop counter.
+ * \param n          another type * to use as temporary storage
+ * \param head       the head for your list.
+ * \param member     the name of the list_struct within the struct.
+ */
+#define cfs_list_for_each_entry_safe(pos, n, head, member)                   \
+        for (pos = cfs_list_entry((head)->next, typeof(*pos), member),       \
+		n = cfs_list_entry(pos->member.next, typeof(*pos), member);  \
+	     &pos->member != (head);                                         \
+	     pos = n, n = cfs_list_entry(n->member.next, typeof(*n), member))
+
+#endif /* cfs_list_for_each_entry_safe */
+
+#ifndef cfs_list_for_each_entry_safe_from
+/**
+ * Iterate over a list continuing from an existing point
+ * \param pos        the type * to use as a loop cursor.
+ * \param n          another type * to use as temporary storage
+ * \param head       the head for your list.
+ * \param member     the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define cfs_list_for_each_entry_safe_from(pos, n, head, member)             \
+        for (n = cfs_list_entry(pos->member.next, typeof(*pos), member);    \
+             &pos->member != (head);                                        \
+             pos = n, n = cfs_list_entry(n->member.next, typeof(*n), member))
+#endif /* cfs_list_for_each_entry_safe_from */
+
+#define cfs_list_for_each_entry_typed(pos, head, type, member)		\
+        for (pos = cfs_list_entry((head)->next, type, member),		\
+		     prefetch(pos->member.next);                        \
+	     &pos->member != (head);                                    \
+	     pos = cfs_list_entry(pos->member.next, type, member),	\
+	     prefetch(pos->member.next))
+
+#define cfs_list_for_each_entry_reverse_typed(pos, head, type, member)	\
+	for (pos = cfs_list_entry((head)->prev, type, member);		\
+	     prefetch(pos->member.prev), &pos->member != (head);	\
+	     pos = cfs_list_entry(pos->member.prev, type, member))
+
+#define cfs_list_for_each_entry_safe_typed(pos, n, head, type, member)	\
+    for (pos = cfs_list_entry((head)->next, type, member),		\
+		n = cfs_list_entry(pos->member.next, type, member);	\
+	     &pos->member != (head);                                    \
+	     pos = n, n = cfs_list_entry(n->member.next, type, member))
+
+#define cfs_list_for_each_entry_safe_from_typed(pos, n, head, type, member)  \
+        for (n = cfs_list_entry(pos->member.next, type, member);             \
+             &pos->member != (head);                                         \
+             pos = n, n = cfs_list_entry(n->member.next, type, member))
+
+#define cfs_hlist_for_each_entry_typed(tpos, pos, head, type, member)   \
+	for (pos = (head)->first;                                       \
+	     pos && (prefetch(pos->next), 1) &&                         \
+		(tpos = cfs_hlist_entry(pos, type, member), 1);         \
+	     pos = pos->next)
+
+#define cfs_hlist_for_each_entry_safe_typed(tpos, pos, n, head, type, member) \
+	for (pos = (head)->first;                                             \
+	     pos && (n = pos->next, 1) &&                                     \
+		(tpos = cfs_hlist_entry(pos, type, member), 1);               \
+	     pos = n)
+
+#endif /* __DAOS_LIST_H__ */
--- a/src/mdtest-main.c
+++ b/src/mdtest-main.c
@ -0,0 +1,11 @@
+#include "mdtest.h"
+#include "aiori.h"
+
+int main(int argc, char **argv) {
+    MPI_Init(&argc, &argv);
+
+    mdtest_run(argc, argv, MPI_COMM_WORLD, stdout);
+
+    MPI_Finalize();
+    return 0;
+}
--- a/src/mdtest.c
+++ b/src/mdtest.c
--- a/src/mdtest.h
+++ b/src/mdtest.h
@ -0,0 +1,37 @@
+#ifndef _MDTEST_H
+#define _MDTEST_H
+
+#include <mpi.h>
+#include <stdio.h>
+#include <stdint.h>
+
+typedef enum {
+  MDTEST_DIR_CREATE_NUM = 0,
+  MDTEST_DIR_STAT_NUM = 1,
+  MDTEST_DIR_READ_NUM = 1,
+  MDTEST_DIR_REMOVE_NUM = 3,
+  MDTEST_FILE_CREATE_NUM = 4,
+  MDTEST_FILE_STAT_NUM = 5,
+  MDTEST_FILE_READ_NUM = 6,
+  MDTEST_FILE_REMOVE_NUM = 7,
+  MDTEST_TREE_CREATE_NUM = 8,
+  MDTEST_TREE_REMOVE_NUM = 9,
+  MDTEST_LAST_NUM
+} mdtest_test_num_t;
+
+typedef struct
+{
+    double rate[MDTEST_LAST_NUM]; /* Calculated throughput */
+    double time[MDTEST_LAST_NUM]; /* Time */
+    uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done */
+
+    /* Statistics when hitting the stonewall */
+    double   stonewall_time[MDTEST_LAST_NUM];     /* runtime until completion / hit of the stonewall */
+    uint64_t stonewall_last_item[MDTEST_LAST_NUM]; /* Max number of items a process has accessed */
+    uint64_t stonewall_item_min[MDTEST_LAST_NUM];  /* Min number of items a process has accessed */
+    uint64_t stonewall_item_sum[MDTEST_LAST_NUM];  /* Total number of items accessed until stonewall */
+} mdtest_results_t;
+
+mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * out_logfile);
+
+#endif
--- a/src/option.c
+++ b/src/option.c
@ -0,0 +1,406 @@
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <limits.h>
+
+#include <option.h>
+
+/*
+* Takes a string of the form 64, 8m, 128k, 4g, etc. and converts to bytes.
+*/
+int64_t string_to_bytes(char *size_str)
+{
+       int64_t size = 0;
+       char range;
+       int rc;
+
+       rc = sscanf(size_str, " %lld %c ", (long long*) & size, &range);
+       if (rc == 2) {
+               switch ((int)range) {
+               case 'k':
+               case 'K':
+                       size <<= 10;
+                       break;
+               case 'm':
+               case 'M':
+                       size <<= 20;
+                       break;
+               case 'g':
+               case 'G':
+                       size <<= 30;
+                       break;
+               case 't':
+               case 'T':
+                       size <<= 40;
+                       break;
+               case 'p':
+               case 'P':
+                       size <<= 50;
+                       break;
+               }
+       } else if (rc == 0) {
+               size = -1;
+       }
+       return (size);
+}
+
+/*
+ * Initial revision by JK
+ */
+
+static int print_value(option_help * o){
+  int pos = 0;
+  if (o->arg == OPTION_OPTIONAL_ARGUMENT || o->arg == OPTION_REQUIRED_ARGUMENT){
+    assert(o->variable != NULL);
+
+    switch(o->type){
+      case('p'):{
+        pos += printf("=STRING");
+        break;
+      }
+      case('F'):{
+        pos += printf("=%.14f ", *(double*) o->variable);
+        break;
+      }
+      case('f'):{
+        pos += printf("=%.6f ", (double) *(float*) o->variable);
+        break;
+      }
+      case('d'):{
+        pos += printf("=%d ", *(int*) o->variable);
+        break;
+      }
+      case('H'):
+      case('s'):{
+        if ( *(char**) o->variable != NULL &&  ((char**) o->variable)[0][0] != 0 ){
+          pos += printf("=%s", *(char**) o->variable);
+        }else{
+          pos += printf("=STRING");
+        }
+        break;
+      }
+      case('c'):{
+        pos += printf("=%c", *(char*) o->variable);
+        break;
+      }
+      case('l'):{
+        pos += printf("=%lld", *(long long*) o->variable);
+        break;
+      }
+      case('u'):{
+        pos += printf("=%lu", *(uint64_t*) o->variable);
+        break;
+      }
+    }
+  }
+  if (o->arg == OPTION_FLAG && (*(int*)o->variable) != 0){
+    pos += printf(" (%d)", (*(int*)o->variable));
+  }
+
+  return pos;
+}
+
+static void print_help_section(option_help * args, option_value_type type, char * name){
+  int first;
+  first = 1;
+  option_help * o;
+  for(o = args; o->shortVar != 0 || o->longVar != 0 || o->help != NULL ; o++){
+
+    if (o->arg == type){
+      if( o->shortVar == 0 && o->longVar == 0 && o->help != NULL){
+        printf("%s\n", o->help);
+        continue;
+      }
+      if (first){
+        printf("\n%s\n", name);
+        first = 0;
+      }
+      printf("  ");
+      int pos = 0;
+      if(o->shortVar != 0 && o->longVar != 0){
+        pos += printf("-%c, --%s", o->shortVar, o->longVar);
+      }else if(o->shortVar != 0){
+        pos += printf("-%c", o->shortVar);
+      }else if(o->longVar != 0){
+        pos += printf("--%s", o->longVar);
+      }
+
+      pos += print_value(o);
+      if(o->help != NULL){
+        for(int i = 0 ; i < (30 - pos); i++){
+          printf(" ");
+        }
+        printf("%s", o->help);
+      }
+      printf("\n");
+    }
+  }
+}
+
+void option_print_help(option_help * args, int is_plugin){
+  option_help * o;
+  int optionalArgs = 0;
+  for(o = args; o->shortVar != 0 || o->longVar != 0 ; o++){
+    if(o->arg != OPTION_REQUIRED_ARGUMENT){
+      optionalArgs = 1;
+    }
+
+    switch(o->arg){
+      case (OPTION_OPTIONAL_ARGUMENT):
+      case (OPTION_FLAG):{
+        if(o->shortVar != 0){
+          printf("[-%c] ", o->shortVar);
+        }else if(o->longVar != 0){
+          printf("[--%s] ", o->longVar);
+        }
+        break;
+      }case (OPTION_REQUIRED_ARGUMENT):{
+        if(o->shortVar != 0){
+          printf("-%c ", o->shortVar);
+        }else if(o->longVar != 0){
+          printf("--%s ", o->longVar);
+        }
+        break;
+      }
+    }
+  }
+  if (optionalArgs){
+    //printf(" [Optional Args]");
+  }
+  if (! is_plugin){
+    printf(" -- <Plugin options, see below>\n");
+  }
+
+  print_help_section(args, OPTION_REQUIRED_ARGUMENT, "Required arguments");
+  print_help_section(args, OPTION_FLAG, "Flags");
+  print_help_section(args, OPTION_OPTIONAL_ARGUMENT, "Optional arguments");
+}
+
+
+static int print_option_value(option_help * o){
+  int pos = 0;
+  if (o->arg == OPTION_OPTIONAL_ARGUMENT || o->arg == OPTION_REQUIRED_ARGUMENT){
+    assert(o->variable != NULL);
+
+    switch(o->type){
+      case('F'):{
+        pos += printf("=%.14f ", *(double*) o->variable);
+        break;
+      }
+      case('f'):{
+        pos += printf("=%.6f ", (double) *(float*) o->variable);
+        break;
+      }
+      case('d'):{
+        pos += printf("=%d ", *(int*) o->variable);
+        break;
+      }
+      case('H'):{
+        pos += printf("=HIDDEN");
+        break;
+      }
+      case('s'):{
+        if ( *(char**) o->variable != NULL &&  ((char**) o->variable)[0][0] != 0 ){
+          pos += printf("=%s", *(char**) o->variable);
+        }else{
+          pos += printf("=");
+        }
+        break;
+      }
+      case('c'):{
+        pos += printf("=%c", *(char*) o->variable);
+        break;
+      }
+      case('l'):{
+        pos += printf("=%lld", *(long long*) o->variable);
+        break;
+      }
+      case('u'):{
+        pos += printf("=%lu", *(uint64_t*) o->variable);
+        break;
+      }
+    }
+  }else{
+    //printf(" ");
+  }
+
+  return pos;
+}
+
+
+static void print_current_option_section(option_help * args, option_value_type type){
+  option_help * o;
+  for(o = args; o->shortVar != 0 || o->longVar != 0 ; o++){
+    if (o->arg == type){
+      int pos = 0;
+      if (o->arg == OPTION_FLAG && (*(int*)o->variable) == 0){
+        continue;
+      }
+      printf("\t");
+
+      if(o->shortVar != 0 && o->longVar != 0){
+        pos += printf("%s", o->longVar);
+      }else if(o->shortVar != 0){
+        pos += printf("%c", o->shortVar);
+      }else if(o->longVar != 0){
+        pos += printf("%s", o->longVar);
+      }
+
+      pos += print_option_value(o);
+      printf("\n");
+    }
+  }
+}
+
+
+void option_print_current(option_help * args){
+  print_current_option_section(args, OPTION_REQUIRED_ARGUMENT);
+  print_current_option_section(args, OPTION_OPTIONAL_ARGUMENT);
+  print_current_option_section(args, OPTION_FLAG);
+}
+
+int option_parse(int argc, char ** argv, option_help * args, int * printhelp){
+  int error = 0;
+  int requiredArgsSeen = 0;
+  int requiredArgsNeeded = 0;
+  int i;
+
+  for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 ; o++ ){
+    if(o->arg == OPTION_REQUIRED_ARGUMENT){
+      requiredArgsNeeded++;
+    }
+  }
+  for(i=1; i < argc; i++){
+    char * txt = argv[i];
+    int foundOption = 0;
+    char * arg = strstr(txt, "=");
+    int replaced_equal = 0;
+    if(arg != NULL){
+      arg[0] = 0;
+      arg++;
+      replaced_equal = 1;
+    }
+    if(strcmp(txt, "--") == 0){
+      // we found plugin options
+      break;
+    }
+
+    // try to find matching option help
+    for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 || o->help != NULL ; o++ ){
+      if( o->shortVar == 0 && o->longVar == 0 ){
+        // section
+        continue;
+      }
+
+      if ( (txt[0] == '-' && o->shortVar == txt[1]) || (strlen(txt) > 2 && txt[0] == '-' && txt[1] == '-' && o->longVar != NULL && strcmp(txt + 2, o->longVar) == 0)){
+        foundOption = 1;
+
+        // now process the option.
+        switch(o->arg){
+          case (OPTION_FLAG):{
+            assert(o->type == 'd');
+            (*(int*) o->variable)++;
+            break;
+          }
+          case (OPTION_OPTIONAL_ARGUMENT):
+          case (OPTION_REQUIRED_ARGUMENT):{
+            // check if next is an argument
+            if(arg == NULL){
+              if(o->shortVar == txt[1] && txt[2] != 0){
+                arg = & txt[2];
+              }else{
+                // simply take the next value as argument
+                i++;
+                arg = argv[i];
+              }
+            }
+
+            if(arg == NULL){
+              const char str[] = {o->shortVar, 0};
+              printf("Error, argument missing for option %s\n", (o->longVar != NULL) ? o->longVar : str);
+              exit(1);
+            }
+
+            switch(o->type){
+              case('p'):{
+                // call the function in the variable
+                void(*fp)() = o->variable;
+                fp(arg);
+                break;
+              }
+              case('F'):{
+                *(double*) o->variable = atof(arg);
+                break;
+              }
+              case('f'):{
+                *(float*) o->variable = atof(arg);
+                break;
+              }
+              case('d'):{
+                int64_t val = string_to_bytes(arg);
+                if (val > INT_MAX || val < INT_MIN){
+                  printf("WARNING: parsing the number %s to integer, this produced an overflow!\n", arg);
+                }
+                *(int*) o->variable = val;
+                break;
+              }
+              case('H'):
+              case('s'):{
+                (*(char **) o->variable) = strdup(arg);
+                break;
+              }
+              case('c'):{
+                (*(char *)o->variable) = arg[0];
+                if(strlen(arg) > 1){
+                  printf("Error, ignoring remainder of string for option %c (%s).\n", o->shortVar, o->longVar);
+                }
+                break;
+              }
+              case('l'):{
+                *(long long*) o->variable = string_to_bytes(arg);
+                break;
+              }
+              case('u'):{
+                *(uint64_t*) o->variable = string_to_bytes(arg);
+                break;
+              }
+              default:
+                printf("ERROR: Unknown option type %c\n", o->type);
+            }
+          }
+        }
+        if(replaced_equal){
+          arg[-1] = '=';
+        }
+
+        if(o->arg == OPTION_REQUIRED_ARGUMENT){
+          requiredArgsSeen++;
+        }
+
+        break;
+      }
+    }
+    if (! foundOption){
+        if(strcmp(txt, "-h") == 0 || strcmp(txt, "--help") == 0){
+          *printhelp=1;
+        }else{
+          printf("Error invalid argument: %s\n", txt);
+          error = 1;
+        }
+    }
+  }
+
+  if( requiredArgsSeen != requiredArgsNeeded ){
+    printf("Error: Missing some required arguments\n\n");
+    *printhelp = -1;
+  }
+
+  if(error != 0){
+    printf("Invalid options\n");
+    *printhelp = -1;
+  }
+
+  return i;
+}
--- a/src/option.h
+++ b/src/option.h
@ -0,0 +1,35 @@
+#ifndef _IOR_OPTION_H
+#define _IOR_OPTION_H
+
+#include <stdint.h>
+
+/*
+ * Initial revision by JK
+ */
+
+typedef enum{
+  OPTION_FLAG,
+  OPTION_OPTIONAL_ARGUMENT,
+  OPTION_REQUIRED_ARGUMENT
+} option_value_type;
+
+typedef struct{
+  char shortVar;
+  char * longVar;
+  char * help;
+
+  option_value_type arg;
+  char type;  // data type, H = hidden string
+  void * variable;
+} option_help;
+
+#define LAST_OPTION {0, 0, 0, (option_value_type) 0, 0, NULL}
+
+int64_t string_to_bytes(char *size_str);
+void option_print_help(option_help * args, int is_plugin);
+void option_print_current(option_help * args);
+
+//@return the number of parsed arguments
+int option_parse(int argc, char ** argv, option_help * args, int * print_help);
+
+#endif
--- a/src/parse_options.c
+++ b/src/parse_options.c
@ -21,44 +21,18 @@
 #include <ctype.h>
 #include <string.h>

+
+#include "utilities.h"
 #include "ior.h"
 #include "aiori.h"
 #include "parse_options.h"
+#include "option.h"
+#include "aiori.h"

 #define ISPOWEROFTWO(x) ((x != 0) && !(x & (x - 1)))

 IOR_param_t initialTestParams;

-/*
- * Takes a string of the form 64, 8m, 128k, 4g, etc. and converts to bytes.
- */
-static IOR_offset_t StringToBytes(char *size_str)
-{
-        IOR_offset_t size = 0;
-        char range;
-        int rc;
-
-        rc = sscanf(size_str, " %lld %c ", &size, &range);
-        if (rc == 2) {
-                switch ((int)range) {
-                case 'k':
-                case 'K':
-                        size <<= 10;
-                        break;
-                case 'm':
-                case 'M':
-                        size <<= 20;
-                        break;
-                case 'g':
-                case 'G':
-                        size <<= 30;
-                        break;
-                }
-        } else if (rc == 0) {
-                size = -1;
-        }
-        return (size);
-}

 static size_t NodeMemoryStringToBytes(char *size_str)
 {
@ -70,7 +44,7 @@ static size_t NodeMemoryStringToBytes(char *size_str)

        rc = sscanf(size_str, " %d %% ", &percent);
        if (rc == 0)
-                return (size_t)StringToBytes(size_str);
+                return (size_t) string_to_bytes(size_str);
        if (percent > 100 || percent < 0)
                ERR("percentage must be between 0 and 100");

@ -87,11 +61,6 @@ static size_t NodeMemoryStringToBytes(char *size_str)
        return mem / 100 * percent;
 }

-static void RecalculateExpectedFileSize(IOR_param_t *params)
-{
-	params->expectedAggFileSize =
-		params->blockSize * params->segmentCount * params->numTasks;
-}

 /*
 * Check and correct all settings of each test in queue for correctness.
@ -100,7 +69,6 @@ static void CheckRunSettings(IOR_test_t *tests)
 {
        IOR_test_t *ptr;
        IOR_param_t *params;
-        int needRead, needWrite;

        for (ptr = tests; ptr != NULL; ptr = ptr->next) {
                params = &ptr->params;
@ -119,16 +87,13 @@ static void CheckRunSettings(IOR_test_t *tests)
                 * of HDFS, which doesn't support opening RDWR.
                 * (We assume int-valued params are exclusively 0 or 1.)
                 */
-                needRead  = params->readFile  |
-                            params->checkRead |
-                            params->checkWrite; /* checkWrite reads the file */
-                needWrite = params->writeFile;
                if ((params->openFlags & IOR_RDWR)
-                    && (needRead ^ needWrite))
-                {
-                        /* need to either read or write, but not both */
+                    && ((params->readFile | params->checkRead)
+                        ^ (params->writeFile | params->checkWrite))
+                    && (params->openFlags & IOR_RDWR)) {
+
                        params->openFlags &= ~(IOR_RDWR);
-                        if (needRead) {
+                        if (params->readFile | params->checkRead) {
                                params->openFlags |= IOR_RDONLY;
                                params->openFlags &= ~(IOR_CREAT|IOR_EXCL);
                        }
@ -136,13 +101,6 @@ static void CheckRunSettings(IOR_test_t *tests)
                                params->openFlags |= IOR_WRONLY;
                }

-                /* If numTasks set to 0, use all tasks */
-                if (params->numTasks == 0) {
-                        MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD,
-                                                &params->numTasks),
-                                  "MPI_Comm_size() error");
-                        RecalculateExpectedFileSize(params);
-                }
        }
 }

@ -157,28 +115,48 @@ void DecodeDirective(char *line, IOR_param_t *params)

        rc = sscanf(line, " %[^=# \t\r\n] = %[^# \t\r\n] ", option, value);
        if (rc != 2 && rank == 0) {
-                fprintf(stdout, "Syntax error in configuration options: %s\n",
+                fprintf(out_logfile, "Syntax error in configuration options: %s\n",
                        line);
                MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
        }
        if (strcasecmp(option, "api") == 0) {
-                strcpy(params->api, value);
+          params->api = strdup(value);
+        } else if (strcasecmp(option, "summaryFile") == 0) {
+          if (rank == 0){
+            out_resultfile = fopen(value, "w");
+            if (out_resultfile == NULL){
+              FAIL("Cannot open output file for writes!");
+            }
+            printf("Writing output to %s\n", value);
+          }
+        } else if (strcasecmp(option, "summaryFormat") == 0) {
+                if(strcasecmp(value, "default") == 0){
+                  outputFormat = OUTPUT_DEFAULT;
+                }else if(strcasecmp(value, "JSON") == 0){
+                  outputFormat = OUTPUT_JSON;
+                }else if(strcasecmp(value, "CSV") == 0){
+                  outputFormat = OUTPUT_CSV;
+                }else{
+                  FAIL("Unknown summaryFormat");
+                }
        } else if (strcasecmp(option, "refnum") == 0) {
                params->referenceNumber = atoi(value);
        } else if (strcasecmp(option, "debug") == 0) {
-                strcpy(params->debug, value);
+                params->debug = strdup(value);
        } else if (strcasecmp(option, "platform") == 0) {
-                strcpy(params->platform, value);
+                params->platform  = strdup(value);
        } else if (strcasecmp(option, "testfile") == 0) {
-                strcpy(params->testFileName, value);
+                params->testFileName  = strdup(value);
        } else if (strcasecmp(option, "hintsfilename") == 0) {
-                strcpy(params->hintsFileName, value);
+                params->hintsFileName  = strdup(value);
        } else if (strcasecmp(option, "deadlineforstonewalling") == 0) {
                params->deadlineForStonewalling = atoi(value);
        } else if (strcasecmp(option, "stoneWallingWearOut") == 0) {
                params->stoneWallingWearOut = atoi(value);
        } else if (strcasecmp(option, "stoneWallingWearOutIterations") == 0) {
-                params->stoneWallingWearOutIterations = atoi(value);
+                params->stoneWallingWearOutIterations = atoll(value);
+        } else if (strcasecmp(option, "stoneWallingStatusFile") == 0) {
+                params->stoneWallingStatusFile  = strdup(value);
        } else if (strcasecmp(option, "maxtimeduration") == 0) {
                params->maxTimeDuration = atoi(value);
        } else if (strcasecmp(option, "outlierthreshold") == 0) {
@ -220,15 +198,13 @@ void DecodeDirective(char *line, IOR_param_t *params)
        } else if (strcasecmp(option, "quitonerror") == 0) {
                params->quitOnError = atoi(value);
        } else if (strcasecmp(option, "segmentcount") == 0) {
-                params->segmentCount = StringToBytes(value);
-		RecalculateExpectedFileSize(params);
+                params->segmentCount = string_to_bytes(value);
        } else if (strcasecmp(option, "blocksize") == 0) {
-                params->blockSize = StringToBytes(value);
-		RecalculateExpectedFileSize(params);
+                params->blockSize = string_to_bytes(value);
        } else if (strcasecmp(option, "transfersize") == 0) {
-                params->transferSize = StringToBytes(value);
+                params->transferSize = string_to_bytes(value);
        } else if (strcasecmp(option, "setalignment") == 0) {
-                params->setAlignment = StringToBytes(value);
+                params->setAlignment = string_to_bytes(value);
        } else if (strcasecmp(option, "singlexferattempt") == 0) {
                params->singleXferAttempt = atoi(value);
        } else if (strcasecmp(option, "individualdatasets") == 0) {
@ -257,8 +233,6 @@ void DecodeDirective(char *line, IOR_param_t *params)
                params->useStridedDatatype = atoi(value);
        } else if (strcasecmp(option, "showhints") == 0) {
                params->showHints = atoi(value);
-        } else if (strcasecmp(option, "showhelp") == 0) {
-                params->showHelp = atoi(value);
        } else if (strcasecmp(option, "uniqueDir") == 0) {
                params->uniqueDir = atoi(value);
        } else if (strcasecmp(option, "useexistingtestfile") == 0) {
@ -270,7 +244,7 @@ void DecodeDirective(char *line, IOR_param_t *params)
        } else if (strcasecmp(option, "randomoffset") == 0) {
                params->randomOffset = atoi(value);
        } else if (strcasecmp(option, "memoryPerTask") == 0) {
-                params->memoryPerTask = StringToBytes(value);
+                params->memoryPerTask = string_to_bytes(value);
                params->memoryPerNode = 0;
        } else if (strcasecmp(option, "memoryPerNode") == 0) {
                params->memoryPerNode = NodeMemoryStringToBytes(value);
@ -285,7 +259,7 @@ void DecodeDirective(char *line, IOR_param_t *params)
 #ifndef HAVE_LUSTRE_LUSTRE_USER_H
                ERR("ior was not compiled with Lustre support");
 #endif
-                params->lustre_stripe_size = StringToBytes(value);
+                params->lustre_stripe_size = string_to_bytes(value);
                params->lustre_set_striping = 1;
        } else if (strcasecmp(option, "lustrestartost") == 0) {
 #ifndef HAVE_LUSTRE_LUSTRE_USER_H
@ -317,26 +291,18 @@ void DecodeDirective(char *line, IOR_param_t *params)
                        ERR("beegfsNumTargets must be >= 1");
        } else if (strcasecmp(option, "beegfsChunkSize") == 0) {
 #ifndef HAVE_BEEGFS_BEEGFS_H
-                 ERR("ior was not compiled with BeeGFS support"); 
+                 ERR("ior was not compiled with BeeGFS support");
 #endif
-                 params->beegfs_chunkSize = StringToBytes(value);
+                 params->beegfs_chunkSize = string_to_bytes(value);
                 if (!ISPOWEROFTWO(params->beegfs_chunkSize) || params->beegfs_chunkSize < (1<<16))
                         ERR("beegfsChunkSize must be a power of two and >64k");
        } else if (strcasecmp(option, "numtasks") == 0) {
                params->numTasks = atoi(value);
-		RecalculateExpectedFileSize(params);
        } else if (strcasecmp(option, "summaryalways") == 0) {
                params->summary_every_test = atoi(value);
-        } else if (strcasecmp(option, "daospool") == 0) {
-                strcpy(params->daosPool, value);
-        } else if (strcasecmp(option, "daospoolsvc") == 0) {
-                strcpy(params->daosPoolSvc, value);
-        } else if (strcasecmp(option, "daosgroup") == 0) {
-                strcpy(params->daosGroup, value);
-        }
-        else {
+        } else {
                if (rank == 0)
-                        fprintf(stdout, "Unrecognized parameter \"%s\"\n",
+                        fprintf(out_logfile, "Unrecognized parameter \"%s\"\n",
                                option);
                MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
        }
@ -345,11 +311,13 @@ void DecodeDirective(char *line, IOR_param_t *params)
 /*
 * Parse a single line, which may contain multiple comma-seperated directives
 */
-void ParseLine(char *line, IOR_param_t * test)
+void ParseLine(const char *line, IOR_param_t * test)
 {
        char *start, *end;

-        start = line;
+        start = strdup(line);
+        if (start == NULL)
+                ERR("failed to duplicate line");
        do {
                end = strchr(start, ',');
                if (end != NULL)
@ -357,7 +325,6 @@ void ParseLine(char *line, IOR_param_t * test)
                DecodeDirective(start, test);
                start = end + 1;
        } while (end != NULL);
-
 }

 /*
@ -425,7 +392,6 @@ IOR_test_t *ReadConfigScript(char *scriptName)
                if (sscanf(linebuf, " #%s", empty) == 1)
                        continue;
                if (contains_only(linebuf, "ior stop")) {
-                        AllocResults(tail);
                        break;
                } else if (contains_only(linebuf, "run")) {
                        if (runflag) {
@ -434,7 +400,6 @@ IOR_test_t *ReadConfigScript(char *scriptName)
                                tail->next = CreateTest(&tail->params, test_num++);
                                tail = tail->next;
                        }
-                        AllocResults(tail);
                        runflag = 1;
                } else if (runflag) {
                        /* If this directive was preceded by a "run" line, then
@ -455,215 +420,153 @@ IOR_test_t *ReadConfigScript(char *scriptName)
        return head;
 }

+
+static IOR_param_t * parameters;
+
+static void decodeDirectiveWrapper(char *line){
+        ParseLine(line, parameters);
+}
+
 /*
 * Parse Commandline.
 */
 IOR_test_t *ParseCommandLine(int argc, char **argv)
 {
-        static const char *opts =
-          "a:A:b:BcCd:D:eEf:FgG:hHi:Ij:J:kKl:mM:nN:o:O:pPqQ:rRs:St:T:uU:vVwWxX:YzZ";
-        int c, i;
-        static IOR_test_t *tests = NULL;
+    char * testscripts = NULL;
+    int toggleG = FALSE;
+    char * buffer_type = "";
+    char * memoryPerNode = NULL;
+    init_IOR_Param_t(& initialTestParams);
+    parameters = & initialTestParams;

-        /* suppress getopt() error message when a character is unrecognized */
-        opterr = 0;
+    char APIs[1024];
+    aiori_supported_apis(APIs);
+    char apiStr[1024];
+    sprintf(apiStr, "API for I/O [%s]", APIs);
+
+    option_help options [] = {
+          {'a', NULL,        apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & initialTestParams.api},
+          {'A', NULL,        "refNum -- user supplied reference number to include in the summary", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.referenceNumber},
+          {'b', NULL,        "blockSize -- contiguous bytes to write per task  (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & initialTestParams.blockSize},
+          {'B', NULL,        "useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers", OPTION_FLAG, 'd', & initialTestParams.useO_DIRECT},
+          {'c', NULL,        "collective -- collective I/O", OPTION_FLAG, 'd', & initialTestParams.collective},
+          {'C', NULL,        "reorderTasks -- changes task ordering to n+1 ordering for readback", OPTION_FLAG, 'd', & initialTestParams.reorderTasks},
+          {'d', NULL,        "interTestDelay -- delay between reps in seconds", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.interTestDelay},
+          {'D', NULL,        "deadlineForStonewalling -- seconds before stopping write or read phase", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.deadlineForStonewalling},
+          {.help="  -O stoneWallingWearOut=1           -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT},
+          {.help="  -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT},
+          {.help="  -O stoneWallingStatusFile=FILE     -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT},
+          {'e', NULL,        "fsync -- perform sync operation after each block write", OPTION_FLAG, 'd', & initialTestParams.fsync},
+          {'E', NULL,        "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & initialTestParams.useExistingTestFile},
+          {'f', NULL,        "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & testscripts},
+          {'F', NULL,        "filePerProc -- file-per-process", OPTION_FLAG, 'd', & initialTestParams.filePerProc},
+          {'g', NULL,        "intraTestBarriers -- use barriers between open, write/read, and close", OPTION_FLAG, 'd', & initialTestParams.intraTestBarriers},
+          /* This option toggles between Incompressible Seed and Time stamp sig based on -l,
+           * so we'll toss the value in both for now, and sort it out in initialization
+           * after all the arguments are in and we know which it keep.
+           */
+          {'G', NULL,        "setTimeStampSignature -- set value for time stamp signature/random seed", OPTION_OPTIONAL_ARGUMENT, 'd', & toggleG},
+          {'H', NULL,        "showHints -- show hints", OPTION_FLAG, 'd', & initialTestParams.showHints},
+          {'i', NULL,        "repetitions -- number of repetitions of test", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.repetitions},
+          {'I', NULL,        "individualDataSets -- datasets not shared by all procs [not working]", OPTION_FLAG, 'd', & initialTestParams.individualDataSets},
+          {'j', NULL,        "outlierThreshold -- warn on outlier N seconds from mean", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.outlierThreshold},
+          {'J', NULL,        "setAlignment -- HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.setAlignment},
+          {'k', NULL,        "keepFile -- don't remove the test file(s) on program exit", OPTION_FLAG, 'd', & initialTestParams.keepFile},
+          {'K', NULL,        "keepFileWithError  -- keep error-filled file(s) after data-checking", OPTION_FLAG, 'd', & initialTestParams.keepFileWithError},
+          {'l', NULL,        "datapacket type-- type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & buffer_type},
+          {'m', NULL,        "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & initialTestParams.multiFile},
+          {'M', NULL,        "memoryPerNode -- hog memory on the node  (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & memoryPerNode},
+          {'n', NULL,        "noFill -- no fill in HDF5 file creation", OPTION_FLAG, 'd', & initialTestParams.noFill},
+          {'N', NULL,        "numTasks -- number of tasks that should participate in the test", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.numTasks},
+          {'o', NULL,        "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & initialTestParams.testFileName},
+          {'O', NULL,        "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper},
+          {'p', NULL,        "preallocate -- preallocate file size", OPTION_FLAG, 'd', & initialTestParams.preallocate},
+          {'P', NULL,        "useSharedFilePointer -- use shared file pointer [not working]", OPTION_FLAG, 'd', & initialTestParams.useSharedFilePointer},
+          {'q', NULL,        "quitOnError -- during file error-checking, abort on error", OPTION_FLAG, 'd', & initialTestParams.quitOnError},
+          {'Q', NULL,        "taskPerNodeOffset for read tests use with -C & -Z options (-C constant N, -Z at least N)", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.taskPerNodeOffset},
+          {'r', NULL,        "readFile -- read existing file", OPTION_FLAG, 'd', & initialTestParams.readFile},
+          {'R', NULL,        "checkRead -- verify that the output of read matches the expected signature (used with -G)", OPTION_FLAG, 'd', & initialTestParams.checkRead},
+          {'s', NULL,        "segmentCount -- number of segments", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.segmentCount},
+          {'S', NULL,        "useStridedDatatype -- put strided access into datatype [not working]", OPTION_FLAG, 'd', & initialTestParams.useStridedDatatype},
+          {'t', NULL,        "transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & initialTestParams.transferSize},
+          {'T', NULL,        "maxTimeDuration -- max time in minutes executing repeated test; it aborts only between iterations and not within a test!", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.maxTimeDuration},
+          {'u', NULL,        "uniqueDir -- use unique directory name for each file-per-process", OPTION_FLAG, 'd', & initialTestParams.uniqueDir},
+          {'U', NULL,        "hintsFileName -- full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & initialTestParams.hintsFileName},
+          {'v', NULL,        "verbose -- output information (repeating flag increases level)", OPTION_FLAG, 'd', & initialTestParams.verbose},
+          {'V', NULL,        "useFileView -- use MPI_File_set_view", OPTION_FLAG, 'd', & initialTestParams.useFileView},
+          {'w', NULL,        "writeFile -- write file", OPTION_FLAG, 'd', & initialTestParams.writeFile},
+          {'W', NULL,        "checkWrite -- check read after write", OPTION_FLAG, 'd', & initialTestParams.checkWrite},
+          {'x', NULL,        "singleXferAttempt -- do not retry transfer if incomplete", OPTION_FLAG, 'd', & initialTestParams.singleXferAttempt},
+          {'X', NULL,        "reorderTasksRandomSeed -- random seed for -Z option", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.reorderTasksRandomSeed},
+          {'Y', NULL,        "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & initialTestParams.fsyncPerWrite},
+          {'z', NULL,        "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & initialTestParams.randomOffset},
+          {'Z', NULL,        "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & initialTestParams.reorderTasksRandom},
+          {.help="  -O summaryFile=FILE                 -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT},
+          {.help="  -O summaryFormat=[default,JSON,CSV] -- use the format for outputing the summary", .arg = OPTION_OPTIONAL_ARGUMENT},
+          LAST_OPTION,
+        };
+
+        IOR_test_t *tests = NULL;

-        init_IOR_Param_t(&initialTestParams);
        GetPlatformName(initialTestParams.platform);
-        initialTestParams.writeFile = initialTestParams.readFile = FALSE;
-        initialTestParams.checkWrite = initialTestParams.checkRead = FALSE;
+        int printhelp = 0;
+        int parsed_options = option_parse(argc, argv, options, & printhelp);

-        while ((c = getopt(argc, argv, opts)) != -1) {
-                switch (c) {
-                case 'a':
-                        strcpy(initialTestParams.api, optarg);
-                        break;
-                case 'A':
-                        initialTestParams.referenceNumber = atoi(optarg);
-                        break;
-                case 'b':
-                        initialTestParams.blockSize = StringToBytes(optarg);
-                        RecalculateExpectedFileSize(&initialTestParams);
-                        break;
-                case 'B':
-                        initialTestParams.useO_DIRECT = TRUE;
-                        break;
-                case 'c':
-                        initialTestParams.collective = TRUE;
-                        break;
-                case 'C':
-                        initialTestParams.reorderTasks = TRUE;
-                        break;
-                case 'd':
-                        initialTestParams.interTestDelay = atoi(optarg);
-                        break;
-                case 'D':
-                        initialTestParams.deadlineForStonewalling =
-                            atoi(optarg);
-                        break;
-                case 'e':
-                        initialTestParams.fsync = TRUE;
-                        break;
-                case 'E':
-                        initialTestParams.useExistingTestFile = TRUE;
-                        break;
-                case 'f':
-                        tests = ReadConfigScript(optarg);
-                        break;
-                case 'F':
-                        initialTestParams.filePerProc = TRUE;
-                        break;
-                case 'g':
-                        initialTestParams.intraTestBarriers = TRUE;
-                        break;
-                case 'G':
-                        /* This option toggles between Incompressible Seed and Time stamp sig based on -l,
-                         * so we'll toss the value in both for now, and sort it out in initialization
-                         * after all the arguments are in and we know which it keep.
-                         */
-                        initialTestParams.setTimeStampSignature = atoi(optarg);
-                        initialTestParams.incompressibleSeed = atoi(optarg);
-                        break;
-                case 'h':
-                        initialTestParams.showHelp = TRUE;
-                        break;
-                case 'H':
-                        initialTestParams.showHints = TRUE;
-                        break;
-                case 'i':
-                        initialTestParams.repetitions = atoi(optarg);
-                        break;
-                case 'I':
-                        initialTestParams.individualDataSets = TRUE;
-                        break;
-                case 'j':
-                        initialTestParams.outlierThreshold = atoi(optarg);
-                        break;
-                case 'J':
-                        initialTestParams.setAlignment = StringToBytes(optarg);
-                        break;
-                case 'k':
-                        initialTestParams.keepFile = TRUE;
-                        break;
-                case 'K':
-                        initialTestParams.keepFileWithError = TRUE;
-                        break;
-                case 'l':
-                        switch(*optarg) {
-                        case 'i': /* Incompressible */
-                                initialTestParams.dataPacketType = incompressible;
-                                break;
-                        case 't': /* timestamp */
-                                initialTestParams.dataPacketType = timestamp;
-                                break;
-                        case 'o': /* offset packet */
-                                initialTestParams.storeFileOffset = TRUE;
-                                initialTestParams.dataPacketType = offset;
-                                break;
-                        default:
-                                fprintf(stdout,
-                                        "Unknown arguement for -l  %s generic assumed\n", optarg);
-                                break;
-                        }
-                        break;
-                case 'm':
-                        initialTestParams.multiFile = TRUE;
-                        break;
-                case 'M':
-                        initialTestParams.memoryPerNode =
-                                NodeMemoryStringToBytes(optarg);
-                        break;
-                case 'n':
-                        initialTestParams.noFill = TRUE;
-                        break;
-                case 'N':
-                        initialTestParams.numTasks = atoi(optarg);
-                        RecalculateExpectedFileSize(&initialTestParams);
-                        break;
-                case 'o':
-                        strcpy(initialTestParams.testFileName, optarg);
-                        break;
-                case 'O':
-                        ParseLine(optarg, &initialTestParams);
-                        break;
-                case 'p':
-                        initialTestParams.preallocate = TRUE;
-                        break;
-                case 'P':
-                        initialTestParams.useSharedFilePointer = TRUE;
-                        break;
-                case 'q':
-                        initialTestParams.quitOnError = TRUE;
-                        break;
-                case 'Q':
-                        initialTestParams.taskPerNodeOffset = atoi(optarg);
-                        break;
-                case 'r':
-                        initialTestParams.readFile = TRUE;
-                        break;
-                case 'R':
-                        initialTestParams.checkRead = TRUE;
-                        break;
-                case 's':
-                        initialTestParams.segmentCount = atoi(optarg);
-                        RecalculateExpectedFileSize(&initialTestParams);
-                        break;
-                case 'S':
-                        initialTestParams.useStridedDatatype = TRUE;
-                        break;
-                case 't':
-                        initialTestParams.transferSize = StringToBytes(optarg);
-                        break;
-                case 'T':
-                        initialTestParams.maxTimeDuration = atoi(optarg);
-                        break;
-                case 'u':
-                        initialTestParams.uniqueDir = TRUE;
-                        break;
-                case 'U':
-                        strcpy(initialTestParams.hintsFileName, optarg);
-                        break;
-                case 'v':
-                        initialTestParams.verbose++;
-                        break;
-                case 'V':
-                        initialTestParams.useFileView = TRUE;
-                        break;
-                case 'w':
-                        initialTestParams.writeFile = TRUE;
-                        break;
-                case 'W':
-                        initialTestParams.checkWrite = TRUE;
-                        break;
-                case 'x':
-                        initialTestParams.singleXferAttempt = TRUE;
-                        break;
-                case 'X':
-                        initialTestParams.reorderTasksRandomSeed = atoi(optarg);
-                        break;
-                case 'Y':
-                        initialTestParams.fsyncPerWrite = TRUE;
-                        break;
-                case 'z':
-                        initialTestParams.randomOffset = TRUE;
-                        break;
-                case 'Z':
-                        initialTestParams.reorderTasksRandom = TRUE;
-                        break;
-                default:
-                        fprintf(stdout,
-                                "ParseCommandLine: unknown option `-%c'.\n",
-                                optopt);
-                }
+        if (toggleG){
+          initialTestParams.setTimeStampSignature = toggleG;
+          initialTestParams.incompressibleSeed = toggleG;
        }

-        for (i = optind; i < argc; i++)
-                fprintf(stdout, "non-option argument: %s\n", argv[i]);
+        if (buffer_type[0] != 0){
+          switch(buffer_type[0]) {
+          case 'i': /* Incompressible */
+                  initialTestParams.dataPacketType = incompressible;
+                  break;
+          case 't': /* timestamp */
+                  initialTestParams.dataPacketType = timestamp;
+                  break;
+          case 'o': /* offset packet */
+                  initialTestParams.storeFileOffset = TRUE;
+                  initialTestParams.dataPacketType = offset;
+                  break;
+          default:
+                  fprintf(out_logfile,
+                          "Unknown arguement for -l %s; generic assumed\n", buffer_type);
+                  break;
+          }
+        }
+        if (memoryPerNode){
+          initialTestParams.memoryPerNode = NodeMemoryStringToBytes(optarg);
+        }

-        /* If an IOR script was not used, initialize test queue to the defaults */
-        if (tests == NULL) {
-                tests = CreateTest(&initialTestParams, 0);
-                AllocResults(tests);
+        const ior_aiori_t * backend = aiori_select(initialTestParams.api);
+        initialTestParams.backend = backend;
+        initialTestParams.apiVersion = backend->get_version();
+
+        if(backend->get_options != NULL){
+          option_parse(argc - parsed_options, argv + parsed_options, backend->get_options(), & printhelp);
+        }
+
+        if(printhelp != 0){
+          printf("Usage: %s ", argv[0]);
+
+          option_print_help(options, 0);
+
+          if(backend->get_options != NULL){
+            printf("\nPlugin options for backend %s (%s)\n", initialTestParams.api, backend->get_version());
+            option_print_help(backend->get_options(), 1);
+          }
+          if(printhelp == 1){
+            exit(0);
+          }else{
+            exit(1);
+          }
+        }
+
+        if (testscripts){
+          tests = ReadConfigScript(testscripts);
+        }else{
+          tests = CreateTest(&initialTestParams, 0);
        }

        CheckRunSettings(tests);
--- a/src/test/lib.c
+++ b/src/test/lib.c
@ -0,0 +1,32 @@
+#include "../ior.h"
+#include "../mdtest.h"
+
+int main(int argc, char ** argv){
+  int rank;
+  int ret = 0;
+
+  MPI_Init(& argc, & argv);
+  MPI_Comm_rank(MPI_COMM_WORLD, & rank);
+
+  if (rank == 0){
+    char * param[] = {"./ior", "-a", "DUMMY"};
+    IOR_test_t * res = ior_run(3, param, MPI_COMM_SELF, stdout);
+    if (res == NULL)
+    {
+        fprintf(stderr, "Could not run ior\n");
+        ret = 1;
+    }
+  }
+  if (rank == 0){
+    char * param[] = {"./mdtest", "-a", "DUMMY"};
+    mdtest_results_t * res = mdtest_run(3, param, MPI_COMM_SELF, stdout);
+    if (res == NULL)
+    {
+        fprintf(stderr, "Could not run mdtest\n");
+        ret = 1;
+    }
+  }
+  MPI_Finalize();
+
+  return ret;
+}
--- a/src/utilities.c
+++ b/src/utilities.c
@ -16,10 +16,6 @@
 #  include "config.h"
 #endif

-#ifdef __linux__
-#  define _GNU_SOURCE            /* Needed for O_DIRECT in fcntl */
-#endif                           /* __linux__ */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
@ -51,9 +47,18 @@

 extern int errno;
 extern int numTasks;
-extern int rank;
-extern int rankOffset;
-extern int verbose;
+
+/* globals used by other files, also defined "extern" in ior.h */
+int      numTasksWorld = 0;
+int      rank = 0;
+int      rankOffset = 0;
+int      tasksPerNode = 0;           /* tasks per node */
+int      verbose = VERBOSE_0;        /* verbose output */
+MPI_Comm testComm;
+MPI_Comm mpi_comm_world;
+FILE * out_logfile;
+FILE * out_resultfile;
+enum OutputFormat_t outputFormat;

 /***************************** F U N C T I O N S ******************************/

@ -120,84 +125,86 @@ void DumpBuffer(void *buffer,
           to assume that it must always be */
        for (i = 0; i < ((size / sizeof(IOR_size_t)) / 4); i++) {
                for (j = 0; j < 4; j++) {
-                        fprintf(stdout, IOR_format" ", dumpBuf[4 * i + j]);
+                        fprintf(out_logfile, IOR_format" ", dumpBuf[4 * i + j]);
                }
-                fprintf(stdout, "\n");
+                fprintf(out_logfile, "\n");
        }
        return;
 }                               /* DumpBuffer() */

-/*
- * Sends all strings to root nodes and displays.
- */
-void OutputToRoot(int numTasks, MPI_Comm comm, char *stringToDisplay)
-{
-        int i;
-        int swapNeeded = TRUE;
-        int pairsToSwap;
-        char **stringArray;
-        char tmpString[MAX_STR];
-        MPI_Status status;
+#if MPI_VERSION >= 3
+int CountTasksPerNode(MPI_Comm comm) {
+    /* modern MPI provides a simple way to get the local process count */
+    MPI_Comm shared_comm;
+    int count;

-        /* malloc string array */
-        stringArray = (char **)malloc(sizeof(char *) * numTasks);
-        if (stringArray == NULL)
-                ERR("out of memory");
-        for (i = 0; i < numTasks; i++) {
-                stringArray[i] = (char *)malloc(sizeof(char) * MAX_STR);
-                if (stringArray[i] == NULL)
-                        ERR("out of memory");
-        }
+    MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm);
+    MPI_Comm_size (shared_comm, &count);
+    MPI_Comm_free (&shared_comm);

-        strcpy(stringArray[rank], stringToDisplay);
-
-        if (rank == 0) {
-                /* MPI_receive all strings */
-                for (i = 1; i < numTasks; i++) {
-                        MPI_CHECK(MPI_Recv(stringArray[i], MAX_STR, MPI_CHAR,
-                                           MPI_ANY_SOURCE, MPI_ANY_TAG, comm,
-                                           &status), "MPI_Recv() error");
-                }
-        } else {
-                /* MPI_send string to root node */
-                MPI_CHECK(MPI_Send
-                          (stringArray[rank], MAX_STR, MPI_CHAR, 0, 0, comm),
-                          "MPI_Send() error");
-        }
-        MPI_CHECK(MPI_Barrier(comm), "barrier error");
-
-        /* sort strings using bubblesort */
-        if (rank == 0) {
-                pairsToSwap = numTasks - 1;
-                while (swapNeeded) {
-                        swapNeeded = FALSE;
-                        for (i = 0; i < pairsToSwap; i++) {
-                                if (strcmp(stringArray[i], stringArray[i + 1]) >
-                                    0) {
-                                        strcpy(tmpString, stringArray[i]);
-                                        strcpy(stringArray[i],
-                                               stringArray[i + 1]);
-                                        strcpy(stringArray[i + 1], tmpString);
-                                        swapNeeded = TRUE;
-                                }
-                        }
-                        pairsToSwap--;
-                }
-        }
-
-        /* display strings */
-        if (rank == 0) {
-                for (i = 0; i < numTasks; i++) {
-                        fprintf(stdout, "%s\n", stringArray[i]);
-                }
-        }
-
-        /* free strings */
-        for (i = 0; i < numTasks; i++) {
-                free(stringArray[i]);
-        }
-        free(stringArray);
+    return count;
 }
+#else
+/*
+ * Count the number of tasks that share a host.
+ *
+ * This function employees the gethostname() call, rather than using
+ * MPI_Get_processor_name().  We are interested in knowing the number
+ * of tasks that share a file system client (I/O node, compute node,
+ * whatever that may be).  However on machines like BlueGene/Q,
+ * MPI_Get_processor_name() uniquely identifies a cpu in a compute node,
+ * not the node where the I/O is function shipped to.  gethostname()
+ * is assumed to identify the shared filesystem client in more situations.
+ *
+ * NOTE: This also assumes that the task count on all nodes is equal
+ * to the task count on the host running MPI task 0.
+ */
+int CountTasksPerNode(MPI_Comm comm) {
+    int size;
+    MPI_Comm_size(comm, & size);
+    /* for debugging and testing */
+    if (getenv("IOR_FAKE_TASK_PER_NODES")){
+      int tasksPerNode = atoi(getenv("IOR_FAKE_TASK_PER_NODES"));
+      int rank;
+      MPI_Comm_rank(comm, & rank);
+      if(rank == 0){
+        printf("Fake tasks per node: using %d\n", tasksPerNode);
+      }
+      return tasksPerNode;
+    }
+    char       localhost[MAX_PATHLEN],
+        hostname[MAX_PATHLEN];
+    int        count               = 1,
+        i;
+    MPI_Status status;
+
+    if (( rank == 0 ) && ( verbose >= 1 )) {
+        fprintf( out_logfile, "V-1: Entering count_tasks_per_node...\n" );
+        fflush( out_logfile );
+    }
+
+    if (gethostname(localhost, MAX_PATHLEN) != 0) {
+        FAIL("gethostname()");
+    }
+    if (rank == 0) {
+        /* MPI_receive all hostnames, and compares them to the local hostname */
+        for (i = 0; i < size-1; i++) {
+            MPI_Recv(hostname, MAX_PATHLEN, MPI_CHAR, MPI_ANY_SOURCE,
+                     MPI_ANY_TAG, comm, &status);
+            if (strcmp(hostname, localhost) == 0) {
+                count++;
+            }
+        }
+    } else {
+        /* MPI_send hostname to root node */
+        MPI_Send(localhost, MAX_PATHLEN, MPI_CHAR, 0, 0, comm);
+    }
+    MPI_Bcast(&count, 1, MPI_INT, 0, comm);
+
+    return(count);
+}
+#endif
+

 /*
 * Extract key/value pair from hint string.
@ -217,7 +224,7 @@ void ExtractHint(char *settingVal, char *valueVal, char *hintString)
                tmpPtr2 = (char *)strstr(settingPtr, "IOR_HINT__GPFS__");
                if (tmpPtr1 == tmpPtr2) {
                        settingPtr += strlen("IOR_HINT__GPFS__");
-                        fprintf(stdout,
+                        fprintf(out_logfile,
                                "WARNING: Unable to set GPFS hints (not implemented.)\n");
                }
        }
@ -259,7 +266,7 @@ void SetHints(MPI_Info * mpiHints, char *hintsFileName)
        }

        /* get hints from hints file */
-        if (strcmp(hintsFileName, "") != 0) {
+        if (hintsFileName != NULL && strcmp(hintsFileName, "") != 0) {

                /* open the hint file */
                fd = fopen(hintsFileName, "r");
@ -304,7 +311,7 @@ void ShowHints(MPI_Info * mpiHints)
                MPI_CHECK(MPI_Info_get(*mpiHints, key, MPI_MAX_INFO_VAL - 1,
                                       value, &flag),
                          "cannot get info object value");
-                fprintf(stdout, "\t%s = %s\n", key, value);
+                fprintf(out_logfile, "\t%s = %s\n", key, value);
        }
 }

@ -399,14 +406,28 @@ void ShowFileSystemSize(char *fileSystem)
        if (realpath(fileSystem, realPath) == NULL) {
                ERR("unable to use realpath()");
        }
-        fprintf(stdout, "Path: %s\n", realPath);
-        fprintf(stdout, "FS: %.1f %s   Used FS: %2.1f%%   ",
-                totalFileSystemSizeHR, fileSystemUnitStr,
-                usedFileSystemPercentage);
-        fprintf(stdout, "Inodes: %.1f Mi   Used Inodes: %2.1f%%\n",
-                (double)totalInodes / (double)(1<<20),
-                usedInodePercentage);
-        fflush(stdout);
+
+        if(outputFormat == OUTPUT_DEFAULT){
+          fprintf(out_resultfile, "%-20s: %s\n", "Path", realPath);
+          fprintf(out_resultfile, "%-20s: %.1f %s   Used FS: %2.1f%%   ",
+                  "FS", totalFileSystemSizeHR, fileSystemUnitStr,
+                  usedFileSystemPercentage);
+          fprintf(out_resultfile, "Inodes: %.1f Mi   Used Inodes: %2.1f%%\n",
+                  (double)totalInodes / (double)(1<<20),
+                  usedInodePercentage);
+          fflush(out_logfile);
+        }else if(outputFormat == OUTPUT_JSON){
+          fprintf(out_resultfile, "    , \"Path\": \"%s\",", realPath);
+          fprintf(out_resultfile, "\"Capacity\": \"%.1f %s\", \"Used Capacity\": \"%2.1f%%\",",
+                  totalFileSystemSizeHR, fileSystemUnitStr,
+                  usedFileSystemPercentage);
+          fprintf(out_resultfile, "\"Inodes\": \"%.1f Mi\", \"Used Inodes\" : \"%2.1f%%\"\n",
+                  (double)totalInodes / (double)(1<<20),
+                  usedInodePercentage);
+        }else if(outputFormat == OUTPUT_CSV){
+
+        }
+
 #endif /* !_WIN32 */

        return;
@ -474,3 +495,181 @@ int uname(struct utsname *name)
        return 0;
 }
 #endif /* _WIN32 */
+
+
+double wall_clock_deviation;
+double wall_clock_delta = 0;
+
+/*
+ * Get time stamp.  Use MPI_Timer() unless _NO_MPI_TIMER is defined,
+ * in which case use gettimeofday().
+ */
+double GetTimeStamp(void)
+{
+        double timeVal;
+#ifdef _NO_MPI_TIMER
+        struct timeval timer;
+
+        if (gettimeofday(&timer, (struct timezone *)NULL) != 0)
+                ERR("cannot use gettimeofday()");
+        timeVal = (double)timer.tv_sec + ((double)timer.tv_usec / 1000000);
+#else                           /* not _NO_MPI_TIMER */
+        timeVal = MPI_Wtime();  /* no MPI_CHECK(), just check return value */
+        if (timeVal < 0)
+                ERR("cannot use MPI_Wtime()");
+#endif                          /* _NO_MPI_TIMER */
+
+        /* wall_clock_delta is difference from root node's time */
+        timeVal -= wall_clock_delta;
+
+        return (timeVal);
+}
+
+/*
+ * Determine any spread (range) between node times.
+ */
+static double TimeDeviation(void)
+{
+        double timestamp;
+        double min = 0;
+        double max = 0;
+        double roottimestamp;
+
+        MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error");
+        timestamp = GetTimeStamp();
+        MPI_CHECK(MPI_Reduce(&timestamp, &min, 1, MPI_DOUBLE,
+                             MPI_MIN, 0, mpi_comm_world),
+                  "cannot reduce tasks' times");
+        MPI_CHECK(MPI_Reduce(&timestamp, &max, 1, MPI_DOUBLE,
+                             MPI_MAX, 0, mpi_comm_world),
+                  "cannot reduce tasks' times");
+
+        /* delta between individual nodes' time and root node's time */
+        roottimestamp = timestamp;
+        MPI_CHECK(MPI_Bcast(&roottimestamp, 1, MPI_DOUBLE, 0, mpi_comm_world),
+                  "cannot broadcast root's time");
+        wall_clock_delta = timestamp - roottimestamp;
+
+        return max - min;
+}
+
+void init_clock(){
+  /* check for skew between tasks' start times */
+  wall_clock_deviation = TimeDeviation();
+}
+
+char * PrintTimestamp() {
+    static char datestring[80];
+    time_t cur_timestamp;
+
+    if (( rank == 0 ) && ( verbose >= 1 )) {
+        fprintf( out_logfile, "V-1: Entering PrintTimestamp...\n" );
+    }
+
+    fflush(out_logfile);
+    cur_timestamp = time(NULL);
+    strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp));
+
+    return datestring;
+}
+
+int64_t ReadStoneWallingIterations(char * const filename){
+  long long data;
+  if(rank != 0){
+    MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
+    return data;
+  }else{
+    FILE * out = fopen(filename, "r");
+    if (out == NULL){
+      data = -1;
+      MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
+      return data;
+    }
+    int ret = fscanf(out, "%lld", & data);
+    if (ret != 1){
+      return -1;
+    }
+    fclose(out);
+    MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
+    return data;
+  }
+}
+
+void StoreStoneWallingIterations(char * const filename, int64_t count){
+  if(rank != 0){
+    return;
+  }
+  FILE * out = fopen(filename, "w");
+  if (out == NULL){
+    FAIL("Cannot write to the stonewalling file!");
+  }
+  fprintf(out, "%lld", (long long) count);
+  fclose(out);
+}
+
+/*
+ * Sleep for 'delay' seconds.
+ */
+void DelaySecs(int delay){
+  if (rank == 0 && delay > 0) {
+    if (verbose >= VERBOSE_1)
+            fprintf(out_logfile, "delaying %d seconds . . .\n", delay);
+    sleep(delay);
+  }
+}
+
+
+/*
+ * Convert IOR_offset_t value to human readable string.  This routine uses a
+ * statically-allocated buffer internally and so is not re-entrant.
+ */
+char *HumanReadable(IOR_offset_t value, int base)
+{
+        static char valueStr[MAX_STR];
+        IOR_offset_t m = 0, g = 0, t = 0;
+        char m_str[8], g_str[8], t_str[8];
+
+        if (base == BASE_TWO) {
+                m = MEBIBYTE;
+                g = GIBIBYTE;
+                t = GIBIBYTE * 1024llu;
+                strcpy(m_str, "MiB");
+                strcpy(g_str, "GiB");
+                strcpy(t_str, "TiB");
+        } else if (base == BASE_TEN) {
+                m = MEGABYTE;
+                g = GIGABYTE;
+                t = GIGABYTE * 1000llu;
+                strcpy(m_str, "MB");
+                strcpy(g_str, "GB");
+                strcpy(t_str, "TB");
+        }
+
+        if (value >= t) {
+                if (value % t) {
+                        snprintf(valueStr, MAX_STR-1, "%.2f %s",
+                                (double)((double)value / t), t_str);
+                } else {
+                        snprintf(valueStr, MAX_STR-1, "%d %s", (int)(value / t), t_str);
+                }
+        }else if (value >= g) {
+                if (value % g) {
+                        snprintf(valueStr, MAX_STR-1, "%.2f %s",
+                                (double)((double)value / g), g_str);
+                } else {
+                        snprintf(valueStr, MAX_STR-1, "%d %s", (int)(value / g), g_str);
+                }
+        } else if (value >= m) {
+                if (value % m) {
+                        snprintf(valueStr, MAX_STR-1, "%.2f %s",
+                                (double)((double)value / m), m_str);
+                } else {
+                        snprintf(valueStr, MAX_STR-1, "%d %s", (int)(value / m), m_str);
+                }
+        } else if (value >= 0) {
+                snprintf(valueStr, MAX_STR-1, "%d bytes", (int)value);
+        } else {
+                snprintf(valueStr, MAX_STR-1, "-");
+        }
+        return valueStr;
+}
--- a/src/utilities.h
+++ b/src/utilities.h
@ -18,15 +18,62 @@
 #include <mpi.h>
 #include "ior.h"

+extern int numTasksWorld;
+extern int rank;
+extern int rankOffset;
+extern int tasksPerNode;
+extern int verbose;
+extern MPI_Comm testComm;
+extern MPI_Comm mpi_comm_world;
+extern FILE * out_logfile;
+extern FILE * out_resultfile;
+extern enum OutputFormat_t outputFormat;  /* format of the output */
+
+/*
+ * Try using the system's PATH_MAX, which is what realpath and such use.
+ */
+#define MAX_PATHLEN PATH_MAX
+
+
+#ifdef __linux__
+#define FAIL(msg) do {                                                   \
+        fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n",   \
+                PrintTimestamp(), rank, __func__,                       \
+                msg, strerror(errno));                                   \
+        fflush(out_logfile);                                             \
+        MPI_Abort(testComm, 1);                                          \
+    } while(0)
+#else
+#define FAIL(msg) do {                                                   \
+        fprintf(out_logfile, "%s: Process %d: FAILED at %d, %s: %s\n",   \
+                PrintTimestamp(), rank, __LINE__,                       \
+                msg, strerror(errno));                                   \
+        fflush(out_logfile);                                             \
+        MPI_Abort(testComm, 1);                                          \
+    } while(0)
+#endif
+
 void set_o_direct_flag(int *fd);

 char *CurrentTimeString(void);
-void OutputToRoot(int, MPI_Comm, char *);
 int Regex(char *, char *);
 void ShowFileSystemSize(char *);
 void DumpBuffer(void *, size_t);
 void SeedRandGen(MPI_Comm);
 void SetHints (MPI_Info *, char *);
 void ShowHints (MPI_Info *);
+char *HumanReadable(IOR_offset_t value, int base);
+int CountTasksPerNode(MPI_Comm comm);
+void DelaySecs(int delay);

+/* Returns -1, if cannot be read  */
+int64_t ReadStoneWallingIterations(char * const filename);
+void StoreStoneWallingIterations(char * const filename, int64_t count);
+
+void init_clock(void);
+double GetTimeStamp(void);
+char * PrintTimestamp(); // TODO remove this function
+
+extern double wall_clock_deviation;
+extern double wall_clock_delta;
 #endif  /* !_UTILITIES_H */
--- a/src/win/getopt.c
+++ b/src/win/getopt.c
--- a/src/win/getopt.h
+++ b/src/win/getopt.h
@ -1,192 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-/* getopt.h */
-/* Declarations for getopt.
-   Copyright (C) 1989-1994, 1996-1999, 2001 Free Software 
-   Foundation, Inc. This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute 
-   it and/or modify it under the terms of the GNU Lesser 
-   General Public License as published by the Free Software 
-   Foundation; either version 2.1 of the License, or 
-   (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will 
-   be useful, but WITHOUT ANY WARRANTY; without even the 
-   implied warranty of MERCHANTABILITY or FITNESS FOR A 
-   PARTICULAR PURPOSE.  See the GNU Lesser General Public 
-   License for more details.
-
-   You should have received a copy of the GNU Lesser General 
-   Public License along with the GNU C Library; if not, write 
-   to the Free Software Foundation, Inc., 59 Temple Place,
-   Suite 330, Boston, MA 02111-1307 USA.  */
-
-  
-
-  
-
-#ifndef _GETOPT_H
-
-#ifndef __need_getopt
-# define _GETOPT_H 1
-#endif
-
-/* If __GNU_LIBRARY__ is not already defined, either we are being used
-   standalone, or this is the first header included in the source file.
-   If we are being used with glibc, we need to include <features.h>, but
-   that does not exist if we are standalone.  So: if __GNU_LIBRARY__ is
-   not defined, include <ctype.h>, which will pull in <features.h> for us
-   if it's from glibc.  (Why ctype.h?  It's guaranteed to exist and it
-   doesn't flood the namespace with stuff the way some other headers do.)  */
-#if !defined __GNU_LIBRARY__
-# include <ctype.h>
-#endif
-
-#ifdef  __cplusplus
-extern "C" {
-#endif
-
-/* For communication from `getopt' to the caller.
-   When `getopt' finds an option that takes an argument,
-   the argument value is returned here.
-   Also, when `ordering' is RETURN_IN_ORDER,
-   each non-option ARGV-element is returned here.  */
-
-extern char *optarg;
-
-/* Index in ARGV of the next element to be scanned.
-   This is used for communication to and from the caller
-   and for communication between successive calls to `getopt'.
-
-   On entry to `getopt', zero means this is the first call; initialize.
-
-   When `getopt' returns -1, this is the index of the first of the
-   non-option elements that the caller should itself scan.
-
-   Otherwise, `optind' communicates from one call to the next
-   how much of ARGV has been scanned so far.  */
-
-extern int optind;
-
-/* Callers store zero here to inhibit the error message `getopt' prints
-   for unrecognized options.  */
-
-extern int opterr;
-
-/* Set to an option character which was unrecognized.  */
-
-extern int optopt;
-
-#ifndef __need_getopt
-/* Describe the long-named options requested by the application.
-   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
-   of `struct option' terminated by an element containing a name which is
-   zero.
-
-   The field `has_arg' is:
-   no_argument          (or 0) if the option does not take an argument,
-   required_argument    (or 1) if the option requires an argument,
-   optional_argument    (or 2) if the option takes an optional argument.
-
-   If the field `flag' is not NULL, it points to a variable that is set
-   to the value given in the field `val' when the option is found, but
-   left unchanged if the option is not found.
-
-   To have a long-named option do something other than set an `int' to
-   a compiled-in constant, such as set a value from `optarg', set the
-   option's `flag' field to zero and its `val' field to a nonzero
-   value (the equivalent single-letter option character, if there is
-   one).  For long options that have a zero `flag' field, `getopt'
-   returns the contents of the `val' field.  */
-
-struct option
-{
-# if (defined __STDC__ && __STDC__) || defined __cplusplus
-  const char *name;
-# else
-  char *name;
-# endif
-  /* has_arg can't be an enum because some compilers complain about
-     type mismatches in all the code that assumes it is an int.  */
-  int has_arg;
-  int *flag;
-  int val;
-};
-
-/* Names for the values of the `has_arg' field of `struct option'.  */
-
-# define no_argument            0
-# define required_argument      1
-# define optional_argument      2
-#endif  /* need getopt */
-
-
-/* Get definitions and prototypes for functions to process the
-   arguments in ARGV (ARGC of them, minus the program name) for
-   options given in OPTS.
-
-   Return the option character from OPTS just read.  Return -1 when
-   there are no more options.  For unrecognized options, or options
-   missing arguments, `optopt' is set to the option letter, and '?' is
-   returned.
-
-   The OPTS string is a list of characters which are recognized option
-   letters, optionally followed by colons, specifying that that letter
-   takes an argument, to be placed in `optarg'.
-
-   If a letter in OPTS is followed by two colons, its argument is
-   optional.  This behavior is specific to the GNU `getopt'.
-
-   The argument `--' causes premature termination of argument
-   scanning, explicitly telling `getopt' that there are no more
-   options.
-
-   If OPTS begins with `--', then non-option arguments are treated as
-   arguments to the option '\0'.  This behavior is specific to the GNU
-   `getopt'.  */
-
-#if (defined __STDC__ && __STDC__) || defined __cplusplus
-# ifdef __cplusplus // __GNU_LIBRARY__
-/* Many other libraries have conflicting prototypes for getopt, with
-   differences in the consts, in stdlib.h.  To avoid compilation
-   errors, only prototype getopt for the GNU C library.  */
-extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
-# else /* not __GNU_LIBRARY__ */
-extern int getopt ();
-# endif /* __GNU_LIBRARY__ */
-
-# ifndef __need_getopt
-extern int getopt_long (int ___argc, char *const *___argv,
-                        const char *__shortopts,
-                        const struct option *__longopts, int *__longind);
-extern int getopt_long_only (int ___argc, char *const *___argv,
-                             const char *__shortopts,
-                             const struct option *__longopts, int *__longind);
-
-/* Internal only.  Users should not call this directly.  */
-extern int _getopt_internal (int ___argc, char *const *___argv,
-                             const char *__shortopts,
-                             const struct option *__longopts, int *__longind,
-                             int __long_only);
-# endif
-#else /* not __STDC__ */
-extern int getopt ();
-# ifndef __need_getopt
-extern int getopt_long ();
-extern int getopt_long_only ();
-
-extern int _getopt_internal ();
-# endif
-#endif /* __STDC__ */
-
-#ifdef  __cplusplus
-}
-#endif
-
-/* Make sure we later can get all the definitions and declarations.  */
-#undef __need_getopt
-
-#endif /* getopt.h */
-
--- a/testing/IOR-tester.README
+++ b/testing/IOR-tester.README
@ -1,46 +0,0 @@
-/******************************************************************************\
-*                                                                              *
-*        Copyright (c) 2003, The Regents of the University of California       *
-*      See the file COPYRIGHT for a complete copyright notice and license.     *
-*                                                                              *
-\******************************************************************************/
-
-The IOR-tester runs a series of tests to check and maintain the existing
-functionality of the source code as code is modified.  The IOR-tester creates
-a default test, then modifies it to run test scripts.  It runs a large number
-of tests, most which are expected to pass, but some with an expectation of
-failure.
-
-To run the code, modify the 'DefaultTest' dictionary in the source code to
-reflect the test file location, the executable location, etc.  Then, run
-the code using './IOR-tester.py'.
-
-The expected-pass, pattern-independent tests include:
-	POSIX only:
-	  o retry transfer
-
-	MPIIO only:
-	  o hints
-	  o preallocation
-
-	Both POSIX and MPIIO:
-	  o repetition count
-	  o intertest delay
-	  o test file removal
-	  o verbosity
-	
-The expected-pass, pattern-dependent tests include:
-	POSIX:
-	  o write-only, read-only, write/read, and write/read check
-	  o fpp and single file
-	  o segmented, strided
-	  o zero-length, 4-byte, and larger file, block, and transfer sizes
-	MPIIO (same as POSIX, but using MPIIO access):
-	  o noncollective
-	  o noncollective, file view
-	  o collective
-	  o collective, file view
-	
-The expected-fail tests include:
-	Both POSIX and MPIIO:
-	  o repetition count
--- a/testing/IOR-tester.py
+++ b/testing/IOR-tester.py
--- a/testing/basic-tests.sh
+++ b/testing/basic-tests.sh
@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Test script for basic IOR functionality testing various patterns
+# It is kept as simple as possible and outputs the parameters used such that any test can be rerun easily.
+
+# You can override the defaults by setting the variables before invoking the script, or simply set them here...
+# Example: export IOR_EXTRA="-v -v -v"
+
+ROOT=${0%/*}
+
+source $ROOT/test-lib.sh
+
+MDTEST 1 -a POSIX
+MDTEST 2 -a POSIX -W 2
+
+IOR 1 -a POSIX -w    -z                  -F -Y -e -i1 -m -t 100k -b 1000k
+IOR 1 -a POSIX -w    -z                  -F -k -e -i2 -m -t 100k -b 100k
+IOR 1 -a MMAP -r    -z                  -F -k -e -i1 -m -t 100k -b 100k
+
+IOR 2 -a POSIX -w    -z  -C             -F -k -e -i1 -m -t 100k -b 100k
+IOR 2 -a POSIX -w    -z  -C -Q 1        -F -k -e -i1 -m -t 100k -b 100k
+IOR 2 -a POSIX -r    -z  -Z -Q 2        -F -k -e -i1 -m -t 100k -b 100k
+IOR 2 -a POSIX -r    -z  -Z -Q 3 -X  13 -F -k -e -i1 -m -t 100k -b 100k
+IOR 2 -a POSIX -w    -z  -Z -Q 1 -X -13 -F    -e -i1 -m -t 100k -b 100k
+
+END
--- a/testing/bug-multi-node.conf
+++ b/testing/bug-multi-node.conf
@ -0,0 +1,18 @@
+IOR START
+reorderTasksConstant=1
+repetitions=1
+segmentCount=1
+verbose=0
+fsync=0
+checkWrite=1
+blockSize=16
+setTimeStampSignature=1511817315
+checkRead=0
+readFile=1
+filePerProc=0
+writeFile=1
+transferSize=16
+intraTestBarriers=0
+
+RUN
+IOR STOP
--- a/testing/complex-tests.sh
+++ b/testing/complex-tests.sh
@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Test script for more complex IOR functionality testing various patterns
+# You can override the defaults by setting the variables before invoking the script, or simply set them here...
+# Example: export IOR_EXTRA="-v -v -v"
+
+ROOT=${0%/*}
+
+source $ROOT/test-lib.sh
+
+#stonewalling tests
+IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15
+IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still!
+IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30
+
+MDTEST 2 -I 20 -a DUMMY -W 1 -x stonewall-md.log -C
+MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -T -v
+MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -D -v
+
+#shared tests
+IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k
+IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 100k
+IOR 2 -a POSIX -r -z-k -e -i1 -m -t 100k -b 100k
+
+#test mutually exclusive options
+IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 100k
+IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 100k
+IOR 2 -a POSIX -w -Z -i1 -m -t 100k -b 100k -d 0.1
+
+# Now set the num tasks per node to 1:
+export IOR_FAKE_TASK_PER_NODES=1
+IOR 2 -a POSIX -f $ROOT/bug-multi-node.conf
+
+END
--- a/testing/docker/README.md
+++ b/testing/docker/README.md
@ -0,0 +1,10 @@
+# Docker enabled testing
+
+This directory contains scripts to run the IOR benchmark testing in various Docker images.
+This allows for testing several distributions on a developer machine.
+
+To setup your test systems run:
+  ./prepare.sh
+
+To run all tests for all variants use
+  ./run-all-tests.sh
--- a/testing/docker/centos6/Dockerfile
+++ b/testing/docker/centos6/Dockerfile
@ -0,0 +1,5 @@
+FROM centos:6
+
+WORKDIR /data
+RUN yum install -y mpich openmpi git pkg-config nano gcc bzip2 patch gcc-c++ make mpich-devel openmpi-devel
+RUN yum install -y sudo
--- a/testing/docker/centos6/run-test.sh
+++ b/testing/docker/centos6/run-test.sh
@ -0,0 +1 @@
+../centos7/run-test.sh
--- a/testing/docker/centos7/Dockerfile
+++ b/testing/docker/centos7/Dockerfile
@ -0,0 +1,5 @@
+FROM centos:7
+
+WORKDIR /data
+RUN yum install -y mpich openmpi git pkg-config nano gcc bzip2 patch gcc-c++ make mpich-devel openmpi-devel
+RUN yum install -y sudo
--- a/testing/docker/centos7/run-test.sh
+++ b/testing/docker/centos7/run-test.sh
@ -0,0 +1,35 @@
+#!/bin/bash
+
+BUILD="$1"
+
+groupadd -g $3 testuser
+useradd -r -u $2 -g testuser testuser
+ERROR=0
+
+function runTest(){
+  P=$PATH
+  FLAVOR="$1"
+  MPI_DIR="$2"
+
+  echo $FLAVOR in $BUILD/$FLAVOR
+	sudo -u testuser mkdir -p $BUILD/$FLAVOR
+
+	pushd $BUILD/$FLAVOR > /dev/null
+
+  export PATH=$MPI_DIR/bin:$PATH
+  sudo -u testuser PATH=$PATH /data/configure || exit 1
+  sudo -u testuser PATH=$PATH make || exit 1
+
+	cd /data/
+	sudo -u testuser PATH=$PATH IOR_BIN_DIR=$BUILD/$FLAVOR/src  IOR_OUT=$BUILD/$FLAVOR/test ./testing/basic-tests.sh
+
+  ERROR=$(($ERROR + $?))
+  popd  > /dev/null
+  PATH=$P
+}
+
+
+runTest openmpi /usr/lib64/openmpi/
+runTest mpich /usr/lib64/mpich
+
+exit $ERROR
--- a/testing/docker/ceph/NOTES
+++ b/testing/docker/ceph/NOTES
@ -0,0 +1,35 @@
+Following are basic notes on how to deploy the 'ceph/demo' docker container. The 'ceph/demo' container bootstraps a complete Ceph cluster with all necessary daemons already running, providing a convenient environment for evaluating the correctness of the RADOS backend for IOR, in our case.
+
+##########################
+# Pull 'ceph/demo' image #
+##########################
+
+Run `docker pull ceph/demo` to download the image to your system.
+
+################################
+# Deploy 'ceph/demo' conatiner #
+################################
+
+To deploy the Ceph cluster, execute the following command:
+
+`docker run -it --net=host -v /etc/ceph:/etc/ceph -e MON_IP=10.0.0.1 -e CEPH_PUBLIC_NETWORK=10.0.0.0/24 ceph/demo`
+
+The only necessary modification to the above command is to provide the correct network IP address for MON_IP and to provide the corresponding CIDR notation of this IP for CEPH_PUBLIC_NETWORK, as illustrated.
+
+NOTE: The above command starts the docker container in interactive mode. Replace '-it' with '-d' to run in the background as a daemon.
+
+###############################
+# Run IOR against 'ceph/demo' #
+###############################
+
+With a Ceph cluster now deployed, running IOR against it is straightforward:
+
+`./ior -a RADOS -- -u admin -c /etc/ceph/cephconf -p cephfs_data`
+
+All command line arguments following the '--' are required. 
+
+-u is the Ceph username (e.g., admin)
+-c is the Ceph config file (typically found in /etc/ceph/ceph.conf)
+-p is the Ceph pool to perform I/O to (e.g., cephfs_data)
+
+NOTE: Permissions of the various config files, keyrings, etc. inside of /etc/ceph may need to be modified to be readable by the user running IOR (e.g., `sudo chmod 644 /etc/ceph/*`). These various files are created internally within the docker container and may not be readable by other users.
--- a/testing/docker/prepare.sh
+++ b/testing/docker/prepare.sh
@ -0,0 +1,24 @@
+#!/bin/bash
+cd "${0%/*}"
+if [[ ! -e run-all-tests.sh ]] ; then
+	echo "Error, this script must run from the ./testing/docker directory"
+	exit 1
+fi
+
+echo "Checking docker"
+docker ps
+if [ $? != 0 ] ; then
+	echo "Error, cannot run docker commands"
+	groups |grep docker || echo "You are not in the docker group !"
+	exit 1
+fi
+
+echo "Building docker containers"
+
+for IMAGE in $(find -type d | cut -b 3- |grep -v "^$") ; do
+	docker build -t hpc/ior:$IMAGE $IMAGE
+	if [ $? != 0 ] ; then
+		echo "Error building image $IMAGE"
+		exit 1
+	fi
+done
--- a/testing/docker/run-all-tests.sh
+++ b/testing/docker/run-all-tests.sh
@ -0,0 +1,52 @@
+#!/bin/bash
+
+# This script runs the testscript for all supported docker images
+cd "${0%/*}"
+if [[ ! -e run-all-tests.sh ]] ; then
+	echo "Error, this script must run from the ./testing/docker directory"
+	exit 1
+fi
+
+TARGET=../../build-docker
+mkdir -p $TARGET
+
+ARGS="$@"
+GID=$(id -g $USER)
+OPT="-it --rm -v $PWD/../../:/data/:z"
+ERROR=0
+VERBOSE=0
+
+set -- `getopt -u -l "clean" -l verbose -o "" -- "$ARGS"`
+test $# -lt 1  && exit 1
+while test $# -gt 0
+do
+	case "$1" in
+		--clean) echo "Cleaning build dirs!"; rm -rf $TARGET/* ;;
+		--verbose) VERBOSE=1 ;;
+		--) ;;
+		*) echo "Unknown option $1"; exit 1;;
+	esac
+	shift
+done
+
+for IMAGE in $(find -type d | cut -b 3- |grep -v "^$") ; do
+	echo "RUNNING $IMAGE"
+	mkdir -p $TARGET/$IMAGE
+	WHAT="docker run $OPT -h $IMAGE hpc/ior:$IMAGE /data/testing/docker/$IMAGE/run-test.sh /data/build-docker/$IMAGE $UID $GID"
+	if [[ $VERBOSE == 1 ]] ; then
+		echo $WHAT
+	fi
+	$WHAT 2>$TARGET/$IMAGE/LastTest.log 1>&2
+	ERR=$?
+	ERROR=$(($ERROR+$ERR))
+	if [[ $ERR != 0 ]]; then
+		echo $WHAT
+		echo "Error, see $TARGET/$IMAGE/LastTest.log"
+	fi
+done
+
+if [[ $ERROR != 0 ]] ; then
+	echo "Errors occured!"
+else
+	echo "OK: all tests passed!"
+fi
--- a/testing/docker/ubuntu14.04/Dockerfile
+++ b/testing/docker/ubuntu14.04/Dockerfile
@ -0,0 +1,7 @@
+FROM ubuntu:14.04
+
+WORKDIR /data
+RUN apt-get update
+RUN apt-get install -y libopenmpi-dev openmpi-bin mpich git pkg-config gcc-4.7 nano make
+RUN apt-get install -y sudo hdf5-tools libhdf5-mpi-dev
+
--- a/testing/docker/ubuntu14.04/run-test.sh
+++ b/testing/docker/ubuntu14.04/run-test.sh
@ -0,0 +1,33 @@
+#!/bin/bash
+
+BUILD="$1"
+groupadd -g $3 testuser
+useradd -r -u $2 -g testuser testuser
+
+ERROR=0
+
+function runTest(){
+  FLAVOR="$1"
+  MPI_DIR="$2"
+  echo $FLAVOR in $BUILD/$FLAVOR
+  update-alternatives --set mpi $MPI_DIR
+	sudo -u testuser mkdir -p $BUILD/$FLAVOR
+
+	pushd $BUILD/$FLAVOR > /dev/null
+  sudo -u testuser /data/configure --with-hdf5 CFLAGS=-I/usr/lib/x86_64-linux-gnu/hdf5/openmpi/include LDFLAGS=-L/usr/lib/x86_64-linux-gnu/hdf5/openmpi/lib|| exit 1
+  sudo -u testuser make V=1 || exit 1
+
+  #define the alias
+  ln -sf $(which mpiexec.$FLAVOR) /usr/bin/mpiexec
+
+	cd /data/
+	sudo -u testuser IOR_BIN_DIR=$BUILD/$FLAVOR/src IOR_OUT=$BUILD/$FLAVOR/test ./testing/basic-tests.sh
+
+  ERROR=$(($ERROR + $?))
+  popd  > /dev/null
+}
+
+runTest openmpi /usr/lib/openmpi/include
+runTest mpich /usr/include/mpich
+
+exit $ERROR
--- a/testing/docker/ubuntu16.04/Dockerfile
+++ b/testing/docker/ubuntu16.04/Dockerfile
@ -0,0 +1,6 @@
+FROM ubuntu:16.04
+
+WORKDIR /data
+RUN apt-get update
+RUN apt-get install -y libopenmpi-dev openmpi-bin mpich git pkg-config gcc-5 gcc-4.8 nano
+RUN apt-get install -y sudo
--- a/testing/docker/ubuntu16.04/run-test.sh
+++ b/testing/docker/ubuntu16.04/run-test.sh
@ -0,0 +1,35 @@
+#!/bin/bash
+
+BUILD="$1"
+groupadd -g $3 testuser
+useradd -r -u $2 -g testuser testuser
+ERROR=0
+
+function runTest(){
+  FLAVOR="$1"
+  MPI_DIR="$2"
+	export IOR_MPIRUN="$3"
+  echo $FLAVOR in $BUILD/$FLAVOR
+  update-alternatives --set mpi $MPI_DIR
+	sudo -u testuser mkdir -p $BUILD/$FLAVOR
+
+	pushd $BUILD/$FLAVOR > /dev/null
+  sudo -u testuser /data/configure || exit 1
+  sudo -u testuser make || exit 1
+
+  #define the alias
+  ln -sf $(which mpiexec.$FLAVOR) /usr/bin/mpiexec
+
+	cd /data/
+
+	sudo -u testuser IOR_BIN_DIR=$BUILD/$FLAVOR/src IOR_OUT=$BUILD/$FLAVOR/test ./testing/basic-tests.sh
+
+  ERROR=$(($ERROR + $?))
+  popd  > /dev/null
+}
+
+export MPI_ARGS=""
+runTest openmpi /usr/lib/openmpi/include "mpiexec -n"
+runTest mpich /usr/include/mpich "mpiexec -n"
+
+exit $ERROR
--- a/testing/hintsFile
+++ b/testing/hintsFile
@ -1,2 +0,0 @@
-IOR_HINT__MPI__unrecognizedHint=true
-IOR_HINT__MPI__IBM_largeblock_io=true
--- a/testing/test-lib.sh
+++ b/testing/test-lib.sh
@ -0,0 +1,74 @@
+# Test script for basic IOR functionality testing various patterns
+# It is kept as simple as possible and outputs the parameters used such that any test can be rerun easily.
+
+# You can override the defaults by setting the variables before invoking the script, or simply set them here...
+# Example: export IOR_EXTRA="-v -v -v"
+
+IOR_MPIRUN=${IOR_MPIRUN:-mpiexec -np}
+IOR_BIN_DIR=${IOR_BIN_DIR:-./build/src}
+IOR_OUT=${IOR_OUT:-./build/test}
+IOR_EXTRA=${IOR_EXTRA:-} # Add global options like verbosity
+MDTEST_EXTRA=${MDTEST_EXTRA:-}
+
+################################################################################
+mkdir -p ${IOR_OUT}
+mkdir -p /dev/shm/mdest
+
+## Sanity check
+
+if [[ ! -e ${IOR_OUT} ]]; then
+  echo "Could not create output dir ${IOR_OUT}"
+  exit 1
+fi
+
+if [[ ! -e ${IOR_BIN_DIR}/ior ]]; then
+  echo "IOR Executable \"${IOR_BIN_DIR}/ior\" does not exist! Call me from the root directory!"
+  exit 1
+fi
+
+if [[ ! -e ${IOR_BIN_DIR}/mdtest ]]; then
+  echo "MDTest Executable \"${IOR_BIN_DIR}/mdtest\" does not exist! Call me from the root directory!"
+  exit 1
+fi
+
+ERRORS=0 # Number of errors detected while running
+I=0
+function IOR(){
+  RANKS=$1
+  shift
+  WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/ior ${@} ${IOR_EXTRA} -o /dev/shm/ior"
+  $WHAT 1>${IOR_OUT}/$I 2>&1
+  if [[ $? != 0 ]]; then
+    echo -n "ERR"
+    ERRORS=$(($ERRORS + 1))
+  else
+    echo -n "OK "
+  fi
+  echo " $WHAT"
+  I=$((${I}+1))
+}
+
+function MDTEST(){
+  RANKS=$1
+  shift
+  WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/mdtest ${@} ${MDTEST_EXTRA} -d /dev/shm/mdest"
+  $WHAT 1>${IOR_OUT}/$I 2>&1
+  if [[ $? != 0 ]]; then
+    echo -n "ERR"
+    ERRORS=$(($ERRORS + 1))
+  else
+    echo -n "OK "
+  fi
+  echo " $WHAT"
+  I=$((${I}+1))
+}
+
+function END(){
+  if [[ ${ERRORS} == 0 ]] ; then
+    echo "PASSED"
+  else
+    echo "Error, check the output files!"
+  fi
+
+  exit ${ERRORS}
+}
--- a/testing/timestamp.cfg
+++ b/testing/timestamp.cfg
@ -1,28 +0,0 @@
-# This tests the checks of read and write for correctness
-# Run with multiple processes, at least 3
-# You may set the environment variable IOR_FAKE_TASK_PER_NODES=3 and run it on one node
-# Example: IOR_FAKE_TASK_PER_NODES=3 mpiexec -n 3 ./src/ior -f testing/timestamp.cfg 
-
-IOR START
-reorderTasksConstant=1
-repetitions=1
-storeFileOffset=0
-segmentCount=1
-verbose=0
-fsync=0
-checkWrite=1
-blockSize=16
-setTimeStampSignature=1511817315
-checkRead=1
-readFile=1
-filePerProc=0
-writeFile=1
-api=POSIX
-transferSize=16
-intraTestBarriers=0
-testFile=/tmp/test
-RUN
-
-storeFileOffset=1
-RUN
-IOR STOP