Merge remote branch 'daos-stack/daos'

Signed-off-by: Mohamad Chaarawi <mohamad.chaarawi@intel.com>

Conflicts:
	configure.ac
	src/Makefile.am
	src/aiori.c
	src/aiori.h
	src/ior.c
	src/mdtest.c
	src/parse_options.c
master
Mohamad Chaarawi 2018-08-29 21:14:19 +00:00
commit 46ff4e26be
64 changed files with 6689 additions and 6148 deletions

9
.gitignore vendored
View File

@ -1,3 +1,4 @@
tags
Makefile
Makefile.in
aclocal.m4
@ -11,11 +12,13 @@ config/config.sub
config/depcomp
config/install-sh
config/missing
config/test-driver
configure
contrib/.deps/
contrib/cbif
contrib/Makefile
contrib/Makefile.in
contrib/cbif
doc/Makefile
doc/Makefile.in
src/.deps/
@ -32,7 +35,13 @@ contrib/cbif.o
src/*.o
src/*.i
src/*.s
src/*.a
src/ior
src/mdtest
src/testlib
src/test/.deps/
src/test/.dirstamp
src/test/lib.o
doc/doxygen/build
doc/sphinx/_*/

View File

@ -29,7 +29,7 @@ install:
# GPFS
# NOTE: Think GPFS need a license and is therefore not testable with travis.
before_script: ./bootstrap
script: ./configure --with-hdf5 && make
script: mkdir build && cd build && ../configure --with-hdf5 && make && cd .. && ./testing/basic-tests.sh
# notifications:

View File

@ -1,4 +1,5 @@
MAKEFLAGS = --no-print-directory
SUBDIRS = src doc contrib
EXTRA_DIST = META COPYRIGHT README ChangeLog
EXTRA_DIST = META COPYRIGHT README.md ChangeLog
# ACLOCAL_AMFLAGS needed for autoconf < 2.69
ACLOCAL_AMFLAGS = -I config

View File

@ -1,9 +1,9 @@
# HPC IO Benchmark Repository [![Build Status](https://travis-ci.org/hpc/ior.svg?branch=master)](https://travis-ci.org/hpc/ior)
This repo now contains both IOR and mdtest.
See also NOTES.txt
[See also NOTES.txt]
Building
--------
# Building
0. If "configure" is missing from the top level directory, you
probably retrieved this code directly from the repository.
@ -21,3 +21,11 @@ Building
3. Optionally, run "make install". The installation prefix
can be changed as an option to the "configure" script.
# Testing
Run "make check" to invoke the unit test framework of Automake.
* To run basic functionality tests that we use for continuous integration, see ./testing/
* There are docker scripts provided to test various distributions at once.
* See ./testing/docker/

48
README_DAOS Normal file
View File

@ -0,0 +1,48 @@
Building with DAOS API
----------------------
At step 1 above, one must specify "--with-daos". If the DAOS
headers and libraries are not installed at respective system
default locations, then one may also needs to set CPPFLAGS and
LDFLAGS accordingly.
Running with DAOS API
---------------------
One must specify an existing pool using "-O
daospool=<pool_uuid>". IOR must be launched in a way that
attaches the IOR process group to the DAOS server process group.
One must also specify a container UUID using "-o
<container_uuid>". If the "-E" option is given, then this UUID
shall denote an existing container created by a "matching" IOR
run. Otherwise, IOR will create a new container with this UUID.
In the latter case, one may use uuidgen(1) to generate the UUID
of the new container.
When benchmarking write performance, one likely do not want
"-W", which causes the write phase to do one additional memory
copy for every I/O. This is due to IOR's assumption that when a
DAOS_Xfer() call returns the buffer may be released. Therefore,
random data is written when "-W" is absent, while data is copied
from IOR buffers when "-W" is present.
See doc/USER_GUIDE for all options and directives. Note that not
all combinations of options are supported.
Examples that should work include:
- "ior -a DAOS -w -W -o <container_uuid> -O
daospool=<pool_uuid>,daospoolsvc=<svc_ranks>" writes into a new container
and verifies the data, using default daosRecordSize, transferSize,
daosStripeSize, blockSize, daosAios, etc.
- "ior -a DAOS -w -W -r -R -o <container_uuid> -b 1g -t 4m -C -O
daospool=<pool_uuid>,daospoolsvc=<svc_ranks>,daosrecordsize=1m,
daosstripesize=4m, daosstripecount=256,daosaios=8" does all IOR tests and
shifts ranks during checkWrite and checkRead.
- "ior -a DAOS -w -r -o <container_uuid> -b 8g -t 1m -C -O
daospool=<pool_uuid>,daospoolsvc=<svc_ranks>,daosrecordsize=1m,daosstripesize=4m,
daosstripecount=256,daosaios=8" may be a base to be tuned for performance
benchmarking.

View File

@ -15,7 +15,8 @@ AC_CONFIG_HEADER([src/config.h])
AC_CANONICAL_HOST
# Automake support
AM_INIT_AUTOMAKE([check-news dist-bzip2 gnu no-define])
AM_INIT_AUTOMAKE([check-news dist-bzip2 gnu no-define foreign subdir-objects])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AM_MAINTAINER_MODE
# Checks for programs
@ -25,6 +26,7 @@ AX_PROG_CC_MPI(,,[
AC_MSG_FAILURE([MPI compiler requested, but couldn't use MPI.])
])
AC_PROG_RANLIB
# No reason not to require modern C at this point
AC_PROG_CC_C99
@ -76,6 +78,17 @@ AS_IF([test "x$with_lustre" != xno], [
])
])
# IME (DDN's Infinite Memory Engine) support
AC_ARG_WITH([ime],
[AS_HELP_STRING([--with-ime],
[support IO with IME backend @<:@default=no@:>@])],
[],
[with_ime=no])
AM_CONDITIONAL([USE_IME_AIORI], [test x$with_ime = xyes])
AM_COND_IF([USE_IME_AIORI],[
AC_DEFINE([USE_IME_AIORI], [], [Build IME backend AIORI])
])
# HDF5 support
AC_ARG_WITH([hdf5],
[AS_HELP_STRING([--with-hdf5],
@ -142,22 +155,21 @@ AM_COND_IF([USE_POSIX_AIORI],[
AC_DEFINE([USE_POSIX_AIORI], [], [Build POSIX backend AIORI])
])
AC_ARG_WITH([cart],
[AS_HELP_STRING([--with-cart],
[Build DAOS ROMIO driver[default=no]])],,
[with_cart=no])
# RADOS support
AC_ARG_WITH([rados],
[AS_HELP_STRING([--with-rados],
[support IO with librados backend @<:@default=no@:>@])],
[],
[with_rados=no])
AM_CONDITIONAL([USE_RADOS_AIORI], [test x$with_rados = xyes])
AM_COND_IF([USE_RADOS_AIORI],[
AC_DEFINE([USE_RADOS_AIORI], [], [Build RADOS backend AIORI])
])
AS_IF([test "x$with_cart" != xno],
CART="yes"
LDFLAGS="$LDFLAGS -L$with_cart/lib"
CPPFLAGS="$CPPFLAGS -I$with_cart/include/"
AC_CHECK_HEADERS(gurt/common.h,, [unset CART])
AC_CHECK_LIB([gurt], [d_rank_list_alloc],, [unset CART]))
# DFS IO support
# DAOS Backends (DAOS and DFS) IO support
AC_ARG_WITH([daos],
[AS_HELP_STRING([--with-daos],
[support IO with DFS backend @<:@default=no@:>@])],
[support IO with DAOS backends @<:@default=no@:>@])],
[],
[with_daos=no])
@ -171,11 +183,9 @@ AS_IF([test "x$with_daos" != xno],
AC_CHECK_LIB([daos], [daos_init],, [unset DAOS])
AC_CHECK_LIB([dfs], [dfs_mkdir],, [unset DAOS]))
AS_IF([test "x$CART" != xyes], [unset DAOS])
AM_CONDITIONAL([USE_DFS_AIORI], [test x$DAOS = xyes])
AM_COND_IF([USE_DFS_AIORI],[
AC_DEFINE([USE_DFS_AIORI], [], [Build DFS backend AIORI])
AM_CONDITIONAL([USE_DAOS_AIORI], [test x$DAOS = xyes])
AM_COND_IF([USE_DAOS_AIORI],[
AC_DEFINE([USE_DAOS_AIORI], [], [Build DAOS backends AIORI])
])
# aws4c is needed for the S3 backend (see --with-S3, below).

View File

@ -23,10 +23,10 @@ Index:
*******************
* 1. DESCRIPTION *
*******************
IOR can be used for testing performance of parallel file systems using various
interfaces and access patterns. IOR uses MPI for process synchronization.
IOR version 2 is a complete rewrite of the original IOR (Interleaved-Or-Random)
version 1 code.
IOR can be used for testing performance of parallel file systems using various
interfaces and access patterns. IOR uses MPI for process synchronization.
IOR version 2 is a complete rewrite of the original IOR (Interleaved-Or-Random)
version 1 code.
******************
@ -39,7 +39,7 @@ Two ways to run IOR:
E.g., to execute: IOR -w -r -o filename
This performs a write and a read to the file 'filename'.
* Command line with scripts -- any arguments on the command line will
* Command line with scripts -- any arguments on the command line will
establish the default for the test run, but a script may be used in
conjunction with this for varying specific tests during an execution of the
code.
@ -47,12 +47,22 @@ Two ways to run IOR:
E.g., to execute: IOR -W -f script
This defaults all tests in 'script' to use write data checking.
* The Command line supports to specify additional parameters for the choosen API.
For example, username and password for the storage.
Available options are listed in the help text after selecting the API when running with -h.
For example, 'IOR -a DUMMY -h' shows the supported options for the DUMMY backend.
The options for the backend must be specified at last and are separated with
two dashes '--'. Example: 'IOR -a DUMMY -- -c 1000' defines a delay for the
file creation for the plugin. Currently, it is not possible to set these
backend options using a command line script (-f option).
**************
* 3. OPTIONS *
**************
These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'.
-a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
-a S api -- API for I/O, e.g., POSIX
-A N refNum -- user reference number to include in long summary
-b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)
-B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers
@ -121,11 +131,11 @@ GENERAL:
* refNum - user supplied reference number, included in
long summary [0]
* api - must be set to one of POSIX, MPIIO, HDF5, HDFS, S3,
S3_EMC, or NCMPI, depending on test [POSIX]
* api - must be set to one of POSIX, MPIIO, HDF5, HDFS, IME,
S3, S3_EMC, or NCMPI, depending on test [POSIX]
* testFile - name of the output file [testFile]
NOTE: with filePerProc set, the tasks can round
NOTE: with filePerProc set, the tasks can round
robin across multiple file names '-o S@S@S'
* hintsFileName - name of the hints file []
@ -267,7 +277,7 @@ GENERAL:
data, this option measures the amount of
data moved in a fixed amount of time. The
objective is to prevent tasks slow to
complete from skewing the performance.
complete from skewing the performance.
* setting this to zero (0) unsets this option
* this option is incompatible w/data checking
@ -280,7 +290,8 @@ GENERAL:
* summaryAlways - Always print the long summary for each test.
Useful for long runs that may be interrupted, preventing
the final long summary for ALL tests to be printed.
* summaryFile=File - Output the summary to the file instead on stdout/stderr.
* summaryFormat=FMT - Choose the output format -- default, JSON, CSV
POSIX-ONLY:
===========
@ -319,7 +330,7 @@ HDF5-ONLY:
NOTE: default IOR creates a dataset the size of
numTasks * blockSize to be accessed by all
tasks
* noFill - no pre-filling of data in HDF5 file creation [0=FALSE]
* setAlignment - HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g) [1]
@ -356,6 +367,43 @@ BeeGFS-SPECIFIC (POSIX only):
* beegfsChunkSize - set the striping chunk size. Must be a power of two,
and greater than 64kiB, (e.g.: 256k, 1M, ...)
DAOS-ONLY:
==========
* daosGroup - group name [NULL]
* daosPool - UUID of the pool []
* daosPoolSvc - pool service replica ranks (e.g., 1:2:3:4:5) []
* daosRecordSize - size (in bytes) of an akey record [256k]
NOTE: must divide transferSize
* daosStripeSize - size (in bytes) of a chunk in a stripe [512k]
NOTE: must be a multiple of transferSize
* daosStripeCount - number of stripes [64 * number of targets]
NOTE: i.e., number of dkeys
* daosStripeMax - max length of each stripe [0]
NOTE: must be a multiple of daosStripeSize
NOTE: for write testing with small storage
NOTE: offsets in a stripe larger than daosStripeMax
are mapped to offset % daosStripeMax
* daosAios - max number of asychonous I/Os [1]
* daosWriteOnly - skip flushing and committing [0=FALSE]
* daosEpoch - epoch to read or write [0]
NOTE: 0 denotes reading GHCE or writing GHCE + 1
* daosWait - epoch to wait when opening the container [0]
* daosKill - kill a target in the middle of the test [0]
NOTE: must also specify daosObjectClass=repl
* daosObjectClass - object class (tiny, small, large, repl, repl_max)
[large]
***********************
* 5. VERBOSITY LEVELS *
@ -395,7 +443,7 @@ various application codes. Details are included in each script as necessary.
An example of a script:
===============> start script <===============
IOR START
api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
api=[POSIX|MPIIO|HDF5|HDFS|IME|S3|S3_EMC|NCMPI]
testFile=testFile
hintsFileName=hintsFile
repetitions=8
@ -483,8 +531,8 @@ zip, gzip, and bzip.
2) gzip: For gzipped files, a transfer size of 1k is sufficient.
3) bzip2: For bziped files a transfer size of 1k is insufficient (~50% compressed).
To avoid compression a transfer size of greater than the bzip block size is required
3) bzip2: For bziped files a transfer size of 1k is insufficient (~50% compressed).
To avoid compression a transfer size of greater than the bzip block size is required
(default = 900KB). I suggest a transfer size of greather than 1MB to avoid bzip2 compression.
Be aware of the block size your compression algorithm will look at, and adjust the transfer size
@ -508,9 +556,9 @@ HOW DO I PERFORM MULTIPLE DATA CHECKS ON AN EXISTING FILE?
and -r implied using both. This semantic has been subsequently altered to be
omitting -w, -r, -W, and -R implied using both -w and -r.)
If you're running new tests to create a file and want repeat data checking on
this file multiple times, there is an undocumented option for this. It's -O
multiReRead=1, and you'd need to have an IOR version compiled with the
If you're running new tests to create a file and want repeat data checking on
this file multiple times, there is an undocumented option for this. It's -O
multiReRead=1, and you'd need to have an IOR version compiled with the
USE_UNDOC_OPT=1 (in iordef.h). The command line would look like this:
IOR -k -E -w -W -i 5 -o file -O multiReRead=1
@ -586,7 +634,7 @@ HOW DO I USE STONEWALLING?
actually reading the same amount from disk in the allotted time, but they
are also reading the cached data from the previous test each time to get the
increased performance. Setting -D high enough so that the cache is
overfilled will prevent this.
overfilled will prevent this.
HOW DO I BYPASS CACHING WHEN READING BACK A FILE I'VE JUST WRITTEN?

View File

@ -17,7 +17,7 @@ normal parameters override each other, so the last one executed.
Command line options
--------------------
These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'.
-a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
-a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI|RADOS]
-A N refNum -- user reference number to include in long summary
-b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)
-B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers

View File

@ -25,7 +25,7 @@ Syntax:
An example of a script: ::
IOR START
api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI|RADOS]
testFile=testFile
hintsFileName=hintsFile
repetitions=8

View File

@ -1,24 +0,0 @@
IOR START
testFile = /tmp/work/swh13/testfile
filePerProc=1
api=POSIX
repetitions=2
verbose=1
reordertasksrandom=1
reordertasksrandomseed=-113
RUN
verbose = 2
repetitions=1#more foo
reordertasksconstant=1
#foobar
blockSize=10m
transferSize=128k
randomoffset=1
RUN
verbose = 0
#blockSize=
transferSize=64k
RUN
IOR STOP

View File

@ -1,93 +0,0 @@
#!/bin/bash -x
#PBS -N IOR
#PBS -j oe
#PBS -q batch
#PBS -A stf006
#PBS -V
#PBS -l walltime=0:60:00,size=8
VERS=IOR-2.10.1.ornl.16
WORK=/tmp/work/${USER}
echo $PBS_O_WORKDIR
cd /ccs/proj/quadcore
tar -czvf ${WORK}/${VERS}.tar.gz ./${VERS}
cd ${WORK}
rm -fr ./${VERS}
tar -xzvf ${WORK}/${VERS}.tar.gz
cd ${WORK}/${VERS}
gmake clean
gmake mpiio
EXEC=${WORK}/${VERS}/src/C/IOR
IODIR=/tmp/work/swh13/test_files_x
cd ${WORK}/${VERS}/tests
which mpirun
rm -fr $IODIR
mkdir $IODIR
let "w=128"
let "s=1024*1024"
let "i=3"
MPIRUN="aprun -n"
RESULTS="."
let "tid=1"
XFERS="1048576 262144 32768 4096 1024"
XFERS="262144"
for xfer in `echo $XFERS`
do
let "n=8"
until [ "$n" -gt 8 ]
do
let "m=$n/4"
#TESTS="POSIX MPIIO HDF5 NCMPI"
TESTS="POSIX MPIIO"
for test in `echo $TESTS`
do
runid="p$n.$xfer.${test}"
date
V=" "
BLOCKS="1 10 1 10 1 10"
for blocks in `echo $BLOCKS`
do
let "block=${xfer} * ${blocks}"
#fileperproc tests
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w -z ${V} -F -o $IODIR/testwrite.${runid} -Y -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w -z ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -C ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -C -Q $m ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -Z -Q $m ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -Z -Q $m -X 13 ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -Z -Q $m -X -13 ${V} -F -o $IODIR/testwrite.${runid} -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
#shared tests
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w -z ${V} -o $IODIR/testwrite.${runid} -Y -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w ${V} -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z ${V} -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
#test mutually exclusive options
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -C ${V} -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -Z ${V} -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -Z -C ${V} -o $IODIR/testwrite.${runid} -i${i} -m -t ${xfer} -b ${block} -d 0.0
let "tid=$tid + 17"
V=$V" -v"
done #blocks
date
done #test
let "n = $n * 2"
done #n
done #xfer
exit

View File

@ -1,93 +0,0 @@
#!/bin/bash -x
#PBS -N IOR
#PBS -j oe
#PBS -q batch
#PBS -A stf006
#PBS -V
#PBS -l walltime=0:60:00,nodes=8:ppn=2
VERS=IOR-2.10.1
WORK=/tmp/work/${USER}
echo $PBS_O_WORKDIR
cd /ccs/proj/quadcore
tar -czvf ${WORK}/${VERS}.tar.gz ./${VERS}
cd ${WORK}
rm -fr ./${VERS}
tar -xzvf ${WORK}/${VERS}.tar.gz
cd ${WORK}/${VERS}
gmake clean
gmake mpiio
EXEC=${WORK}/${VERS}/src/C/IOR
IODIR=/tmp/work/swh13/test_files_x
cd ${WORK}/${VERS}/tests
which mpirun
rm -fr $IODIR
mkdir $IODIR
let "w=128"
let "s=1024*1024"
let "i=3"
MPIRUN="mpirun -np"
RESULTS="."
let "tid=1"
XFERS="1048576 262144 32768 4096 1024"
XFERS="262144"
for xfer in `echo $XFERS`
do
let "n=8"
until [ "$n" -gt 8 ]
do
let "m=$n/4"
#TESTS="POSIX MPIIO HDF5 NCMPI"
TESTS="POSIX MPIIO"
for test in `echo $TESTS`
do
runid="p$n.$xfer.${test}"
date
V=" "
BLOCKS="1 10 1 10 1 10"
for blocks in `echo $BLOCKS`
do
let "block=${xfer} * ${blocks}"
#fileperproc tests
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w -z ${V} -F -o $IODIR/testwrite.${runid} -Y -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w -z ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -C ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -C -Q $m ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -Z -Q $m ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -Z -Q $m -X 13 ${V} -F -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -Z -Q $m -X -13 ${V} -F -o $IODIR/testwrite.${runid} -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
#shared tests
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w -z ${V} -o $IODIR/testwrite.${runid} -Y -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -w ${V} -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z ${V} -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
#test mutually exclusive options
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -C ${V} -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -z -Z ${V} -o $IODIR/testwrite.${runid} -k -e -i${i} -m -t ${xfer} -b ${block} -d 0.1
${MPIRUN} $n ${EXEC} -A ${tid} -a ${test} -r -Z -C ${V} -o $IODIR/testwrite.${runid} -i${i} -m -t ${xfer} -b ${block} -d 0.0
let "tid=$tid + 17"
V=$V" -v"
done #blocks
date
done #test
let "n = $n * 2"
done #n
done #xfer
exit

View File

@ -1,23 +1,28 @@
SUBDIRS = .
bin_PROGRAMS = ior mdtest
if USE_CAPS
bin_PROGRAMS += IOR MDTEST
endif
noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h
noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h
extraSOURCES = aiori.c
lib_LIBRARIES = libaiori.a
libaiori_a_SOURCES = ior.c mdtest.c utilities.c parse_options.c ior-output.c option.c
extraSOURCES = aiori.c aiori-DUMMY.c
extraLDADD =
extraLDFLAGS =
extraCPPFLAGS =
ior_SOURCES = ior.c utilities.c parse_options.c
ior_SOURCES = ior-main.c
ior_LDFLAGS =
ior_LDADD =
ior_LDADD = libaiori.a
ior_CPPFLAGS =
mdtest_SOURCES = mdtest.c utilities.c
mdtest_SOURCES = mdtest-main.c
mdtest_LDFLAGS =
mdtest_LDADD =
mdtest_LDADD = libaiori.a
mdtest_CPPFLAGS =
if USE_HDFS_AIORI
@ -36,6 +41,13 @@ extraSOURCES += aiori-HDF5.c
extraLDADD += -lhdf5 -lz
endif
if USE_IME_AIORI
extraSOURCES += aiori-IME.c
extraCPPFLAGS += -I/opt/ddn/ime/include
extraLDFLAGS += -L/opt/ddn/ime/lib
extraLDADD += -lim_client
endif
if USE_MPIIO_AIORI
extraSOURCES += aiori-MPIIO.c
endif
@ -53,10 +65,14 @@ if USE_POSIX_AIORI
extraSOURCES += aiori-POSIX.c
endif
if USE_DFS_AIORI
extraSOURCES += aiori-DFS.c
if USE_RADOS_AIORI
extraSOURCES += aiori-RADOS.c
extraLDADD += -lrados
endif
if USE_DAOS_AIORI
extraSOURCES += aiori-DAOS.c aiori-DFS.c list.h
endif
if USE_S3_AIORI
extraSOURCES += aiori-S3.c
@ -70,21 +86,33 @@ extraLDADD += -laws4c
extraLDADD += -laws4c_extra
endif
ior_SOURCES += $(extraSOURCES)
ior_LDFLAGS += $(extraLDFLAGS)
ior_LDADD += $(extraLDADD)
ior_SOURCES += $(extraSOURCES)
ior_LDFLAGS += $(extraLDFLAGS)
ior_LDADD += $(extraLDADD)
ior_CPPFLAGS += $(extraCPPFLAGS)
mdtest_SOURCES += $(extraSOURCES)
mdtest_LDFLAGS += $(extraLDFLAGS)
mdtest_LDADD += $(extraLDADD)
mdtest_SOURCES += $(extraSOURCES)
mdtest_LDFLAGS += $(extraLDFLAGS)
mdtest_LDADD += $(extraLDADD)
mdtest_CPPFLAGS += $(extraCPPFLAGS)
IOR_SOURCES = $(ior_SOURCES)
IOR_LDFLAGS = $(ior_LDFLAGS)
IOR_LDADD = $(ior_LDADD)
IOT_CPPFLAGS = $(ior_CPPFLAGS)
IOR_CPPFLAGS = $(ior_CPPFLAGS)
MDTEST_SOURCES = $(mdtest_SOURCES)
MDTEST_LDFLAGS = $(mdtest_LDFLAGS)
MDTEST_LDADD = $(mdtest_LDADD)
MDTEST_CPPFLAGS = $(mdtest_CPPFLAGS)
libaiori_a_SOURCES += $(extraSOURCES)
libaiori_a_CPPFLAGS = $(extraCPPFLAGS)
TESTS = testlib
bin_PROGRAMS += testlib
testlib_SOURCES = ./test/lib.c
testlib_LDFLAGS = $(extraLDFLAGS)
testlib_LDADD = libaiori.a $(extraLDADD)

939
src/aiori-DAOS.c Normal file
View File

@ -0,0 +1,939 @@
/*
* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
/*
* SPECIAL LICENSE RIGHTS-OPEN SOURCE SOFTWARE
* The Government's rights to use, modify, reproduce, release, perform, display,
* or disclose this software are subject to the terms of Contract No. B599860,
* and the terms of the GNU General Public License version 2.
* Any reproduction of computer software, computer software documentation, or
* portions thereof marked with this legend must also reproduce the markings.
*/
/*
* Copyright (c) 2013, 2016 Intel Corporation.
*/
/*
* This file implements the abstract I/O interface for DAOS.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdint.h>
#include <assert.h>
#include <unistd.h>
#include <sys/types.h>
#include <libgen.h>
#include <stdbool.h>
#include <daos.h>
#include <daos_types.h>
#include "ior.h"
#include "aiori.h"
#include "iordef.h"
#include "list.h"
/************************** O P T I O N S *****************************/
struct daos_options{
char *daosPool;
char *daosPoolSvc;
char *daosGroup;
int daosRecordSize;
int daosStripeSize;
uint64_t daosStripeCount;
uint64_t daosStripeMax; /* max length of a stripe */
int daosAios; /* max number of concurrent async I/Os */
int daosWriteOnly; /* write only, no flush and commit */
uint64_t daosEpoch; /* epoch to access */
uint64_t daosWait; /* epoch to wait for before reading */
int daosKill; /* kill a target while running IOR */
char *daosObjectClass; /* object class */
};
static struct daos_options o = {
.daosPool = NULL,
.daosPoolSvc = NULL,
.daosGroup = NULL,
.daosRecordSize = 262144,
.daosStripeSize = 524288,
.daosStripeCount = -1,
.daosStripeMax = 0,
.daosAios = 1,
.daosWriteOnly = 0,
.daosEpoch = 0,
.daosWait = 0,
.daosKill = 0,
.daosObjectClass = NULL,
};
static option_help options [] = {
{'p', "daosPool", "pool uuid", OPTION_REQUIRED_ARGUMENT, 's', &o.daosPool},
{'v', "daosPoolSvc", "pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', &o.daosPoolSvc},
{'g', "daosGroup", "server group", OPTION_OPTIONAL_ARGUMENT, 's', &o.daosGroup},
{'r', "daosRecordSize", "Record Size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosRecordSize},
{'s', "daosStripeSize", "Stripe Size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosStripeSize},
{'c', "daosStripeCount", "Stripe Count", OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeCount},
{'m', "daosStripeMax", "Max Stripe",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeMax},
{'a', "daosAios", "Concurrent Async IOs",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosAios},
{'w', "daosWriteOnly", "Write Only, no commit",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosWriteOnly},
{'e', "daosEpoch", "Epoch Number to Access",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosEpoch},
{'t', "daosWait", "Epoch to wait for before read",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosWait},
{'k', "daosKill", "Kill target while running",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosKill},
{'o', "daosObjectClass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.daosObjectClass},
LAST_OPTION
};
/**************************** P R O T O T Y P E S *****************************/
static void DAOS_Init(IOR_param_t *);
static void DAOS_Fini(IOR_param_t *);
static void *DAOS_Create(char *, IOR_param_t *);
static void *DAOS_Open(char *, IOR_param_t *);
static IOR_offset_t DAOS_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static void DAOS_Close(void *, IOR_param_t *);
static void DAOS_Delete(char *, IOR_param_t *);
static char* DAOS_GetVersion();
static void DAOS_Fsync(void *, IOR_param_t *);
static IOR_offset_t DAOS_GetFileSize(IOR_param_t *, MPI_Comm, char *);
/************************** D E C L A R A T I O N S ***************************/
ior_aiori_t daos_aiori = {
.name = "DAOS",
.create = DAOS_Create,
.open = DAOS_Open,
.xfer = DAOS_Xfer,
.close = DAOS_Close,
.delete = DAOS_Delete,
.get_version = DAOS_GetVersion,
.fsync = DAOS_Fsync,
.get_file_size = DAOS_GetFileSize,
.initialize = DAOS_Init,
.finalize = DAOS_Fini,
};
enum handleType {
POOL_HANDLE,
CONTAINER_HANDLE
};
struct fileDescriptor {
daos_handle_t container;
daos_cont_info_t containerInfo;
daos_handle_t object;
daos_epoch_t epoch;
};
struct aio {
cfs_list_t a_list;
char a_dkeyBuf[32];
daos_key_t a_dkey;
daos_recx_t a_recx;
unsigned char a_csumBuf[32];
daos_csum_buf_t a_csum;
daos_epoch_range_t a_epochRange;
daos_iod_t a_iod;
daos_iov_t a_iov;
daos_sg_list_t a_sgl;
struct daos_event a_event;
};
static daos_handle_t eventQueue;
static struct daos_event **events;
static unsigned char *buffers;
static int nAios;
static daos_handle_t pool;
static daos_pool_info_t poolInfo;
static daos_oclass_id_t objectClass = DAOS_OC_LARGE_RW;
static CFS_LIST_HEAD(aios);
static IOR_offset_t total_size;
/***************************** F U N C T I O N S ******************************/
/* For DAOS methods. */
#define DCHECK(rc, format, ...) \
do { \
int _rc = (rc); \
\
if (_rc < 0) { \
fprintf(stdout, "ior ERROR (%s:%d): %d: %d: " \
format"\n", __FILE__, __LINE__, rank, _rc, \
##__VA_ARGS__); \
fflush(stdout); \
MPI_Abort(MPI_COMM_WORLD, -1); \
} \
} while (0)
#define INFO(level, param, format, ...) \
do { \
if (param->verbose >= level) \
printf("[%d] "format"\n", rank, ##__VA_ARGS__); \
} while (0)
/* For generic errors like invalid command line options. */
#define GERR(format, ...) \
do { \
fprintf(stdout, format"\n", ##__VA_ARGS__); \
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \
} while (0)
/* Distribute process 0's pool or container handle to others. */
static void HandleDistribute(daos_handle_t *handle, enum handleType type,
IOR_param_t *param)
{
daos_iov_t global;
int rc;
assert(type == POOL_HANDLE || !daos_handle_is_inval(pool));
global.iov_buf = NULL;
global.iov_buf_len = 0;
global.iov_len = 0;
if (rank == 0) {
/* Get the global handle size. */
if (type == POOL_HANDLE)
rc = daos_pool_local2global(*handle, &global);
else
rc = daos_cont_local2global(*handle, &global);
DCHECK(rc, "Failed to get global handle size");
}
MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0,
param->testComm),
"Failed to bcast global handle buffer size");
global.iov_buf = malloc(global.iov_buf_len);
if (global.iov_buf == NULL)
ERR("Failed to allocate global handle buffer");
if (rank == 0) {
if (type == POOL_HANDLE)
rc = daos_pool_local2global(*handle, &global);
else
rc = daos_cont_local2global(*handle, &global);
DCHECK(rc, "Failed to create global handle");
}
MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0,
param->testComm),
"Failed to bcast global pool handle");
if (rank != 0) {
/* A larger-than-actual length works just fine. */
global.iov_len = global.iov_buf_len;
if (type == POOL_HANDLE)
rc = daos_pool_global2local(global, handle);
else
rc = daos_cont_global2local(pool, global, handle);
DCHECK(rc, "Failed to get local handle");
}
free(global.iov_buf);
}
static void ContainerOpen(char *testFileName, IOR_param_t *param,
daos_handle_t *container, daos_cont_info_t *info)
{
int rc;
if (rank == 0) {
uuid_t uuid;
unsigned int dFlags;
rc = uuid_parse(testFileName, uuid);
DCHECK(rc, "Failed to parse 'testFile': %s", testFileName);
if (param->open == WRITE &&
param->useExistingTestFile == FALSE) {
INFO(VERBOSE_2, param, "Creating container %s",
testFileName);
rc = daos_cont_create(pool, uuid, NULL /* ev */);
DCHECK(rc, "Failed to create container %s",
testFileName);
}
INFO(VERBOSE_2, param, "Openning container %s", testFileName);
if (param->open == WRITE)
dFlags = DAOS_COO_RW;
else
dFlags = DAOS_COO_RO;
rc = daos_cont_open(pool, uuid, dFlags, container, info,
NULL /* ev */);
DCHECK(rc, "Failed to open container %s", testFileName);
INFO(VERBOSE_2, param, "Container epoch state:");
INFO(VERBOSE_2, param, " HCE: %lu",
info->ci_epoch_state.es_hce);
INFO(VERBOSE_2, param, " LRE: %lu",
info->ci_epoch_state.es_lre);
INFO(VERBOSE_2, param, " LHE: %lu (%lx)",
info->ci_epoch_state.es_lhe, info->ci_epoch_state.es_lhe);
INFO(VERBOSE_2, param, " GHCE: %lu",
info->ci_epoch_state.es_ghce);
INFO(VERBOSE_2, param, " GLRE: %lu",
info->ci_epoch_state.es_glre);
INFO(VERBOSE_2, param, " GHPCE: %lu",
info->ci_epoch_state.es_ghpce);
#if 0
if (param->open != WRITE && o.daosWait != 0) {
daos_epoch_t e;
e = o.daosWait;
INFO(VERBOSE_2, param, "Waiting for epoch %lu", e);
rc = daos_epoch_wait(*container, &e,
NULL /* ignore HLE */,
NULL /* synchronous */);
DCHECK(rc, "Failed to wait for epoch %lu",
o.daosWait);
}
if (param->open == WRITE &&
param->useExistingTestFile == FALSE) {
daos_oclass_attr_t attr = {
.ca_schema = DAOS_OS_STRIPED,
.ca_resil_degree = 0,
.ca_resil = DAOS_RES_REPL,
.ca_grp_nr = 4,
.u.repl = {
.r_method = 0,
.r_num = 2
}
};
INFO(VERBOSE_2, param, "Registering object class");
rc = daos_oclass_register(container, objectClass, &attr,
NULL /* ev */);
DCHECK(rc, "Failed to register object class");
}
#endif
}
HandleDistribute(container, CONTAINER_HANDLE, param);
MPI_CHECK(MPI_Bcast(info, sizeof *info, MPI_BYTE, 0, param->testComm),
"Failed to broadcast container info");
}
static void ContainerClose(daos_handle_t container, IOR_param_t *param)
{
int rc;
if (rank != 0) {
rc = daos_cont_close(container, NULL /* ev */);
DCHECK(rc, "Failed to close container");
}
/* An MPI_Gather() call would probably be more efficient. */
MPI_CHECK(MPI_Barrier(param->testComm),
"Failed to synchronize processes");
if (rank == 0) {
rc = daos_cont_close(container, NULL /* ev */);
DCHECK(rc, "Failed to close container");
}
}
static void ObjectOpen(daos_handle_t container, daos_handle_t *object,
daos_epoch_t epoch, IOR_param_t *param)
{
daos_obj_id_t oid;
unsigned int flags;
int rc;
oid.hi = 0;
oid.lo = 1;
daos_obj_id_generate(&oid, 0, objectClass);
#if 0
/** declaring object not implemented commenting it */
if (rank == 0 && param->open == WRITE &&
param->useExistingTestFile == FALSE) {
INFO(VERBOSE_2, param, "Declaring object");
rc = daos_obj_declare(container, oid, epoch, NULL /* oa */,
NULL /* ev */);
DCHECK(rc, "Failed to declare object");
}
#endif
/* An MPI_Bcast() call would probably be more efficient. */
MPI_CHECK(MPI_Barrier(param->testComm),
"Failed to synchronize processes");
if (param->open == WRITE)
flags = DAOS_OO_RW;
else
flags = DAOS_OO_RO;
rc = daos_obj_open(container, oid, epoch, flags, object, NULL /* ev */);
DCHECK(rc, "Failed to open object");
}
static void ObjectClose(daos_handle_t object)
{
int rc;
rc = daos_obj_close(object, NULL /* ev */);
DCHECK(rc, "Failed to close object");
}
static void AIOInit(IOR_param_t *param)
{
struct aio *aio;
int i;
int rc;
rc = posix_memalign((void **) &buffers, sysconf(_SC_PAGESIZE),
param->transferSize * o.daosAios);
DCHECK(rc, "Failed to allocate buffer array");
for (i = 0; i < o.daosAios; i++) {
aio = malloc(sizeof *aio);
if (aio == NULL)
ERR("Failed to allocate aio array");
memset(aio, 0, sizeof *aio);
aio->a_dkey.iov_buf = aio->a_dkeyBuf;
aio->a_dkey.iov_buf_len = sizeof aio->a_dkeyBuf;
aio->a_recx.rx_nr = 1;
aio->a_csum.cs_csum = &aio->a_csumBuf;
aio->a_csum.cs_buf_len = sizeof aio->a_csumBuf;
aio->a_csum.cs_len = aio->a_csum.cs_buf_len;
aio->a_epochRange.epr_hi = DAOS_EPOCH_MAX;
aio->a_iod.iod_name.iov_buf = "data";
aio->a_iod.iod_name.iov_buf_len =
strlen(aio->a_iod.iod_name.iov_buf) + 1;
aio->a_iod.iod_name.iov_len = aio->a_iod.iod_name.iov_buf_len;
aio->a_iod.iod_nr = 1;
aio->a_iod.iod_type = DAOS_IOD_ARRAY;
aio->a_iod.iod_recxs = &aio->a_recx;
aio->a_iod.iod_csums = &aio->a_csum;
aio->a_iod.iod_eprs = &aio->a_epochRange;
aio->a_iod.iod_size = param->transferSize;
aio->a_iov.iov_buf = buffers + param->transferSize * i;
aio->a_iov.iov_buf_len = param->transferSize;
aio->a_iov.iov_len = aio->a_iov.iov_buf_len;
aio->a_sgl.sg_nr = 1;
aio->a_sgl.sg_iovs = &aio->a_iov;
rc = daos_event_init(&aio->a_event, eventQueue,
NULL /* parent */);
DCHECK(rc, "Failed to initialize event for aio[%d]", i);
cfs_list_add(&aio->a_list, &aios);
INFO(VERBOSE_3, param, "Allocated AIO %p: buffer %p", aio,
aio->a_iov.iov_buf);
}
nAios = o.daosAios;
events = malloc((sizeof *events) * o.daosAios);
if (events == NULL)
ERR("Failed to allocate events array");
}
static void AIOFini(IOR_param_t *param)
{
struct aio *aio;
struct aio *tmp;
free(events);
cfs_list_for_each_entry_safe(aio, tmp, &aios, a_list) {
INFO(VERBOSE_3, param, "Freeing AIO %p: buffer %p", aio,
aio->a_iov.iov_buf);
cfs_list_del_init(&aio->a_list);
daos_event_fini(&aio->a_event);
free(aio);
}
free(buffers);
}
static void AIOWait(IOR_param_t *param)
{
struct aio *aio;
int i;
int rc;
rc = daos_eq_poll(eventQueue, 0, DAOS_EQ_WAIT, o.daosAios,
events);
DCHECK(rc, "Failed to poll event queue");
assert(rc <= o.daosAios - nAios);
for (i = 0; i < rc; i++) {
int ret;
aio = (struct aio *)
((char *) events[i] -
(char *) (&((struct aio *) 0)->a_event));
DCHECK(aio->a_event.ev_error, "Failed to transfer (%lu, %lu)",
aio->a_iod.iod_recxs->rx_idx,
aio->a_iod.iod_recxs->rx_nr);
daos_event_fini(&aio->a_event);
ret = daos_event_init(&aio->a_event, eventQueue,
NULL /* parent */);
DCHECK(ret, "Failed to reinitialize event for AIO %p", aio);
cfs_list_move(&aio->a_list, &aios);
nAios++;
if (param->verbose >= VERBOSE_3)
INFO(VERBOSE_3, param, "Completed AIO %p: buffer %p", aio,
aio->a_iov.iov_buf);
}
INFO(VERBOSE_3, param, "Found %d completed AIOs (%d free %d busy)", rc,
nAios, o.daosAios - nAios);
}
static void ObjectClassParse(const char *string)
{
if (strcasecmp(string, "tiny") == 0)
objectClass = DAOS_OC_TINY_RW;
else if (strcasecmp(string, "small") == 0)
objectClass = DAOS_OC_SMALL_RW;
else if (strcasecmp(string, "large") == 0)
objectClass = DAOS_OC_LARGE_RW;
else if (strcasecmp(string, "echo") == 0)
objectClass = DAOS_OC_ECHO_RW;
else if (strcasecmp(string, "R2") == 0)
objectClass = DAOS_OC_R2_RW;
else if (strcasecmp(string, "R2S") == 0)
objectClass = DAOS_OC_R2S_RW;
else if (strcasecmp(string, "R3S") == 0)
objectClass = DAOS_OC_R3S_RW;
else if (strcasecmp(string, "R3") == 0)
objectClass = DAOS_OC_R3_RW;
else if (strcasecmp(string, "R4") == 0)
objectClass = DAOS_OC_R4_RW;
else if (strcasecmp(string, "R4S") == 0)
objectClass = DAOS_OC_R4S_RW;
else if (strcasecmp(string, "repl_max") == 0)
objectClass = DAOS_OC_REPL_MAX_RW;
else
GERR("Invalid 'daosObjectClass' argument: '%s'", string);
}
static const char *GetGroup(IOR_param_t *param)
{
if (strlen(o.daosGroup) == 0)
return NULL;
return o.daosGroup;
}
static void ParseService(IOR_param_t *param, int max, d_rank_list_t *ranks)
{
char *s;
s = strdup(o.daosPoolSvc);
if (s == NULL)
GERR("failed to duplicate argument");
ranks->rl_nr = 0;
while ((s = strtok(s, ":")) != NULL) {
if (ranks->rl_nr >= max) {
free(s);
GERR("at most %d pool service replicas supported", max);
}
ranks->rl_ranks[ranks->rl_nr] = atoi(s);
ranks->rl_nr++;
s = NULL;
}
free(s);
}
static void DAOS_Init(IOR_param_t *param)
{
int rc;
if (strlen(o.daosObjectClass) != 0)
ObjectClassParse(o.daosObjectClass);
if (param->filePerProc)
GERR("'filePerProc' not yet supported");
if (o.daosStripeMax % o.daosStripeSize != 0)
GERR("'daosStripeMax' must be a multiple of 'daosStripeSize'");
if (o.daosStripeSize % param->transferSize != 0)
GERR("'daosStripeSize' must be a multiple of 'transferSize'");
if (param->transferSize % o.daosRecordSize != 0)
GERR("'transferSize' must be a multiple of 'daosRecordSize'");
if (o.daosKill && ((objectClass != DAOS_OC_R2_RW) ||
(objectClass != DAOS_OC_R3_RW) ||
(objectClass != DAOS_OC_R4_RW) ||
(objectClass != DAOS_OC_R2S_RW) ||
(objectClass != DAOS_OC_R3S_RW) ||
(objectClass != DAOS_OC_R4S_RW) ||
(objectClass != DAOS_OC_REPL_MAX_RW)))
GERR("'daosKill' only makes sense with 'daosObjectClass=repl'");
if (rank == 0)
INFO(VERBOSE_0, param, "WARNING: USING daosStripeMax CAUSES READS TO RETURN INVALID DATA");
rc = daos_init();
DCHECK(rc, "Failed to initialize daos");
rc = daos_eq_create(&eventQueue);
DCHECK(rc, "Failed to create event queue");
if (rank == 0) {
uuid_t uuid;
d_rank_t rank[13];
d_rank_list_t ranks;
if (strlen(o.daosPool) == 0)
GERR("'daosPool' must be specified");
if (strlen(o.daosPoolSvc) == 0)
GERR("'daosPoolSvc' must be specified");
INFO(VERBOSE_2, param, "Connecting to pool %s %s",
o.daosPool, o.daosPoolSvc);
rc = uuid_parse(o.daosPool, uuid);
DCHECK(rc, "Failed to parse 'daosPool': %s", o.daosPool);
ranks.rl_ranks = rank;
ParseService(param, sizeof(rank) / sizeof(rank[0]), &ranks);
rc = daos_pool_connect(uuid, GetGroup(param), &ranks,
DAOS_PC_RW, &pool, &poolInfo,
NULL /* ev */);
DCHECK(rc, "Failed to connect to pool %s", o.daosPool);
}
HandleDistribute(&pool, POOL_HANDLE, param);
MPI_CHECK(MPI_Bcast(&poolInfo, sizeof poolInfo, MPI_BYTE, 0,
param->testComm),
"Failed to bcast pool info");
if (o.daosStripeCount == -1)
o.daosStripeCount = poolInfo.pi_ntargets * 64UL;
}
static void DAOS_Fini(IOR_param_t *param)
{
int rc;
rc = daos_pool_disconnect(pool, NULL /* ev */);
DCHECK(rc, "Failed to disconnect from pool %s", o.daosPool);
rc = daos_eq_destroy(eventQueue, 0 /* flags */);
DCHECK(rc, "Failed to destroy event queue");
rc = daos_fini();
DCHECK(rc, "Failed to finalize daos");
}
static void *DAOS_Create(char *testFileName, IOR_param_t *param)
{
return DAOS_Open(testFileName, param);
}
static void *DAOS_Open(char *testFileName, IOR_param_t *param)
{
struct fileDescriptor *fd;
daos_epoch_t ghce;
fd = malloc(sizeof *fd);
if (fd == NULL)
ERR("Failed to allocate fd");
ContainerOpen(testFileName, param, &fd->container, &fd->containerInfo);
ghce = fd->containerInfo.ci_epoch_state.es_ghce;
if (param->open == WRITE) {
if (o.daosEpoch == 0)
fd->epoch = ghce + 1;
else if (o.daosEpoch <= ghce)
GERR("Can't modify committed epoch\n");
else
fd->epoch = o.daosEpoch;
} else {
if (o.daosEpoch == 0) {
if (o.daosWait == 0)
fd->epoch = ghce;
else
fd->epoch = o.daosWait;
} else if (o.daosEpoch > ghce) {
GERR("Can't read uncommitted epoch\n");
} else {
fd->epoch = o.daosEpoch;
}
}
if (rank == 0)
INFO(VERBOSE_2, param, "Accessing epoch %lu", fd->epoch);
if (rank == 0 && param->open == WRITE) {
daos_epoch_t e = fd->epoch;
int rc;
INFO(VERBOSE_2, param, "Holding epoch %lu", fd->epoch);
rc = daos_epoch_hold(fd->container, &fd->epoch,
NULL /* state */, NULL /* ev */);
DCHECK(rc, "Failed to hold epoch");
assert(fd->epoch == e);
}
ObjectOpen(fd->container, &fd->object, fd->epoch, param);
AIOInit(param);
return fd;
}
static void
kill_daos_server(IOR_param_t *param)
{
daos_pool_info_t info;
d_rank_t rank, svc_ranks[13];
d_rank_list_t svc, targets;
uuid_t uuid;
char *s;
int rc;
rc = daos_pool_query(pool, NULL, &info, NULL);
DCHECK(rc, "Error in querying pool\n");
if (info.pi_ntargets - info.pi_ndisabled <= 1)
return;
/* choose the last alive one */
rank = info.pi_ntargets - 1 - info.pi_ndisabled;
rc = uuid_parse(o.daosPool, uuid);
DCHECK(rc, "Failed to parse 'daosPool': %s", o.daosPool);
if (rc != 0)
printf("Killing tgt rank: %d (total of %d of %d already disabled)\n",
rank, info.pi_ndisabled, info.pi_ntargets);
fflush(stdout);
rc = daos_mgmt_svc_rip(GetGroup(param), rank, true, NULL);
DCHECK(rc, "Error in killing server\n");
targets.rl_nr = 1;
targets.rl_ranks = &rank;
svc.rl_ranks = svc_ranks;
ParseService(param, sizeof(svc_ranks)/ sizeof(svc_ranks[0]), &svc);
rc = daos_pool_exclude(uuid, NULL, &svc, &targets, NULL);
DCHECK(rc, "Error in excluding pool from poolmap\n");
rc = daos_pool_query(pool, NULL, &info, NULL);
DCHECK(rc, "Error in querying pool\n");
printf("%d targets succesfully disabled\n",
info.pi_ndisabled);
}
static void
kill_and_sync(IOR_param_t *param)
{
double start, end;
start = MPI_Wtime();
if (rank == 0)
kill_daos_server(param);
if (rank == 0)
printf("Done killing and excluding\n");
MPI_CHECK(MPI_Barrier(param->testComm),
"Failed to synchronize processes");
end = MPI_Wtime();
if (rank == 0)
printf("Time spent inducing failure: %lf\n", (end - start));
}
static IOR_offset_t DAOS_Xfer(int access, void *file, IOR_size_t *buffer,
IOR_offset_t length, IOR_param_t *param)
{
struct fileDescriptor *fd = file;
struct aio *aio;
uint64_t stripe;
IOR_offset_t stripeOffset;
uint64_t round;
int rc;
assert(length == param->transferSize);
assert(param->offset % length == 0);
/**
* Currently killing only during writes
* Kills once when 1/2 of blocksize is
* written
**/
total_size += length;
if (o.daosKill && (access == WRITE) &&
((param->blockSize)/2) == total_size) {
/** More than half written lets kill */
if (rank == 0)
printf("Killing and Syncing\n", rank);
kill_and_sync(param);
o.daosKill = 0;
}
/*
* Find an available AIO descriptor. If none, wait for one.
*/
while (nAios == 0)
AIOWait(param);
aio = cfs_list_entry(aios.next, struct aio, a_list);
cfs_list_move_tail(&aio->a_list, &aios);
nAios--;
stripe = (param->offset / o.daosStripeSize) %
o.daosStripeCount;
rc = snprintf(aio->a_dkeyBuf, sizeof aio->a_dkeyBuf, "%lu", stripe);
assert(rc < sizeof aio->a_dkeyBuf);
aio->a_dkey.iov_len = strlen(aio->a_dkeyBuf) + 1;
round = param->offset / (o.daosStripeSize * o.daosStripeCount);
stripeOffset = o.daosStripeSize * round +
param->offset % o.daosStripeSize;
if (o.daosStripeMax != 0)
stripeOffset %= o.daosStripeMax;
aio->a_recx.rx_idx = stripeOffset / o.daosRecordSize;
aio->a_epochRange.epr_lo = fd->epoch;
/*
* If the data written will be checked later, we have to copy in valid
* data instead of writing random bytes. If the data being read is for
* checking purposes, poison the buffer first.
*/
if (access == WRITE && param->checkWrite)
memcpy(aio->a_iov.iov_buf, buffer, length);
else if (access == WRITECHECK || access == READCHECK)
memset(aio->a_iov.iov_buf, '#', length);
INFO(VERBOSE_3, param, "Starting AIO %p (%d free %d busy): access %d "
"dkey '%s' iod <%llu, %llu> sgl <%p, %lu>", aio, nAios,
o.daosAios - nAios, access, (char *) aio->a_dkey.iov_buf,
(unsigned long long) aio->a_iod.iod_recxs->rx_idx,
(unsigned long long) aio->a_iod.iod_recxs->rx_nr,
aio->a_sgl.sg_iovs->iov_buf,
(unsigned long long) aio->a_sgl.sg_iovs->iov_buf_len);
if (access == WRITE) {
rc = daos_obj_update(fd->object, fd->epoch, &aio->a_dkey,
1 /* nr */, &aio->a_iod, &aio->a_sgl,
&aio->a_event);
DCHECK(rc, "Failed to start update operation");
} else {
rc = daos_obj_fetch(fd->object, fd->epoch, &aio->a_dkey,
1 /* nr */, &aio->a_iod, &aio->a_sgl,
NULL /* maps */, &aio->a_event);
DCHECK(rc, "Failed to start fetch operation");
}
/*
* If this is a WRITECHECK or READCHECK, we are expected to fill data
* into the buffer before returning. Note that if this is a READ, we
* don't have to return valid data as WriteOrRead() doesn't care.
*/
if (access == WRITECHECK || access == READCHECK) {
while (o.daosAios - nAios > 0)
AIOWait(param);
memcpy(buffer, aio->a_sgl.sg_iovs->iov_buf, length);
}
return length;
}
static void DAOS_Close(void *file, IOR_param_t *param)
{
struct fileDescriptor *fd = file;
int rc;
while (o.daosAios - nAios > 0)
AIOWait(param);
AIOFini(param);
ObjectClose(fd->object);
if (param->open == WRITE && !o.daosWriteOnly) {
/* Wait for everybody for to complete the writes. */
MPI_CHECK(MPI_Barrier(param->testComm),
"Failed to synchronize processes");
if (rank == 0) {
INFO(VERBOSE_2, param, "Flushing epoch %lu", fd->epoch);
rc = daos_epoch_flush(fd->container, fd->epoch,
NULL /* state */, NULL /* ev */);
DCHECK(rc, "Failed to flush epoch");
INFO(VERBOSE_2, param, "Committing epoch %lu",
fd->epoch);
rc = daos_epoch_commit(fd->container, fd->epoch,
NULL /* state */, NULL /* ev */);
DCHECK(rc, "Failed to commit object write");
}
}
ContainerClose(fd->container, param);
free(fd);
}
static void DAOS_Delete(char *testFileName, IOR_param_t *param)
{
uuid_t uuid;
int rc;
INFO(VERBOSE_2, param, "Deleting container %s", testFileName);
rc = uuid_parse(testFileName, uuid);
DCHECK(rc, "Failed to parse 'testFile': %s", testFileName);
rc = daos_cont_destroy(pool, uuid, 1 /* force */, NULL /* ev */);
if (rc != -DER_NONEXIST)
DCHECK(rc, "Failed to destroy container %s", testFileName);
}
static char* DAOS_GetVersion()
{
static char ver[1024] = {};
sprintf(ver, "%s", "DAOS");
return ver;
}
static void DAOS_Fsync(void *file, IOR_param_t *param)
{
while (o.daosAios - nAios > 0)
AIOWait(param);
}
static IOR_offset_t DAOS_GetFileSize(IOR_param_t *test, MPI_Comm testComm,
char *testFileName)
{
/*
* Sizes are inapplicable to containers at the moment.
*/
return 0;
}

View File

@ -46,6 +46,99 @@
dfs_t *dfs;
daos_handle_t poh, coh;
/************************** O P T I O N S *****************************/
struct dfs_options{
char * pool;
char * svcl;
char * group;
char * cont;
};
static struct dfs_options o = {
.pool = NULL,
.svcl = NULL,
.group = NULL,
.cont = NULL,
};
static option_help options [] = {
{'p', "pool", "DAOS pool uuid", OPTION_REQUIRED_ARGUMENT, 's', & o.pool},
{'s', "svcl", "DAOS pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', & o.svcl},
{'g', "group", "DAOS server group", OPTION_OPTIONAL_ARGUMENT, 's', & o.group},
{'c', "cont", "DFS container uuid", OPTION_REQUIRED_ARGUMENT, 's', & o.cont},
LAST_OPTION
};
/**************************** P R O T O T Y P E S *****************************/
static void *DFS_Create(char *, IOR_param_t *);
static void *DFS_Open(char *, IOR_param_t *);
static IOR_offset_t DFS_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static void DFS_Close(void *, IOR_param_t *);
static void DFS_Delete(char *, IOR_param_t *);
static char* DFS_GetVersion();
static void DFS_Fsync(void *, IOR_param_t *);
static IOR_offset_t DFS_GetFileSize(IOR_param_t *, MPI_Comm, char *);
static int DFS_Statfs (const char *, ior_aiori_statfs_t *, IOR_param_t *);
static int DFS_Stat (const char *, struct stat *, IOR_param_t *);
static int DFS_Mkdir (const char *, mode_t, IOR_param_t *);
static int DFS_Rmdir (const char *, IOR_param_t *);
static int DFS_Access (const char *, int, IOR_param_t *);
static void DFS_Init(IOR_param_t *param);
static void DFS_Finalize(IOR_param_t *param);
static option_help * DFS_options();
/************************** D E C L A R A T I O N S ***************************/
ior_aiori_t dfs_aiori = {
.name = "DFS",
.create = DFS_Create,
.open = DFS_Open,
.xfer = DFS_Xfer,
.close = DFS_Close,
.delete = DFS_Delete,
.get_version = DFS_GetVersion,
.fsync = DFS_Fsync,
.get_file_size = DFS_GetFileSize,
.statfs = DFS_Statfs,
.mkdir = DFS_Mkdir,
.rmdir = DFS_Rmdir,
.access = DFS_Access,
.stat = DFS_Stat,
.initialize = DFS_Init,
.finalize = DFS_Finalize,
.get_options = DFS_options,
};
/***************************** F U N C T I O N S ******************************/
/* For DAOS methods. */
#define DCHECK(rc, format, ...) \
do { \
int _rc = (rc); \
\
if (_rc < 0) { \
fprintf(stderr, "ERROR (%s:%d): %d: %d: " \
format"\n", __FILE__, __LINE__, rank, _rc, \
##__VA_ARGS__); \
fflush(stderr); \
MPI_Abort(MPI_COMM_WORLD, -1); \
} \
} while (0)
#define DERR(rc, format, ...) \
do { \
int _rc = (rc); \
\
if (_rc < 0) { \
fprintf(stderr, "ERROR (%s:%d): %d: %d: " \
format"\n", __FILE__, __LINE__, rank, _rc, \
##__VA_ARGS__); \
fflush(stderr); \
goto out; \
} \
} while (0)
static int
parse_filename(const char *path, char **_obj_name, char **_cont_name)
{
@ -124,48 +217,11 @@ out:
return rc;
}
/**************************** P R O T O T Y P E S *****************************/
static void *DFS_Create(char *, IOR_param_t *);
static void *DFS_Open(char *, IOR_param_t *);
static IOR_offset_t DFS_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static void DFS_Close(void *, IOR_param_t *);
static void DFS_Delete(char *, IOR_param_t *);
static void DFS_SetVersion(IOR_param_t *);
static void DFS_Fsync(void *, IOR_param_t *);
static IOR_offset_t DFS_GetFileSize(IOR_param_t *, MPI_Comm, char *);
static int DFS_Statfs (const char *, ior_aiori_statfs_t *, IOR_param_t *);
static int DFS_Stat (const char *, struct stat *, IOR_param_t *);
static int DFS_Mkdir (const char *, mode_t, IOR_param_t *);
static int DFS_Rmdir (const char *, IOR_param_t *);
static int DFS_Access (const char *, int, IOR_param_t *);
static int DFS_Init(IOR_param_t *param);
static int DFS_Finalize(IOR_param_t *param);
static option_help * DFS_options(){
return options;
}
/************************** D E C L A R A T I O N S ***************************/
ior_aiori_t dfs_aiori = {
.name = "DFS",
.create = DFS_Create,
.open = DFS_Open,
.xfer = DFS_Xfer,
.close = DFS_Close,
.delete = DFS_Delete,
.set_version = DFS_SetVersion,
.fsync = DFS_Fsync,
.get_file_size = DFS_GetFileSize,
.statfs = DFS_Statfs,
.mkdir = DFS_Mkdir,
.rmdir = DFS_Rmdir,
.access = DFS_Access,
.stat = DFS_Stat,
.init = DFS_Init,
.finalize = DFS_Finalize,
};
/***************************** F U N C T I O N S ******************************/
static int
static void
DFS_Init(IOR_param_t *param) {
uuid_t pool_uuid, co_uuid;
daos_pool_info_t pool_info;
@ -174,46 +230,37 @@ DFS_Init(IOR_param_t *param) {
bool cont_created = false;
int rc;
if (uuid_parse(param->daosPool, pool_uuid) < 0) {
fprintf(stderr, "Invalid pool uuid\n");
return -1;
}
if (o.pool == NULL || o.svcl == NULL || o.cont == NULL)
ERR("Invalid Arguments to DFS\n");
if (uuid_parse(param->daosCont, co_uuid) < 0) {
fprintf(stderr, "Invalid pool uuid\n");
return -1;
}
rc = uuid_parse(o.pool, pool_uuid);
DCHECK(rc, "Failed to parse 'Pool uuid': %s", o.pool);
svcl = daos_rank_list_parse(param->daosPoolSvc, ":");
if (svcl == NULL) {
fprintf(stderr, "Invalid pool service rank list\n");
return -1;
}
rc = uuid_parse(o.cont, co_uuid);
DCHECK(rc, "Failed to parse 'Cont uuid': %s", o.cont);
printf("Pool uuid = %s, SVCL = %s\n", param->daosPool,
param->daosPoolSvc);
svcl = daos_rank_list_parse(o.svcl, ":");
if (svcl == NULL)
ERR("Failed to allocate svcl");
printf("DFS Container namespace uuid = %s\n", param->daosCont);
if (verbose >= 3) {
printf("Pool uuid = %s, SVCL = %s\n", o.pool, o.svcl);
printf("DFS Container namespace uuid = %s\n", o.cont);
}
rc = daos_init();
if (rc) {
fprintf(stderr, "daos_init() failed with %d\n", rc);
return rc;
}
DCHECK(rc, "Failed to initialize daos");
/** Connect to DAOS pool */
rc = daos_pool_connect(pool_uuid,
strlen(param->daosGroup) ? param->daosGroup : NULL,
svcl, DAOS_PC_RW, &poh, &pool_info, NULL);
if (rc < 0) {
fprintf(stderr, "Failed to connect to pool (%d)\n", rc);
goto err_daos;
}
rc = daos_pool_connect(pool_uuid, o.group, svcl, DAOS_PC_RW, &poh,
&pool_info, NULL);
DCHECK(rc, "Failed to connect to pool");
rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL);
/* If NOEXIST we create it */
if (rc == -DER_NONEXIST) {
printf("Creating DFS Container ...\n");
if (verbose >= 3)
printf("Creating DFS Container ...\n");
rc = daos_cont_create(poh, co_uuid, NULL);
if (rc == 0) {
cont_created = true;
@ -221,61 +268,28 @@ DFS_Init(IOR_param_t *param) {
&co_info, NULL);
}
}
if (rc) {
fprintf(stderr, "Failed to create container (%d)\n", rc);
goto err_pool;
}
DCHECK(rc, "Failed to create container");
rc = dfs_mount(poh, coh, O_RDWR, &dfs);
if (rc) {
fprintf(stderr, "dfs_mount failed (%d)\n", rc);
goto err_cont;
}
out:
daos_rank_list_free(svcl);
return rc;
err_cont:
daos_cont_close(coh, NULL);
err_pool:
if (cont_created)
daos_cont_destroy(poh, co_uuid, 1, NULL);
daos_pool_disconnect(poh, NULL);
err_daos:
daos_fini();
goto out;
DCHECK(rc, "Failed to mount DFS namespace");
}
int
static void
DFS_Finalize(IOR_param_t *param)
{
int rc;
rc = dfs_umount(dfs, true);
if (rc) {
fprintf(stderr, "dfs_umount() failed (%d)\n", rc);
return -1;
}
DCHECK(rc, "Failed to umount DFS namespace");
rc = daos_cont_close(coh, NULL);
if (rc) {
fprintf(stderr, "daos_cont_close() failed (%d)\n", rc);
return -1;
}
DCHECK(rc, "Failed to close container");
daos_pool_disconnect(poh, NULL);
if (rc) {
fprintf(stderr, "daos_pool_disconnect() failed (%d)\n", rc);
return -1;
}
DCHECK(rc, "Failed to disconnect from pool");
rc = daos_fini();
if (rc) {
fprintf(stderr, "daos_fini() failed (%d)\n", rc);
return -1;
}
return 0;
DCHECK(rc, "Failed to finalize DAOS");
}
/*
@ -296,20 +310,17 @@ DFS_Create(char *testFileName, IOR_param_t *param)
mode = S_IFREG | param->mode;
rc = parse_filename(testFileName, &name, &dir_name);
if (rc)
goto out;
DERR(rc, "Failed to parse path %s", testFileName);
assert(dir_name);
assert(name);
rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
if (rc || !S_ISDIR(pmode))
goto out;
DERR(rc, "dfs_lookup() of %s Failed", dir_name);
rc = dfs_open(dfs, parent, name, mode, fd_oflag, DAOS_OC_LARGE_RW,
NULL, &obj);
if (rc)
goto out;
DERR(rc, "dfs_open() of %s Failed", name);
out:
if (name)
@ -337,19 +348,16 @@ DFS_Open(char *testFileName, IOR_param_t *param)
fd_oflag |= O_RDWR;
rc = parse_filename(testFileName, &name, &dir_name);
if (rc)
goto out;
DERR(rc, "Failed to parse path %s", testFileName);
assert(dir_name);
assert(name);
rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
if (rc || !S_ISDIR(pmode))
goto out;
DERR(rc, "dfs_lookup() of %s Failed", dir_name);
rc = dfs_open(dfs, parent, name, S_IFREG, fd_oflag, 0, NULL, &obj);
if (rc)
goto out;
DERR(rc, "dfs_open() of %s Failed", name);
out:
if (name)
@ -391,13 +399,15 @@ DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length,
/* write/read file */
if (access == WRITE) {
rc = dfs_write(dfs, obj, sgl, param->offset);
if (rc)
ERR("write() failed");
if (rc) {
fprintf(stderr, "dfs_write() failed (%d)", rc);
return -1;
}
ret = remaining;
} else {
rc = dfs_read(dfs, obj, sgl, param->offset, &ret);
if (rc || ret == 0)
ERR("read() failed");
fprintf(stderr, "dfs_read() failed(%d)", rc);
}
if (ret < remaining) {
@ -449,19 +459,16 @@ DFS_Delete(char *testFileName, IOR_param_t * param)
int rc;
rc = parse_filename(testFileName, &name, &dir_name);
if (rc)
goto out;
DERR(rc, "Failed to parse path %s", testFileName);
assert(dir_name);
assert(name);
rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
if (rc || !S_ISDIR(pmode))
goto out;
DERR(rc, "dfs_lookup() of %s Failed", dir_name);
rc = dfs_remove(dfs, parent, name, false);
if (rc)
goto out;
DERR(rc, "dfs_remove() of %s Failed", name);
out:
if (name)
@ -472,13 +479,12 @@ out:
dfs_release(parent);
}
/*
* Determine api version.
*/
static void
DFS_SetVersion(IOR_param_t * test)
static char* DFS_GetVersion()
{
strcpy(test->apiVersion, test->api);
static char ver[1024] = {};
sprintf(ver, "%s", "DAOS");
return ver;
}
/*
@ -492,8 +498,10 @@ DFS_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName)
int rc;
rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL);
if (rc)
if (rc) {
fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc);
return -1;
}
rc = dfs_get_size(dfs, obj, &fsize);
if (rc)
@ -540,19 +548,16 @@ DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param)
int rc;
rc = parse_filename(path, &name, &dir_name);
if (rc)
return rc;
DERR(rc, "Failed to parse path %s", path);
assert(dir_name);
assert(name);
rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
if (rc || !S_ISDIR(pmode))
goto out;
DERR(rc, "dfs_lookup() of %s Failed", dir_name);
rc = dfs_mkdir(dfs, parent, name, mode);
if (rc)
goto out;
DERR(rc, "dfs_mkdir() of %s Failed", name);
out:
if (name)
@ -575,19 +580,16 @@ DFS_Rmdir(const char *path, IOR_param_t * param)
int rc;
rc = parse_filename(path, &name, &dir_name);
if (rc)
return rc;
DERR(rc, "Failed to parse path %s", path);
assert(dir_name);
assert(name);
rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
if (rc || !S_ISDIR(pmode))
goto out;
DERR(rc, "dfs_lookup() of %s Failed", dir_name);
rc = dfs_remove(dfs, parent, name, false);
if (rc)
goto out;
DERR(rc, "dfs_remove() of %s Failed", name);
out:
if (name)
@ -611,26 +613,19 @@ DFS_Access(const char *path, int mode, IOR_param_t * param)
int rc;
rc = parse_filename(path, &name, &dir_name);
if (rc)
return rc;
DERR(rc, "Failed to parse path %s", path);
assert(dir_name);
assert(name);
rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode);
if (rc || !S_ISDIR(pmode))
goto out;
DERR(rc, "dfs_lookup() of %s Failed", dir_name);
if (strcmp(name, ".") == 0) {
if (name && strcmp(name, ".") == 0) {
free(name);
name = NULL;
}
rc = dfs_stat(dfs, parent, name, &stbuf);
if (rc) {
rc = -1;
errno = -ENOENT;
goto out;
}
DERR(rc, "dfs_stat() of %s Failed", name);
out:
if (name)
@ -653,19 +648,16 @@ DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param)
int rc;
rc = parse_filename(path, &name, &dir_name);
if (rc)
return rc;
DERR(rc, "Failed to parse path %s", path);
assert(dir_name);
assert(name);
rc = dfs_lookup(dfs, dir_name, O_RDONLY, &parent, &pmode);
if (rc || !S_ISDIR(pmode))
goto out;
DERR(rc, "dfs_lookup() of %s Failed", dir_name);
rc = dfs_stat(dfs, parent, name, buf);
if (rc)
goto out;
DERR(rc, "dfs_stat() of %s Failed", name);
out:
if (name)

155
src/aiori-DUMMY.c Executable file
View File

@ -0,0 +1,155 @@
/*
* Dummy implementation doesn't do anything besides waiting
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "ior.h"
#include "aiori.h"
#include "utilities.h"
/************************** O P T I O N S *****************************/
struct dummy_options{
uint64_t delay_creates;
uint64_t delay_xfer;
int delay_rank_0_only;
};
static struct dummy_options o = {
.delay_creates = 0,
.delay_xfer = 0,
.delay_rank_0_only = 0,
};
static option_help options [] = {
{'c', "delay-create", "Delay per create in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o.delay_creates},
{'x', "delay-xfer", "Delay per xfer in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o.delay_xfer},
{'z', "delay-only-rank0", "Delay only Rank0", OPTION_FLAG, 'd', & o.delay_rank_0_only},
LAST_OPTION
};
static char * current = (char*) 1;
static option_help * DUMMY_options(){
return options;
}
static void *DUMMY_Create(char *testFileName, IOR_param_t * param)
{
if(verbose > 4){
fprintf(out_logfile, "DUMMY create: %s = %p\n", testFileName, current);
}
if (o.delay_creates){
if (! o.delay_rank_0_only || (o.delay_rank_0_only && rank == 0)){
usleep(o.delay_creates);
}
}
return current++;
}
static void *DUMMY_Open(char *testFileName, IOR_param_t * param)
{
if(verbose > 4){
fprintf(out_logfile, "DUMMY open: %s = %p\n", testFileName, current);
}
return current++;
}
static void DUMMY_Fsync(void *fd, IOR_param_t * param)
{
if(verbose > 4){
fprintf(out_logfile, "DUMMY fsync %p\n", fd);
}
}
static void DUMMY_Close(void *fd, IOR_param_t * param)
{
if(verbose > 4){
fprintf(out_logfile, "DUMMY close %p\n", fd);
}
}
static void DUMMY_Delete(char *testFileName, IOR_param_t * param)
{
if(verbose > 4){
fprintf(out_logfile, "DUMMY delete: %s\n", testFileName);
}
}
static char * DUMMY_getVersion()
{
return "0.5";
}
static IOR_offset_t DUMMY_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName)
{
if(verbose > 4){
fprintf(out_logfile, "DUMMY getFileSize: %s\n", testFileName);
}
return 0;
}
static IOR_offset_t DUMMY_Xfer(int access, void *file, IOR_size_t * buffer, IOR_offset_t length, IOR_param_t * param){
if(verbose > 4){
fprintf(out_logfile, "DUMMY xfer: %p\n", file);
}
if (o.delay_xfer){
if (! o.delay_rank_0_only || (o.delay_rank_0_only && rank == 0)){
usleep(o.delay_xfer);
}
}
return length;
}
static int DUMMY_statfs (const char * path, ior_aiori_statfs_t * stat, IOR_param_t * param){
stat->f_bsize = 1;
stat->f_blocks = 1;
stat->f_bfree = 1;
stat->f_bavail = 1;
stat->f_files = 1;
stat->f_ffree = 1;
return 0;
}
static int DUMMY_mkdir (const char *path, mode_t mode, IOR_param_t * param){
return 0;
}
static int DUMMY_rmdir (const char *path, IOR_param_t * param){
return 0;
}
static int DUMMY_access (const char *path, int mode, IOR_param_t * param){
return 0;
}
static int DUMMY_stat (const char *path, struct stat *buf, IOR_param_t * param){
return 0;
}
ior_aiori_t dummy_aiori = {
"DUMMY",
DUMMY_Create,
DUMMY_Open,
DUMMY_Xfer,
DUMMY_Close,
DUMMY_Delete,
DUMMY_getVersion,
DUMMY_Fsync,
DUMMY_GetFileSize,
DUMMY_statfs,
DUMMY_mkdir,
DUMMY_rmdir,
DUMMY_access,
DUMMY_stat,
NULL,
NULL,
DUMMY_options
};

View File

@ -89,9 +89,10 @@ static IOR_offset_t HDF5_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static void HDF5_Close(void *, IOR_param_t *);
static void HDF5_Delete(char *, IOR_param_t *);
static void HDF5_SetVersion(IOR_param_t *);
static char* HDF5_GetVersion();
static void HDF5_Fsync(void *, IOR_param_t *);
static IOR_offset_t HDF5_GetFileSize(IOR_param_t *, MPI_Comm, char *);
static int HDF5_Access(const char *, int, IOR_param_t *);
/************************** D E C L A R A T I O N S ***************************/
@ -102,9 +103,14 @@ ior_aiori_t hdf5_aiori = {
.xfer = HDF5_Xfer,
.close = HDF5_Close,
.delete = HDF5_Delete,
.set_version = HDF5_SetVersion,
.get_version = HDF5_GetVersion,
.fsync = HDF5_Fsync,
.get_file_size = HDF5_GetFileSize,
.statfs = aiori_posix_statfs,
.mkdir = aiori_posix_mkdir,
.rmdir = aiori_posix_rmdir,
.access = HDF5_Access,
.stat = aiori_posix_stat,
};
static hid_t xferPropList; /* xfer property list */
@ -435,27 +441,29 @@ static void HDF5_Close(void *fd, IOR_param_t * param)
*/
static void HDF5_Delete(char *testFileName, IOR_param_t * param)
{
if (unlink(testFileName) != 0)
WARN("cannot delete file");
return(MPIIO_Delete(testFileName, param));
}
/*
* Determine api version.
*/
static void HDF5_SetVersion(IOR_param_t * test)
static char * HDF5_GetVersion()
{
static char version[1024] = {0};
if(version[0]) return version;
unsigned major, minor, release;
if (H5get_libversion(&major, &minor, &release) < 0) {
WARN("cannot get HDF5 library version");
} else {
sprintf(test->apiVersion, "%s-%u.%u.%u",
test->api, major, minor, release);
sprintf(version, "%u.%u.%u", major, minor, release);
}
#ifndef H5_HAVE_PARALLEL
strcat(test->apiVersion, " (Serial)");
strcat(version, " (Serial)");
#else /* H5_HAVE_PARALLEL */
strcat(test->apiVersion, " (Parallel)");
strcat(version, " (Parallel)");
#endif /* not H5_HAVE_PARALLEL */
return version;
}
/*
@ -565,5 +573,13 @@ static void SetupDataSet(void *fd, IOR_param_t * param)
static IOR_offset_t
HDF5_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName)
{
return (MPIIO_GetFileSize(test, testComm, testFileName));
return(MPIIO_GetFileSize(test, testComm, testFileName));
}
/*
* Use MPIIO call to check for access.
*/
static int HDF5_Access(const char *path, int mode, IOR_param_t *param)
{
return(MPIIO_Access(path, mode, param));
}

357
src/aiori-IME.c Executable file
View File

@ -0,0 +1,357 @@
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
/******************************************************************************\
* *
* Copyright (c) 2003, The Regents of the University of California. *
* Copyright (c) 2018, DataDirect Networks. *
* See the file COPYRIGHT for a complete copyright notice and license. *
* *
********************************************************************************
*
* Implement abstract I/O interface for DDN Infinite Memory Engine (IME).
*
\******************************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <errno.h> /* sys_errlist */
#include <fcntl.h> /* IO operations */
#include "ior.h"
#include "iordef.h"
#include "aiori.h"
#include "utilities.h"
#include "ime_native.h"
#ifndef O_BINARY /* Required on Windows */
# define O_BINARY 0
#endif
/**************************** P R O T O T Y P E S *****************************/
static void *IME_Create(char *, IOR_param_t *);
static void *IME_Open(char *, IOR_param_t *);
static void IME_Close(void *, IOR_param_t *);
static void IME_Delete(char *, IOR_param_t *);
static char *IME_GetVersion();
static void IME_Fsync(void *, IOR_param_t *);
static int IME_Access(const char *, int, IOR_param_t *);
static IOR_offset_t IME_GetFileSize(IOR_param_t *, MPI_Comm, char *);
static IOR_offset_t IME_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static int IME_StatFS(const char *, ior_aiori_statfs_t *,
IOR_param_t *);
static int IME_RmDir(const char *, IOR_param_t *);
static int IME_MkDir(const char *, mode_t, IOR_param_t *);
static int IME_Stat(const char *, struct stat *, IOR_param_t *);
static void IME_Initialize();
static void IME_Finalize();
/************************** D E C L A R A T I O N S ***************************/
extern int rank;
extern int rankOffset;
extern int verbose;
extern MPI_Comm testComm;
ior_aiori_t ime_aiori = {
.name = "IME",
.create = IME_Create,
.open = IME_Open,
.xfer = IME_Xfer,
.close = IME_Close,
.delete = IME_Delete,
.get_version = IME_GetVersion,
.fsync = IME_Fsync,
.get_file_size = IME_GetFileSize,
.access = IME_Access,
.statfs = IME_StatFS,
.rmdir = IME_RmDir,
.mkdir = IME_MkDir,
.stat = IME_Stat,
.initialize = IME_Initialize,
.finalize = IME_Finalize,
};
/***************************** F U N C T I O N S ******************************/
/*
* Initialize IME (before MPI is started).
*/
static void IME_Initialize()
{
ime_native_init();
}
/*
* Finlize IME (after MPI is shutdown).
*/
static void IME_Finalize()
{
(void)ime_native_finalize();
}
/*
* Try to access a file through the IME interface.
*/
static int IME_Access(const char *path, int mode, IOR_param_t *param)
{
(void)param;
return ime_native_access(path, mode);
}
/*
* Creat and open a file through the IME interface.
*/
static void *IME_Create(char *testFileName, IOR_param_t *param)
{
return IME_Open(testFileName, param);
}
/*
* Open a file through the IME interface.
*/
static void *IME_Open(char *testFileName, IOR_param_t *param)
{
int fd_oflag = O_BINARY;
int *fd;
fd = (int *)malloc(sizeof(int));
if (fd == NULL)
ERR("Unable to malloc file descriptor");
if (param->useO_DIRECT)
set_o_direct_flag(&fd_oflag);
if (param->openFlags & IOR_RDONLY)
fd_oflag |= O_RDONLY;
if (param->openFlags & IOR_WRONLY)
fd_oflag |= O_WRONLY;
if (param->openFlags & IOR_RDWR)
fd_oflag |= O_RDWR;
if (param->openFlags & IOR_APPEND)
fd_oflag |= O_APPEND;
if (param->openFlags & IOR_CREAT)
fd_oflag |= O_CREAT;
if (param->openFlags & IOR_EXCL)
fd_oflag |= O_EXCL;
if (param->openFlags & IOR_TRUNC)
fd_oflag |= O_TRUNC;
*fd = ime_native_open(testFileName, fd_oflag, 0664);
if (*fd < 0) {
free(fd);
ERR("cannot open file");
}
return((void *)fd);
}
/*
* Write or read access to file using the IM interface.
*/
static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer,
IOR_offset_t length, IOR_param_t *param)
{
int xferRetries = 0;
long long remaining = (long long)length;
char *ptr = (char *)buffer;
int fd = *(int *)file;
long long rc;
while (remaining > 0) {
/* write/read file */
if (access == WRITE) { /* WRITE */
if (verbose >= VERBOSE_4) {
fprintf(stdout, "task %d writing to offset %lld\n",
rank, param->offset + length - remaining);
}
rc = ime_native_pwrite(fd, ptr, remaining, param->offset);
if (param->fsyncPerWrite)
IME_Fsync(&fd, param);
} else { /* READ or CHECK */
if (verbose >= VERBOSE_4) {
fprintf(stdout, "task %d reading from offset %lld\n",
rank, param->offset + length - remaining);
}
rc = ime_native_pread(fd, ptr, remaining, param->offset);
if (rc == 0)
ERR("hit EOF prematurely");
else if (rc < 0)
ERR("read failed");
}
if (rc < remaining) {
fprintf(stdout, "WARNING: Task %d, partial %s, %lld of "
"%lld bytes at offset %lld\n",
rank, access == WRITE ? "write" : "read", rc,
remaining, param->offset + length - remaining );
if (param->singleXferAttempt) {
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1),
"barrier error");
}
if (xferRetries > MAX_RETRY) {
ERR( "too many retries -- aborting" );
}
} else if (rc > remaining) /* this should never happen */
ERR("too many bytes transferred!?!");
assert(rc >= 0);
assert(rc <= remaining);
remaining -= rc;
ptr += rc;
xferRetries++;
}
return(length);
}
/*
* Perform fsync().
*/
static void IME_Fsync(void *fd, IOR_param_t *param)
{
if (ime_native_fsync(*(int *)fd) != 0)
WARN("cannot perform fsync on file");
}
/*
* Close a file through the IME interface.
*/
static void IME_Close(void *fd, IOR_param_t *param)
{
if (ime_native_close(*(int *)fd) != 0)
{
free(fd);
ERR("cannot close file");
}
else
free(fd);
}
/*
* Delete a file through the IME interface.
*/
static void IME_Delete(char *testFileName, IOR_param_t *param)
{
char errmsg[256];
sprintf(errmsg, "[RANK %03d]:cannot delete file %s\n",
rank, testFileName);
if (ime_native_unlink(testFileName) != 0)
WARN(errmsg);
}
/*
* Determine API version.
*/
static char *IME_GetVersion()
{
static char ver[1024] = {};
#if (IME_NATIVE_API_VERSION >= 120)
strcpy(ver, ime_native_version());
#else
strcpy(ver, "not supported");
#endif
return ver;
}
/*
* XXX: statfs call is currently not exposed by IME native interface.
*/
static int IME_StatFS(const char *oid, ior_aiori_statfs_t *stat_buf,
IOR_param_t *param)
{
(void)oid;
(void)stat_buf;
(void)param;
WARN("statfs is currently not supported in IME backend!");
return -1;
}
/*
* XXX: mkdir call is currently not exposed by IME native interface.
*/
static int IME_MkDir(const char *oid, mode_t mode, IOR_param_t *param)
{
(void)oid;
(void)mode;
(void)param;
WARN("mkdir is currently not supported in IME backend!");
return -1;
}
/*
* XXX: rmdir call is curretly not exposed by IME native interface.
*/
static int IME_RmDir(const char *oid, IOR_param_t *param)
{
(void)oid;
(void)param;
WARN("rmdir is currently not supported in IME backend!");
return -1;
}
/*
* Perform stat() through the IME interface.
*/
static int IME_Stat(const char *path, struct stat *buf, IOR_param_t *param)
{
(void)param;
return ime_native_stat(path, buf);
}
/*
* Use IME stat() to return aggregate file size.
*/
static IOR_offset_t IME_GetFileSize(IOR_param_t *test, MPI_Comm testComm,
char *testFileName)
{
struct stat stat_buf;
IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum;
if (ime_native_stat(testFileName, &stat_buf) != 0) {
ERR("cannot get status of written file");
}
aggFileSizeFromStat = stat_buf.st_size;
if (test->filePerProc) {
MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1,
MPI_LONG_LONG_INT, MPI_SUM, testComm),
"cannot total data moved");
aggFileSizeFromStat = tmpSum;
} else {
MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1,
MPI_LONG_LONG_INT, MPI_MIN, testComm),
"cannot total data moved");
MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1,
MPI_LONG_LONG_INT, MPI_MAX, testComm),
"cannot total data moved");
if (tmpMin != tmpMax) {
if (rank == 0) {
WARN("inconsistent file size by different tasks");
}
/* incorrect, but now consistent across tasks */
aggFileSizeFromStat = tmpMin;
}
}
return(aggFileSizeFromStat);
}

View File

@ -42,7 +42,7 @@ ior_aiori_t mmap_aiori = {
.xfer = MMAP_Xfer,
.close = MMAP_Close,
.delete = POSIX_Delete,
.set_version = POSIX_SetVersion,
.get_version = aiori_get_version,
.fsync = MMAP_Fsync,
.get_file_size = POSIX_GetFileSize,
};

View File

@ -38,10 +38,9 @@ static void *MPIIO_Open(char *, IOR_param_t *);
static IOR_offset_t MPIIO_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static void MPIIO_Close(void *, IOR_param_t *);
static void MPIIO_Delete(char *, IOR_param_t *);
static void MPIIO_SetVersion(IOR_param_t *);
static char* MPIIO_GetVersion();
static void MPIIO_Fsync(void *, IOR_param_t *);
static int MPIIO_Access(const char *, int, IOR_param_t *);
/************************** D E C L A R A T I O N S ***************************/
@ -52,10 +51,14 @@ ior_aiori_t mpiio_aiori = {
.xfer = MPIIO_Xfer,
.close = MPIIO_Close,
.delete = MPIIO_Delete,
.set_version = MPIIO_SetVersion,
.get_version = MPIIO_GetVersion,
.fsync = MPIIO_Fsync,
.get_file_size = MPIIO_GetFileSize,
.statfs = aiori_posix_statfs,
.mkdir = aiori_posix_mkdir,
.rmdir = aiori_posix_rmdir,
.access = MPIIO_Access,
.stat = aiori_posix_stat,
};
/***************************** F U N C T I O N S ******************************/
@ -63,7 +66,7 @@ ior_aiori_t mpiio_aiori = {
/*
* Try to access a file through the MPIIO interface.
*/
static int MPIIO_Access(const char *path, int mode, IOR_param_t *param)
int MPIIO_Access(const char *path, int mode, IOR_param_t *param)
{
MPI_File fd;
int mpi_mode = MPI_MODE_UNIQUE_OPEN;
@ -268,10 +271,14 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer,
/* point functions to appropriate MPIIO calls */
if (access == WRITE) { /* WRITE */
Access = MPI_File_write;
Access_at = MPI_File_write_at;
Access_all = MPI_File_write_all;
Access_at_all = MPI_File_write_at_all;
Access = (int (MPIAPI *)(MPI_File, void *, int,
MPI_Datatype, MPI_Status *)) MPI_File_write;
Access_at = (int (MPIAPI *)(MPI_File, MPI_Offset, void *, int,
MPI_Datatype, MPI_Status *)) MPI_File_write_at;
Access_all = (int (MPIAPI *) (MPI_File, void *, int,
MPI_Datatype, MPI_Status *)) MPI_File_write_all;
Access_at_all = (int (MPIAPI *) (MPI_File, MPI_Offset, void *, int,
MPI_Datatype, MPI_Status *)) MPI_File_write_at_all;
/*
* this needs to be properly implemented:
*
@ -364,15 +371,18 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer,
}
}
}
if((access == WRITE) && (param->fsyncPerWrite == TRUE))
MPIIO_Fsync(fd, param);
return (length);
}
/*
* Perform fsync().
*/
static void MPIIO_Fsync(void *fd, IOR_param_t * param)
static void MPIIO_Fsync(void *fdp, IOR_param_t * param)
{
;
if (MPI_File_sync(*(MPI_File *)fdp) != MPI_SUCCESS)
EWARN("fsync() failed");
}
/*
@ -396,7 +406,7 @@ static void MPIIO_Close(void *fd, IOR_param_t * param)
/*
* Delete a file through the MPIIO interface.
*/
static void MPIIO_Delete(char *testFileName, IOR_param_t * param)
void MPIIO_Delete(char *testFileName, IOR_param_t * param)
{
MPI_CHECK(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL),
"cannot delete file");
@ -405,13 +415,13 @@ static void MPIIO_Delete(char *testFileName, IOR_param_t * param)
/*
* Determine api version.
*/
static void MPIIO_SetVersion(IOR_param_t * test)
static char* MPIIO_GetVersion()
{
int version, subversion;
MPI_CHECK(MPI_Get_version(&version, &subversion),
"cannot get MPI version");
sprintf(test->apiVersion, "%s (version=%d, subversion=%d)",
test->api, version, subversion);
static char ver[1024] = {};
int version, subversion;
MPI_CHECK(MPI_Get_version(&version, &subversion), "cannot get MPI version");
sprintf(ver, "(%d.%d)", version, subversion);
return ver;
}
/*
@ -438,7 +448,7 @@ static IOR_offset_t SeekOffset(MPI_File fd, IOR_offset_t offset,
if (param->filePerProc) {
tempOffset = tempOffset / param->transferSize;
} else {
/*
/*
* this formula finds a file view offset for a task
* from an absolute offset
*/

View File

@ -56,6 +56,7 @@ static void NCMPI_Delete(char *, IOR_param_t *);
static void NCMPI_SetVersion(IOR_param_t *);
static void NCMPI_Fsync(void *, IOR_param_t *);
static IOR_offset_t NCMPI_GetFileSize(IOR_param_t *, MPI_Comm, char *);
static int NCMPI_Access(const char *, int, IOR_param_t *);
/************************** D E C L A R A T I O N S ***************************/
@ -66,9 +67,14 @@ ior_aiori_t ncmpi_aiori = {
.xfer = NCMPI_Xfer,
.close = NCMPI_Close,
.delete = NCMPI_Delete,
.set_version = NCMPI_SetVersion,
.get_version = NCMPI_GetVersion,
.fsync = NCMPI_Fsync,
.get_file_size = NCMPI_GetFileSize,
.statfs = aiori_posix_statfs,
.mkdir = aiori_posix_mkdir,
.rmdir = aiori_posix_rmdir,
.access = NCMPI_Access,
.stat = aiori_posix_stat,
};
/***************************** F U N C T I O N S ******************************/
@ -329,16 +335,15 @@ static void NCMPI_Close(void *fd, IOR_param_t * param)
*/
static void NCMPI_Delete(char *testFileName, IOR_param_t * param)
{
if (unlink(testFileName) != 0)
WARN("unlink() failed");
return(MPIIO_Delete(testFileName, param));
}
/*
* Determine api version.
*/
static void NCMPI_SetVersion(IOR_param_t * test)
static char* NCMPI_GetVersion()
{
sprintf(test->apiVersion, "%s (%s)", test->api, ncmpi_inq_libvers());
return ncmpi_inq_libvers();
}
/*
@ -387,5 +392,13 @@ static int GetFileMode(IOR_param_t * param)
static IOR_offset_t NCMPI_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
char *testFileName)
{
return (MPIIO_GetFileSize(test, testComm, testFileName));
return(MPIIO_GetFileSize(test, testComm, testFileName));
}
/*
* Use MPIIO call to check for access.
*/
static int NCMPI_Access(const char *path, int mode, IOR_param_t *param)
{
return(MPIIO_Access(path, mode, param));
}

View File

@ -78,9 +78,14 @@ ior_aiori_t posix_aiori = {
.xfer = POSIX_Xfer,
.close = POSIX_Close,
.delete = POSIX_Delete,
.set_version = POSIX_SetVersion,
.get_version = aiori_get_version,
.fsync = POSIX_Fsync,
.get_file_size = POSIX_GetFileSize,
.statfs = aiori_posix_statfs,
.mkdir = aiori_posix_mkdir,
.rmdir = aiori_posix_rmdir,
.access = aiori_posix_access,
.stat = aiori_posix_stat,
};
/***************************** F U N C T I O N S ******************************/
@ -507,14 +512,6 @@ void POSIX_Delete(char *testFileName, IOR_param_t * param)
EWARN(errmsg);
}
/*
* Determine api version.
*/
void POSIX_SetVersion(IOR_param_t * test)
{
strcpy(test->apiVersion, test->api);
}
/*
* Use POSIX stat() to return aggregate file size.
*/

361
src/aiori-RADOS.c Executable file
View File

@ -0,0 +1,361 @@
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
/******************************************************************************\
* *
* (C) 2015 The University of Chicago *
* *
* See COPYRIGHT in top-level directory. *
* *
********************************************************************************
*
* Implement abstract I/O interface for RADOS.
*
\******************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <rados/librados.h>
#include "ior.h"
#include "iordef.h"
#include "aiori.h"
#include "utilities.h"
/************************** O P T I O N S *****************************/
struct rados_options{
char * user;
char * conf;
char * pool;
};
static struct rados_options o = {
.user = NULL,
.conf = NULL,
.pool = NULL,
};
static option_help options [] = {
{'u', "user", "Username for the RADOS cluster", OPTION_REQUIRED_ARGUMENT, 's', & o.user},
{'c', "conf", "Config file for the RADOS cluster", OPTION_REQUIRED_ARGUMENT, 's', & o.conf},
{'p', "pool", "RADOS pool to use for I/O", OPTION_REQUIRED_ARGUMENT, 's', & o.pool},
LAST_OPTION
};
/**************************** P R O T O T Y P E S *****************************/
static void *RADOS_Create(char *, IOR_param_t *);
static void *RADOS_Open(char *, IOR_param_t *);
static IOR_offset_t RADOS_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static void RADOS_Close(void *, IOR_param_t *);
static void RADOS_Delete(char *, IOR_param_t *);
static void RADOS_Fsync(void *, IOR_param_t *);
static IOR_offset_t RADOS_GetFileSize(IOR_param_t *, MPI_Comm, char *);
static int RADOS_StatFS(const char *, ior_aiori_statfs_t *, IOR_param_t *);
static int RADOS_MkDir(const char *, mode_t, IOR_param_t *);
static int RADOS_RmDir(const char *, IOR_param_t *);
static int RADOS_Access(const char *, int, IOR_param_t *);
static int RADOS_Stat(const char *, struct stat *, IOR_param_t *);
static option_help * RADOS_options();
/************************** D E C L A R A T I O N S ***************************/
ior_aiori_t rados_aiori = {
.name = "RADOS",
.create = RADOS_Create,
.open = RADOS_Open,
.xfer = RADOS_Xfer,
.close = RADOS_Close,
.delete = RADOS_Delete,
.get_version = aiori_get_version,
.fsync = RADOS_Fsync,
.get_file_size = RADOS_GetFileSize,
.statfs = RADOS_StatFS,
.mkdir = RADOS_MkDir,
.rmdir = RADOS_RmDir,
.access = RADOS_Access,
.stat = RADOS_Stat,
.get_options = RADOS_options,
};
#define RADOS_ERR(__err_str, __ret) do { \
errno = -__ret; \
ERR(__err_str); \
} while(0)
/***************************** F U N C T I O N S ******************************/
static option_help * RADOS_options(){
return options;
}
static void RADOS_Cluster_Init(IOR_param_t * param)
{
int ret;
/* create RADOS cluster handle */
ret = rados_create(&param->rados_cluster, o.user);
if (ret)
RADOS_ERR("unable to create RADOS cluster handle", ret);
/* set the handle using the Ceph config */
ret = rados_conf_read_file(param->rados_cluster, o.conf);
if (ret)
RADOS_ERR("unable to read RADOS config file", ret);
/* connect to the RADOS cluster */
ret = rados_connect(param->rados_cluster);
if (ret)
RADOS_ERR("unable to connect to the RADOS cluster", ret);
/* create an io context for the pool we are operating on */
ret = rados_ioctx_create(param->rados_cluster, o.pool, &param->rados_ioctx);
if (ret)
RADOS_ERR("unable to create an I/O context for the RADOS cluster", ret);
return;
}
static void RADOS_Cluster_Finalize(IOR_param_t * param)
{
/* ioctx destroy */
rados_ioctx_destroy(param->rados_ioctx);
/* shutdown */
rados_shutdown(param->rados_cluster);
}
static void *RADOS_Create_Or_Open(char *testFileName, IOR_param_t * param, int create_flag)
{
int ret;
char *oid;
RADOS_Cluster_Init(param);
if (param->useO_DIRECT == TRUE)
WARN("direct I/O mode is not implemented in RADOS\n");
oid = strdup(testFileName);
if (!oid)
ERR("unable to allocate RADOS oid");
if (create_flag)
{
rados_write_op_t create_op;
int rados_create_flag;
if (param->openFlags & IOR_EXCL)
rados_create_flag = LIBRADOS_CREATE_EXCLUSIVE;
else
rados_create_flag = LIBRADOS_CREATE_IDEMPOTENT;
/* create a RADOS "write op" for creating the object */
create_op = rados_create_write_op();
rados_write_op_create(create_op, rados_create_flag, NULL);
ret = rados_write_op_operate(create_op, param->rados_ioctx, oid,
NULL, 0);
rados_release_write_op(create_op);
if (ret)
RADOS_ERR("unable to create RADOS object", ret);
}
else
{
/* XXX actually, we should probably assert oid existence here? */
}
return (void *)oid;
}
static void *RADOS_Create(char *testFileName, IOR_param_t * param)
{
return RADOS_Create_Or_Open(testFileName, param, TRUE);
}
static void *RADOS_Open(char *testFileName, IOR_param_t * param)
{
if (param->openFlags & IOR_CREAT)
return RADOS_Create_Or_Open(testFileName, param, TRUE);
else
return RADOS_Create_Or_Open(testFileName, param, FALSE);
}
static IOR_offset_t RADOS_Xfer(int access, void *fd, IOR_size_t * buffer,
IOR_offset_t length, IOR_param_t * param)
{
int ret;
char *oid = (char *)fd;
if (access == WRITE)
{
rados_write_op_t write_op;
write_op = rados_create_write_op();
rados_write_op_write(write_op, (const char *)buffer,
length, param->offset);
ret = rados_write_op_operate(write_op, param->rados_ioctx,
oid, NULL, 0);
rados_release_write_op(write_op);
if (ret)
RADOS_ERR("unable to write RADOS object", ret);
}
else /* READ */
{
int read_ret;
size_t bytes_read;
rados_read_op_t read_op;
read_op = rados_create_read_op();
rados_read_op_read(read_op, param->offset, length, (char *)buffer,
&bytes_read, &read_ret);
ret = rados_read_op_operate(read_op, param->rados_ioctx, oid, 0);
rados_release_read_op(read_op);
if (ret || read_ret || ((IOR_offset_t)bytes_read != length))
RADOS_ERR("unable to read RADOS object", ret);
}
return length;
}
static void RADOS_Fsync(void *fd, IOR_param_t * param)
{
return;
}
static void RADOS_Close(void *fd, IOR_param_t * param)
{
char *oid = (char *)fd;
/* object does not need to be "closed", but we should tear the cluster down */
RADOS_Cluster_Finalize(param);
free(oid);
return;
}
static void RADOS_Delete(char *testFileName, IOR_param_t * param)
{
int ret;
char *oid = testFileName;
rados_write_op_t remove_op;
/* we have to reestablish cluster connection here... */
RADOS_Cluster_Init(param);
/* remove the object */
remove_op = rados_create_write_op();
rados_write_op_remove(remove_op);
ret = rados_write_op_operate(remove_op, param->rados_ioctx,
oid, NULL, 0);
rados_release_write_op(remove_op);
if (ret)
RADOS_ERR("unable to remove RADOS object", ret);
RADOS_Cluster_Finalize(param);
return;
}
static IOR_offset_t RADOS_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
char *testFileName)
{
int ret;
char *oid = testFileName;
rados_read_op_t stat_op;
uint64_t oid_size;
int stat_ret;
IOR_offset_t aggSizeFromStat, tmpMin, tmpMax, tmpSum;
/* we have to reestablish cluster connection here... */
RADOS_Cluster_Init(test);
/* stat the object */
stat_op = rados_create_read_op();
rados_read_op_stat(stat_op, &oid_size, NULL, &stat_ret);
ret = rados_read_op_operate(stat_op, test->rados_ioctx, oid, 0);
rados_release_read_op(stat_op);
if (ret || stat_ret)
RADOS_ERR("unable to stat RADOS object", stat_ret);
aggSizeFromStat = oid_size;
if (test->filePerProc == TRUE)
{
MPI_CHECK(MPI_Allreduce(&aggSizeFromStat, &tmpSum, 1,
MPI_LONG_LONG_INT, MPI_SUM, testComm),
"cannot total data moved");
aggSizeFromStat = tmpSum;
}
else
{
MPI_CHECK(MPI_Allreduce(&aggSizeFromStat, &tmpMin, 1,
MPI_LONG_LONG_INT, MPI_MIN, testComm),
"cannot total data moved");
MPI_CHECK(MPI_Allreduce(&aggSizeFromStat, &tmpMax, 1,
MPI_LONG_LONG_INT, MPI_MAX, testComm),
"cannot total data moved");
if (tmpMin != tmpMax)
{
if (rank == 0)
WARN("inconsistent file size by different tasks");
/* incorrect, but now consistent across tasks */
aggSizeFromStat = tmpMin;
}
}
RADOS_Cluster_Finalize(test);
return aggSizeFromStat;
}
static int RADOS_StatFS(const char *oid, ior_aiori_statfs_t *stat_buf,
IOR_param_t *param)
{
WARN("statfs not supported in RADOS backend!");
return -1;
}
static int RADOS_MkDir(const char *oid, mode_t mode, IOR_param_t *param)
{
WARN("mkdir not supported in RADOS backend!");
return -1;
}
static int RADOS_RmDir(const char *oid, IOR_param_t *param)
{
WARN("rmdir not supported in RADOS backend!");
return -1;
}
static int RADOS_Access(const char *oid, int mode, IOR_param_t *param)
{
rados_read_op_t read_op;
int ret;
int prval;
uint64_t oid_size;
/* we have to reestablish cluster connection here... */
RADOS_Cluster_Init(param);
/* use read_op stat to check for oid existence */
read_op = rados_create_read_op();
rados_read_op_stat(read_op, &oid_size, NULL, &prval);
ret = rados_read_op_operate(read_op, param->rados_ioctx, oid, 0);
rados_release_read_op(read_op);
RADOS_Cluster_Finalize(param);
if (ret | prval)
return -1;
else
return 0;
}
static int RADOS_Stat(const char *oid, struct stat *buf, IOR_param_t *param)
{
WARN("stat not supported in RADOS backend!");
return -1;
}

View File

@ -110,6 +110,39 @@
#include "aws4c_extra.h" // utilities, e.g. for parsing XML in responses
/* buffer is used to generate URLs, err_msgs, etc */
#define BUFF_SIZE 1024
static char buff[BUFF_SIZE];
const int ETAG_SIZE = 32;
CURLcode rc;
/* Any objects we create or delete will be under this bucket */
const char* bucket_name = "ior";
/* TODO: The following stuff goes into options! */
/* REST/S3 variables */
// CURL* curl; /* for libcurl "easy" fns (now managed by aws4c) */
# define IOR_CURL_INIT 0x01 /* curl top-level inits were perfomed once? */
# define IOR_CURL_NOCONTINUE 0x02
# define IOR_CURL_S3_EMC_EXT 0x04 /* allow EMC extensions to S3? */
#ifdef USE_S3_AIORI
# include <curl/curl.h>
# include "aws4c.h"
#else
typedef void CURL; /* unused, but needs a type */
typedef void IOBuf; /* unused, but needs a type */
#endif
IOBuf* io_buf; /* aws4c places parsed header values here */
IOBuf* etags; /* accumulate ETags for N:1 parts */
///////////////////////////////////////////////
/**************************** P R O T O T Y P E S *****************************/
static void* S3_Create(char*, IOR_param_t*);
static void* S3_Open(char*, IOR_param_t*);
@ -122,9 +155,10 @@ static IOR_offset_t EMC_Xfer(int, void*, IOR_size_t*, IOR_offset_t, IOR_param_t*
static void EMC_Close(void*, IOR_param_t*);
static void S3_Delete(char*, IOR_param_t*);
static void S3_SetVersion(IOR_param_t*);
static void S3_Fsync(void*, IOR_param_t*);
static IOR_offset_t S3_GetFileSize(IOR_param_t*, MPI_Comm, char*);
static void S3_init();
static void S3_finalize();
/************************** D E C L A R A T I O N S ***************************/
@ -138,9 +172,11 @@ ior_aiori_t s3_aiori = {
.xfer = S3_Xfer,
.close = S3_Close,
.delete = S3_Delete,
.set_version = S3_SetVersion,
.get_version = aiori_get_version,
.fsync = S3_Fsync,
.get_file_size = S3_GetFileSize,
.initialize = S3_init,
.finalize = S3_finalize
};
// "S3", plus EMC-extensions enabled
@ -156,6 +192,8 @@ ior_aiori_t s3_plus_aiori = {
.set_version = S3_SetVersion,
.fsync = S3_Fsync,
.get_file_size = S3_GetFileSize,
.initialize = S3_init,
.finalize = S3_finalize
};
// Use EMC-extensions for N:1 write, as well
@ -171,9 +209,24 @@ ior_aiori_t s3_emc_aiori = {
.set_version = S3_SetVersion,
.fsync = S3_Fsync,
.get_file_size = S3_GetFileSize,
.initialize = S3_init,
.finalize = S3_finalize
};
static void S3_init(){
/* This is supposed to be done before *any* threads are created.
* Could MPI_Init() create threads (or call multi-threaded
* libraries)? We'll assume so. */
AWS4C_CHECK( aws_init() );
}
static void S3_finalize(){
/* done once per program, after exiting all threads.
* NOTE: This fn doesn't return a value that can be checked for success. */
aws_cleanup();
}
/* modelled on similar macros in iordef.h */
#define CURL_ERR(MSG, CURL_ERRNO, PARAM) \
do { \
@ -183,7 +236,7 @@ ior_aiori_t s3_emc_aiori = {
fflush(stdout); \
MPI_Abort((PARAM)->testComm, -1); \
} while (0)
#define CURL_WARN(MSG, CURL_ERRNO) \
do { \
@ -192,20 +245,6 @@ ior_aiori_t s3_emc_aiori = {
__FILE__, __LINE__); \
fflush(stdout); \
} while (0)
/* buffer is used to generate URLs, err_msgs, etc */
#define BUFF_SIZE 1024
static char buff[BUFF_SIZE];
const int ETAG_SIZE = 32;
CURLcode rc;
/* Any objects we create or delete will be under this bucket */
const char* bucket_name = "ior";
//const char* bucket_name = "brettk";
/***************************** F U N C T I O N S ******************************/
@ -232,9 +271,8 @@ const char* bucket_name = "ior";
* ---------------------------------------------------------------------------
*/
static
void
s3_connect( IOR_param_t* param ) {
static void s3_connect( IOR_param_t* param ) {
if (param->verbose >= VERBOSE_2) {
printf("-> s3_connect\n"); /* DEBUGGING */
}
@ -446,7 +484,7 @@ S3_Create_Or_Open_internal(char* testFileName,
if ( n_to_n || (rank == 0) ) {
// rank0 handles truncate
if ( needs_reset) {
if ( needs_reset) {
aws_iobuf_reset(param->io_buf);
AWS4C_CHECK( s3_put(param->io_buf, testFileName) ); /* 0-length write */
AWS4C_CHECK_OK( param->io_buf );
@ -510,7 +548,7 @@ S3_Create_Or_Open_internal(char* testFileName,
fprintf( stdout, "rank %d resetting\n",
rank);
}
aws_iobuf_reset(param->io_buf);
AWS4C_CHECK( s3_put(param->io_buf, testFileName) );
AWS4C_CHECK_OK( param->io_buf );
@ -641,7 +679,7 @@ EMC_Open( char *testFileName, IOR_param_t * param ) {
/* In the EMC case, instead of Multi-Part Upload we can use HTTP
* "byte-range" headers to write parts of a single object. This appears to
* have several advantages over the S3 MPU spec:
*
*
* (a) no need for a special "open" operation, to capture an "UploadID".
* Instead we simply write byte-ranges, and the server-side resolves
* any races, producing a single winner. In the IOR case, there should
@ -808,7 +846,7 @@ S3_Xfer_internal(int access,
printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag);
}
// drop ptrs to <data_ptr>, in param->io_buf
// drop ptrs to <data_ptr>, in param->io_buf
aws_iobuf_reset(param->io_buf);
}
else { // use EMC's byte-range write-support, instead of MPU
@ -830,7 +868,7 @@ S3_Xfer_internal(int access,
AWS4C_CHECK ( s3_put(param->io_buf, file) );
AWS4C_CHECK_OK( param->io_buf );
// drop ptrs to <data_ptr>, in param->io_buf
// drop ptrs to <data_ptr>, in param->io_buf
aws_iobuf_reset(param->io_buf);
}
@ -867,7 +905,7 @@ S3_Xfer_internal(int access,
ERR_SIMPLE(buff);
}
// drop refs to <data_ptr>, in param->io_buf
// drop refs to <data_ptr>, in param->io_buf
aws_iobuf_reset(param->io_buf);
}
@ -1126,7 +1164,7 @@ S3_Close_internal( void* fd,
start_multiplier = ETAG_SIZE; /* one ETag */
stride = etag_data_size; /* one rank's-worth of Etag data */
}
xml = aws_iobuf_new();
aws_iobuf_growth_size(xml, 1024 * 8);
@ -1305,7 +1343,7 @@ S3_Delete( char *testFileName, IOR_param_t * param ) {
#if 0
// EMC BUG: If file was written with appends, and is deleted,
// Then any future recreation will result in an object that can't be read.
// this
// this
AWS4C_CHECK( s3_delete(param->io_buf, testFileName) );
#else
// just replace with a zero-length object for now
@ -1334,7 +1372,7 @@ EMC_Delete( char *testFileName, IOR_param_t * param ) {
#if 0
// EMC BUG: If file was written with appends, and is deleted,
// Then any future recreation will result in an object that can't be read.
// this
// this
AWS4C_CHECK( s3_delete(param->io_buf, testFileName) );
#else
// just replace with a zero-length object for now
@ -1353,25 +1391,6 @@ EMC_Delete( char *testFileName, IOR_param_t * param ) {
/*
* Determine API version.
*/
static
void
S3_SetVersion( IOR_param_t * param ) {
if (param->verbose >= VERBOSE_2) {
printf("-> S3_SetVersion\n");
}
strcpy( param->apiVersion, param->api );
if (param->verbose >= VERBOSE_2) {
printf("<- S3_SetVersion\n");
}
}
/*
* HTTP HEAD returns meta-data for a "file".
*

View File

@ -28,22 +28,25 @@
*/
ior_aiori_t *available_aiori[] = {
#ifdef USE_POSIX_AIORI
&posix_aiori,
#endif
& dummy_aiori,
#ifdef USE_HDF5_AIORI
&hdf5_aiori,
#endif
#ifdef USE_HDFS_AIORI
&hdfs_aiori,
#endif
#ifdef USE_IME_AIORI
&ime_aiori,
#endif
#ifdef USE_MPIIO_AIORI
&mpiio_aiori,
#endif
#ifdef USE_NCMPI_AIORI
&ncmpi_aiori,
#endif
#ifdef USE_POSIX_AIORI
&posix_aiori,
#endif
#ifdef USE_MMAP_AIORI
&mmap_aiori,
#endif
@ -52,12 +55,27 @@ ior_aiori_t *available_aiori[] = {
&s3_plus_aiori,
&s3_emc_aiori,
#endif
#ifdef USE_DFS_AIORI
#ifdef USE_RADOS_AIORI
&rados_aiori,
#endif
#ifdef USE_DAOS_AIORI
&daos_aiori,
&dfs_aiori,
#endif
NULL
};
void aiori_supported_apis(char * APIs){
ior_aiori_t **tmp = available_aiori;
if(*tmp != NULL){
APIs += sprintf(APIs, "%s", (*tmp)->name);
tmp++;
for (; *tmp != NULL; ++tmp) {
APIs += sprintf(APIs, "|%s", (*tmp)->name);
}
}
}
/**
* Default statfs implementation.
*
@ -67,7 +85,7 @@ ior_aiori_t *available_aiori[] = {
* This function provides a AIORI statfs for POSIX-compliant filesystems. It
* uses statvfs is available and falls back on statfs.
*/
static int aiori_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param)
int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param)
{
int ret;
#if defined(HAVE_STATVFS)
@ -92,44 +110,95 @@ static int aiori_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_par
return 0;
}
static int aiori_mkdir (const char *path, mode_t mode, IOR_param_t * param)
int aiori_posix_mkdir (const char *path, mode_t mode, IOR_param_t * param)
{
return mkdir (path, mode);
}
static int aiori_rmdir (const char *path, IOR_param_t * param)
int aiori_posix_rmdir (const char *path, IOR_param_t * param)
{
return rmdir (path);
}
static int aiori_access (const char *path, int mode, IOR_param_t * param)
int aiori_posix_access (const char *path, int mode, IOR_param_t * param)
{
return access (path, mode);
}
static int aiori_stat (const char *path, struct stat *buf, IOR_param_t * param)
int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param)
{
return stat (path, buf);
}
char* aiori_get_version()
{
return "";
}
static int is_initialized = FALSE;
void aiori_initialize(IOR_test_t *tests_head){
if (is_initialized) return;
is_initialized = TRUE;
/* Sanity check, we were compiled with SOME backend, right? */
if (0 == aiori_count ()) {
ERR("No IO backends compiled into aiori. "
"Run 'configure --with-<backend>', and recompile.");
}
for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) {
if((*tmp)->initialize){
(*tmp)->initialize(tests_head ? &tests_head->params : NULL);
}
}
}
void aiori_finalize(IOR_test_t *tests_head){
if (! is_initialized) return;
is_initialized = FALSE;
for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) {
if((*tmp)->finalize){
(*tmp)->finalize(tests_head ? &tests_head->params : NULL);
}
}
}
const ior_aiori_t *aiori_select (const char *api)
{
char warn_str[256] = {0};
for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) {
if (NULL == api || strcasecmp(api, (*tmp)->name) == 0) {
if (NULL == (*tmp)->statfs) {
(*tmp)->statfs = aiori_statfs;
(*tmp)->statfs = aiori_posix_statfs;
snprintf(warn_str, 256, "assuming POSIX-based backend for"
" %s statfs call", api);
WARN(warn_str);
}
if (NULL == (*tmp)->mkdir) {
(*tmp)->mkdir = aiori_mkdir;
(*tmp)->mkdir = aiori_posix_mkdir;
snprintf(warn_str, 256, "assuming POSIX-based backend for"
" %s mkdir call", api);
WARN(warn_str);
}
if (NULL == (*tmp)->rmdir) {
(*tmp)->rmdir = aiori_rmdir;
(*tmp)->rmdir = aiori_posix_rmdir;
snprintf(warn_str, 256, "assuming POSIX-based backend for"
" %s rmdir call", api);
WARN(warn_str);
}
if (NULL == (*tmp)->access) {
(*tmp)->access = aiori_access;
(*tmp)->access = aiori_posix_access;
snprintf(warn_str, 256, "assuming POSIX-based backend for"
" %s access call", api);
WARN(warn_str);
}
if (NULL == (*tmp)->stat) {
(*tmp)->stat = aiori_stat;
(*tmp)->stat = aiori_posix_stat;
snprintf(warn_str, 256, "assuming POSIX-based backend for"
" %s stat call", api);
WARN(warn_str);
}
return *tmp;
}

View File

@ -25,6 +25,7 @@
#include "ior.h"
#include "iordef.h" /* IOR Definitions */
#include "option.h"
/*************************** D E F I N I T I O N S ****************************/
@ -61,6 +62,7 @@ typedef struct ior_aiori_statfs {
uint64_t f_ffree;
} ior_aiori_statfs_t;
typedef struct ior_aiori {
char *name;
void *(*create)(char *, IOR_param_t *);
@ -69,7 +71,7 @@ typedef struct ior_aiori {
IOR_offset_t, IOR_param_t *);
void (*close)(void *, IOR_param_t *);
void (*delete)(char *, IOR_param_t *);
void (*set_version)(IOR_param_t *);
char* (*get_version)();
void (*fsync)(void *, IOR_param_t *);
IOR_offset_t (*get_file_size)(IOR_param_t *, MPI_Comm, char *);
int (*statfs) (const char *, ior_aiori_statfs_t *, IOR_param_t * param);
@ -77,12 +79,15 @@ typedef struct ior_aiori {
int (*rmdir) (const char *path, IOR_param_t * param);
int (*access) (const char *path, int mode, IOR_param_t * param);
int (*stat) (const char *path, struct stat *buf, IOR_param_t * param);
int (*init)(IOR_param_t *);
int (*finalize)(IOR_param_t *);
void (*initialize)(IOR_param_t *); /* called once per program before MPI is started */
void (*finalize)(IOR_param_t *); /* called once per program after MPI is shutdown */
option_help * (*get_options)();
} ior_aiori_t;
extern ior_aiori_t dummy_aiori;
extern ior_aiori_t hdf5_aiori;
extern ior_aiori_t hdfs_aiori;
extern ior_aiori_t ime_aiori;
extern ior_aiori_t mpiio_aiori;
extern ior_aiori_t ncmpi_aiori;
extern ior_aiori_t posix_aiori;
@ -90,21 +95,35 @@ extern ior_aiori_t mmap_aiori;
extern ior_aiori_t s3_aiori;
extern ior_aiori_t s3_plus_aiori;
extern ior_aiori_t s3_emc_aiori;
extern ior_aiori_t rados_aiori;
extern ior_aiori_t daos_aiori;
extern ior_aiori_t dfs_aiori;
void aiori_initialize(IOR_test_t *th);
void aiori_finalize(IOR_test_t *th);
const ior_aiori_t *aiori_select (const char *api);
int aiori_count (void);
void aiori_supported_apis(char * APIs);
const char *aiori_default (void);
/* some generic POSIX-based backend calls */
char * aiori_get_version();
int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param);
int aiori_posix_mkdir (const char *path, mode_t mode, IOR_param_t * param);
int aiori_posix_rmdir (const char *path, IOR_param_t * param);
int aiori_posix_access (const char *path, int mode, IOR_param_t * param);
int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param);
void *POSIX_Create(char *testFileName, IOR_param_t * param);
void *POSIX_Open(char *testFileName, IOR_param_t * param);
IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName);
void POSIX_Delete(char *testFileName, IOR_param_t * param);
void POSIX_Close(void *fd, IOR_param_t * param);
/* NOTE: these 3 MPI-IO functions are exported for reuse by HDF5/PNetCDF */
void MPIIO_Delete(char *testFileName, IOR_param_t * param);
IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
char *testFileName);
void *POSIX_Create(char *testFileName, IOR_param_t *test);
void *POSIX_Open(char *testFileName, IOR_param_t *test);
void POSIX_Close(void *fd, IOR_param_t *test);
void POSIX_Delete(char *testFileName, IOR_param_t *test);
void POSIX_SetVersion(IOR_param_t *test);
IOR_offset_t POSIX_GetFileSize(IOR_param_t *test, MPI_Comm testComm,
char *testFileName);
int MPIIO_Access(const char *, int, IOR_param_t *);
#endif /* not _AIORI_H */

40
src/ior-internal.h Normal file
View File

@ -0,0 +1,40 @@
/*
* This file contains header information for support code that is only used within IOR.
* For code shared across benchmarks, see utilities.h
*/
#ifndef _IOR_INTERNAL_H
#define _IOR_INTERNAL_H
/* Part of ior-output.c */
void PrintEarlyHeader();
void PrintHeader(int argc, char **argv);
void ShowTestStart(IOR_param_t *params);
void ShowTestEnd(IOR_test_t *tptr);
void ShowSetup(IOR_param_t *params);
void PrintRepeatEnd();
void PrintRepeatStart();
void PrintShortSummary(IOR_test_t * test);
void PrintLongSummaryAllTests(IOR_test_t *tests_head);
void PrintLongSummaryHeader();
void PrintLongSummaryOneTest(IOR_test_t *test);
void DisplayFreespace(IOR_param_t * test);
void GetTestFileName(char *, IOR_param_t *);
void PrintRemoveTiming(double start, double finish, int rep);
void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep);
void PrintTestEnds();
void PrintTableHeader();
/* End of ior-output */
struct results {
double min;
double max;
double mean;
double var;
double sd;
double sum;
double *val;
};
#endif

6
src/ior-main.c Normal file
View File

@ -0,0 +1,6 @@
#include "ior.h"
int main(int argc, char **argv)
{
return ior_main(argc, argv);
}

823
src/ior-output.c Normal file
View File

@ -0,0 +1,823 @@
#ifndef _WIN32
# include <sys/utsname.h> /* uname() */
#endif
#include <math.h>
#include <stddef.h> /* needed for offsetof on some compilers */
#include "ior.h"
#include "ior-internal.h"
#include "utilities.h"
extern char **environ;
static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals);
static struct results *ops_values(int reps, IOR_results_t * measured, int offset, IOR_offset_t transfer_size, double *vals);
static double mean_of_array_of_doubles(double *values, int len);
static void PPDouble(int leftjustify, double number, char *append);
static void PrintNextToken();
void PrintTableHeader(){
if (outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "\n");
fprintf(out_resultfile, "access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n");
fprintf(out_resultfile, "------ --------- ---------- --------- -------- -------- -------- -------- ----\n");
}
}
static int indent = 0;
static int needNextToken = 0;
static void PrintIndent(){
if(outputFormat != OUTPUT_JSON){
return;
}
for(int i=0; i < indent; i++){
fprintf(out_resultfile, " ");
}
}
static void PrintKeyValStart(char * key){
PrintNextToken();
if (outputFormat == OUTPUT_DEFAULT){
PrintIndent();
fprintf(out_resultfile, "%-20s: ", key);
return;
}
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "\"%s\": \"", key);
}else if(outputFormat == OUTPUT_CSV){
}
}
static void PrintNextToken(){
if(needNextToken){
needNextToken = 0;
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, ", \n");
}
}
PrintIndent();
}
static void PrintKeyValEnd(){
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "\"");
}
if (outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "\n");
}
needNextToken = 1;
}
static void PrintKeyVal(char * key, char * value){
if(value != NULL && value[0] != 0 && value[strlen(value) -1 ] == '\n'){
// remove \n
value[strlen(value) -1 ] = 0;
}
PrintNextToken();
needNextToken = 1;
if (outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "%-20s: %s\n", key, value);
return;
}
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "\"%s\": \"%s\"", key, value);
}else if(outputFormat == OUTPUT_CSV){
fprintf(out_resultfile, "%s", value);
}
}
static void PrintKeyValDouble(char * key, double value){
PrintNextToken();
needNextToken = 1;
if (outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "%-20s: %.4f\n", key, value);
return;
}
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "\"%s\": %.4f", key, value);
}else if(outputFormat == OUTPUT_CSV){
fprintf(out_resultfile, "%.4f", value);
}
}
static void PrintKeyValInt(char * key, int64_t value){
PrintNextToken();
needNextToken = 1;
if (outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "%-20s: %lld\n", key, (long long) value);
return;
}
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "\"%s\": %lld", key, (long long) value);
}else if(outputFormat == OUTPUT_CSV){
fprintf(out_resultfile, "%lld", (long long) value);
}
}
static void PrintStartSection(){
PrintNextToken();
needNextToken = 0;
if(outputFormat == OUTPUT_JSON){
PrintIndent();
fprintf(out_resultfile, "{\n");
}
indent++;
}
static void PrintNamedSectionStart(char * key){
PrintNextToken();
needNextToken = 0;
indent++;
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "\"%s\": {\n", key);
}else if(outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "\n%s: \n", key);
}
}
static void PrintNamedArrayStart(char * key){
PrintNextToken();
needNextToken = 0;
indent++;
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "\"%s\": [\n", key);
}else if(outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "\n%s: \n", key);
}
}
static void PrintEndSection(){
indent--;
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "\n");
PrintIndent();
fprintf(out_resultfile, "}\n");
}
needNextToken = 1;
}
static void PrintArrayStart(){
PrintNextToken();
needNextToken = 0;
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "[ ");
}
}
static void PrintArrayNamedStart(char * key){
PrintNextToken();
needNextToken = 0;
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "\"%s\": [\n", key);
}
}
static void PrintArrayEnd(){
indent--;
if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, "]\n");
}
needNextToken = 1;
}
void PrintRepeatEnd(){
PrintArrayEnd();
}
void PrintRepeatStart(){
if( outputFormat == OUTPUT_DEFAULT){
return;
}
PrintArrayStart();
}
void PrintTestEnds(){
if (rank != 0 || verbose < VERBOSE_0) {
PrintEndSection();
return;
}
PrintKeyVal("Finished", CurrentTimeString());
PrintEndSection();
}
void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep){
if (outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "%-10s", access == WRITE ? "write" : "read");
PPDouble(1, bw / MEBIBYTE, " ");
PPDouble(1, (double)test->params.blockSize / KIBIBYTE, " ");
PPDouble(1, (double)test->params.transferSize / KIBIBYTE, " ");
PPDouble(1, diff_subset[0], " ");
PPDouble(1, diff_subset[1], " ");
PPDouble(1, diff_subset[2], " ");
PPDouble(1, totalTime, " ");
fprintf(out_resultfile, "%-4d\n", rep);
}else if (outputFormat == OUTPUT_JSON){
PrintStartSection();
PrintKeyVal("access", access == WRITE ? "write" : "read");
PrintKeyValDouble("bwMiB", bw / MEBIBYTE);
PrintKeyValDouble("blockKiB", (double)test->params.blockSize / KIBIBYTE);
PrintKeyValDouble("xferKiB", (double)test->params.transferSize / KIBIBYTE);
PrintKeyValDouble("openTime", diff_subset[0]);
PrintKeyValDouble("wrRdTime", diff_subset[1]);
PrintKeyValDouble("closeTime", diff_subset[2]);
PrintKeyValDouble("totalTime", totalTime);
PrintEndSection();
}
fflush(out_resultfile);
}
/*
* Message to print immediately after MPI_Init so we know that
* ior has started.
*/
void PrintEarlyHeader()
{
if (rank != 0)
return;
fprintf(out_resultfile, "IOR-" META_VERSION ": MPI Coordinated Test of Parallel I/O\n");
fflush(out_resultfile);
}
void PrintHeader(int argc, char **argv)
{
struct utsname unamebuf;
int i;
if (rank != 0)
return;
PrintStartSection();
PrintKeyVal("Began", CurrentTimeString());
PrintKeyValStart("Command line");
fprintf(out_resultfile, "%s", argv[0]);
for (i = 1; i < argc; i++) {
fprintf(out_resultfile, " %s", argv[i]);
}
PrintKeyValEnd();
if (uname(&unamebuf) != 0) {
EWARN("uname failed");
PrintKeyVal("Machine", "Unknown");
} else {
PrintKeyValStart("Machine");
fprintf(out_resultfile, "%s %s", unamebuf.sysname,
unamebuf.nodename);
if (verbose >= VERBOSE_2) {
fprintf(out_resultfile, " %s %s %s", unamebuf.release,
unamebuf.version, unamebuf.machine);
}
PrintKeyValEnd();
}
#ifdef _NO_MPI_TIMER
if (verbose >= VERBOSE_2)
fprintf(out_logfile, "Using unsynchronized POSIX timer\n");
#else /* not _NO_MPI_TIMER */
if (MPI_WTIME_IS_GLOBAL) {
if (verbose >= VERBOSE_2)
fprintf(out_logfile, "Using synchronized MPI timer\n");
} else {
if (verbose >= VERBOSE_2)
fprintf(out_logfile, "Using unsynchronized MPI timer\n");
}
#endif /* _NO_MPI_TIMER */
if (verbose >= VERBOSE_1) {
fprintf(out_logfile, "Start time skew across all tasks: %.02f sec\n",
wall_clock_deviation);
}
if (verbose >= VERBOSE_3) { /* show env */
fprintf(out_logfile, "STARTING ENVIRON LOOP\n");
for (i = 0; environ[i] != NULL; i++) {
fprintf(out_logfile, "%s\n", environ[i]);
}
fprintf(out_logfile, "ENDING ENVIRON LOOP\n");
}
PrintArrayNamedStart("tests");
fflush(out_resultfile);
fflush(out_logfile);
}
/*
* Print header information for test output.
*/
void ShowTestStart(IOR_param_t *test)
{
PrintStartSection();
PrintKeyValInt("TestID", test->id);
PrintKeyVal("StartTime", CurrentTimeString());
/* if pvfs2:, then skip */
if (strcasecmp(test->api, "DFS") &&
Regex(test->testFileName, "^[a-z][a-z].*:") == 0) {
DisplayFreespace(test);
}
if (verbose >= VERBOSE_3 || outputFormat == OUTPUT_JSON) {
char* data_packets[] = {"g","t","o","i"};
PrintNamedSectionStart("Parameters");
PrintKeyValInt("testID", test->id);
PrintKeyValInt("refnum", test->referenceNumber);
PrintKeyVal("api", test->api);
PrintKeyVal("platform", test->platform);
PrintKeyVal("testFileName", test->testFileName);
PrintKeyVal("hintsFileName", test->hintsFileName);
PrintKeyValInt("deadlineForStonewall", test->deadlineForStonewalling);
PrintKeyValInt("stoneWallingWearOut", test->stoneWallingWearOut);
PrintKeyValInt("maxTimeDuration", test->maxTimeDuration);
PrintKeyValInt("outlierThreshold", test->outlierThreshold);
PrintKeyVal("options", test->options);
PrintKeyValInt("nodes", test->nodes);
PrintKeyValInt("memoryPerTask", (unsigned long) test->memoryPerTask);
PrintKeyValInt("memoryPerNode", (unsigned long) test->memoryPerNode);
PrintKeyValInt("tasksPerNode", tasksPerNode);
PrintKeyValInt("repetitions", test->repetitions);
PrintKeyValInt("multiFile", test->multiFile);
PrintKeyValInt("interTestDelay", test->interTestDelay);
PrintKeyValInt("fsync", test->fsync);
PrintKeyValInt("fsyncperwrite", test->fsyncPerWrite);
PrintKeyValInt("useExistingTestFile", test->useExistingTestFile);
PrintKeyValInt("showHints", test->showHints);
PrintKeyValInt("uniqueDir", test->uniqueDir);
PrintKeyValInt("individualDataSets", test->individualDataSets);
PrintKeyValInt("singleXferAttempt", test->singleXferAttempt);
PrintKeyValInt("readFile", test->readFile);
PrintKeyValInt("writeFile", test->writeFile);
PrintKeyValInt("filePerProc", test->filePerProc);
PrintKeyValInt("reorderTasks", test->reorderTasks);
PrintKeyValInt("reorderTasksRandom", test->reorderTasksRandom);
PrintKeyValInt("reorderTasksRandomSeed", test->reorderTasksRandomSeed);
PrintKeyValInt("randomOffset", test->randomOffset);
PrintKeyValInt("checkWrite", test->checkWrite);
PrintKeyValInt("checkRead", test->checkRead);
PrintKeyValInt("preallocate", test->preallocate);
PrintKeyValInt("useFileView", test->useFileView);
PrintKeyValInt("setAlignment", test->setAlignment);
PrintKeyValInt("storeFileOffset", test->storeFileOffset);
PrintKeyValInt("useSharedFilePointer", test->useSharedFilePointer);
PrintKeyValInt("useO_DIRECT", test->useO_DIRECT);
PrintKeyValInt("useStridedDatatype", test->useStridedDatatype);
PrintKeyValInt("keepFile", test->keepFile);
PrintKeyValInt("keepFileWithError", test->keepFileWithError);
PrintKeyValInt("quitOnError", test->quitOnError);
PrintKeyValInt("verbose", verbose);
PrintKeyVal("data packet type", data_packets[test->dataPacketType]);
PrintKeyValInt("setTimeStampSignature/incompressibleSeed", test->setTimeStampSignature); /* Seed value was copied into setTimeStampSignature as well */
PrintKeyValInt("collective", test->collective);
PrintKeyValInt("segmentCount", test->segmentCount);
#ifdef HAVE_GPFS_FCNTL_H
PrintKeyValInt("gpfsHintAccess", test->gpfs_hint_access);
PrintKeyValInt("gpfsReleaseToken", test->gpfs_release_token);
#endif
PrintKeyValInt("transferSize", test->transferSize);
PrintKeyValInt("blockSize", test->blockSize);
PrintEndSection();
}
fflush(out_resultfile);
}
void ShowTestEnd(IOR_test_t *tptr){
if(rank == 0 && tptr->params.stoneWallingWearOut){
if (tptr->params.stoneWallingStatusFile){
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
}else{
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
}
}
PrintEndSection();
}
/*
* Show simple test output with max results for iterations.
*/
void ShowSetup(IOR_param_t *params)
{
if (params->debug) {
fprintf(out_logfile, "\n*** DEBUG MODE ***\n");
fprintf(out_logfile, "*** %s ***\n\n", params->debug);
}
PrintNamedSectionStart("Options");
PrintKeyVal("api", params->api);
PrintKeyVal("apiVersion", params->apiVersion);
PrintKeyVal("test filename", params->testFileName);
PrintKeyVal("access", params->filePerProc ? "file-per-process" : "single-shared-file");
PrintKeyVal("type", params->collective ? "collective" : "independent");
PrintKeyValInt("segments", params->segmentCount);
PrintKeyVal("ordering in a file", params->randomOffset ? "random" : "sequential");
if (params->reorderTasks == FALSE && params->reorderTasksRandom == FALSE) {
PrintKeyVal("ordering inter file", "no tasks offsets");
}
if (params->reorderTasks == TRUE) {
PrintKeyVal("ordering inter file", "constant task offset");
PrintKeyValInt("task offset", params->taskPerNodeOffset);
}
if (params->reorderTasksRandom == TRUE) {
PrintKeyVal("ordering inter file", "random task offset");
PrintKeyValInt("task offset", params->taskPerNodeOffset);
PrintKeyValInt("reorder random seed", params->reorderTasksRandomSeed);
}
PrintKeyValInt("tasks", params->numTasks);
PrintKeyValInt("clients per node", params->tasksPerNode);
if (params->memoryPerTask != 0){
PrintKeyVal("memoryPerTask", HumanReadable(params->memoryPerTask, BASE_TWO));
}
if (params->memoryPerNode != 0){
PrintKeyVal("memoryPerNode", HumanReadable(params->memoryPerNode, BASE_TWO));
}
PrintKeyValInt("repetitions", params->repetitions);
PrintKeyVal("xfersize", HumanReadable(params->transferSize, BASE_TWO));
PrintKeyVal("blocksize", HumanReadable(params->blockSize, BASE_TWO));
PrintKeyVal("aggregate filesize", HumanReadable(params->expectedAggFileSize, BASE_TWO));
#ifdef HAVE_LUSTRE_LUSTRE_USER_H
if (params->lustre_set_striping) {
PrintKeyVal("Lustre stripe size", ((params->lustre_stripe_size == 0) ? "Use default" :
HumanReadable(params->lustre_stripe_size, BASE_TWO)));
PrintKeyVal("stripe count", (params->lustre_stripe_count == 0 ? "Use default" : HumanReadable(params->lustre_stripe_count, BASE_TWO)));
}
#endif /* HAVE_LUSTRE_LUSTRE_USER_H */
if (params->deadlineForStonewalling > 0) {
PrintKeyValInt("stonewallingTime", params->deadlineForStonewalling);
PrintKeyValInt("stoneWallingWearOut", params->stoneWallingWearOut );
}
PrintEndSection();
PrintNamedArrayStart("Results");
fflush(out_resultfile);
}
/*
* Summarize results
*
* operation is typically "write" or "read"
*/
static void PrintLongSummaryOneOperation(IOR_test_t *test, int times_offset, char *operation)
{
IOR_param_t *params = &test->params;
IOR_results_t *results = test->results;
struct results *bw;
struct results *ops;
int reps;
if (rank != 0 || verbose < VERBOSE_0)
return;
reps = params->repetitions;
double * times = malloc(sizeof(double)* reps);
for(int i=0; i < reps; i++){
times[i] = *(double*)((char*) & results[i] + times_offset);
}
bw = bw_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), times);
ops = ops_values(reps, results, offsetof(IOR_results_t, aggFileSizeForBW), params->transferSize, times);
if(outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "%-9s ", operation);
fprintf(out_resultfile, "%10.2f ", bw->max / MEBIBYTE);
fprintf(out_resultfile, "%10.2f ", bw->min / MEBIBYTE);
fprintf(out_resultfile, "%10.2f ", bw->mean / MEBIBYTE);
fprintf(out_resultfile, "%10.2f ", bw->sd / MEBIBYTE);
fprintf(out_resultfile, "%10.2f ", ops->max);
fprintf(out_resultfile, "%10.2f ", ops->min);
fprintf(out_resultfile, "%10.2f ", ops->mean);
fprintf(out_resultfile, "%10.2f ", ops->sd);
fprintf(out_resultfile, "%10.5f ", mean_of_array_of_doubles(times, reps));
fprintf(out_resultfile, "%5d ", params->id);
fprintf(out_resultfile, "%6d ", params->numTasks);
fprintf(out_resultfile, "%3d ", params->tasksPerNode);
fprintf(out_resultfile, "%4d ", params->repetitions);
fprintf(out_resultfile, "%3d ", params->filePerProc);
fprintf(out_resultfile, "%5d ", params->reorderTasks);
fprintf(out_resultfile, "%8d ", params->taskPerNodeOffset);
fprintf(out_resultfile, "%9d ", params->reorderTasksRandom);
fprintf(out_resultfile, "%4d ", params->reorderTasksRandomSeed);
fprintf(out_resultfile, "%6lld ", params->segmentCount);
fprintf(out_resultfile, "%8lld ", params->blockSize);
fprintf(out_resultfile, "%8lld ", params->transferSize);
fprintf(out_resultfile, "%9.1f ", (float)results[0].aggFileSizeForBW / MEBIBYTE);
fprintf(out_resultfile, "%3s ", params->api);
fprintf(out_resultfile, "%6d", params->referenceNumber);
fprintf(out_resultfile, "\n");
}else if (outputFormat == OUTPUT_JSON){
PrintStartSection();
PrintKeyVal("operation", operation);
PrintKeyVal("API", params->api);
PrintKeyValInt("TestID", params->id);
PrintKeyValInt("ReferenceNumber", params->referenceNumber);
PrintKeyValInt("segmentCount", params->segmentCount);
PrintKeyValInt("blockSize", params->blockSize);
PrintKeyValInt("transferSize", params->transferSize);
PrintKeyValInt("numTasks", params->numTasks);
PrintKeyValInt("tasksPerNode", params->tasksPerNode);
PrintKeyValInt("repetitions", params->repetitions);
PrintKeyValInt("filePerProc", params->filePerProc);
PrintKeyValInt("reorderTasks", params->reorderTasks);
PrintKeyValInt("taskPerNodeOffset", params->taskPerNodeOffset);
PrintKeyValInt("reorderTasksRandom", params->reorderTasksRandom);
PrintKeyValInt("reorderTasksRandomSeed", params->reorderTasksRandomSeed);
PrintKeyValInt("segmentCount", params->segmentCount);
PrintKeyValInt("blockSize", params->blockSize);
PrintKeyValInt("transferSize", params->transferSize);
PrintKeyValDouble("bwMaxMIB", bw->max / MEBIBYTE);
PrintKeyValDouble("bwMinMIB", bw->min / MEBIBYTE);
PrintKeyValDouble("bwMeanMIB", bw->mean / MEBIBYTE);
PrintKeyValDouble("bwStdMIB", bw->sd / MEBIBYTE);
PrintKeyValDouble("OPsMax", ops->max);
PrintKeyValDouble("OPsMin", ops->min);
PrintKeyValDouble("OPsMean", ops->mean);
PrintKeyValDouble("OPsSD", ops->sd);
PrintKeyValDouble("MeanTime", mean_of_array_of_doubles(times, reps));
PrintKeyValDouble("xsizeMiB", (double) results[0].aggFileSizeForBW / MEBIBYTE);
PrintEndSection();
}else if (outputFormat == OUTPUT_CSV){
}
fflush(out_resultfile);
free(bw);
free(ops);
free(times);
}
void PrintLongSummaryOneTest(IOR_test_t *test)
{
IOR_param_t *params = &test->params;
if (params->writeFile)
PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, writeTime), "write");
if (params->readFile)
PrintLongSummaryOneOperation(test, offsetof(IOR_results_t, readTime), "read");
}
void PrintLongSummaryHeader()
{
if (rank != 0 || verbose < VERBOSE_0)
return;
if(outputFormat != OUTPUT_DEFAULT){
return;
}
fprintf(out_resultfile, "\n");
fprintf(out_resultfile, "%-9s %10s %10s %10s %10s %10s %10s %10s %10s %10s",
"Operation", "Max(MiB)", "Min(MiB)", "Mean(MiB)", "StdDev",
"Max(OPs)", "Min(OPs)", "Mean(OPs)", "StdDev",
"Mean(s)");
fprintf(out_resultfile, " Test# #Tasks tPN reps fPP reord reordoff reordrand seed"
" segcnt ");
fprintf(out_resultfile, "%8s %8s %9s %5s", " blksiz", "xsize","aggs(MiB)", "API");
fprintf(out_resultfile, " RefNum\n");
}
void PrintLongSummaryAllTests(IOR_test_t *tests_head)
{
IOR_test_t *tptr;
if (rank != 0 || verbose < VERBOSE_0)
return;
PrintArrayEnd();
if(outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "\n");
fprintf(out_resultfile, "Summary of all tests:");
}else if (outputFormat == OUTPUT_JSON){
PrintNamedArrayStart("summary");
}else if (outputFormat == OUTPUT_CSV){
}
PrintLongSummaryHeader();
for (tptr = tests_head; tptr != NULL; tptr = tptr->next) {
PrintLongSummaryOneTest(tptr);
}
PrintArrayEnd();
}
void PrintShortSummary(IOR_test_t * test)
{
IOR_param_t *params = &test->params;
IOR_results_t *results = test->results;
double max_write = 0.0;
double max_read = 0.0;
double bw;
int reps;
int i;
if (rank != 0 || verbose < VERBOSE_0)
return;
PrintArrayEnd();
reps = params->repetitions;
max_write = results[0].writeTime;
max_read = results[0].readTime;
for (i = 0; i < reps; i++) {
bw = (double)results[i].aggFileSizeForBW / results[i].writeTime;
max_write = MAX(bw, max_write);
bw = (double)results[i].aggFileSizeForBW / results[i].readTime;
max_read = MAX(bw, max_read);
}
if(outputFormat == OUTPUT_DEFAULT){
if (params->writeFile) {
fprintf(out_resultfile, "Max Write: %.2f MiB/sec (%.2f MB/sec)\n",
max_write/MEBIBYTE, max_write/MEGABYTE);
}
if (params->readFile) {
fprintf(out_resultfile, "Max Read: %.2f MiB/sec (%.2f MB/sec)\n",
max_read/MEBIBYTE, max_read/MEGABYTE);
}
}else if (outputFormat == OUTPUT_JSON){
PrintNamedSectionStart("max");
if (params->writeFile) {
PrintKeyValDouble("writeMiB", max_write/MEBIBYTE);
PrintKeyValDouble("writeMB", max_write/MEGABYTE);
}
if (params->readFile) {
PrintKeyValDouble("readMiB", max_read/MEBIBYTE);
PrintKeyValDouble("readMB", max_read/MEGABYTE);
}
PrintEndSection();
}
}
/*
* Display freespace (df).
*/
void DisplayFreespace(IOR_param_t * test)
{
char fileName[MAX_STR] = { 0 };
int i;
int directoryFound = FALSE;
/* get outfile name */
GetTestFileName(fileName, test);
/* get directory for outfile */
i = strlen(fileName);
while (i-- > 0) {
if (fileName[i] == '/') {
fileName[i] = '\0';
directoryFound = TRUE;
break;
}
}
/* if no directory/, use '.' */
if (directoryFound == FALSE) {
strcpy(fileName, ".");
}
ShowFileSystemSize(fileName);
}
void PrintRemoveTiming(double start, double finish, int rep)
{
if (rank != 0 || verbose < VERBOSE_0)
return;
if (outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "remove - - - - - - ");
PPDouble(1, finish-start, " ");
fprintf(out_resultfile, "%-4d\n", rep);
}else if (outputFormat == OUTPUT_JSON){
PrintStartSection();
PrintKeyVal("access", "remove");
PrintKeyValDouble("totalTime", finish - start);
PrintEndSection();
}
}
/*
* Pretty Print a Double. The First parameter is a flag determining if left
* justification should be used. The third parameter a null-terminated string
* that should be appended to the number field.
*/
static void PPDouble(int leftjustify, double number, char *append)
{
char format[16];
int width = 10;
int precision;
if (number < 0) {
fprintf(out_resultfile, " - %s", append);
return;
}
if (number < 1)
precision = 6;
else if (number < 3600)
precision = 2;
else
precision = 0;
sprintf(format, "%%%s%d.%df%%s",
leftjustify ? "-" : "",
width, precision);
fprintf(out_resultfile, format, number, append);
}
static struct results *bw_values(int reps, IOR_results_t * measured, int offset, double *vals)
{
struct results *r;
int i;
r = (struct results *) malloc(sizeof(struct results) + (reps * sizeof(double)));
if (r == NULL)
ERR("malloc failed");
r->val = (double *)&r[1];
for (i = 0; i < reps; i++, measured++) {
r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset)) / vals[i];
if (i == 0) {
r->min = r->val[i];
r->max = r->val[i];
r->sum = 0.0;
}
r->min = MIN(r->min, r->val[i]);
r->max = MAX(r->max, r->val[i]);
r->sum += r->val[i];
}
r->mean = r->sum / reps;
r->var = 0.0;
for (i = 0; i < reps; i++) {
r->var += pow((r->mean - r->val[i]), 2);
}
r->var = r->var / reps;
r->sd = sqrt(r->var);
return r;
}
static struct results *ops_values(int reps, IOR_results_t * measured, int offset,
IOR_offset_t transfer_size,
double *vals)
{
struct results *r;
int i;
r = (struct results *)malloc(sizeof(struct results)
+ (reps * sizeof(double)));
if (r == NULL)
ERR("malloc failed");
r->val = (double *)&r[1];
for (i = 0; i < reps; i++, measured++) {
r->val[i] = (double) *((IOR_offset_t*) ((char*)measured + offset))
/ transfer_size / vals[i];
if (i == 0) {
r->min = r->val[i];
r->max = r->val[i];
r->sum = 0.0;
}
r->min = MIN(r->min, r->val[i]);
r->max = MAX(r->max, r->val[i]);
r->sum += r->val[i];
}
r->mean = r->sum / reps;
r->var = 0.0;
for (i = 0; i < reps; i++) {
r->var += pow((r->mean - r->val[i]), 2);
}
r->var = r->var / reps;
r->sd = sqrt(r->var);
return r;
}
static double mean_of_array_of_doubles(double *values, int len)
{
double tot = 0.0;
int i;
for (i = 0; i < len; i++) {
tot += values[i];
}
return tot / len;
}

1491
src/ior.c

File diff suppressed because it is too large Load Diff

View File

@ -29,26 +29,14 @@
typedef void* hdfsFS; /* unused, but needs a type */
#endif
#ifdef USE_S3_AIORI
# include <curl/curl.h>
# include "aws4c.h"
#ifdef USE_RADOS_AIORI
# include <rados/librados.h>
#else
typedef void CURL; /* unused, but needs a type */
typedef void IOBuf; /* unused, but needs a type */
typedef void *rados_t;
typedef void *rados_ioctx_t;
#endif
#include "iordef.h"
extern int numTasksWorld;
extern int rank;
extern int rankOffset;
extern int tasksPerNode;
extern int verbose;
extern MPI_Comm testComm;
/******************** DATA Packet Type ***************************************/
/* Holds the types of data packets: generic, offset, timestamp, incompressible */
@ -91,17 +79,18 @@ typedef struct IO_BUFFERS
typedef struct
{
char debug[MAX_STR]; /* debug info string */
const void * backend;
char * debug; /* debug info string */
unsigned int mode; /* file permissions */
unsigned int openFlags; /* open flags (see also <open>) */
int referenceNumber; /* user supplied reference number */
char api[MAX_STR]; /* API for I/O */
char apiVersion[MAX_STR]; /* API version */
char platform[MAX_STR]; /* platform type */
char testFileName[MAXPATHLEN]; /* full name for test */
char testFileName_fppReadCheck[MAXPATHLEN];/* filename for fpp read check */
char hintsFileName[MAXPATHLEN]; /* full name for hints file */
char options[MAXPATHLEN]; /* options string */
char * api; /* API for I/O */
char * apiVersion; /* API version */
char * platform; /* platform type */
char * testFileName; /* full name for test */
char * testFileName_fppReadCheck;/* filename for fpp read check */
char * hintsFileName; /* full name for hints file */
char * options; /* options string */
int numTasks; /* number of tasks for test */
int nodes; /* number of nodes for test */
int tasksPerNode; /* number of tasks per node */
@ -135,14 +124,15 @@ typedef struct
int useStridedDatatype; /* put strided access into datatype */
int useO_DIRECT; /* use O_DIRECT, bypassing I/O buffers */
int showHints; /* show hints */
int showHelp; /* show options and help */
int summary_every_test; /* flag to print summary every test, not just at end */
int uniqueDir; /* use unique directory for each fpp */
int useExistingTestFile; /* do not delete test file before access */
int storeFileOffset; /* use file offset as stored signature */
int deadlineForStonewalling; /* max time in seconds to run any test phase */
int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */
int stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
char * stoneWallingStatusFile;
int maxTimeDuration; /* max time in minutes to run each test */
int outlierThreshold; /* warn on outlier N seconds from mean */
int verbose; /* verbosity */
@ -150,7 +140,7 @@ typedef struct
unsigned int timeStampSignatureValue; /* value for time stamp signature */
void * fd_fppReadCheck; /* additional fd for fpp read check */
int randomSeed; /* random seed for write/read check */
int incompressibleSeed; /* random seed for incompressible file creation */
unsigned int incompressibleSeed; /* random seed for incompressible file creation */
int randomOffset; /* access is to random offsets */
size_t memoryPerTask; /* additional memory used per task */
size_t memoryPerNode; /* additional memory used per node */
@ -175,25 +165,20 @@ typedef struct
IOR_offset_t setAlignment; /* alignment in bytes */
/* HDFS variables */
char hdfs_user[MAX_STR]; /* copied from ENV, for now */
char * hdfs_user; /* copied from ENV, for now */
const char* hdfs_name_node;
tPort hdfs_name_node_port; /* (uint16_t) */
hdfsFS hdfs_fs; /* file-system handle */
int hdfs_replicas; /* n block replicas. (0 gets default) */
int hdfs_block_size; /* internal blk-size. (0 gets default) */
/* REST/S3 variables */
// CURL* curl; /* for libcurl "easy" fns (now managed by aws4c) */
# define IOR_CURL_INIT 0x01 /* curl top-level inits were perfomed once? */
# define IOR_CURL_NOCONTINUE 0x02
# define IOR_CURL_S3_EMC_EXT 0x04 /* allow EMC extensions to S3? */
char curl_flags;
char* URI; /* "path" to target object */
IOBuf* io_buf; /* aws4c places parsed header values here */
IOBuf* etags; /* accumulate ETags for N:1 parts */
size_t part_number; /* multi-part upload increment (PER-RANK!) */
# define MAX_UPLOAD_ID_SIZE 256 /* seems to be 32, actually */
char UploadId[MAX_UPLOAD_ID_SIZE +1]; /* key for multi-part-uploads */
char* UploadId; /* key for multi-part-uploads */
/* RADOS variables */
rados_t rados_cluster; /* RADOS cluster handle */
rados_ioctx_t rados_ioctx; /* I/O context for our pool in the RADOS cluster */
/* NCMPI variables */
int var_id; /* variable id handle for data set */
@ -213,12 +198,6 @@ typedef struct
int beegfs_numTargets; /* number storage targets to use */
int beegfs_chunkSize; /* srtipe pattern for new files */
/* daos variables */
char daosGroup[MAX_STR]; /* group name */
char daosPool[37]; /* pool UUID */
char daosPoolSvc[MAX_STR]; /* pool service ranks */
char daosCont[37]; /* Container UUID */
int id; /* test's unique ID */
int intraTestBarriers; /* barriers between open/op and op/close */
} IOR_param_t;
@ -226,25 +205,38 @@ typedef struct
/* each pointer is to an array, each of length equal to the number of
repetitions in the test */
typedef struct {
double *writeTime;
double *readTime;
double writeTime;
double readTime;
int errors;
size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling
IOR_offset_t *aggFileSizeFromStat;
IOR_offset_t *aggFileSizeFromXfer;
IOR_offset_t *aggFileSizeForBW;
double stonewall_time;
long long stonewall_min_data_accessed;
long long stonewall_avg_data_accessed;
IOR_offset_t aggFileSizeFromStat;
IOR_offset_t aggFileSizeFromXfer;
IOR_offset_t aggFileSizeForBW;
} IOR_results_t;
/* define the queuing structure for the test parameters */
typedef struct IOR_test_t {
IOR_param_t params;
IOR_results_t *results;
IOR_results_t *results; /* This is an array of reps times IOR_results_t */
struct IOR_test_t *next;
} IOR_test_t;
IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num);
void AllocResults(IOR_test_t *test);
void GetPlatformName(char *);
char * GetPlatformName();
void init_IOR_Param_t(IOR_param_t *p);
/*
* This function runs IOR given by command line, useful for testing
*/
IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * out_logfile);
/* Actual IOR Main function, renamed to allow library usage */
int ior_main(int argc, char **argv);
#endif /* !_IOR_H */

View File

@ -31,9 +31,7 @@
# include <Windows.h>
# include <io.h>
# include <direct.h>
# include "win/getopt.h"
# define MAXPATHLEN 1024
# define F_OK 00
# define W_OK 02
# define R_OK 04
@ -41,7 +39,7 @@
# define lseek _lseeki64
# define fsync _commit
# define mkdir(dir, mode) _mkdir(dir)
# define mkdir(dir, mode) _mkdir(dir)
# define strcasecmp _stricmp
# define strncasecmp _strnicmp
# define srandom srand
@ -63,6 +61,12 @@ extern int verbose; /* verbose output */
/*************************** D E F I N I T I O N S ****************************/
enum OutputFormat_t{
OUTPUT_DEFAULT,
OUTPUT_CSV,
OUTPUT_JSON
};
#ifndef FALSE
# define FALSE 0
#endif /* not FALSE */
@ -102,8 +106,8 @@ extern int verbose; /* verbose output */
#define VERBOSE_4 4
#define VERBOSE_5 5
#define MAX_STR 1024 /* max string length */
#define MAX_HINTS 16 /* max number of hints */
#define MAX_STR 1024 /* max string length */
#define MAX_HINTS 16 /* max number of hints */
#define MAX_RETRY 10000 /* max retries for POSIX xfer */
#ifndef PATH_MAX
#define PATH_MAX 4096
@ -211,7 +215,7 @@ struct utsname {
char nodename[257];
char release [16];
char version [16];
char machine [16];
char machine [16];
};
extern int uname(struct utsname *name);

556
src/list.h Normal file
View File

@ -0,0 +1,556 @@
/**
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 only,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is included
* in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* GPL HEADER END
*/
#ifndef __DAOS_LIST_H__
#define __DAOS_LIST_H__
/*
* Simple doubly linked list implementation.
*
* Some of the internal functions ("__xxx") are useful when
* manipulating whole lists rather than single entries, as
* sometimes we already know the next/prev entries and we can
* generate better code by using them directly rather than
* using the generic single-entry routines.
*/
#define prefetch(a) ((void)a)
struct cfs_list_head {
struct cfs_list_head *next, *prev;
};
typedef struct cfs_list_head cfs_list_t;
#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) }
#define CFS_LIST_HEAD(name) \
cfs_list_t name = CFS_LIST_HEAD_INIT(name)
#define CFS_INIT_LIST_HEAD(ptr) do { \
(ptr)->next = (ptr); (ptr)->prev = (ptr); \
} while (0)
/**
* Insert a new entry between two known consecutive entries.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
static inline void __cfs_list_add(cfs_list_t * new,
cfs_list_t * prev,
cfs_list_t * next)
{
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}
/**
* Insert an entry at the start of a list.
* \param new new entry to be inserted
* \param head list to add it to
*
* Insert a new entry after the specified head.
* This is good for implementing stacks.
*/
static inline void cfs_list_add(cfs_list_t *new,
cfs_list_t *head)
{
__cfs_list_add(new, head, head->next);
}
/**
* Insert an entry at the end of a list.
* \param new new entry to be inserted
* \param head list to add it to
*
* Insert a new entry before the specified head.
* This is useful for implementing queues.
*/
static inline void cfs_list_add_tail(cfs_list_t *new,
cfs_list_t *head)
{
__cfs_list_add(new, head->prev, head);
}
/*
* Delete a list entry by making the prev/next entries
* point to each other.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
static inline void __cfs_list_del(cfs_list_t *prev,
cfs_list_t *next)
{
next->prev = prev;
prev->next = next;
}
/**
* Remove an entry from the list it is currently in.
* \param entry the entry to remove
* Note: list_empty(entry) does not return true after this, the entry is in an
* undefined state.
*/
static inline void cfs_list_del(cfs_list_t *entry)
{
__cfs_list_del(entry->prev, entry->next);
}
/**
* Remove an entry from the list it is currently in and reinitialize it.
* \param entry the entry to remove.
*/
static inline void cfs_list_del_init(cfs_list_t *entry)
{
__cfs_list_del(entry->prev, entry->next);
CFS_INIT_LIST_HEAD(entry);
}
/**
* Remove an entry from the list it is currently in and insert it at the start
* of another list.
* \param list the entry to move
* \param head the list to move it to
*/
static inline void cfs_list_move(cfs_list_t *list,
cfs_list_t *head)
{
__cfs_list_del(list->prev, list->next);
cfs_list_add(list, head);
}
/**
* Remove an entry from the list it is currently in and insert it at the end of
* another list.
* \param list the entry to move
* \param head the list to move it to
*/
static inline void cfs_list_move_tail(cfs_list_t *list,
cfs_list_t *head)
{
__cfs_list_del(list->prev, list->next);
cfs_list_add_tail(list, head);
}
/**
* Test whether a list is empty
* \param head the list to test.
*/
static inline int cfs_list_empty(cfs_list_t *head)
{
return head->next == head;
}
/**
* Test whether a list is empty and not being modified
* \param head the list to test
*
* Tests whether a list is empty _and_ checks that no other CPU might be
* in the process of modifying either member (next or prev)
*
* NOTE: using cfs_list_empty_careful() without synchronization
* can only be safe if the only activity that can happen
* to the list entry is cfs_list_del_init(). Eg. it cannot be used
* if another CPU could re-list_add() it.
*/
static inline int cfs_list_empty_careful(const cfs_list_t *head)
{
cfs_list_t *next = head->next;
return (next == head) && (next == head->prev);
}
static inline void __cfs_list_splice(cfs_list_t *list,
cfs_list_t *head)
{
cfs_list_t *first = list->next;
cfs_list_t *last = list->prev;
cfs_list_t *at = head->next;
first->prev = head;
head->next = first;
last->next = at;
at->prev = last;
}
/**
* Join two lists
* \param list the new list to add.
* \param head the place to add it in the first list.
*
* The contents of \a list are added at the start of \a head. \a list is in an
* undefined state on return.
*/
static inline void cfs_list_splice(cfs_list_t *list,
cfs_list_t *head)
{
if (!cfs_list_empty(list))
__cfs_list_splice(list, head);
}
/**
* Join two lists and reinitialise the emptied list.
* \param list the new list to add.
* \param head the place to add it in the first list.
*
* The contents of \a list are added at the start of \a head. \a list is empty
* on return.
*/
static inline void cfs_list_splice_init(cfs_list_t *list,
cfs_list_t *head)
{
if (!cfs_list_empty(list)) {
__cfs_list_splice(list, head);
CFS_INIT_LIST_HEAD(list);
}
}
/**
* Get the container of a list
* \param ptr the embedded list.
* \param type the type of the struct this is embedded in.
* \param member the member name of the list within the struct.
*/
#define cfs_list_entry(ptr, type, member) \
((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
/**
* Iterate over a list
* \param pos the iterator
* \param head the list to iterate over
*
* Behaviour is undefined if \a pos is removed from the list in the body of the
* loop.
*/
#define cfs_list_for_each(pos, head) \
for (pos = (head)->next, prefetch(pos->next); pos != (head); \
pos = pos->next, prefetch(pos->next))
/**
* Iterate over a list safely
* \param pos the iterator
* \param n temporary storage
* \param head the list to iterate over
*
* This is safe to use if \a pos could be removed from the list in the body of
* the loop.
*/
#define cfs_list_for_each_safe(pos, n, head) \
for (pos = (head)->next, n = pos->next; pos != (head); \
pos = n, n = pos->next)
/**
* Iterate over a list continuing after existing point
* \param pos the type * to use as a loop counter
* \param head the list head
* \param member the name of the list_struct within the struct
*/
#define cfs_list_for_each_entry_continue(pos, head, member) \
for (pos = cfs_list_entry(pos->member.next, typeof(*pos), member); \
prefetch(pos->member.next), &pos->member != (head); \
pos = cfs_list_entry(pos->member.next, typeof(*pos), member))
/**
* \defgroup hlist Hash List
* Double linked lists with a single pointer list head.
* Mostly useful for hash tables where the two pointer list head is too
* wasteful. You lose the ability to access the tail in O(1).
* @{
*/
typedef struct cfs_hlist_node {
struct cfs_hlist_node *next, **pprev;
} cfs_hlist_node_t;
typedef struct cfs_hlist_head {
cfs_hlist_node_t *first;
} cfs_hlist_head_t;
/* @} */
/*
* "NULL" might not be defined at this point
*/
#ifdef NULL
#define NULL_P NULL
#else
#define NULL_P ((void *)0)
#endif
/**
* \addtogroup hlist
* @{
*/
#define CFS_HLIST_HEAD_INIT { NULL_P }
#define CFS_HLIST_HEAD(name) cfs_hlist_head_t name = { NULL_P }
#define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P)
#define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P)
static inline int cfs_hlist_unhashed(const cfs_hlist_node_t *h)
{
return !h->pprev;
}
static inline int cfs_hlist_empty(const cfs_hlist_head_t *h)
{
return !h->first;
}
static inline void __cfs_hlist_del(cfs_hlist_node_t *n)
{
cfs_hlist_node_t *next = n->next;
cfs_hlist_node_t **pprev = n->pprev;
*pprev = next;
if (next)
next->pprev = pprev;
}
static inline void cfs_hlist_del(cfs_hlist_node_t *n)
{
__cfs_hlist_del(n);
}
static inline void cfs_hlist_del_init(cfs_hlist_node_t *n)
{
if (n->pprev) {
__cfs_hlist_del(n);
CFS_INIT_HLIST_NODE(n);
}
}
static inline void cfs_hlist_add_head(cfs_hlist_node_t *n,
cfs_hlist_head_t *h)
{
cfs_hlist_node_t *first = h->first;
n->next = first;
if (first)
first->pprev = &n->next;
h->first = n;
n->pprev = &h->first;
}
/* next must be != NULL */
static inline void cfs_hlist_add_before(cfs_hlist_node_t *n,
cfs_hlist_node_t *next)
{
n->pprev = next->pprev;
n->next = next;
next->pprev = &n->next;
*(n->pprev) = n;
}
static inline void cfs_hlist_add_after(cfs_hlist_node_t *n,
cfs_hlist_node_t *next)
{
next->next = n->next;
n->next = next;
next->pprev = &n->next;
if(next->next)
next->next->pprev = &next->next;
}
#define cfs_hlist_entry(ptr, type, member) container_of(ptr,type,member)
#define cfs_hlist_for_each(pos, head) \
for (pos = (head)->first; pos && (prefetch(pos->next), 1); \
pos = pos->next)
#define cfs_hlist_for_each_safe(pos, n, head) \
for (pos = (head)->first; pos && (n = pos->next, 1); \
pos = n)
/**
* Iterate over an hlist of given type
* \param tpos the type * to use as a loop counter.
* \param pos the &struct hlist_node to use as a loop counter.
* \param head the head for your list.
* \param member the name of the hlist_node within the struct.
*/
#define cfs_hlist_for_each_entry(tpos, pos, head, member) \
for (pos = (head)->first; \
pos && ({ prefetch(pos->next); 1;}) && \
({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \
pos = pos->next)
/**
* Iterate over an hlist continuing after existing point
* \param tpos the type * to use as a loop counter.
* \param pos the &struct hlist_node to use as a loop counter.
* \param member the name of the hlist_node within the struct.
*/
#define cfs_hlist_for_each_entry_continue(tpos, pos, member) \
for (pos = (pos)->next; \
pos && ({ prefetch(pos->next); 1;}) && \
({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \
pos = pos->next)
/**
* Iterate over an hlist continuing from an existing point
* \param tpos the type * to use as a loop counter.
* \param pos the &struct hlist_node to use as a loop counter.
* \param member the name of the hlist_node within the struct.
*/
#define cfs_hlist_for_each_entry_from(tpos, pos, member) \
for (; pos && ({ prefetch(pos->next); 1;}) && \
({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \
pos = pos->next)
/**
* Iterate over an hlist of given type safe against removal of list entry
* \param tpos the type * to use as a loop counter.
* \param pos the &struct hlist_node to use as a loop counter.
* \param n another &struct hlist_node to use as temporary storage
* \param head the head for your list.
* \param member the name of the hlist_node within the struct.
*/
#define cfs_hlist_for_each_entry_safe(tpos, pos, n, head, member) \
for (pos = (head)->first; \
pos && ({ n = pos->next; 1; }) && \
({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \
pos = n)
/* @} */
#ifndef cfs_list_for_each_prev
/**
* Iterate over a list in reverse order
* \param pos the &struct list_head to use as a loop counter.
* \param head the head for your list.
*/
#define cfs_list_for_each_prev(pos, head) \
for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
pos = pos->prev, prefetch(pos->prev))
#endif /* cfs_list_for_each_prev */
#ifndef cfs_list_for_each_entry
/**
* Iterate over a list of given type
* \param pos the type * to use as a loop counter.
* \param head the head for your list.
* \param member the name of the list_struct within the struct.
*/
#define cfs_list_for_each_entry(pos, head, member) \
for (pos = cfs_list_entry((head)->next, typeof(*pos), member), \
prefetch(pos->member.next); \
&pos->member != (head); \
pos = cfs_list_entry(pos->member.next, typeof(*pos), member), \
prefetch(pos->member.next))
#endif /* cfs_list_for_each_entry */
#ifndef cfs_list_for_each_entry_rcu
#define cfs_list_for_each_entry_rcu(pos, head, member) \
list_for_each_entry(pos, head, member)
#endif
#ifndef cfs_list_for_each_entry_rcu
#define cfs_list_for_each_entry_rcu(pos, head, member) \
list_for_each_entry(pos, head, member)
#endif
#ifndef cfs_list_for_each_entry_reverse
/**
* Iterate backwards over a list of given type.
* \param pos the type * to use as a loop counter.
* \param head the head for your list.
* \param member the name of the list_struct within the struct.
*/
#define cfs_list_for_each_entry_reverse(pos, head, member) \
for (pos = cfs_list_entry((head)->prev, typeof(*pos), member); \
prefetch(pos->member.prev), &pos->member != (head); \
pos = cfs_list_entry(pos->member.prev, typeof(*pos), member))
#endif /* cfs_list_for_each_entry_reverse */
#ifndef cfs_list_for_each_entry_safe
/**
* Iterate over a list of given type safe against removal of list entry
* \param pos the type * to use as a loop counter.
* \param n another type * to use as temporary storage
* \param head the head for your list.
* \param member the name of the list_struct within the struct.
*/
#define cfs_list_for_each_entry_safe(pos, n, head, member) \
for (pos = cfs_list_entry((head)->next, typeof(*pos), member), \
n = cfs_list_entry(pos->member.next, typeof(*pos), member); \
&pos->member != (head); \
pos = n, n = cfs_list_entry(n->member.next, typeof(*n), member))
#endif /* cfs_list_for_each_entry_safe */
#ifndef cfs_list_for_each_entry_safe_from
/**
* Iterate over a list continuing from an existing point
* \param pos the type * to use as a loop cursor.
* \param n another type * to use as temporary storage
* \param head the head for your list.
* \param member the name of the list_struct within the struct.
*
* Iterate over list of given type from current point, safe against
* removal of list entry.
*/
#define cfs_list_for_each_entry_safe_from(pos, n, head, member) \
for (n = cfs_list_entry(pos->member.next, typeof(*pos), member); \
&pos->member != (head); \
pos = n, n = cfs_list_entry(n->member.next, typeof(*n), member))
#endif /* cfs_list_for_each_entry_safe_from */
#define cfs_list_for_each_entry_typed(pos, head, type, member) \
for (pos = cfs_list_entry((head)->next, type, member), \
prefetch(pos->member.next); \
&pos->member != (head); \
pos = cfs_list_entry(pos->member.next, type, member), \
prefetch(pos->member.next))
#define cfs_list_for_each_entry_reverse_typed(pos, head, type, member) \
for (pos = cfs_list_entry((head)->prev, type, member); \
prefetch(pos->member.prev), &pos->member != (head); \
pos = cfs_list_entry(pos->member.prev, type, member))
#define cfs_list_for_each_entry_safe_typed(pos, n, head, type, member) \
for (pos = cfs_list_entry((head)->next, type, member), \
n = cfs_list_entry(pos->member.next, type, member); \
&pos->member != (head); \
pos = n, n = cfs_list_entry(n->member.next, type, member))
#define cfs_list_for_each_entry_safe_from_typed(pos, n, head, type, member) \
for (n = cfs_list_entry(pos->member.next, type, member); \
&pos->member != (head); \
pos = n, n = cfs_list_entry(n->member.next, type, member))
#define cfs_hlist_for_each_entry_typed(tpos, pos, head, type, member) \
for (pos = (head)->first; \
pos && (prefetch(pos->next), 1) && \
(tpos = cfs_hlist_entry(pos, type, member), 1); \
pos = pos->next)
#define cfs_hlist_for_each_entry_safe_typed(tpos, pos, n, head, type, member) \
for (pos = (head)->first; \
pos && (n = pos->next, 1) && \
(tpos = cfs_hlist_entry(pos, type, member), 1); \
pos = n)
#endif /* __DAOS_LIST_H__ */

11
src/mdtest-main.c Normal file
View File

@ -0,0 +1,11 @@
#include "mdtest.h"
#include "aiori.h"
int main(int argc, char **argv) {
MPI_Init(&argc, &argv);
mdtest_run(argc, argv, MPI_COMM_WORLD, stdout);
MPI_Finalize();
return 0;
}

File diff suppressed because it is too large Load Diff

37
src/mdtest.h Normal file
View File

@ -0,0 +1,37 @@
#ifndef _MDTEST_H
#define _MDTEST_H
#include <mpi.h>
#include <stdio.h>
#include <stdint.h>
typedef enum {
MDTEST_DIR_CREATE_NUM = 0,
MDTEST_DIR_STAT_NUM = 1,
MDTEST_DIR_READ_NUM = 1,
MDTEST_DIR_REMOVE_NUM = 3,
MDTEST_FILE_CREATE_NUM = 4,
MDTEST_FILE_STAT_NUM = 5,
MDTEST_FILE_READ_NUM = 6,
MDTEST_FILE_REMOVE_NUM = 7,
MDTEST_TREE_CREATE_NUM = 8,
MDTEST_TREE_REMOVE_NUM = 9,
MDTEST_LAST_NUM
} mdtest_test_num_t;
typedef struct
{
double rate[MDTEST_LAST_NUM]; /* Calculated throughput */
double time[MDTEST_LAST_NUM]; /* Time */
uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done */
/* Statistics when hitting the stonewall */
double stonewall_time[MDTEST_LAST_NUM]; /* runtime until completion / hit of the stonewall */
uint64_t stonewall_last_item[MDTEST_LAST_NUM]; /* Max number of items a process has accessed */
uint64_t stonewall_item_min[MDTEST_LAST_NUM]; /* Min number of items a process has accessed */
uint64_t stonewall_item_sum[MDTEST_LAST_NUM]; /* Total number of items accessed until stonewall */
} mdtest_results_t;
mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * out_logfile);
#endif

406
src/option.c Normal file
View File

@ -0,0 +1,406 @@
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <limits.h>
#include <option.h>
/*
* Takes a string of the form 64, 8m, 128k, 4g, etc. and converts to bytes.
*/
int64_t string_to_bytes(char *size_str)
{
int64_t size = 0;
char range;
int rc;
rc = sscanf(size_str, " %lld %c ", (long long*) & size, &range);
if (rc == 2) {
switch ((int)range) {
case 'k':
case 'K':
size <<= 10;
break;
case 'm':
case 'M':
size <<= 20;
break;
case 'g':
case 'G':
size <<= 30;
break;
case 't':
case 'T':
size <<= 40;
break;
case 'p':
case 'P':
size <<= 50;
break;
}
} else if (rc == 0) {
size = -1;
}
return (size);
}
/*
* Initial revision by JK
*/
static int print_value(option_help * o){
int pos = 0;
if (o->arg == OPTION_OPTIONAL_ARGUMENT || o->arg == OPTION_REQUIRED_ARGUMENT){
assert(o->variable != NULL);
switch(o->type){
case('p'):{
pos += printf("=STRING");
break;
}
case('F'):{
pos += printf("=%.14f ", *(double*) o->variable);
break;
}
case('f'):{
pos += printf("=%.6f ", (double) *(float*) o->variable);
break;
}
case('d'):{
pos += printf("=%d ", *(int*) o->variable);
break;
}
case('H'):
case('s'):{
if ( *(char**) o->variable != NULL && ((char**) o->variable)[0][0] != 0 ){
pos += printf("=%s", *(char**) o->variable);
}else{
pos += printf("=STRING");
}
break;
}
case('c'):{
pos += printf("=%c", *(char*) o->variable);
break;
}
case('l'):{
pos += printf("=%lld", *(long long*) o->variable);
break;
}
case('u'):{
pos += printf("=%lu", *(uint64_t*) o->variable);
break;
}
}
}
if (o->arg == OPTION_FLAG && (*(int*)o->variable) != 0){
pos += printf(" (%d)", (*(int*)o->variable));
}
return pos;
}
static void print_help_section(option_help * args, option_value_type type, char * name){
int first;
first = 1;
option_help * o;
for(o = args; o->shortVar != 0 || o->longVar != 0 || o->help != NULL ; o++){
if (o->arg == type){
if( o->shortVar == 0 && o->longVar == 0 && o->help != NULL){
printf("%s\n", o->help);
continue;
}
if (first){
printf("\n%s\n", name);
first = 0;
}
printf(" ");
int pos = 0;
if(o->shortVar != 0 && o->longVar != 0){
pos += printf("-%c, --%s", o->shortVar, o->longVar);
}else if(o->shortVar != 0){
pos += printf("-%c", o->shortVar);
}else if(o->longVar != 0){
pos += printf("--%s", o->longVar);
}
pos += print_value(o);
if(o->help != NULL){
for(int i = 0 ; i < (30 - pos); i++){
printf(" ");
}
printf("%s", o->help);
}
printf("\n");
}
}
}
void option_print_help(option_help * args, int is_plugin){
option_help * o;
int optionalArgs = 0;
for(o = args; o->shortVar != 0 || o->longVar != 0 ; o++){
if(o->arg != OPTION_REQUIRED_ARGUMENT){
optionalArgs = 1;
}
switch(o->arg){
case (OPTION_OPTIONAL_ARGUMENT):
case (OPTION_FLAG):{
if(o->shortVar != 0){
printf("[-%c] ", o->shortVar);
}else if(o->longVar != 0){
printf("[--%s] ", o->longVar);
}
break;
}case (OPTION_REQUIRED_ARGUMENT):{
if(o->shortVar != 0){
printf("-%c ", o->shortVar);
}else if(o->longVar != 0){
printf("--%s ", o->longVar);
}
break;
}
}
}
if (optionalArgs){
//printf(" [Optional Args]");
}
if (! is_plugin){
printf(" -- <Plugin options, see below>\n");
}
print_help_section(args, OPTION_REQUIRED_ARGUMENT, "Required arguments");
print_help_section(args, OPTION_FLAG, "Flags");
print_help_section(args, OPTION_OPTIONAL_ARGUMENT, "Optional arguments");
}
static int print_option_value(option_help * o){
int pos = 0;
if (o->arg == OPTION_OPTIONAL_ARGUMENT || o->arg == OPTION_REQUIRED_ARGUMENT){
assert(o->variable != NULL);
switch(o->type){
case('F'):{
pos += printf("=%.14f ", *(double*) o->variable);
break;
}
case('f'):{
pos += printf("=%.6f ", (double) *(float*) o->variable);
break;
}
case('d'):{
pos += printf("=%d ", *(int*) o->variable);
break;
}
case('H'):{
pos += printf("=HIDDEN");
break;
}
case('s'):{
if ( *(char**) o->variable != NULL && ((char**) o->variable)[0][0] != 0 ){
pos += printf("=%s", *(char**) o->variable);
}else{
pos += printf("=");
}
break;
}
case('c'):{
pos += printf("=%c", *(char*) o->variable);
break;
}
case('l'):{
pos += printf("=%lld", *(long long*) o->variable);
break;
}
case('u'):{
pos += printf("=%lu", *(uint64_t*) o->variable);
break;
}
}
}else{
//printf(" ");
}
return pos;
}
static void print_current_option_section(option_help * args, option_value_type type){
option_help * o;
for(o = args; o->shortVar != 0 || o->longVar != 0 ; o++){
if (o->arg == type){
int pos = 0;
if (o->arg == OPTION_FLAG && (*(int*)o->variable) == 0){
continue;
}
printf("\t");
if(o->shortVar != 0 && o->longVar != 0){
pos += printf("%s", o->longVar);
}else if(o->shortVar != 0){
pos += printf("%c", o->shortVar);
}else if(o->longVar != 0){
pos += printf("%s", o->longVar);
}
pos += print_option_value(o);
printf("\n");
}
}
}
void option_print_current(option_help * args){
print_current_option_section(args, OPTION_REQUIRED_ARGUMENT);
print_current_option_section(args, OPTION_OPTIONAL_ARGUMENT);
print_current_option_section(args, OPTION_FLAG);
}
int option_parse(int argc, char ** argv, option_help * args, int * printhelp){
int error = 0;
int requiredArgsSeen = 0;
int requiredArgsNeeded = 0;
int i;
for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 ; o++ ){
if(o->arg == OPTION_REQUIRED_ARGUMENT){
requiredArgsNeeded++;
}
}
for(i=1; i < argc; i++){
char * txt = argv[i];
int foundOption = 0;
char * arg = strstr(txt, "=");
int replaced_equal = 0;
if(arg != NULL){
arg[0] = 0;
arg++;
replaced_equal = 1;
}
if(strcmp(txt, "--") == 0){
// we found plugin options
break;
}
// try to find matching option help
for(option_help * o = args; o->shortVar != 0 || o->longVar != 0 || o->help != NULL ; o++ ){
if( o->shortVar == 0 && o->longVar == 0 ){
// section
continue;
}
if ( (txt[0] == '-' && o->shortVar == txt[1]) || (strlen(txt) > 2 && txt[0] == '-' && txt[1] == '-' && o->longVar != NULL && strcmp(txt + 2, o->longVar) == 0)){
foundOption = 1;
// now process the option.
switch(o->arg){
case (OPTION_FLAG):{
assert(o->type == 'd');
(*(int*) o->variable)++;
break;
}
case (OPTION_OPTIONAL_ARGUMENT):
case (OPTION_REQUIRED_ARGUMENT):{
// check if next is an argument
if(arg == NULL){
if(o->shortVar == txt[1] && txt[2] != 0){
arg = & txt[2];
}else{
// simply take the next value as argument
i++;
arg = argv[i];
}
}
if(arg == NULL){
const char str[] = {o->shortVar, 0};
printf("Error, argument missing for option %s\n", (o->longVar != NULL) ? o->longVar : str);
exit(1);
}
switch(o->type){
case('p'):{
// call the function in the variable
void(*fp)() = o->variable;
fp(arg);
break;
}
case('F'):{
*(double*) o->variable = atof(arg);
break;
}
case('f'):{
*(float*) o->variable = atof(arg);
break;
}
case('d'):{
int64_t val = string_to_bytes(arg);
if (val > INT_MAX || val < INT_MIN){
printf("WARNING: parsing the number %s to integer, this produced an overflow!\n", arg);
}
*(int*) o->variable = val;
break;
}
case('H'):
case('s'):{
(*(char **) o->variable) = strdup(arg);
break;
}
case('c'):{
(*(char *)o->variable) = arg[0];
if(strlen(arg) > 1){
printf("Error, ignoring remainder of string for option %c (%s).\n", o->shortVar, o->longVar);
}
break;
}
case('l'):{
*(long long*) o->variable = string_to_bytes(arg);
break;
}
case('u'):{
*(uint64_t*) o->variable = string_to_bytes(arg);
break;
}
default:
printf("ERROR: Unknown option type %c\n", o->type);
}
}
}
if(replaced_equal){
arg[-1] = '=';
}
if(o->arg == OPTION_REQUIRED_ARGUMENT){
requiredArgsSeen++;
}
break;
}
}
if (! foundOption){
if(strcmp(txt, "-h") == 0 || strcmp(txt, "--help") == 0){
*printhelp=1;
}else{
printf("Error invalid argument: %s\n", txt);
error = 1;
}
}
}
if( requiredArgsSeen != requiredArgsNeeded ){
printf("Error: Missing some required arguments\n\n");
*printhelp = -1;
}
if(error != 0){
printf("Invalid options\n");
*printhelp = -1;
}
return i;
}

35
src/option.h Normal file
View File

@ -0,0 +1,35 @@
#ifndef _IOR_OPTION_H
#define _IOR_OPTION_H
#include <stdint.h>
/*
* Initial revision by JK
*/
typedef enum{
OPTION_FLAG,
OPTION_OPTIONAL_ARGUMENT,
OPTION_REQUIRED_ARGUMENT
} option_value_type;
typedef struct{
char shortVar;
char * longVar;
char * help;
option_value_type arg;
char type; // data type, H = hidden string
void * variable;
} option_help;
#define LAST_OPTION {0, 0, 0, (option_value_type) 0, 0, NULL}
int64_t string_to_bytes(char *size_str);
void option_print_help(option_help * args, int is_plugin);
void option_print_current(option_help * args);
//@return the number of parsed arguments
int option_parse(int argc, char ** argv, option_help * args, int * print_help);
#endif

View File

@ -21,44 +21,18 @@
#include <ctype.h>
#include <string.h>
#include "utilities.h"
#include "ior.h"
#include "aiori.h"
#include "parse_options.h"
#include "option.h"
#include "aiori.h"
#define ISPOWEROFTWO(x) ((x != 0) && !(x & (x - 1)))
IOR_param_t initialTestParams;
/*
* Takes a string of the form 64, 8m, 128k, 4g, etc. and converts to bytes.
*/
static IOR_offset_t StringToBytes(char *size_str)
{
IOR_offset_t size = 0;
char range;
int rc;
rc = sscanf(size_str, " %lld %c ", &size, &range);
if (rc == 2) {
switch ((int)range) {
case 'k':
case 'K':
size <<= 10;
break;
case 'm':
case 'M':
size <<= 20;
break;
case 'g':
case 'G':
size <<= 30;
break;
}
} else if (rc == 0) {
size = -1;
}
return (size);
}
static size_t NodeMemoryStringToBytes(char *size_str)
{
@ -70,7 +44,7 @@ static size_t NodeMemoryStringToBytes(char *size_str)
rc = sscanf(size_str, " %d %% ", &percent);
if (rc == 0)
return (size_t)StringToBytes(size_str);
return (size_t) string_to_bytes(size_str);
if (percent > 100 || percent < 0)
ERR("percentage must be between 0 and 100");
@ -87,11 +61,6 @@ static size_t NodeMemoryStringToBytes(char *size_str)
return mem / 100 * percent;
}
static void RecalculateExpectedFileSize(IOR_param_t *params)
{
params->expectedAggFileSize =
params->blockSize * params->segmentCount * params->numTasks;
}
/*
* Check and correct all settings of each test in queue for correctness.
@ -100,7 +69,6 @@ static void CheckRunSettings(IOR_test_t *tests)
{
IOR_test_t *ptr;
IOR_param_t *params;
int needRead, needWrite;
for (ptr = tests; ptr != NULL; ptr = ptr->next) {
params = &ptr->params;
@ -119,16 +87,13 @@ static void CheckRunSettings(IOR_test_t *tests)
* of HDFS, which doesn't support opening RDWR.
* (We assume int-valued params are exclusively 0 or 1.)
*/
needRead = params->readFile |
params->checkRead |
params->checkWrite; /* checkWrite reads the file */
needWrite = params->writeFile;
if ((params->openFlags & IOR_RDWR)
&& (needRead ^ needWrite))
{
/* need to either read or write, but not both */
&& ((params->readFile | params->checkRead)
^ (params->writeFile | params->checkWrite))
&& (params->openFlags & IOR_RDWR)) {
params->openFlags &= ~(IOR_RDWR);
if (needRead) {
if (params->readFile | params->checkRead) {
params->openFlags |= IOR_RDONLY;
params->openFlags &= ~(IOR_CREAT|IOR_EXCL);
}
@ -136,13 +101,6 @@ static void CheckRunSettings(IOR_test_t *tests)
params->openFlags |= IOR_WRONLY;
}
/* If numTasks set to 0, use all tasks */
if (params->numTasks == 0) {
MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD,
&params->numTasks),
"MPI_Comm_size() error");
RecalculateExpectedFileSize(params);
}
}
}
@ -157,28 +115,48 @@ void DecodeDirective(char *line, IOR_param_t *params)
rc = sscanf(line, " %[^=# \t\r\n] = %[^# \t\r\n] ", option, value);
if (rc != 2 && rank == 0) {
fprintf(stdout, "Syntax error in configuration options: %s\n",
fprintf(out_logfile, "Syntax error in configuration options: %s\n",
line);
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
}
if (strcasecmp(option, "api") == 0) {
strcpy(params->api, value);
params->api = strdup(value);
} else if (strcasecmp(option, "summaryFile") == 0) {
if (rank == 0){
out_resultfile = fopen(value, "w");
if (out_resultfile == NULL){
FAIL("Cannot open output file for writes!");
}
printf("Writing output to %s\n", value);
}
} else if (strcasecmp(option, "summaryFormat") == 0) {
if(strcasecmp(value, "default") == 0){
outputFormat = OUTPUT_DEFAULT;
}else if(strcasecmp(value, "JSON") == 0){
outputFormat = OUTPUT_JSON;
}else if(strcasecmp(value, "CSV") == 0){
outputFormat = OUTPUT_CSV;
}else{
FAIL("Unknown summaryFormat");
}
} else if (strcasecmp(option, "refnum") == 0) {
params->referenceNumber = atoi(value);
} else if (strcasecmp(option, "debug") == 0) {
strcpy(params->debug, value);
params->debug = strdup(value);
} else if (strcasecmp(option, "platform") == 0) {
strcpy(params->platform, value);
params->platform = strdup(value);
} else if (strcasecmp(option, "testfile") == 0) {
strcpy(params->testFileName, value);
params->testFileName = strdup(value);
} else if (strcasecmp(option, "hintsfilename") == 0) {
strcpy(params->hintsFileName, value);
params->hintsFileName = strdup(value);
} else if (strcasecmp(option, "deadlineforstonewalling") == 0) {
params->deadlineForStonewalling = atoi(value);
} else if (strcasecmp(option, "stoneWallingWearOut") == 0) {
params->stoneWallingWearOut = atoi(value);
} else if (strcasecmp(option, "stoneWallingWearOutIterations") == 0) {
params->stoneWallingWearOutIterations = atoi(value);
params->stoneWallingWearOutIterations = atoll(value);
} else if (strcasecmp(option, "stoneWallingStatusFile") == 0) {
params->stoneWallingStatusFile = strdup(value);
} else if (strcasecmp(option, "maxtimeduration") == 0) {
params->maxTimeDuration = atoi(value);
} else if (strcasecmp(option, "outlierthreshold") == 0) {
@ -220,15 +198,13 @@ void DecodeDirective(char *line, IOR_param_t *params)
} else if (strcasecmp(option, "quitonerror") == 0) {
params->quitOnError = atoi(value);
} else if (strcasecmp(option, "segmentcount") == 0) {
params->segmentCount = StringToBytes(value);
RecalculateExpectedFileSize(params);
params->segmentCount = string_to_bytes(value);
} else if (strcasecmp(option, "blocksize") == 0) {
params->blockSize = StringToBytes(value);
RecalculateExpectedFileSize(params);
params->blockSize = string_to_bytes(value);
} else if (strcasecmp(option, "transfersize") == 0) {
params->transferSize = StringToBytes(value);
params->transferSize = string_to_bytes(value);
} else if (strcasecmp(option, "setalignment") == 0) {
params->setAlignment = StringToBytes(value);
params->setAlignment = string_to_bytes(value);
} else if (strcasecmp(option, "singlexferattempt") == 0) {
params->singleXferAttempt = atoi(value);
} else if (strcasecmp(option, "individualdatasets") == 0) {
@ -257,8 +233,6 @@ void DecodeDirective(char *line, IOR_param_t *params)
params->useStridedDatatype = atoi(value);
} else if (strcasecmp(option, "showhints") == 0) {
params->showHints = atoi(value);
} else if (strcasecmp(option, "showhelp") == 0) {
params->showHelp = atoi(value);
} else if (strcasecmp(option, "uniqueDir") == 0) {
params->uniqueDir = atoi(value);
} else if (strcasecmp(option, "useexistingtestfile") == 0) {
@ -270,7 +244,7 @@ void DecodeDirective(char *line, IOR_param_t *params)
} else if (strcasecmp(option, "randomoffset") == 0) {
params->randomOffset = atoi(value);
} else if (strcasecmp(option, "memoryPerTask") == 0) {
params->memoryPerTask = StringToBytes(value);
params->memoryPerTask = string_to_bytes(value);
params->memoryPerNode = 0;
} else if (strcasecmp(option, "memoryPerNode") == 0) {
params->memoryPerNode = NodeMemoryStringToBytes(value);
@ -285,7 +259,7 @@ void DecodeDirective(char *line, IOR_param_t *params)
#ifndef HAVE_LUSTRE_LUSTRE_USER_H
ERR("ior was not compiled with Lustre support");
#endif
params->lustre_stripe_size = StringToBytes(value);
params->lustre_stripe_size = string_to_bytes(value);
params->lustre_set_striping = 1;
} else if (strcasecmp(option, "lustrestartost") == 0) {
#ifndef HAVE_LUSTRE_LUSTRE_USER_H
@ -317,26 +291,18 @@ void DecodeDirective(char *line, IOR_param_t *params)
ERR("beegfsNumTargets must be >= 1");
} else if (strcasecmp(option, "beegfsChunkSize") == 0) {
#ifndef HAVE_BEEGFS_BEEGFS_H
ERR("ior was not compiled with BeeGFS support");
ERR("ior was not compiled with BeeGFS support");
#endif
params->beegfs_chunkSize = StringToBytes(value);
params->beegfs_chunkSize = string_to_bytes(value);
if (!ISPOWEROFTWO(params->beegfs_chunkSize) || params->beegfs_chunkSize < (1<<16))
ERR("beegfsChunkSize must be a power of two and >64k");
} else if (strcasecmp(option, "numtasks") == 0) {
params->numTasks = atoi(value);
RecalculateExpectedFileSize(params);
} else if (strcasecmp(option, "summaryalways") == 0) {
params->summary_every_test = atoi(value);
} else if (strcasecmp(option, "daospool") == 0) {
strcpy(params->daosPool, value);
} else if (strcasecmp(option, "daospoolsvc") == 0) {
strcpy(params->daosPoolSvc, value);
} else if (strcasecmp(option, "daosgroup") == 0) {
strcpy(params->daosGroup, value);
}
else {
} else {
if (rank == 0)
fprintf(stdout, "Unrecognized parameter \"%s\"\n",
fprintf(out_logfile, "Unrecognized parameter \"%s\"\n",
option);
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
}
@ -345,11 +311,13 @@ void DecodeDirective(char *line, IOR_param_t *params)
/*
* Parse a single line, which may contain multiple comma-seperated directives
*/
void ParseLine(char *line, IOR_param_t * test)
void ParseLine(const char *line, IOR_param_t * test)
{
char *start, *end;
start = line;
start = strdup(line);
if (start == NULL)
ERR("failed to duplicate line");
do {
end = strchr(start, ',');
if (end != NULL)
@ -357,7 +325,6 @@ void ParseLine(char *line, IOR_param_t * test)
DecodeDirective(start, test);
start = end + 1;
} while (end != NULL);
}
/*
@ -425,7 +392,6 @@ IOR_test_t *ReadConfigScript(char *scriptName)
if (sscanf(linebuf, " #%s", empty) == 1)
continue;
if (contains_only(linebuf, "ior stop")) {
AllocResults(tail);
break;
} else if (contains_only(linebuf, "run")) {
if (runflag) {
@ -434,7 +400,6 @@ IOR_test_t *ReadConfigScript(char *scriptName)
tail->next = CreateTest(&tail->params, test_num++);
tail = tail->next;
}
AllocResults(tail);
runflag = 1;
} else if (runflag) {
/* If this directive was preceded by a "run" line, then
@ -455,215 +420,153 @@ IOR_test_t *ReadConfigScript(char *scriptName)
return head;
}
static IOR_param_t * parameters;
static void decodeDirectiveWrapper(char *line){
ParseLine(line, parameters);
}
/*
* Parse Commandline.
*/
IOR_test_t *ParseCommandLine(int argc, char **argv)
{
static const char *opts =
"a:A:b:BcCd:D:eEf:FgG:hHi:Ij:J:kKl:mM:nN:o:O:pPqQ:rRs:St:T:uU:vVwWxX:YzZ";
int c, i;
static IOR_test_t *tests = NULL;
char * testscripts = NULL;
int toggleG = FALSE;
char * buffer_type = "";
char * memoryPerNode = NULL;
init_IOR_Param_t(& initialTestParams);
parameters = & initialTestParams;
/* suppress getopt() error message when a character is unrecognized */
opterr = 0;
char APIs[1024];
aiori_supported_apis(APIs);
char apiStr[1024];
sprintf(apiStr, "API for I/O [%s]", APIs);
option_help options [] = {
{'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & initialTestParams.api},
{'A', NULL, "refNum -- user supplied reference number to include in the summary", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.referenceNumber},
{'b', NULL, "blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & initialTestParams.blockSize},
{'B', NULL, "useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers", OPTION_FLAG, 'd', & initialTestParams.useO_DIRECT},
{'c', NULL, "collective -- collective I/O", OPTION_FLAG, 'd', & initialTestParams.collective},
{'C', NULL, "reorderTasks -- changes task ordering to n+1 ordering for readback", OPTION_FLAG, 'd', & initialTestParams.reorderTasks},
{'d', NULL, "interTestDelay -- delay between reps in seconds", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.interTestDelay},
{'D', NULL, "deadlineForStonewalling -- seconds before stopping write or read phase", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.deadlineForStonewalling},
{.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT},
{.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT},
{.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT},
{'e', NULL, "fsync -- perform sync operation after each block write", OPTION_FLAG, 'd', & initialTestParams.fsync},
{'E', NULL, "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & initialTestParams.useExistingTestFile},
{'f', NULL, "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & testscripts},
{'F', NULL, "filePerProc -- file-per-process", OPTION_FLAG, 'd', & initialTestParams.filePerProc},
{'g', NULL, "intraTestBarriers -- use barriers between open, write/read, and close", OPTION_FLAG, 'd', & initialTestParams.intraTestBarriers},
/* This option toggles between Incompressible Seed and Time stamp sig based on -l,
* so we'll toss the value in both for now, and sort it out in initialization
* after all the arguments are in and we know which it keep.
*/
{'G', NULL, "setTimeStampSignature -- set value for time stamp signature/random seed", OPTION_OPTIONAL_ARGUMENT, 'd', & toggleG},
{'H', NULL, "showHints -- show hints", OPTION_FLAG, 'd', & initialTestParams.showHints},
{'i', NULL, "repetitions -- number of repetitions of test", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.repetitions},
{'I', NULL, "individualDataSets -- datasets not shared by all procs [not working]", OPTION_FLAG, 'd', & initialTestParams.individualDataSets},
{'j', NULL, "outlierThreshold -- warn on outlier N seconds from mean", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.outlierThreshold},
{'J', NULL, "setAlignment -- HDF5 alignment in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.setAlignment},
{'k', NULL, "keepFile -- don't remove the test file(s) on program exit", OPTION_FLAG, 'd', & initialTestParams.keepFile},
{'K', NULL, "keepFileWithError -- keep error-filled file(s) after data-checking", OPTION_FLAG, 'd', & initialTestParams.keepFileWithError},
{'l', NULL, "datapacket type-- type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & buffer_type},
{'m', NULL, "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & initialTestParams.multiFile},
{'M', NULL, "memoryPerNode -- hog memory on the node (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & memoryPerNode},
{'n', NULL, "noFill -- no fill in HDF5 file creation", OPTION_FLAG, 'd', & initialTestParams.noFill},
{'N', NULL, "numTasks -- number of tasks that should participate in the test", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.numTasks},
{'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & initialTestParams.testFileName},
{'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper},
{'p', NULL, "preallocate -- preallocate file size", OPTION_FLAG, 'd', & initialTestParams.preallocate},
{'P', NULL, "useSharedFilePointer -- use shared file pointer [not working]", OPTION_FLAG, 'd', & initialTestParams.useSharedFilePointer},
{'q', NULL, "quitOnError -- during file error-checking, abort on error", OPTION_FLAG, 'd', & initialTestParams.quitOnError},
{'Q', NULL, "taskPerNodeOffset for read tests use with -C & -Z options (-C constant N, -Z at least N)", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.taskPerNodeOffset},
{'r', NULL, "readFile -- read existing file", OPTION_FLAG, 'd', & initialTestParams.readFile},
{'R', NULL, "checkRead -- verify that the output of read matches the expected signature (used with -G)", OPTION_FLAG, 'd', & initialTestParams.checkRead},
{'s', NULL, "segmentCount -- number of segments", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.segmentCount},
{'S', NULL, "useStridedDatatype -- put strided access into datatype [not working]", OPTION_FLAG, 'd', & initialTestParams.useStridedDatatype},
{'t', NULL, "transferSize -- size of transfer in bytes (e.g.: 8, 4k, 2m, 1g)", OPTION_OPTIONAL_ARGUMENT, 'l', & initialTestParams.transferSize},
{'T', NULL, "maxTimeDuration -- max time in minutes executing repeated test; it aborts only between iterations and not within a test!", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.maxTimeDuration},
{'u', NULL, "uniqueDir -- use unique directory name for each file-per-process", OPTION_FLAG, 'd', & initialTestParams.uniqueDir},
{'U', NULL, "hintsFileName -- full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & initialTestParams.hintsFileName},
{'v', NULL, "verbose -- output information (repeating flag increases level)", OPTION_FLAG, 'd', & initialTestParams.verbose},
{'V', NULL, "useFileView -- use MPI_File_set_view", OPTION_FLAG, 'd', & initialTestParams.useFileView},
{'w', NULL, "writeFile -- write file", OPTION_FLAG, 'd', & initialTestParams.writeFile},
{'W', NULL, "checkWrite -- check read after write", OPTION_FLAG, 'd', & initialTestParams.checkWrite},
{'x', NULL, "singleXferAttempt -- do not retry transfer if incomplete", OPTION_FLAG, 'd', & initialTestParams.singleXferAttempt},
{'X', NULL, "reorderTasksRandomSeed -- random seed for -Z option", OPTION_OPTIONAL_ARGUMENT, 'd', & initialTestParams.reorderTasksRandomSeed},
{'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & initialTestParams.fsyncPerWrite},
{'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & initialTestParams.randomOffset},
{'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & initialTestParams.reorderTasksRandom},
{.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT},
{.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputing the summary", .arg = OPTION_OPTIONAL_ARGUMENT},
LAST_OPTION,
};
IOR_test_t *tests = NULL;
init_IOR_Param_t(&initialTestParams);
GetPlatformName(initialTestParams.platform);
initialTestParams.writeFile = initialTestParams.readFile = FALSE;
initialTestParams.checkWrite = initialTestParams.checkRead = FALSE;
int printhelp = 0;
int parsed_options = option_parse(argc, argv, options, & printhelp);
while ((c = getopt(argc, argv, opts)) != -1) {
switch (c) {
case 'a':
strcpy(initialTestParams.api, optarg);
break;
case 'A':
initialTestParams.referenceNumber = atoi(optarg);
break;
case 'b':
initialTestParams.blockSize = StringToBytes(optarg);
RecalculateExpectedFileSize(&initialTestParams);
break;
case 'B':
initialTestParams.useO_DIRECT = TRUE;
break;
case 'c':
initialTestParams.collective = TRUE;
break;
case 'C':
initialTestParams.reorderTasks = TRUE;
break;
case 'd':
initialTestParams.interTestDelay = atoi(optarg);
break;
case 'D':
initialTestParams.deadlineForStonewalling =
atoi(optarg);
break;
case 'e':
initialTestParams.fsync = TRUE;
break;
case 'E':
initialTestParams.useExistingTestFile = TRUE;
break;
case 'f':
tests = ReadConfigScript(optarg);
break;
case 'F':
initialTestParams.filePerProc = TRUE;
break;
case 'g':
initialTestParams.intraTestBarriers = TRUE;
break;
case 'G':
/* This option toggles between Incompressible Seed and Time stamp sig based on -l,
* so we'll toss the value in both for now, and sort it out in initialization
* after all the arguments are in and we know which it keep.
*/
initialTestParams.setTimeStampSignature = atoi(optarg);
initialTestParams.incompressibleSeed = atoi(optarg);
break;
case 'h':
initialTestParams.showHelp = TRUE;
break;
case 'H':
initialTestParams.showHints = TRUE;
break;
case 'i':
initialTestParams.repetitions = atoi(optarg);
break;
case 'I':
initialTestParams.individualDataSets = TRUE;
break;
case 'j':
initialTestParams.outlierThreshold = atoi(optarg);
break;
case 'J':
initialTestParams.setAlignment = StringToBytes(optarg);
break;
case 'k':
initialTestParams.keepFile = TRUE;
break;
case 'K':
initialTestParams.keepFileWithError = TRUE;
break;
case 'l':
switch(*optarg) {
case 'i': /* Incompressible */
initialTestParams.dataPacketType = incompressible;
break;
case 't': /* timestamp */
initialTestParams.dataPacketType = timestamp;
break;
case 'o': /* offset packet */
initialTestParams.storeFileOffset = TRUE;
initialTestParams.dataPacketType = offset;
break;
default:
fprintf(stdout,
"Unknown arguement for -l %s generic assumed\n", optarg);
break;
}
break;
case 'm':
initialTestParams.multiFile = TRUE;
break;
case 'M':
initialTestParams.memoryPerNode =
NodeMemoryStringToBytes(optarg);
break;
case 'n':
initialTestParams.noFill = TRUE;
break;
case 'N':
initialTestParams.numTasks = atoi(optarg);
RecalculateExpectedFileSize(&initialTestParams);
break;
case 'o':
strcpy(initialTestParams.testFileName, optarg);
break;
case 'O':
ParseLine(optarg, &initialTestParams);
break;
case 'p':
initialTestParams.preallocate = TRUE;
break;
case 'P':
initialTestParams.useSharedFilePointer = TRUE;
break;
case 'q':
initialTestParams.quitOnError = TRUE;
break;
case 'Q':
initialTestParams.taskPerNodeOffset = atoi(optarg);
break;
case 'r':
initialTestParams.readFile = TRUE;
break;
case 'R':
initialTestParams.checkRead = TRUE;
break;
case 's':
initialTestParams.segmentCount = atoi(optarg);
RecalculateExpectedFileSize(&initialTestParams);
break;
case 'S':
initialTestParams.useStridedDatatype = TRUE;
break;
case 't':
initialTestParams.transferSize = StringToBytes(optarg);
break;
case 'T':
initialTestParams.maxTimeDuration = atoi(optarg);
break;
case 'u':
initialTestParams.uniqueDir = TRUE;
break;
case 'U':
strcpy(initialTestParams.hintsFileName, optarg);
break;
case 'v':
initialTestParams.verbose++;
break;
case 'V':
initialTestParams.useFileView = TRUE;
break;
case 'w':
initialTestParams.writeFile = TRUE;
break;
case 'W':
initialTestParams.checkWrite = TRUE;
break;
case 'x':
initialTestParams.singleXferAttempt = TRUE;
break;
case 'X':
initialTestParams.reorderTasksRandomSeed = atoi(optarg);
break;
case 'Y':
initialTestParams.fsyncPerWrite = TRUE;
break;
case 'z':
initialTestParams.randomOffset = TRUE;
break;
case 'Z':
initialTestParams.reorderTasksRandom = TRUE;
break;
default:
fprintf(stdout,
"ParseCommandLine: unknown option `-%c'.\n",
optopt);
}
if (toggleG){
initialTestParams.setTimeStampSignature = toggleG;
initialTestParams.incompressibleSeed = toggleG;
}
for (i = optind; i < argc; i++)
fprintf(stdout, "non-option argument: %s\n", argv[i]);
if (buffer_type[0] != 0){
switch(buffer_type[0]) {
case 'i': /* Incompressible */
initialTestParams.dataPacketType = incompressible;
break;
case 't': /* timestamp */
initialTestParams.dataPacketType = timestamp;
break;
case 'o': /* offset packet */
initialTestParams.storeFileOffset = TRUE;
initialTestParams.dataPacketType = offset;
break;
default:
fprintf(out_logfile,
"Unknown arguement for -l %s; generic assumed\n", buffer_type);
break;
}
}
if (memoryPerNode){
initialTestParams.memoryPerNode = NodeMemoryStringToBytes(optarg);
}
/* If an IOR script was not used, initialize test queue to the defaults */
if (tests == NULL) {
tests = CreateTest(&initialTestParams, 0);
AllocResults(tests);
const ior_aiori_t * backend = aiori_select(initialTestParams.api);
initialTestParams.backend = backend;
initialTestParams.apiVersion = backend->get_version();
if(backend->get_options != NULL){
option_parse(argc - parsed_options, argv + parsed_options, backend->get_options(), & printhelp);
}
if(printhelp != 0){
printf("Usage: %s ", argv[0]);
option_print_help(options, 0);
if(backend->get_options != NULL){
printf("\nPlugin options for backend %s (%s)\n", initialTestParams.api, backend->get_version());
option_print_help(backend->get_options(), 1);
}
if(printhelp == 1){
exit(0);
}else{
exit(1);
}
}
if (testscripts){
tests = ReadConfigScript(testscripts);
}else{
tests = CreateTest(&initialTestParams, 0);
}
CheckRunSettings(tests);

32
src/test/lib.c Normal file
View File

@ -0,0 +1,32 @@
#include "../ior.h"
#include "../mdtest.h"
int main(int argc, char ** argv){
int rank;
int ret = 0;
MPI_Init(& argc, & argv);
MPI_Comm_rank(MPI_COMM_WORLD, & rank);
if (rank == 0){
char * param[] = {"./ior", "-a", "DUMMY"};
IOR_test_t * res = ior_run(3, param, MPI_COMM_SELF, stdout);
if (res == NULL)
{
fprintf(stderr, "Could not run ior\n");
ret = 1;
}
}
if (rank == 0){
char * param[] = {"./mdtest", "-a", "DUMMY"};
mdtest_results_t * res = mdtest_run(3, param, MPI_COMM_SELF, stdout);
if (res == NULL)
{
fprintf(stderr, "Could not run mdtest\n");
ret = 1;
}
}
MPI_Finalize();
return ret;
}

View File

@ -16,10 +16,6 @@
# include "config.h"
#endif
#ifdef __linux__
# define _GNU_SOURCE /* Needed for O_DIRECT in fcntl */
#endif /* __linux__ */
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
@ -51,9 +47,18 @@
extern int errno;
extern int numTasks;
extern int rank;
extern int rankOffset;
extern int verbose;
/* globals used by other files, also defined "extern" in ior.h */
int numTasksWorld = 0;
int rank = 0;
int rankOffset = 0;
int tasksPerNode = 0; /* tasks per node */
int verbose = VERBOSE_0; /* verbose output */
MPI_Comm testComm;
MPI_Comm mpi_comm_world;
FILE * out_logfile;
FILE * out_resultfile;
enum OutputFormat_t outputFormat;
/***************************** F U N C T I O N S ******************************/
@ -120,84 +125,86 @@ void DumpBuffer(void *buffer,
to assume that it must always be */
for (i = 0; i < ((size / sizeof(IOR_size_t)) / 4); i++) {
for (j = 0; j < 4; j++) {
fprintf(stdout, IOR_format" ", dumpBuf[4 * i + j]);
fprintf(out_logfile, IOR_format" ", dumpBuf[4 * i + j]);
}
fprintf(stdout, "\n");
fprintf(out_logfile, "\n");
}
return;
} /* DumpBuffer() */
/*
* Sends all strings to root nodes and displays.
*/
void OutputToRoot(int numTasks, MPI_Comm comm, char *stringToDisplay)
{
int i;
int swapNeeded = TRUE;
int pairsToSwap;
char **stringArray;
char tmpString[MAX_STR];
MPI_Status status;
#if MPI_VERSION >= 3
int CountTasksPerNode(MPI_Comm comm) {
/* modern MPI provides a simple way to get the local process count */
MPI_Comm shared_comm;
int count;
/* malloc string array */
stringArray = (char **)malloc(sizeof(char *) * numTasks);
if (stringArray == NULL)
ERR("out of memory");
for (i = 0; i < numTasks; i++) {
stringArray[i] = (char *)malloc(sizeof(char) * MAX_STR);
if (stringArray[i] == NULL)
ERR("out of memory");
}
MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm);
MPI_Comm_size (shared_comm, &count);
MPI_Comm_free (&shared_comm);
strcpy(stringArray[rank], stringToDisplay);
if (rank == 0) {
/* MPI_receive all strings */
for (i = 1; i < numTasks; i++) {
MPI_CHECK(MPI_Recv(stringArray[i], MAX_STR, MPI_CHAR,
MPI_ANY_SOURCE, MPI_ANY_TAG, comm,
&status), "MPI_Recv() error");
}
} else {
/* MPI_send string to root node */
MPI_CHECK(MPI_Send
(stringArray[rank], MAX_STR, MPI_CHAR, 0, 0, comm),
"MPI_Send() error");
}
MPI_CHECK(MPI_Barrier(comm), "barrier error");
/* sort strings using bubblesort */
if (rank == 0) {
pairsToSwap = numTasks - 1;
while (swapNeeded) {
swapNeeded = FALSE;
for (i = 0; i < pairsToSwap; i++) {
if (strcmp(stringArray[i], stringArray[i + 1]) >
0) {
strcpy(tmpString, stringArray[i]);
strcpy(stringArray[i],
stringArray[i + 1]);
strcpy(stringArray[i + 1], tmpString);
swapNeeded = TRUE;
}
}
pairsToSwap--;
}
}
/* display strings */
if (rank == 0) {
for (i = 0; i < numTasks; i++) {
fprintf(stdout, "%s\n", stringArray[i]);
}
}
/* free strings */
for (i = 0; i < numTasks; i++) {
free(stringArray[i]);
}
free(stringArray);
return count;
}
#else
/*
* Count the number of tasks that share a host.
*
* This function employees the gethostname() call, rather than using
* MPI_Get_processor_name(). We are interested in knowing the number
* of tasks that share a file system client (I/O node, compute node,
* whatever that may be). However on machines like BlueGene/Q,
* MPI_Get_processor_name() uniquely identifies a cpu in a compute node,
* not the node where the I/O is function shipped to. gethostname()
* is assumed to identify the shared filesystem client in more situations.
*
* NOTE: This also assumes that the task count on all nodes is equal
* to the task count on the host running MPI task 0.
*/
int CountTasksPerNode(MPI_Comm comm) {
int size;
MPI_Comm_size(comm, & size);
/* for debugging and testing */
if (getenv("IOR_FAKE_TASK_PER_NODES")){
int tasksPerNode = atoi(getenv("IOR_FAKE_TASK_PER_NODES"));
int rank;
MPI_Comm_rank(comm, & rank);
if(rank == 0){
printf("Fake tasks per node: using %d\n", tasksPerNode);
}
return tasksPerNode;
}
char localhost[MAX_PATHLEN],
hostname[MAX_PATHLEN];
int count = 1,
i;
MPI_Status status;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( out_logfile, "V-1: Entering count_tasks_per_node...\n" );
fflush( out_logfile );
}
if (gethostname(localhost, MAX_PATHLEN) != 0) {
FAIL("gethostname()");
}
if (rank == 0) {
/* MPI_receive all hostnames, and compares them to the local hostname */
for (i = 0; i < size-1; i++) {
MPI_Recv(hostname, MAX_PATHLEN, MPI_CHAR, MPI_ANY_SOURCE,
MPI_ANY_TAG, comm, &status);
if (strcmp(hostname, localhost) == 0) {
count++;
}
}
} else {
/* MPI_send hostname to root node */
MPI_Send(localhost, MAX_PATHLEN, MPI_CHAR, 0, 0, comm);
}
MPI_Bcast(&count, 1, MPI_INT, 0, comm);
return(count);
}
#endif
/*
* Extract key/value pair from hint string.
@ -217,7 +224,7 @@ void ExtractHint(char *settingVal, char *valueVal, char *hintString)
tmpPtr2 = (char *)strstr(settingPtr, "IOR_HINT__GPFS__");
if (tmpPtr1 == tmpPtr2) {
settingPtr += strlen("IOR_HINT__GPFS__");
fprintf(stdout,
fprintf(out_logfile,
"WARNING: Unable to set GPFS hints (not implemented.)\n");
}
}
@ -259,7 +266,7 @@ void SetHints(MPI_Info * mpiHints, char *hintsFileName)
}
/* get hints from hints file */
if (strcmp(hintsFileName, "") != 0) {
if (hintsFileName != NULL && strcmp(hintsFileName, "") != 0) {
/* open the hint file */
fd = fopen(hintsFileName, "r");
@ -304,7 +311,7 @@ void ShowHints(MPI_Info * mpiHints)
MPI_CHECK(MPI_Info_get(*mpiHints, key, MPI_MAX_INFO_VAL - 1,
value, &flag),
"cannot get info object value");
fprintf(stdout, "\t%s = %s\n", key, value);
fprintf(out_logfile, "\t%s = %s\n", key, value);
}
}
@ -399,14 +406,28 @@ void ShowFileSystemSize(char *fileSystem)
if (realpath(fileSystem, realPath) == NULL) {
ERR("unable to use realpath()");
}
fprintf(stdout, "Path: %s\n", realPath);
fprintf(stdout, "FS: %.1f %s Used FS: %2.1f%% ",
totalFileSystemSizeHR, fileSystemUnitStr,
usedFileSystemPercentage);
fprintf(stdout, "Inodes: %.1f Mi Used Inodes: %2.1f%%\n",
(double)totalInodes / (double)(1<<20),
usedInodePercentage);
fflush(stdout);
if(outputFormat == OUTPUT_DEFAULT){
fprintf(out_resultfile, "%-20s: %s\n", "Path", realPath);
fprintf(out_resultfile, "%-20s: %.1f %s Used FS: %2.1f%% ",
"FS", totalFileSystemSizeHR, fileSystemUnitStr,
usedFileSystemPercentage);
fprintf(out_resultfile, "Inodes: %.1f Mi Used Inodes: %2.1f%%\n",
(double)totalInodes / (double)(1<<20),
usedInodePercentage);
fflush(out_logfile);
}else if(outputFormat == OUTPUT_JSON){
fprintf(out_resultfile, " , \"Path\": \"%s\",", realPath);
fprintf(out_resultfile, "\"Capacity\": \"%.1f %s\", \"Used Capacity\": \"%2.1f%%\",",
totalFileSystemSizeHR, fileSystemUnitStr,
usedFileSystemPercentage);
fprintf(out_resultfile, "\"Inodes\": \"%.1f Mi\", \"Used Inodes\" : \"%2.1f%%\"\n",
(double)totalInodes / (double)(1<<20),
usedInodePercentage);
}else if(outputFormat == OUTPUT_CSV){
}
#endif /* !_WIN32 */
return;
@ -474,3 +495,181 @@ int uname(struct utsname *name)
return 0;
}
#endif /* _WIN32 */
double wall_clock_deviation;
double wall_clock_delta = 0;
/*
* Get time stamp. Use MPI_Timer() unless _NO_MPI_TIMER is defined,
* in which case use gettimeofday().
*/
double GetTimeStamp(void)
{
double timeVal;
#ifdef _NO_MPI_TIMER
struct timeval timer;
if (gettimeofday(&timer, (struct timezone *)NULL) != 0)
ERR("cannot use gettimeofday()");
timeVal = (double)timer.tv_sec + ((double)timer.tv_usec / 1000000);
#else /* not _NO_MPI_TIMER */
timeVal = MPI_Wtime(); /* no MPI_CHECK(), just check return value */
if (timeVal < 0)
ERR("cannot use MPI_Wtime()");
#endif /* _NO_MPI_TIMER */
/* wall_clock_delta is difference from root node's time */
timeVal -= wall_clock_delta;
return (timeVal);
}
/*
* Determine any spread (range) between node times.
*/
static double TimeDeviation(void)
{
double timestamp;
double min = 0;
double max = 0;
double roottimestamp;
MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error");
timestamp = GetTimeStamp();
MPI_CHECK(MPI_Reduce(&timestamp, &min, 1, MPI_DOUBLE,
MPI_MIN, 0, mpi_comm_world),
"cannot reduce tasks' times");
MPI_CHECK(MPI_Reduce(&timestamp, &max, 1, MPI_DOUBLE,
MPI_MAX, 0, mpi_comm_world),
"cannot reduce tasks' times");
/* delta between individual nodes' time and root node's time */
roottimestamp = timestamp;
MPI_CHECK(MPI_Bcast(&roottimestamp, 1, MPI_DOUBLE, 0, mpi_comm_world),
"cannot broadcast root's time");
wall_clock_delta = timestamp - roottimestamp;
return max - min;
}
void init_clock(){
/* check for skew between tasks' start times */
wall_clock_deviation = TimeDeviation();
}
char * PrintTimestamp() {
static char datestring[80];
time_t cur_timestamp;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( out_logfile, "V-1: Entering PrintTimestamp...\n" );
}
fflush(out_logfile);
cur_timestamp = time(NULL);
strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp));
return datestring;
}
int64_t ReadStoneWallingIterations(char * const filename){
long long data;
if(rank != 0){
MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
return data;
}else{
FILE * out = fopen(filename, "r");
if (out == NULL){
data = -1;
MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
return data;
}
int ret = fscanf(out, "%lld", & data);
if (ret != 1){
return -1;
}
fclose(out);
MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
return data;
}
}
void StoreStoneWallingIterations(char * const filename, int64_t count){
if(rank != 0){
return;
}
FILE * out = fopen(filename, "w");
if (out == NULL){
FAIL("Cannot write to the stonewalling file!");
}
fprintf(out, "%lld", (long long) count);
fclose(out);
}
/*
* Sleep for 'delay' seconds.
*/
void DelaySecs(int delay){
if (rank == 0 && delay > 0) {
if (verbose >= VERBOSE_1)
fprintf(out_logfile, "delaying %d seconds . . .\n", delay);
sleep(delay);
}
}
/*
* Convert IOR_offset_t value to human readable string. This routine uses a
* statically-allocated buffer internally and so is not re-entrant.
*/
char *HumanReadable(IOR_offset_t value, int base)
{
static char valueStr[MAX_STR];
IOR_offset_t m = 0, g = 0, t = 0;
char m_str[8], g_str[8], t_str[8];
if (base == BASE_TWO) {
m = MEBIBYTE;
g = GIBIBYTE;
t = GIBIBYTE * 1024llu;
strcpy(m_str, "MiB");
strcpy(g_str, "GiB");
strcpy(t_str, "TiB");
} else if (base == BASE_TEN) {
m = MEGABYTE;
g = GIGABYTE;
t = GIGABYTE * 1000llu;
strcpy(m_str, "MB");
strcpy(g_str, "GB");
strcpy(t_str, "TB");
}
if (value >= t) {
if (value % t) {
snprintf(valueStr, MAX_STR-1, "%.2f %s",
(double)((double)value / t), t_str);
} else {
snprintf(valueStr, MAX_STR-1, "%d %s", (int)(value / t), t_str);
}
}else if (value >= g) {
if (value % g) {
snprintf(valueStr, MAX_STR-1, "%.2f %s",
(double)((double)value / g), g_str);
} else {
snprintf(valueStr, MAX_STR-1, "%d %s", (int)(value / g), g_str);
}
} else if (value >= m) {
if (value % m) {
snprintf(valueStr, MAX_STR-1, "%.2f %s",
(double)((double)value / m), m_str);
} else {
snprintf(valueStr, MAX_STR-1, "%d %s", (int)(value / m), m_str);
}
} else if (value >= 0) {
snprintf(valueStr, MAX_STR-1, "%d bytes", (int)value);
} else {
snprintf(valueStr, MAX_STR-1, "-");
}
return valueStr;
}

View File

@ -18,15 +18,62 @@
#include <mpi.h>
#include "ior.h"
extern int numTasksWorld;
extern int rank;
extern int rankOffset;
extern int tasksPerNode;
extern int verbose;
extern MPI_Comm testComm;
extern MPI_Comm mpi_comm_world;
extern FILE * out_logfile;
extern FILE * out_resultfile;
extern enum OutputFormat_t outputFormat; /* format of the output */
/*
* Try using the system's PATH_MAX, which is what realpath and such use.
*/
#define MAX_PATHLEN PATH_MAX
#ifdef __linux__
#define FAIL(msg) do { \
fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", \
PrintTimestamp(), rank, __func__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#else
#define FAIL(msg) do { \
fprintf(out_logfile, "%s: Process %d: FAILED at %d, %s: %s\n", \
PrintTimestamp(), rank, __LINE__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#endif
void set_o_direct_flag(int *fd);
char *CurrentTimeString(void);
void OutputToRoot(int, MPI_Comm, char *);
int Regex(char *, char *);
void ShowFileSystemSize(char *);
void DumpBuffer(void *, size_t);
void SeedRandGen(MPI_Comm);
void SetHints (MPI_Info *, char *);
void ShowHints (MPI_Info *);
char *HumanReadable(IOR_offset_t value, int base);
int CountTasksPerNode(MPI_Comm comm);
void DelaySecs(int delay);
/* Returns -1, if cannot be read */
int64_t ReadStoneWallingIterations(char * const filename);
void StoreStoneWallingIterations(char * const filename, int64_t count);
void init_clock(void);
double GetTimeStamp(void);
char * PrintTimestamp(); // TODO remove this function
extern double wall_clock_deviation;
extern double wall_clock_delta;
#endif /* !_UTILITIES_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,192 +0,0 @@
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
/* getopt.h */
/* Declarations for getopt.
Copyright (C) 1989-1994, 1996-1999, 2001 Free Software
Foundation, Inc. This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute
it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software
Foundation; either version 2.1 of the License, or
(at your option) any later version.
The GNU C Library is distributed in the hope that it will
be useful, but WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General
Public License along with the GNU C Library; if not, write
to the Free Software Foundation, Inc., 59 Temple Place,
Suite 330, Boston, MA 02111-1307 USA. */
#ifndef _GETOPT_H
#ifndef __need_getopt
# define _GETOPT_H 1
#endif
/* If __GNU_LIBRARY__ is not already defined, either we are being used
standalone, or this is the first header included in the source file.
If we are being used with glibc, we need to include <features.h>, but
that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
not defined, include <ctype.h>, which will pull in <features.h> for us
if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
doesn't flood the namespace with stuff the way some other headers do.) */
#if !defined __GNU_LIBRARY__
# include <ctype.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* For communication from `getopt' to the caller.
When `getopt' finds an option that takes an argument,
the argument value is returned here.
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
extern char *optarg;
/* Index in ARGV of the next element to be scanned.
This is used for communication to and from the caller
and for communication between successive calls to `getopt'.
On entry to `getopt', zero means this is the first call; initialize.
When `getopt' returns -1, this is the index of the first of the
non-option elements that the caller should itself scan.
Otherwise, `optind' communicates from one call to the next
how much of ARGV has been scanned so far. */
extern int optind;
/* Callers store zero here to inhibit the error message `getopt' prints
for unrecognized options. */
extern int opterr;
/* Set to an option character which was unrecognized. */
extern int optopt;
#ifndef __need_getopt
/* Describe the long-named options requested by the application.
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
of `struct option' terminated by an element containing a name which is
zero.
The field `has_arg' is:
no_argument (or 0) if the option does not take an argument,
required_argument (or 1) if the option requires an argument,
optional_argument (or 2) if the option takes an optional argument.
If the field `flag' is not NULL, it points to a variable that is set
to the value given in the field `val' when the option is found, but
left unchanged if the option is not found.
To have a long-named option do something other than set an `int' to
a compiled-in constant, such as set a value from `optarg', set the
option's `flag' field to zero and its `val' field to a nonzero
value (the equivalent single-letter option character, if there is
one). For long options that have a zero `flag' field, `getopt'
returns the contents of the `val' field. */
struct option
{
# if (defined __STDC__ && __STDC__) || defined __cplusplus
const char *name;
# else
char *name;
# endif
/* has_arg can't be an enum because some compilers complain about
type mismatches in all the code that assumes it is an int. */
int has_arg;
int *flag;
int val;
};
/* Names for the values of the `has_arg' field of `struct option'. */
# define no_argument 0
# define required_argument 1
# define optional_argument 2
#endif /* need getopt */
/* Get definitions and prototypes for functions to process the
arguments in ARGV (ARGC of them, minus the program name) for
options given in OPTS.
Return the option character from OPTS just read. Return -1 when
there are no more options. For unrecognized options, or options
missing arguments, `optopt' is set to the option letter, and '?' is
returned.
The OPTS string is a list of characters which are recognized option
letters, optionally followed by colons, specifying that that letter
takes an argument, to be placed in `optarg'.
If a letter in OPTS is followed by two colons, its argument is
optional. This behavior is specific to the GNU `getopt'.
The argument `--' causes premature termination of argument
scanning, explicitly telling `getopt' that there are no more
options.
If OPTS begins with `--', then non-option arguments are treated as
arguments to the option '\0'. This behavior is specific to the GNU
`getopt'. */
#if (defined __STDC__ && __STDC__) || defined __cplusplus
# ifdef __cplusplus // __GNU_LIBRARY__
/* Many other libraries have conflicting prototypes for getopt, with
differences in the consts, in stdlib.h. To avoid compilation
errors, only prototype getopt for the GNU C library. */
extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
# else /* not __GNU_LIBRARY__ */
extern int getopt ();
# endif /* __GNU_LIBRARY__ */
# ifndef __need_getopt
extern int getopt_long (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
extern int getopt_long_only (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind);
/* Internal only. Users should not call this directly. */
extern int _getopt_internal (int ___argc, char *const *___argv,
const char *__shortopts,
const struct option *__longopts, int *__longind,
int __long_only);
# endif
#else /* not __STDC__ */
extern int getopt ();
# ifndef __need_getopt
extern int getopt_long ();
extern int getopt_long_only ();
extern int _getopt_internal ();
# endif
#endif /* __STDC__ */
#ifdef __cplusplus
}
#endif
/* Make sure we later can get all the definitions and declarations. */
#undef __need_getopt
#endif /* getopt.h */

View File

@ -1,46 +0,0 @@
/******************************************************************************\
* *
* Copyright (c) 2003, The Regents of the University of California *
* See the file COPYRIGHT for a complete copyright notice and license. *
* *
\******************************************************************************/
The IOR-tester runs a series of tests to check and maintain the existing
functionality of the source code as code is modified. The IOR-tester creates
a default test, then modifies it to run test scripts. It runs a large number
of tests, most which are expected to pass, but some with an expectation of
failure.
To run the code, modify the 'DefaultTest' dictionary in the source code to
reflect the test file location, the executable location, etc. Then, run
the code using './IOR-tester.py'.
The expected-pass, pattern-independent tests include:
POSIX only:
o retry transfer
MPIIO only:
o hints
o preallocation
Both POSIX and MPIIO:
o repetition count
o intertest delay
o test file removal
o verbosity
The expected-pass, pattern-dependent tests include:
POSIX:
o write-only, read-only, write/read, and write/read check
o fpp and single file
o segmented, strided
o zero-length, 4-byte, and larger file, block, and transfer sizes
MPIIO (same as POSIX, but using MPIIO access):
o noncollective
o noncollective, file view
o collective
o collective, file view
The expected-fail tests include:
Both POSIX and MPIIO:
o repetition count

File diff suppressed because it is too large Load Diff

26
testing/basic-tests.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash
# Test script for basic IOR functionality testing various patterns
# It is kept as simple as possible and outputs the parameters used such that any test can be rerun easily.
# You can override the defaults by setting the variables before invoking the script, or simply set them here...
# Example: export IOR_EXTRA="-v -v -v"
ROOT=${0%/*}
source $ROOT/test-lib.sh
MDTEST 1 -a POSIX
MDTEST 2 -a POSIX -W 2
IOR 1 -a POSIX -w -z -F -Y -e -i1 -m -t 100k -b 1000k
IOR 1 -a POSIX -w -z -F -k -e -i2 -m -t 100k -b 100k
IOR 1 -a MMAP -r -z -F -k -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -w -z -C -F -k -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -w -z -C -Q 1 -F -k -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 100k
END

View File

@ -0,0 +1,18 @@
IOR START
reorderTasksConstant=1
repetitions=1
segmentCount=1
verbose=0
fsync=0
checkWrite=1
blockSize=16
setTimeStampSignature=1511817315
checkRead=0
readFile=1
filePerProc=0
writeFile=1
transferSize=16
intraTestBarriers=0
RUN
IOR STOP

34
testing/complex-tests.sh Executable file
View File

@ -0,0 +1,34 @@
#!/bin/bash
# Test script for more complex IOR functionality testing various patterns
# You can override the defaults by setting the variables before invoking the script, or simply set them here...
# Example: export IOR_EXTRA="-v -v -v"
ROOT=${0%/*}
source $ROOT/test-lib.sh
#stonewalling tests
IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still!
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30
MDTEST 2 -I 20 -a DUMMY -W 1 -x stonewall-md.log -C
MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -T -v
MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -D -v
#shared tests
IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -r -z-k -e -i1 -m -t 100k -b 100k
#test mutually exclusive options
IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -w -Z -i1 -m -t 100k -b 100k -d 0.1
# Now set the num tasks per node to 1:
export IOR_FAKE_TASK_PER_NODES=1
IOR 2 -a POSIX -f $ROOT/bug-multi-node.conf
END

10
testing/docker/README.md Normal file
View File

@ -0,0 +1,10 @@
# Docker enabled testing
This directory contains scripts to run the IOR benchmark testing in various Docker images.
This allows for testing several distributions on a developer machine.
To setup your test systems run:
./prepare.sh
To run all tests for all variants use
./run-all-tests.sh

View File

@ -0,0 +1,5 @@
FROM centos:6
WORKDIR /data
RUN yum install -y mpich openmpi git pkg-config nano gcc bzip2 patch gcc-c++ make mpich-devel openmpi-devel
RUN yum install -y sudo

View File

@ -0,0 +1 @@
../centos7/run-test.sh

View File

@ -0,0 +1,5 @@
FROM centos:7
WORKDIR /data
RUN yum install -y mpich openmpi git pkg-config nano gcc bzip2 patch gcc-c++ make mpich-devel openmpi-devel
RUN yum install -y sudo

View File

@ -0,0 +1,35 @@
#!/bin/bash
BUILD="$1"
groupadd -g $3 testuser
useradd -r -u $2 -g testuser testuser
ERROR=0
function runTest(){
P=$PATH
FLAVOR="$1"
MPI_DIR="$2"
echo $FLAVOR in $BUILD/$FLAVOR
sudo -u testuser mkdir -p $BUILD/$FLAVOR
pushd $BUILD/$FLAVOR > /dev/null
export PATH=$MPI_DIR/bin:$PATH
sudo -u testuser PATH=$PATH /data/configure || exit 1
sudo -u testuser PATH=$PATH make || exit 1
cd /data/
sudo -u testuser PATH=$PATH IOR_BIN_DIR=$BUILD/$FLAVOR/src IOR_OUT=$BUILD/$FLAVOR/test ./testing/basic-tests.sh
ERROR=$(($ERROR + $?))
popd > /dev/null
PATH=$P
}
runTest openmpi /usr/lib64/openmpi/
runTest mpich /usr/lib64/mpich
exit $ERROR

35
testing/docker/ceph/NOTES Normal file
View File

@ -0,0 +1,35 @@
Following are basic notes on how to deploy the 'ceph/demo' docker container. The 'ceph/demo' container bootstraps a complete Ceph cluster with all necessary daemons already running, providing a convenient environment for evaluating the correctness of the RADOS backend for IOR, in our case.
##########################
# Pull 'ceph/demo' image #
##########################
Run `docker pull ceph/demo` to download the image to your system.
################################
# Deploy 'ceph/demo' conatiner #
################################
To deploy the Ceph cluster, execute the following command:
`docker run -it --net=host -v /etc/ceph:/etc/ceph -e MON_IP=10.0.0.1 -e CEPH_PUBLIC_NETWORK=10.0.0.0/24 ceph/demo`
The only necessary modification to the above command is to provide the correct network IP address for MON_IP and to provide the corresponding CIDR notation of this IP for CEPH_PUBLIC_NETWORK, as illustrated.
NOTE: The above command starts the docker container in interactive mode. Replace '-it' with '-d' to run in the background as a daemon.
###############################
# Run IOR against 'ceph/demo' #
###############################
With a Ceph cluster now deployed, running IOR against it is straightforward:
`./ior -a RADOS -- -u admin -c /etc/ceph/cephconf -p cephfs_data`
All command line arguments following the '--' are required.
-u is the Ceph username (e.g., admin)
-c is the Ceph config file (typically found in /etc/ceph/ceph.conf)
-p is the Ceph pool to perform I/O to (e.g., cephfs_data)
NOTE: Permissions of the various config files, keyrings, etc. inside of /etc/ceph may need to be modified to be readable by the user running IOR (e.g., `sudo chmod 644 /etc/ceph/*`). These various files are created internally within the docker container and may not be readable by other users.

24
testing/docker/prepare.sh Executable file
View File

@ -0,0 +1,24 @@
#!/bin/bash
cd "${0%/*}"
if [[ ! -e run-all-tests.sh ]] ; then
echo "Error, this script must run from the ./testing/docker directory"
exit 1
fi
echo "Checking docker"
docker ps
if [ $? != 0 ] ; then
echo "Error, cannot run docker commands"
groups |grep docker || echo "You are not in the docker group !"
exit 1
fi
echo "Building docker containers"
for IMAGE in $(find -type d | cut -b 3- |grep -v "^$") ; do
docker build -t hpc/ior:$IMAGE $IMAGE
if [ $? != 0 ] ; then
echo "Error building image $IMAGE"
exit 1
fi
done

52
testing/docker/run-all-tests.sh Executable file
View File

@ -0,0 +1,52 @@
#!/bin/bash
# This script runs the testscript for all supported docker images
cd "${0%/*}"
if [[ ! -e run-all-tests.sh ]] ; then
echo "Error, this script must run from the ./testing/docker directory"
exit 1
fi
TARGET=../../build-docker
mkdir -p $TARGET
ARGS="$@"
GID=$(id -g $USER)
OPT="-it --rm -v $PWD/../../:/data/:z"
ERROR=0
VERBOSE=0
set -- `getopt -u -l "clean" -l verbose -o "" -- "$ARGS"`
test $# -lt 1 && exit 1
while test $# -gt 0
do
case "$1" in
--clean) echo "Cleaning build dirs!"; rm -rf $TARGET/* ;;
--verbose) VERBOSE=1 ;;
--) ;;
*) echo "Unknown option $1"; exit 1;;
esac
shift
done
for IMAGE in $(find -type d | cut -b 3- |grep -v "^$") ; do
echo "RUNNING $IMAGE"
mkdir -p $TARGET/$IMAGE
WHAT="docker run $OPT -h $IMAGE hpc/ior:$IMAGE /data/testing/docker/$IMAGE/run-test.sh /data/build-docker/$IMAGE $UID $GID"
if [[ $VERBOSE == 1 ]] ; then
echo $WHAT
fi
$WHAT 2>$TARGET/$IMAGE/LastTest.log 1>&2
ERR=$?
ERROR=$(($ERROR+$ERR))
if [[ $ERR != 0 ]]; then
echo $WHAT
echo "Error, see $TARGET/$IMAGE/LastTest.log"
fi
done
if [[ $ERROR != 0 ]] ; then
echo "Errors occured!"
else
echo "OK: all tests passed!"
fi

View File

@ -0,0 +1,7 @@
FROM ubuntu:14.04
WORKDIR /data
RUN apt-get update
RUN apt-get install -y libopenmpi-dev openmpi-bin mpich git pkg-config gcc-4.7 nano make
RUN apt-get install -y sudo hdf5-tools libhdf5-mpi-dev

View File

@ -0,0 +1,33 @@
#!/bin/bash
BUILD="$1"
groupadd -g $3 testuser
useradd -r -u $2 -g testuser testuser
ERROR=0
function runTest(){
FLAVOR="$1"
MPI_DIR="$2"
echo $FLAVOR in $BUILD/$FLAVOR
update-alternatives --set mpi $MPI_DIR
sudo -u testuser mkdir -p $BUILD/$FLAVOR
pushd $BUILD/$FLAVOR > /dev/null
sudo -u testuser /data/configure --with-hdf5 CFLAGS=-I/usr/lib/x86_64-linux-gnu/hdf5/openmpi/include LDFLAGS=-L/usr/lib/x86_64-linux-gnu/hdf5/openmpi/lib|| exit 1
sudo -u testuser make V=1 || exit 1
#define the alias
ln -sf $(which mpiexec.$FLAVOR) /usr/bin/mpiexec
cd /data/
sudo -u testuser IOR_BIN_DIR=$BUILD/$FLAVOR/src IOR_OUT=$BUILD/$FLAVOR/test ./testing/basic-tests.sh
ERROR=$(($ERROR + $?))
popd > /dev/null
}
runTest openmpi /usr/lib/openmpi/include
runTest mpich /usr/include/mpich
exit $ERROR

View File

@ -0,0 +1,6 @@
FROM ubuntu:16.04
WORKDIR /data
RUN apt-get update
RUN apt-get install -y libopenmpi-dev openmpi-bin mpich git pkg-config gcc-5 gcc-4.8 nano
RUN apt-get install -y sudo

View File

@ -0,0 +1,35 @@
#!/bin/bash
BUILD="$1"
groupadd -g $3 testuser
useradd -r -u $2 -g testuser testuser
ERROR=0
function runTest(){
FLAVOR="$1"
MPI_DIR="$2"
export IOR_MPIRUN="$3"
echo $FLAVOR in $BUILD/$FLAVOR
update-alternatives --set mpi $MPI_DIR
sudo -u testuser mkdir -p $BUILD/$FLAVOR
pushd $BUILD/$FLAVOR > /dev/null
sudo -u testuser /data/configure || exit 1
sudo -u testuser make || exit 1
#define the alias
ln -sf $(which mpiexec.$FLAVOR) /usr/bin/mpiexec
cd /data/
sudo -u testuser IOR_BIN_DIR=$BUILD/$FLAVOR/src IOR_OUT=$BUILD/$FLAVOR/test ./testing/basic-tests.sh
ERROR=$(($ERROR + $?))
popd > /dev/null
}
export MPI_ARGS=""
runTest openmpi /usr/lib/openmpi/include "mpiexec -n"
runTest mpich /usr/include/mpich "mpiexec -n"
exit $ERROR

View File

@ -1,2 +0,0 @@
IOR_HINT__MPI__unrecognizedHint=true
IOR_HINT__MPI__IBM_largeblock_io=true

74
testing/test-lib.sh Normal file
View File

@ -0,0 +1,74 @@
# Test script for basic IOR functionality testing various patterns
# It is kept as simple as possible and outputs the parameters used such that any test can be rerun easily.
# You can override the defaults by setting the variables before invoking the script, or simply set them here...
# Example: export IOR_EXTRA="-v -v -v"
IOR_MPIRUN=${IOR_MPIRUN:-mpiexec -np}
IOR_BIN_DIR=${IOR_BIN_DIR:-./build/src}
IOR_OUT=${IOR_OUT:-./build/test}
IOR_EXTRA=${IOR_EXTRA:-} # Add global options like verbosity
MDTEST_EXTRA=${MDTEST_EXTRA:-}
################################################################################
mkdir -p ${IOR_OUT}
mkdir -p /dev/shm/mdest
## Sanity check
if [[ ! -e ${IOR_OUT} ]]; then
echo "Could not create output dir ${IOR_OUT}"
exit 1
fi
if [[ ! -e ${IOR_BIN_DIR}/ior ]]; then
echo "IOR Executable \"${IOR_BIN_DIR}/ior\" does not exist! Call me from the root directory!"
exit 1
fi
if [[ ! -e ${IOR_BIN_DIR}/mdtest ]]; then
echo "MDTest Executable \"${IOR_BIN_DIR}/mdtest\" does not exist! Call me from the root directory!"
exit 1
fi
ERRORS=0 # Number of errors detected while running
I=0
function IOR(){
RANKS=$1
shift
WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/ior ${@} ${IOR_EXTRA} -o /dev/shm/ior"
$WHAT 1>${IOR_OUT}/$I 2>&1
if [[ $? != 0 ]]; then
echo -n "ERR"
ERRORS=$(($ERRORS + 1))
else
echo -n "OK "
fi
echo " $WHAT"
I=$((${I}+1))
}
function MDTEST(){
RANKS=$1
shift
WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/mdtest ${@} ${MDTEST_EXTRA} -d /dev/shm/mdest"
$WHAT 1>${IOR_OUT}/$I 2>&1
if [[ $? != 0 ]]; then
echo -n "ERR"
ERRORS=$(($ERRORS + 1))
else
echo -n "OK "
fi
echo " $WHAT"
I=$((${I}+1))
}
function END(){
if [[ ${ERRORS} == 0 ]] ; then
echo "PASSED"
else
echo "Error, check the output files!"
fi
exit ${ERRORS}
}

View File

@ -1,28 +0,0 @@
# This tests the checks of read and write for correctness
# Run with multiple processes, at least 3
# You may set the environment variable IOR_FAKE_TASK_PER_NODES=3 and run it on one node
# Example: IOR_FAKE_TASK_PER_NODES=3 mpiexec -n 3 ./src/ior -f testing/timestamp.cfg
IOR START
reorderTasksConstant=1
repetitions=1
storeFileOffset=0
segmentCount=1
verbose=0
fsync=0
checkWrite=1
blockSize=16
setTimeStampSignature=1511817315
checkRead=1
readFile=1
filePerProc=0
writeFile=1
api=POSIX
transferSize=16
intraTestBarriers=0
testFile=/tmp/test
RUN
storeFileOffset=1
RUN
IOR STOP