diff --git a/.gitignore b/.gitignore index d065f5b..eb59cf7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,11 @@ +tags Makefile Makefile.in aclocal.m4 config.log config.status +COPYING +INSTALL config/compile config/config.guess config/config.sub @@ -12,11 +15,14 @@ config/missing config/test-driver configure contrib/.deps/ +contrib/cbif contrib/Makefile contrib/Makefile.in +contrib/cbif doc/Makefile doc/Makefile.in src/.deps/ +src/mdtest src/Makefile src/Makefile.in src/config.h @@ -29,7 +35,14 @@ contrib/cbif.o src/*.o src/*.i src/*.s +src/*.a src/ior +src/mdtest +src/testlib +src/test/.deps/ +src/test/.dirstamp +src/test/lib.o +build/ doc/doxygen/build doc/sphinx/_*/ diff --git a/README_DAOS b/README_DAOS new file mode 100644 index 0000000..ed98bd6 --- /dev/null +++ b/README_DAOS @@ -0,0 +1,86 @@ +Building +---------------------- + +The DAOS library must be installed on the system. + +./bootstrap +./configure --prefix=iorInstallDir --with-daos=DIR --with-cart=DIR + +One must specify "--with-daos=/path/to/daos/install and --with-cart". When that +is specified the DAOS and DFS driver will be built. + +The DAOS driver uses the DAOS API to open a container (or create it if it +doesn't exist first) then create an array object in that container (file) and +read/write to the array object using the daos Array API. The DAOS driver works +with IOR only (no mdtest support yet). The file name used by IOR (passed by -o +option) is hashed to an object ID that is used as the array oid. + +The DFS (DAOS File System) driver creates an encapsulated namespace and emulates +the POSIX driver using the DFS API directly on top of DAOS. The DFS driver works +with both IOR and mdtest. + +Running with DAOS API +--------------------- + +ior -a DAOS [ior_options] [daos_options] + +In the IOR options, the file name should be specified as a container uuid using +"-o ". If the "-E" option is given, then this UUID shall denote +an existing container created by a "matching" IOR run. Otherwise, IOR will +create a new container with this UUID. In the latter case, one may use +uuidgen(1) to generate the UUID of the new container. + +The DAOS options include: + +Required Options: +--daos.pool : pool uuid to connect to (has to be created beforehand) +--daos.svcl : pool svcl list (: separated) +--daos.cont : container for the IOR files/objects (can use `uuidgen`) + +Optional Options: +--daos.group : group name of servers with the pool +--daos.chunk_size : Chunk size of the array object controlling striping over DKEYs +--daos.destroy flag to destory the container on finalize +--daos.oclass : specific object class for array object + +Examples that should work include: + + - "ior -a DAOS -w -W -o file_name --daos.pool --daos.svcl \ + --daos.cont " + + - "ior -a DAOS -w -W -r -R -o file_name -b 1g -t 4m \ + --daos.pool --daos.svcl --daos.cont \ + --daos.chunk_size 1024 --daos.oclass R2" + +Running with DFS API +--------------------- + +ior -a DFS [ior_options] [dfs_options] +mdtest -a DFS [mdtest_options] [dfs_options] + +Required Options: +--dfs.pool : pool uuid to connect to (has to be created beforehand) +--dfs.svcl : pool svcl list (: separated) +--dfs.cont : container uuid that will hold the encapsulated namespace + +Optional Options: +--dfs.group : group name of servers with the pool +--dfs.chunk_size : Chunk size of the files +--dfs.destroy flag to destory the container on finalize +--dfs.oclass : specific object class for files + +In the IOR options, the file name should be specified on the root dir directly +since ior does not create directories and the DFS container representing the +encapsulated namespace is not the same as the system namespace the user is +executing from. + +Examples that should work include: + - "ior -a DFS -w -W -o /test1 --dfs.pool --dfs.svcl --dfs.cont " + - "ior -a DFS -w -W -r -R -o /test2 -b 1g -t 4m -C --dfs.pool --dfs.svcl --dfs.cont " + - "ior -a DFS -w -r -o /test3 -b 8g -t 1m -C --dfs.pool --dfs.svcl --dfs.cont " + +Running mdtest, the user needs to specify a directory with -d where the test +tree will be created. Some examples: + - "mdtest -a DFS -n 100 -F -D -d /bla --dfs.pool --dfs.svcl --dfs.cont " + - "mdtest -a DFS -n 1000 -F -C -d /bla --dfs.pool --dfs.svcl --dfs.cont " + - "mdtest -a DFS -I 10 -z 5 -b 2 -L -d /bla --dfs.pool --dfs.svcl --dfs.cont " diff --git a/configure.ac b/configure.ac index af400ab..f6b958b 100755 --- a/configure.ac +++ b/configure.ac @@ -185,6 +185,41 @@ AM_COND_IF([USE_RADOS_AIORI],[ AC_DEFINE([USE_RADOS_AIORI], [], [Build RADOS backend AIORI]) ]) +# DAOS Backends (DAOS and DFS) IO support require DAOS and CART/GURT +AC_ARG_WITH([cart], + [AS_HELP_STRING([--with-cart], + [support IO with DAOS backends @<:@default=no@:>@])], + [], + [with_daos=no]) + +AS_IF([test "x$with_cart" != xno], + CART="yes" + LDFLAGS="$LDFLAGS -L$with_cart/lib" + CPPFLAGS="$CPPFLAGS -I$with_cart/include/" + AC_CHECK_HEADERS(gurt/common.h,, [unset CART]) + AC_CHECK_LIB([gurt], [d_hash_murmur64],, [unset CART])) + +AC_ARG_WITH([daos], + [AS_HELP_STRING([--with-daos], + [support IO with DAOS backends @<:@default=no@:>@])], + [], + [with_daos=no]) + +AS_IF([test "x$with_daos" != xno], + DAOS="yes" + LDFLAGS="$LDFLAGS -L$with_daos/lib" + CPPFLAGS="$CPPFLAGS -I$with_daos/include" + AC_CHECK_HEADERS(daos_types.h,, [unset DAOS]) + AC_CHECK_LIB([uuid], [uuid_generate],, [unset DAOS]) + AC_CHECK_LIB([daos_common], [daos_sgl_init],, [unset DAOS]) + AC_CHECK_LIB([daos], [daos_init],, [unset DAOS]) + AC_CHECK_LIB([dfs], [dfs_mkdir],, [unset DAOS])) + +AM_CONDITIONAL([USE_DAOS_AIORI], [test x$DAOS = xyes]) +AM_COND_IF([USE_DAOS_AIORI],[ + AC_DEFINE([USE_DAOS_AIORI], [], [Build DAOS backends AIORI]) +]) + # Gfarm support AC_MSG_CHECKING([for Gfarm file system]) AC_ARG_WITH([gfarm], diff --git a/src/Makefile.am b/src/Makefile.am index 74dcd31..0de3b4b 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -70,6 +70,10 @@ extraSOURCES += aiori-RADOS.c extraLDADD += -lrados endif +if USE_DAOS_AIORI +extraSOURCES += aiori-DAOS.c aiori-DFS.c +endif + if USE_GFARM_AIORI extraSOURCES += aiori-Gfarm.c extraLDADD += -lgfarm diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c new file mode 100644 index 0000000..21df9aa --- /dev/null +++ b/src/aiori-DAOS.c @@ -0,0 +1,548 @@ +/* + * -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/* + * Copyright (C) 2018-2019 Intel Corporation + * + * GOVERNMENT LICENSE RIGHTS-OPEN SOURCE SOFTWARE + * The Government's rights to use, modify, reproduce, release, perform, display, + * or disclose this software are subject to the terms of the Apache License as + * provided in Contract No. 8F-30005. + * Any reproduction of computer software, computer software documentation, or + * portions thereof marked with this legend must also reproduce the markings. + */ + +/* + * This file implements the abstract I/O interface for DAOS Array API. + */ + +#define _BSD_SOURCE + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ior.h" +#include "aiori.h" +#include "iordef.h" + +/************************** O P T I O N S *****************************/ +struct daos_options{ + char *pool; + char *svcl; + char *group; + char *cont; + int chunk_size; + int destroy; + char *oclass; +}; + +static struct daos_options o = { + .pool = NULL, + .svcl = NULL, + .group = NULL, + .cont = NULL, + .chunk_size = 1048576, + .destroy = 0, + .oclass = NULL, +}; + +static option_help options [] = { + {0, "daos.pool", "pool uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o.pool}, + {0, "daos.svcl", "pool SVCL", OPTION_OPTIONAL_ARGUMENT, 's', &o.svcl}, + {0, "daos.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', &o.group}, + {0, "daos.cont", "container uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o.cont}, + {0, "daos.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.chunk_size}, + {0, "daos.destroy", "Destroy Container", OPTION_FLAG, 'd', &o.destroy}, + {0, "daos.oclass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.oclass}, + LAST_OPTION +}; + +/**************************** P R O T O T Y P E S *****************************/ + +static void DAOS_Init(); +static void DAOS_Fini(); +static void *DAOS_Create(char *, IOR_param_t *); +static void *DAOS_Open(char *, IOR_param_t *); +static int DAOS_Access(const char *, int, IOR_param_t *); +static IOR_offset_t DAOS_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); +static void DAOS_Close(void *, IOR_param_t *); +static void DAOS_Delete(char *, IOR_param_t *); +static char* DAOS_GetVersion(); +static void DAOS_Fsync(void *, IOR_param_t *); +static IOR_offset_t DAOS_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static option_help * DAOS_options(); + +/************************** D E C L A R A T I O N S ***************************/ + +ior_aiori_t daos_aiori = { + .name = "DAOS", + .create = DAOS_Create, + .open = DAOS_Open, + .access = DAOS_Access, + .xfer = DAOS_Xfer, + .close = DAOS_Close, + .delete = DAOS_Delete, + .get_version = DAOS_GetVersion, + .fsync = DAOS_Fsync, + .get_file_size = DAOS_GetFileSize, + .initialize = DAOS_Init, + .finalize = DAOS_Fini, + .get_options = DAOS_options, + .statfs = aiori_posix_statfs, + .mkdir = aiori_posix_mkdir, + .rmdir = aiori_posix_rmdir, + .stat = aiori_posix_stat, +}; + +#define IOR_DAOS_MUR_SEED 0xDEAD10CC + +enum handleType { + POOL_HANDLE, + CONT_HANDLE, + ARRAY_HANDLE +}; + +static daos_handle_t poh; +static daos_handle_t coh; +static daos_handle_t aoh; +static daos_oclass_id_t objectClass = OC_SX; +static bool daos_initialized = false; + +/***************************** F U N C T I O N S ******************************/ + +/* For DAOS methods. */ +#define DCHECK(rc, format, ...) \ +do { \ + int _rc = (rc); \ + \ + if (_rc < 0) { \ + fprintf(stderr, "ior ERROR (%s:%d): %d: %d: " \ + format"\n", __FILE__, __LINE__, rank, _rc, \ + ##__VA_ARGS__); \ + fflush(stdout); \ + MPI_Abort(MPI_COMM_WORLD, -1); \ + } \ +} while (0) + +#define INFO(level, format, ...) \ +do { \ + if (verbose >= level) \ + printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ +} while (0) + +/* For generic errors like invalid command line options. */ +#define GERR(format, ...) \ +do { \ + fprintf(stderr, format"\n", ##__VA_ARGS__); \ + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ +} while (0) + +/* Distribute process 0's pool or container handle to others. */ +static void +HandleDistribute(daos_handle_t *handle, enum handleType type) +{ + d_iov_t global; + int rc; + + global.iov_buf = NULL; + global.iov_buf_len = 0; + global.iov_len = 0; + + if (rank == 0) { + /* Get the global handle size. */ + if (type == POOL_HANDLE) + rc = daos_pool_local2global(*handle, &global); + else if (type == CONT_HANDLE) + rc = daos_cont_local2global(*handle, &global); + else + rc = daos_array_local2global(*handle, &global); + DCHECK(rc, "Failed to get global handle size"); + } + + MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, + MPI_COMM_WORLD), + "Failed to bcast global handle buffer size"); + + global.iov_len = global.iov_buf_len; + global.iov_buf = malloc(global.iov_buf_len); + if (global.iov_buf == NULL) + ERR("Failed to allocate global handle buffer"); + + if (rank == 0) { + if (type == POOL_HANDLE) + rc = daos_pool_local2global(*handle, &global); + else if (type == CONT_HANDLE) + rc = daos_cont_local2global(*handle, &global); + else + rc = daos_array_local2global(*handle, &global); + DCHECK(rc, "Failed to create global handle"); + } + + MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, + MPI_COMM_WORLD), + "Failed to bcast global pool handle"); + + if (rank != 0) { + if (type == POOL_HANDLE) + rc = daos_pool_global2local(global, handle); + else if (type == CONT_HANDLE) + rc = daos_cont_global2local(poh, global, handle); + else + rc = daos_array_global2local(coh, global, 0, handle); + DCHECK(rc, "Failed to get local handle"); + } + + free(global.iov_buf); +} + +static option_help * +DAOS_options() +{ + return options; +} + +static void +DAOS_Init() +{ + int rc; + + if (daos_initialized) + return; + + if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) { + GERR("Invalid DAOS pool/cont\n"); + return; + } + + if (o.oclass) { + objectClass = daos_oclass_name2id(o.oclass); + if (objectClass == OC_UNKNOWN) + GERR("Invalid DAOS Object class %s\n", o.oclass); + } + + rc = daos_init(); + if (rc) + DCHECK(rc, "Failed to initialize daos"); + + if (rank == 0) { + uuid_t uuid; + d_rank_list_t *svcl = NULL; + d_rank_list_t ranks; + static daos_pool_info_t po_info; + static daos_cont_info_t co_info; + + INFO(VERBOSE_1, "Connecting to pool %s", o.pool); + + rc = uuid_parse(o.pool, uuid); + DCHECK(rc, "Failed to parse 'pool': %s", o.pool); + + svcl = daos_rank_list_parse(o.svcl, ":"); + if (svcl == NULL) + ERR("Failed to allocate svcl"); + + rc = daos_pool_connect(uuid, o.group, svcl, DAOS_PC_RW, + &poh, &po_info, NULL); + d_rank_list_free(svcl); + DCHECK(rc, "Failed to connect to pool %s", o.pool); + + INFO(VERBOSE_1, "Create/Open Container %s", o.cont); + + uuid_clear(uuid); + rc = uuid_parse(o.cont, uuid); + DCHECK(rc, "Failed to parse 'cont': %s", o.cont); + + rc = daos_cont_open(poh, uuid, DAOS_COO_RW, &coh, &co_info, + NULL); + /* If NOEXIST we create it */ + if (rc == -DER_NONEXIST) { + INFO(VERBOSE_2, "Creating DAOS Container...\n"); + rc = daos_cont_create(poh, uuid, NULL, NULL); + if (rc == 0) + rc = daos_cont_open(poh, uuid, DAOS_COO_RW, + &coh, &co_info, NULL); + } + DCHECK(rc, "Failed to create container"); + } + + HandleDistribute(&poh, POOL_HANDLE); + HandleDistribute(&coh, CONT_HANDLE); + aoh.cookie = 0; + + daos_initialized = true; +} + +static void +DAOS_Fini() +{ + int rc; + + if (!daos_initialized) + return; + + MPI_Barrier(MPI_COMM_WORLD); + rc = daos_cont_close(coh, NULL); + if (rc) { + DCHECK(rc, "Failed to close container %s (%d)", o.cont, rc); + MPI_Abort(MPI_COMM_WORLD, -1); + } + MPI_Barrier(MPI_COMM_WORLD); + + if (o.destroy) { + if (rank == 0) { + uuid_t uuid; + double t1, t2; + + INFO(VERBOSE_1, "Destroying DAOS Container %s", o.cont); + uuid_parse(o.cont, uuid); + t1 = MPI_Wtime(); + rc = daos_cont_destroy(poh, uuid, 1, NULL); + t2 = MPI_Wtime(); + if (rc == 0) + INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); + } + + MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (rc) { + if (rank == 0) + DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + + if (rank == 0) + INFO(VERBOSE_1, "Disconnecting from DAOS POOL.."); + + rc = daos_pool_disconnect(poh, NULL); + DCHECK(rc, "Failed to disconnect from pool %s", o.pool); + + MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); + if (rank == 0) + INFO(VERBOSE_1, "Finalizing DAOS.."); + + rc = daos_fini(); + DCHECK(rc, "Failed to finalize daos"); + + daos_initialized = false; +} + +static void +gen_oid(const char *name, daos_obj_id_t *oid) +{ + + oid->lo = d_hash_murmur64(name, strlen(name), IOR_DAOS_MUR_SEED); + oid->hi = 0; + + daos_array_generate_id(oid, objectClass, true, 0); +} + +static void * +DAOS_Create(char *testFileName, IOR_param_t *param) +{ + daos_obj_id_t oid; + int rc; + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** Create the array */ + if (param->filePerProc || rank == 0) { + rc = daos_array_create(coh, oid, DAOS_TX_NONE, 1, o.chunk_size, + &aoh, NULL); + DCHECK(rc, "Failed to create array object\n"); + } + + /** Distribute the array handle if not FPP */ + if (!param->filePerProc) + HandleDistribute(&aoh, ARRAY_HANDLE); +} + +static int +DAOS_Access(const char *testFileName, int mode, IOR_param_t * param) +{ + daos_obj_id_t oid; + daos_size_t cell_size, chunk_size; + int rc; + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, + &cell_size, &chunk_size, &aoh, NULL); + if (rc) + return rc; + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + + rc = daos_array_close(aoh, NULL); + aoh.cookie = 0; + return rc; +} + +static void * +DAOS_Open(char *testFileName, IOR_param_t *param) +{ + daos_obj_id_t oid; + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** Open the array */ + if (param->filePerProc || rank == 0) { + daos_size_t cell_size, chunk_size; + int rc; + + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "Failed to create array object\n"); + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + } + + /** Distribute the array handle if not FPP */ + if (!param->filePerProc) + HandleDistribute(&aoh, ARRAY_HANDLE); +} + +static IOR_offset_t +DAOS_Xfer(int access, void *file, IOR_size_t *buffer, + IOR_offset_t length, IOR_param_t *param) +{ + daos_array_iod_t iod; + daos_range_t rg; + d_sg_list_t sgl; + d_iov_t iov; + int rc; + + /** set array location */ + iod.arr_nr = 1; + rg.rg_len = length; + rg.rg_idx = param->offset; + iod.arr_rgs = &rg; + + /** set memory location */ + sgl.sg_nr = 1; + d_iov_set(&iov, buffer, length); + sgl.sg_iovs = &iov; + + if (access == WRITE) { + rc = daos_array_write(aoh, DAOS_TX_NONE, &iod, &sgl, NULL, NULL); + DCHECK(rc, "daos_array_write() failed (%d).", rc); + } else { + rc = daos_array_read(aoh, DAOS_TX_NONE, &iod, &sgl, NULL, NULL); + DCHECK(rc, "daos_array_read() failed (%d).", rc); + } + + return length; +} + +static void +DAOS_Close(void *file, IOR_param_t *param) +{ + int rc; + + if (!daos_initialized) + GERR("DAOS is not initialized!"); + + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); + + aoh.cookie = 0; +} + +static void +DAOS_Delete(char *testFileName, IOR_param_t *param) +{ + daos_obj_id_t oid; + daos_size_t cell_size, chunk_size; + int rc; + + if (!daos_initialized) + GERR("DAOS is not initialized!"); + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** open the array to verify it exists */ + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "daos_array_open() failed (%d).", rc); + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + + rc = daos_array_destroy(aoh, DAOS_TX_NONE, NULL); + DCHECK(rc, "daos_array_destroy() failed (%d).", rc); + + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); + aoh.cookie = 0; +} + +static char * +DAOS_GetVersion() +{ + static char ver[1024] = {}; + + sprintf(ver, "%s", "DAOS"); + return ver; +} + +static void +DAOS_Fsync(void *file, IOR_param_t *param) +{ + return; +} + +static IOR_offset_t +DAOS_GetFileSize(IOR_param_t *param, MPI_Comm testComm, char *testFileName) +{ + daos_obj_id_t oid; + daos_size_t size; + int rc; + + if (!daos_initialized) + GERR("DAOS is not initialized!"); + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** open the array to verify it exists */ + if (param->filePerProc || rank == 0) { + daos_size_t cell_size, chunk_size; + + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "daos_array_open() failed (%d).", rc); + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + + rc = daos_array_get_size(aoh, DAOS_TX_NONE, &size, NULL); + DCHECK(rc, "daos_array_get_size() failed (%d).", rc); + + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); + aoh.cookie = 0; + } + + if (!param->filePerProc) + MPI_Bcast(&size, 1, MPI_LONG, 0, MPI_COMM_WORLD); + + return size; +} diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c new file mode 100755 index 0000000..2838440 --- /dev/null +++ b/src/aiori-DFS.c @@ -0,0 +1,860 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/* + * Copyright (C) 2018-2019 Intel Corporation + * + * GOVERNMENT LICENSE RIGHTS-OPEN SOURCE SOFTWARE + * The Government's rights to use, modify, reproduce, release, perform, display, + * or disclose this software are subject to the terms of the Apache License as + * provided in Contract No. 8F-30005. + * Any reproduction of computer software, computer software documentation, or + * portions thereof marked with this legend must also reproduce the markings. + */ + +/* + * This file implements the abstract I/O interface for DAOS FS API. + */ + +#define _BSD_SOURCE + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ior.h" +#include "iordef.h" +#include "aiori.h" +#include "utilities.h" + +dfs_t *dfs; +static daos_handle_t poh, coh; +static daos_oclass_id_t objectClass = OC_SX; +static struct d_hash_table *dir_hash; + +struct aiori_dir_hdl { + d_list_t entry; + dfs_obj_t *oh; + char name[PATH_MAX]; +}; + +enum handleType { + POOL_HANDLE, + CONT_HANDLE, + ARRAY_HANDLE +}; + +/************************** O P T I O N S *****************************/ +struct dfs_options{ + char *pool; + char *svcl; + char *group; + char *cont; + int chunk_size; + char *oclass; + int destroy; +}; + +static struct dfs_options o = { + .pool = NULL, + .svcl = NULL, + .group = NULL, + .cont = NULL, + .chunk_size = 1048576, + .oclass = NULL, + .destroy = 0, +}; + +static option_help options [] = { + {0, "dfs.pool", "pool uuid", OPTION_OPTIONAL_ARGUMENT, 's', & o.pool}, + {0, "dfs.svcl", "pool SVCL", OPTION_OPTIONAL_ARGUMENT, 's', & o.svcl}, + {0, "dfs.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', & o.group}, + {0, "dfs.cont", "DFS container uuid", OPTION_OPTIONAL_ARGUMENT, 's', & o.cont}, + {0, "dfs.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.chunk_size}, + {0, "dfs.oclass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.oclass}, + {0, "dfs.destroy", "Destroy DFS Container", OPTION_FLAG, 'd', &o.destroy}, + LAST_OPTION +}; + +/**************************** P R O T O T Y P E S *****************************/ +static void *DFS_Create(char *, IOR_param_t *); +static void *DFS_Open(char *, IOR_param_t *); +static IOR_offset_t DFS_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); +static void DFS_Close(void *, IOR_param_t *); +static void DFS_Delete(char *, IOR_param_t *); +static char* DFS_GetVersion(); +static void DFS_Fsync(void *, IOR_param_t *); +static IOR_offset_t DFS_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static int DFS_Statfs (const char *, ior_aiori_statfs_t *, IOR_param_t *); +static int DFS_Stat (const char *, struct stat *, IOR_param_t *); +static int DFS_Mkdir (const char *, mode_t, IOR_param_t *); +static int DFS_Rmdir (const char *, IOR_param_t *); +static int DFS_Access (const char *, int, IOR_param_t *); +static void DFS_Init(); +static void DFS_Finalize(); +static option_help * DFS_options(); + +/************************** D E C L A R A T I O N S ***************************/ + +ior_aiori_t dfs_aiori = { + .name = "DFS", + .create = DFS_Create, + .open = DFS_Open, + .xfer = DFS_Xfer, + .close = DFS_Close, + .delete = DFS_Delete, + .get_version = DFS_GetVersion, + .fsync = DFS_Fsync, + .get_file_size = DFS_GetFileSize, + .statfs = DFS_Statfs, + .mkdir = DFS_Mkdir, + .rmdir = DFS_Rmdir, + .access = DFS_Access, + .stat = DFS_Stat, + .initialize = DFS_Init, + .finalize = DFS_Finalize, + .get_options = DFS_options, +}; + +/***************************** F U N C T I O N S ******************************/ + +/* For DAOS methods. */ +#define DCHECK(rc, format, ...) \ +do { \ + int _rc = (rc); \ + \ + if (_rc != 0) { \ + fprintf(stderr, "ERROR (%s:%d): %d: %d: " \ + format"\n", __FILE__, __LINE__, rank, _rc, \ + ##__VA_ARGS__); \ + fflush(stderr); \ + exit(-1); \ + } \ +} while (0) + +#define INFO(level, format, ...) \ +do { \ + if (verbose >= level) \ + printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ +} while (0) + +#define GERR(format, ...) \ +do { \ + fprintf(stderr, format"\n", ##__VA_ARGS__); \ + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ +} while (0) + +static inline struct aiori_dir_hdl * +hdl_obj(d_list_t *rlink) +{ + return container_of(rlink, struct aiori_dir_hdl, entry); +} + +static bool +key_cmp(struct d_hash_table *htable, d_list_t *rlink, + const void *key, unsigned int ksize) +{ + struct aiori_dir_hdl *hdl = hdl_obj(rlink); + + return (strcmp(hdl->name, (const char *)key) == 0); +} + +static void +rec_free(struct d_hash_table *htable, d_list_t *rlink) +{ + struct aiori_dir_hdl *hdl = hdl_obj(rlink); + + assert(d_hash_rec_unlinked(&hdl->entry)); + dfs_release(hdl->oh); + free(hdl); +} + +static d_hash_table_ops_t hdl_hash_ops = { + .hop_key_cmp = key_cmp, + .hop_rec_free = rec_free +}; + +/* Distribute process 0's pool or container handle to others. */ +static void +HandleDistribute(daos_handle_t *handle, enum handleType type) +{ + d_iov_t global; + int rc; + + global.iov_buf = NULL; + global.iov_buf_len = 0; + global.iov_len = 0; + + assert(type == POOL_HANDLE || type == CONT_HANDLE); + if (rank == 0) { + /* Get the global handle size. */ + if (type == POOL_HANDLE) + rc = daos_pool_local2global(*handle, &global); + else + rc = daos_cont_local2global(*handle, &global); + DCHECK(rc, "Failed to get global handle size"); + } + + MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, + MPI_COMM_WORLD), + "Failed to bcast global handle buffer size"); + + global.iov_len = global.iov_buf_len; + global.iov_buf = malloc(global.iov_buf_len); + if (global.iov_buf == NULL) + ERR("Failed to allocate global handle buffer"); + + if (rank == 0) { + if (type == POOL_HANDLE) + rc = daos_pool_local2global(*handle, &global); + else + rc = daos_cont_local2global(*handle, &global); + DCHECK(rc, "Failed to create global handle"); + } + + MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, + MPI_COMM_WORLD), + "Failed to bcast global pool handle"); + + if (rank != 0) { + if (type == POOL_HANDLE) + rc = daos_pool_global2local(global, handle); + else + rc = daos_cont_global2local(poh, global, handle); + DCHECK(rc, "Failed to get local handle"); + } + + free(global.iov_buf); +} + +static int +parse_filename(const char *path, char **_obj_name, char **_cont_name) +{ + char *f1 = NULL; + char *f2 = NULL; + char *fname = NULL; + char *cont_name = NULL; + int rc = 0; + + if (path == NULL || _obj_name == NULL || _cont_name == NULL) + return -EINVAL; + + if (strcmp(path, "/") == 0) { + *_cont_name = strdup("/"); + if (*_cont_name == NULL) + return -ENOMEM; + *_obj_name = NULL; + return 0; + } + + f1 = strdup(path); + if (f1 == NULL) { + rc = -ENOMEM; + goto out; + } + + f2 = strdup(path); + if (f2 == NULL) { + rc = -ENOMEM; + goto out; + } + + fname = basename(f1); + cont_name = dirname(f2); + + if (cont_name[0] == '.' || cont_name[0] != '/') { + char cwd[1024]; + + if (getcwd(cwd, 1024) == NULL) { + rc = -ENOMEM; + goto out; + } + + if (strcmp(cont_name, ".") == 0) { + cont_name = strdup(cwd); + if (cont_name == NULL) { + rc = -ENOMEM; + goto out; + } + } else { + char *new_dir = calloc(strlen(cwd) + strlen(cont_name) + + 1, sizeof(char)); + if (new_dir == NULL) { + rc = -ENOMEM; + goto out; + } + + strcpy(new_dir, cwd); + if (cont_name[0] == '.') { + strcat(new_dir, &cont_name[1]); + } else { + strcat(new_dir, "/"); + strcat(new_dir, cont_name); + } + cont_name = new_dir; + } + *_cont_name = cont_name; + } else { + *_cont_name = strdup(cont_name); + if (*_cont_name == NULL) { + rc = -ENOMEM; + goto out; + } + } + + *_obj_name = strdup(fname); + if (*_obj_name == NULL) { + free(*_cont_name); + *_cont_name = NULL; + rc = -ENOMEM; + goto out; + } + +out: + if (f1) + free(f1); + if (f2) + free(f2); + return rc; +} + +static dfs_obj_t * +lookup_insert_dir(const char *name) +{ + struct aiori_dir_hdl *hdl; + d_list_t *rlink; + int rc; + + rlink = d_hash_rec_find(dir_hash, name, strlen(name)); + if (rlink != NULL) { + hdl = hdl_obj(rlink); + return hdl->oh; + } + + hdl = calloc(1, sizeof(struct aiori_dir_hdl)); + if (hdl == NULL) + GERR("failed to alloc dir handle"); + + strncpy(hdl->name, name, PATH_MAX-1); + hdl->name[PATH_MAX-1] = '\0'; + + rc = dfs_lookup(dfs, name, O_RDWR, &hdl->oh, NULL, NULL); + DCHECK(rc, "dfs_lookup() of %s Failed", name); + + rc = d_hash_rec_insert(dir_hash, hdl->name, strlen(hdl->name), + &hdl->entry, true); + DCHECK(rc, "Failed to insert dir handle in hashtable"); + + return hdl->oh; +} + +static option_help * DFS_options(){ + return options; +} + +static void +DFS_Init() { + int rc; + + if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) + ERR("Invalid pool or container options\n"); + + if (o.oclass) { + objectClass = daos_oclass_name2id(o.oclass); + if (objectClass == OC_UNKNOWN) + GERR("Invalid DAOS Object class %s\n", o.oclass); + } + + rc = daos_init(); + DCHECK(rc, "Failed to initialize daos"); + + rc = d_hash_table_create(0, 16, NULL, &hdl_hash_ops, &dir_hash); + DCHECK(rc, "Failed to initialize dir hashtable"); + + if (rank == 0) { + uuid_t pool_uuid, co_uuid; + d_rank_list_t *svcl = NULL; + daos_pool_info_t pool_info; + daos_cont_info_t co_info; + + rc = uuid_parse(o.pool, pool_uuid); + DCHECK(rc, "Failed to parse 'Pool uuid': %s", o.pool); + + rc = uuid_parse(o.cont, co_uuid); + DCHECK(rc, "Failed to parse 'Cont uuid': %s", o.cont); + + svcl = daos_rank_list_parse(o.svcl, ":"); + if (svcl == NULL) + ERR("Failed to allocate svcl"); + + INFO(VERBOSE_1, "Pool uuid = %s, SVCL = %s\n", o.pool, o.svcl); + INFO(VERBOSE_1, "DFS Container namespace uuid = %s\n", o.cont); + + /** Connect to DAOS pool */ + rc = daos_pool_connect(pool_uuid, o.group, svcl, DAOS_PC_RW, + &poh, &pool_info, NULL); + d_rank_list_free(svcl); + DCHECK(rc, "Failed to connect to pool"); + + rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, + NULL); + /* If NOEXIST we create it */ + if (rc == -DER_NONEXIST) { + INFO(VERBOSE_1, "Creating DFS Container ...\n"); + + rc = daos_cont_create(poh, co_uuid, NULL, NULL); + if (rc == 0) { + rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, + &coh, &co_info, NULL); + } + } + DCHECK(rc, "Failed to create container"); + } + + HandleDistribute(&poh, POOL_HANDLE); + HandleDistribute(&coh, CONT_HANDLE); + + rc = dfs_mount(poh, coh, O_RDWR, &dfs); + DCHECK(rc, "Failed to mount DFS namespace"); +} + +static void +DFS_Finalize() +{ + int rc; + + MPI_Barrier(MPI_COMM_WORLD); + d_hash_table_destroy(dir_hash, true /* force */); + + rc = dfs_umount(dfs); + DCHECK(rc, "Failed to umount DFS namespace"); + MPI_Barrier(MPI_COMM_WORLD); + + rc = daos_cont_close(coh, NULL); + DCHECK(rc, "Failed to close container %s (%d)", o.cont, rc); + MPI_Barrier(MPI_COMM_WORLD); + + if (o.destroy) { + if (rank == 0) { + uuid_t uuid; + double t1, t2; + + INFO(VERBOSE_1, "Destorying DFS Container: %s\n", o.cont); + uuid_parse(o.cont, uuid); + t1 = MPI_Wtime(); + rc = daos_cont_destroy(poh, uuid, 1, NULL); + t2 = MPI_Wtime(); + if (rc == 0) + INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); + } + + MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (rc) { + if (rank == 0) + DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); + MPI_Abort(MPI_COMM_WORLD, -1); + } + } + + if (rank == 0) + INFO(VERBOSE_1, "Disconnecting from DAOS POOL\n"); + + rc = daos_pool_disconnect(poh, NULL); + DCHECK(rc, "Failed to disconnect from pool"); + + MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); + + if (rank == 0) + INFO(VERBOSE_1, "Finalizing DAOS..\n"); + + rc = daos_fini(); + DCHECK(rc, "Failed to finalize DAOS"); +} + +/* + * Creat and open a file through the DFS interface. + */ +static void * +DFS_Create(char *testFileName, IOR_param_t *param) +{ + char *name = NULL, *dir_name = NULL; + dfs_obj_t *obj = NULL, *parent = NULL; + mode_t mode; + int fd_oflag = 0; + int rc; + + assert(param); + + rc = parse_filename(testFileName, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", testFileName); + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + mode = S_IFREG | param->mode; + if (param->filePerProc || rank == 0) { + fd_oflag |= O_CREAT | O_RDWR | O_EXCL; + + rc = dfs_open(dfs, parent, name, mode, fd_oflag, + objectClass, o.chunk_size, NULL, &obj); + DCHECK(rc, "dfs_open() of %s Failed", name); + } + if (!param->filePerProc) { + MPI_Barrier(MPI_COMM_WORLD); + if (rank != 0) { + fd_oflag |= O_RDWR; + rc = dfs_open(dfs, parent, name, mode, fd_oflag, + objectClass, o.chunk_size, NULL, &obj); + DCHECK(rc, "dfs_open() of %s Failed", name); + } + } + + if (name) + free(name); + if (dir_name) + free(dir_name); + + return ((void *)obj); +} + +/* + * Open a file through the DFS interface. + */ +static void * +DFS_Open(char *testFileName, IOR_param_t *param) +{ + char *name = NULL, *dir_name = NULL; + dfs_obj_t *obj = NULL, *parent = NULL; + mode_t mode; + int rc; + int fd_oflag = 0; + + fd_oflag |= O_RDWR; + mode = S_IFREG | param->mode; + + rc = parse_filename(testFileName, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", testFileName); + + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, + o.chunk_size, NULL, &obj); + DCHECK(rc, "dfs_open() of %s Failed", name); + + if (name) + free(name); + if (dir_name) + free(dir_name); + + return ((void *)obj); +} + +/* + * Write or read access to file using the DFS interface. + */ +static IOR_offset_t +DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length, + IOR_param_t *param) +{ + int xferRetries = 0; + long long remaining = (long long)length; + char *ptr = (char *)buffer; + daos_size_t ret; + int rc; + dfs_obj_t *obj; + + obj = (dfs_obj_t *)file; + + while (remaining > 0) { + d_iov_t iov; + d_sg_list_t sgl; + + /** set memory location */ + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + d_iov_set(&iov, (void *)ptr, remaining); + sgl.sg_iovs = &iov; + + /* write/read file */ + if (access == WRITE) { + rc = dfs_write(dfs, obj, sgl, param->offset); + if (rc) { + fprintf(stderr, "dfs_write() failed (%d)", rc); + return -1; + } + ret = remaining; + } else { + rc = dfs_read(dfs, obj, sgl, param->offset, &ret); + if (rc || ret == 0) + fprintf(stderr, "dfs_read() failed(%d)", rc); + } + + if (ret < remaining) { + if (param->singleXferAttempt == TRUE) + exit(-1); + if (xferRetries > MAX_RETRY) + ERR("too many retries -- aborting"); + } + + assert(ret >= 0); + assert(ret <= remaining); + remaining -= ret; + ptr += ret; + xferRetries++; + } + + return (length); +} + +/* + * Perform fsync(). + */ +static void +DFS_Fsync(void *fd, IOR_param_t * param) +{ + dfs_sync(dfs); + return; +} + +/* + * Close a file through the DFS interface. + */ +static void +DFS_Close(void *fd, IOR_param_t * param) +{ + dfs_release((dfs_obj_t *)fd); +} + +/* + * Delete a file through the DFS interface. + */ +static void +DFS_Delete(char *testFileName, IOR_param_t * param) +{ + char *name = NULL, *dir_name = NULL; + dfs_obj_t *parent = NULL; + int rc; + + rc = parse_filename(testFileName, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", testFileName); + + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_remove(dfs, parent, name, false, NULL); + DCHECK(rc, "dfs_remove() of %s Failed", name); + + if (name) + free(name); + if (dir_name) + free(dir_name); +} + +static char* DFS_GetVersion() +{ + static char ver[1024] = {}; + + sprintf(ver, "%s", "DAOS"); + return ver; +} + +/* + * Use DFS stat() to return aggregate file size. + */ +static IOR_offset_t +DFS_GetFileSize(IOR_param_t * test, MPI_Comm comm, char *testFileName) +{ + dfs_obj_t *obj; + daos_size_t fsize, tmpMin, tmpMax, tmpSum; + int rc; + + rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL, NULL); + if (rc) { + fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc); + return -1; + } + + rc = dfs_get_size(dfs, obj, &fsize); + if (rc) + return -1; + + dfs_release(obj); + + if (test->filePerProc == TRUE) { + MPI_CHECK(MPI_Allreduce(&fsize, &tmpSum, 1, + MPI_LONG_LONG_INT, MPI_SUM, comm), + "cannot total data moved"); + fsize = tmpSum; + } else { + MPI_CHECK(MPI_Allreduce(&fsize, &tmpMin, 1, + MPI_LONG_LONG_INT, MPI_MIN, comm), + "cannot total data moved"); + MPI_CHECK(MPI_Allreduce(&fsize, &tmpMax, 1, + MPI_LONG_LONG_INT, MPI_MAX, comm), + "cannot total data moved"); + if (tmpMin != tmpMax) { + if (rank == 0) { + WARN("inconsistent file size by different tasks"); + } + /* incorrect, but now consistent across tasks */ + fsize = tmpMin; + } + } + + return (fsize); +} + +static int +DFS_Statfs(const char *path, ior_aiori_statfs_t *sfs, IOR_param_t * param) +{ + return 0; +} + +static int +DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + char *name = NULL, *dir_name = NULL; + int rc; + + rc = parse_filename(path, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", path); + + assert(dir_name); + if (!name) + return 0; + + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_mkdir(dfs, parent, name, mode); + DCHECK(rc, "dfs_mkdir() of %s Failed", name); + + if (name) + free(name); + if (dir_name) + free(dir_name); + if (rc) + return -1; + return rc; +} + +static int +DFS_Rmdir(const char *path, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + char *name = NULL, *dir_name = NULL; + int rc; + + rc = parse_filename(path, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", path); + + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_remove(dfs, parent, name, false, NULL); + DCHECK(rc, "dfs_remove() of %s Failed", name); + + if (name) + free(name); + if (dir_name) + free(dir_name); + if (rc) + return -1; + return rc; +} + +static int +DFS_Access(const char *path, int mode, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + char *name = NULL, *dir_name = NULL; + struct stat stbuf; + int rc; + + rc = parse_filename(path, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", path); + + assert(dir_name); + + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + if (name && strcmp(name, ".") == 0) { + free(name); + name = NULL; + } + rc = dfs_stat(dfs, parent, name, &stbuf); + + if (name) + free(name); + if (dir_name) + free(dir_name); + if (rc) + return -1; + return rc; +} + +static int +DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + char *name = NULL, *dir_name = NULL; + int rc; + + rc = parse_filename(path, &name, &dir_name); + DCHECK(rc, "Failed to parse path %s", path); + + assert(dir_name); + assert(name); + + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + + rc = dfs_stat(dfs, parent, name, buf); + DCHECK(rc, "dfs_stat() of Failed (%d)", rc); + + if (name) + free(name); + if (dir_name) + free(dir_name); + if (rc) + return -1; + return rc; +} diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index 707b3a7..04c10be 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -74,6 +74,7 @@ int MPIIO_Access(const char *path, int mode, IOR_param_t *param) } MPI_File fd; int mpi_mode = MPI_MODE_UNIQUE_OPEN; + MPI_Info mpiHints = MPI_INFO_NULL; if ((mode & W_OK) && (mode & R_OK)) mpi_mode |= MPI_MODE_RDWR; @@ -82,12 +83,15 @@ int MPIIO_Access(const char *path, int mode, IOR_param_t *param) else mpi_mode |= MPI_MODE_RDONLY; - int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode, - MPI_INFO_NULL, &fd); + SetHints(&mpiHints, param->hintsFileName); + + int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode, mpiHints, &fd); if (!ret) MPI_File_close(&fd); + if (mpiHints != MPI_INFO_NULL) + MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); return ret; } @@ -497,6 +501,7 @@ IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; MPI_File fd; MPI_Comm comm; + MPI_Info mpiHints = MPI_INFO_NULL; if (test->filePerProc == TRUE) { comm = MPI_COMM_SELF; @@ -504,12 +509,15 @@ IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, comm = testComm; } - MPI_CHECKF(MPI_File_open(comm, testFileName, MPI_MODE_RDONLY, - MPI_INFO_NULL, &fd), - "cannot open file to get file size: %s", testFileName); + SetHints(&mpiHints, test->hintsFileName); + MPI_CHECK(MPI_File_open(comm, testFileName, MPI_MODE_RDONLY, + mpiHints, &fd), + "cannot open file to get file size"); MPI_CHECK(MPI_File_get_size(fd, (MPI_Offset *) & aggFileSizeFromStat), "cannot get file size"); MPI_CHECK(MPI_File_close(&fd), "cannot close file"); + if (mpiHints != MPI_INFO_NULL) + MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); if (test->filePerProc == TRUE) { MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, diff --git a/src/aiori.c b/src/aiori.c index 41773d9..a72180d 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -41,6 +41,10 @@ ior_aiori_t *available_aiori[] = { #ifdef USE_POSIX_AIORI &posix_aiori, +#endif +#ifdef USE_DAOS_AIORI + &daos_aiori, + &dfs_aiori, #endif & dummy_aiori, #ifdef USE_HDF5_AIORI diff --git a/src/aiori.h b/src/aiori.h index f4d22b0..da93a1a 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -96,6 +96,8 @@ enum bench_type { }; extern ior_aiori_t dummy_aiori; +extern ior_aiori_t daos_aiori; +extern ior_aiori_t dfs_aiori; extern ior_aiori_t hdf5_aiori; extern ior_aiori_t hdfs_aiori; extern ior_aiori_t ime_aiori; diff --git a/src/ior-output.c b/src/ior-output.c index 7049a97..560d995 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -318,7 +318,8 @@ void ShowTestStart(IOR_param_t *test) PrintKeyValInt("TestID", test->id); PrintKeyVal("StartTime", CurrentTimeString()); /* if pvfs2:, then skip */ - if (Regex(test->testFileName, "^[a-z][a-z].*:") == 0) { + if (strcasecmp(test->api, "DFS") && + Regex(test->testFileName, "^[a-z][a-z].*:") == 0) { DisplayFreespace(test); } diff --git a/src/ior.c b/src/ior.c index eb41603..2d08234 100755 --- a/src/ior.c +++ b/src/ior.c @@ -143,6 +143,7 @@ int ior_main(int argc, char **argv) sleep(5); fprintf(out_logfile, "\trank %d: awake.\n", rank); } + TestIoSys(tptr); ShowTestEnd(tptr); } @@ -155,10 +156,10 @@ int ior_main(int argc, char **argv) /* display finish time */ PrintTestEnds(); - MPI_CHECK(MPI_Finalize(), "cannot finalize MPI"); - aiori_finalize(tests_head); + MPI_CHECK(MPI_Finalize(), "cannot finalize MPI"); + DestroyTests(tests_head); return totalErrorCount; @@ -293,7 +294,8 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep, 1, MPI_LONG_LONG_INT, MPI_SUM, testComm), "cannot total data moved"); - if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) { + if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0 && + strcasecmp(params->api, "DAOS") != 0) { if (verbose >= VERBOSE_0 && rank == 0) { if ((params->expectedAggFileSize != point->aggFileSizeFromXfer) diff --git a/src/mdtest-main.c b/src/mdtest-main.c index 8f4e608..6dbc7bd 100644 --- a/src/mdtest-main.c +++ b/src/mdtest-main.c @@ -2,12 +2,9 @@ #include "aiori.h" int main(int argc, char **argv) { - aiori_initialize(NULL); MPI_Init(&argc, &argv); - mdtest_run(argc, argv, MPI_COMM_WORLD, stdout); - MPI_Finalize(); - aiori_finalize(NULL); + return 0; } diff --git a/src/mdtest.c b/src/mdtest.c index cfb7c98..17f6e5c 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -367,6 +367,7 @@ static void create_file (const char *path, uint64_t itemNum) { } else { param.openFlags = IOR_CREAT | IOR_WRONLY; param.filePerProc = !shared_file; + param.mode = FILEMODE; VERBOSE(3,5,"create_remove_items_helper (non-collective, shared): open..." ); @@ -444,6 +445,7 @@ void collective_helper(const int dirs, const int create, const char* path, uint6 //create files param.openFlags = IOR_WRONLY | IOR_CREAT; + param.mode = FILEMODE; aiori_fh = backend->create (curr_item, ¶m); if (NULL == aiori_fh) { FAIL("unable to create file %s", curr_item); @@ -1546,6 +1548,9 @@ void display_freespace(char *testdirpath) strcpy(dirpath, "."); } + if (param.api && strcasecmp(param.api, "DFS") == 0) + return; + VERBOSE(3,5,"Before show_file_system_size, dirpath is '%s'", dirpath ); show_file_system_size(dirpath); VERBOSE(3,5, "After show_file_system_size, dirpath is '%s'\n", dirpath ); @@ -1939,6 +1944,9 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * MPI_Comm_rank(testComm, &rank); MPI_Comm_size(testComm, &size); + if (backend->initialize) + backend->initialize(); + pid = getpid(); uid = getuid(); @@ -2224,5 +2232,9 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * if (random_seed > 0) { free(rand_array); } + + if (backend->finalize) + backend->finalize(NULL); + return summary_table; } diff --git a/src/option.c b/src/option.c index b1833c1..41ed09d 100644 --- a/src/option.c +++ b/src/option.c @@ -89,6 +89,10 @@ static int print_value(option_help * o){ pos += printf("=%lld", *(long long*) o->variable); break; } + case('u'):{ + pos += printf("=%lu", *(uint64_t*) o->variable); + break; + } } } if (o->arg == OPTION_FLAG && (*(int*)o->variable) != 0){ @@ -180,6 +184,10 @@ static int print_option_value(option_help * o){ pos += printf("=%lld", *(long long*) o->variable); break; } + case('u'):{ + pos += printf("=%lu", *(uint64_t*) o->variable); + break; + } } }else{ //printf(" "); @@ -308,11 +316,14 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi if(strlen(arg) > 1){ printf("Error, ignoring remainder of string for option %c (%s).\n", o->shortVar, o->longVar); } - break; - } + } case('l'):{ *(long long*) o->variable = string_to_bytes(arg); break; + } + case('u'):{ + *(uint64_t*) o->variable = string_to_bytes(arg); + break; } default: printf("ERROR: Unknown option type %c\n", o->type);