From 8d0cddd21e7461a4bc26cac0c9d391dcfcf9a891 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 13 Jun 2018 18:37:37 +0000 Subject: [PATCH 01/66] Add DFS ior/mdtest driver Signed-off-by: Mohamad Chaarawi --- configure.ac | 19 ++ src/Makefile.am | 4 + src/aiori-DFS.c | 562 ++++++++++++++++++++++++++++++++++++++++++++++++ src/aiori.c | 3 + src/aiori.h | 4 + src/ior.c | 11 +- src/mdtest.c | 10 + 7 files changed, 612 insertions(+), 1 deletion(-) create mode 100755 src/aiori-DFS.c diff --git a/configure.ac b/configure.ac index 4f9461c..af6e49a 100755 --- a/configure.ac +++ b/configure.ac @@ -142,8 +142,27 @@ AM_COND_IF([USE_POSIX_AIORI],[ AC_DEFINE([USE_POSIX_AIORI], [], [Build POSIX backend AIORI]) ]) +# DFS IO support +AC_ARG_WITH([daos], + [AS_HELP_STRING([--with-daos], + [support IO with DFS backend @<:@default=no@:>@])], + [], + [with_daos=no]) +AS_IF([test "x$with_daos" != xno], + DAOS="yes" + LDFLAGS="$LDFLAGS -L$with_daos/lib" + CPPFLAGS="$CPPFLAGS -I$with_daos/include" + AC_CHECK_HEADERS(daos_types.h,, [unset DAOS]) + AC_CHECK_LIB([uuid], [uuid_generate],, [unset DAOS]) + AC_CHECK_LIB([daos_common], [daos_sgl_init],, [unset DAOS]) + AC_CHECK_LIB([daos], [daos_init],, [unset DAOS]) + AC_CHECK_LIB([dfs], [dfs_mkdir],, [unset DAOS])) +AM_CONDITIONAL([USE_DFS_AIORI], [test x$DAOS = xyes]) +AM_COND_IF([USE_DFS_AIORI],[ + AC_DEFINE([USE_DFS_AIORI], [], [Build DFS backend AIORI]) +]) # aws4c is needed for the S3 backend (see --with-S3, below). # Version 0.5.2 of aws4c is available at https://github.com/jti-lanl/aws4c.git diff --git a/src/Makefile.am b/src/Makefile.am index 7d2575b..aea1824 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -53,6 +53,10 @@ if USE_POSIX_AIORI extraSOURCES += aiori-POSIX.c endif +if USE_DFS_AIORI +extraSOURCES += aiori-DFS.c +endif + if USE_S3_AIORI extraSOURCES += aiori-S3.c diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c new file mode 100755 index 0000000..ad0c712 --- /dev/null +++ b/src/aiori-DFS.c @@ -0,0 +1,562 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/******************************************************************************\ +* * +* Copyright (c) 2003, The Regents of the University of California * +* See the file COPYRIGHT for a complete copyright notice and license. * +* * +******************************************************************************** +* +* Implement of abstract I/O interface for DFS. +* +\******************************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "ior.h" +#include "aiori.h" +#include "iordef.h" +#include "utilities.h" + +dfs_t *dfs; + +static int +parse_filename(const char *path, char **_obj_name, char **_cont_name) +{ + char *f1 = NULL; + char *f2 = NULL; + char *fname = NULL; + char *cont_name = NULL; + int rc = 0; + + if (path == NULL || _obj_name == NULL || _cont_name == NULL) + return -EINVAL; + + if (strcmp(path, "/") == 0) { + *_cont_name = strdup("/"); + if (*_cont_name == NULL) + return -ENOMEM; + *_obj_name = NULL; + return 0; + } + + f1 = strdup(path); + if (f1 == NULL) + D_GOTO(out, rc = -ENOMEM); + + f2 = strdup(path); + if (f2 == NULL) + D_GOTO(out, rc = -ENOMEM); + + fname = basename(f1); + cont_name = dirname(f2); + + if (cont_name[0] == '.' || cont_name[0] != '/') { + char *cwd; + + //getcwd(cwd, 1024); + cwd = strdup("/"); + if (strcmp(cont_name, ".") == 0) { + cont_name = strdup(cwd); + if (cont_name == NULL) + D_GOTO(out, rc = -ENOMEM); + } else { + char *new_dir = calloc(strlen(cwd) + strlen(cont_name) + + 1, sizeof(char)); + if (new_dir == NULL) + D_GOTO(out, rc = -ENOMEM); + + strcpy(new_dir, cwd); + if (cont_name[0] == '.') { + strcat(new_dir, &cont_name[1]); + } else { + strcat(new_dir, "/"); + strcat(new_dir, cont_name); + } + cont_name = new_dir; + } + *_cont_name = cont_name; + } else { + *_cont_name = strdup(cont_name); + if (*_cont_name == NULL) + D_GOTO(out, rc = -ENOMEM); + } + + *_obj_name = strdup(fname); + if (*_obj_name == NULL) { + free(*_cont_name); + *_cont_name = NULL; + D_GOTO(out, rc = -ENOMEM); + } + +out: + if (f1) + free(f1); + if (f2) + free(f2); + return rc; +} + +/**************************** P R O T O T Y P E S *****************************/ +static void *DFS_Create(char *, IOR_param_t *); +static void *DFS_Open(char *, IOR_param_t *); +static IOR_offset_t DFS_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); +static void DFS_Close(void *, IOR_param_t *); +static void DFS_Delete(char *, IOR_param_t *); +static void DFS_SetVersion(IOR_param_t *); +static void DFS_Fsync(void *, IOR_param_t *); +static IOR_offset_t DFS_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static int DFS_Statfs (const char *, ior_aiori_statfs_t *, IOR_param_t *); +static int DFS_Stat (const char *, struct stat *, IOR_param_t *); +static int DFS_Mkdir (const char *, mode_t, IOR_param_t *); +static int DFS_Rmdir (const char *, IOR_param_t *); +static int DFS_Access (const char *, int, IOR_param_t *); + +/************************** D E C L A R A T I O N S ***************************/ + +ior_aiori_t dfs_aiori = { + .name = "DFS", + .create = DFS_Create, + .open = DFS_Open, + .xfer = DFS_Xfer, + .close = DFS_Close, + .delete = DFS_Delete, + .set_version = DFS_SetVersion, + .fsync = DFS_Fsync, + .get_file_size = DFS_GetFileSize, + .statfs = DFS_Statfs, + .mkdir = DFS_Mkdir, + .rmdir = DFS_Rmdir, + .access = DFS_Access, + .stat = DFS_Stat, +}; + +/***************************** F U N C T I O N S ******************************/ + +int +dfs_init(void) { + int rc; + + rc = daos_init(); + if (rc) { + fprintf(stderr, "daos_init() failed with %d\n", rc); + return rc; + } + + rc = dfs_mount(&dfs); + if (rc) { + fprintf(stderr, "dfs_mount failed (%d)\n", rc); + return 1; + } + + return rc; +} + +int dfs_finalize(void) +{ + dfs_umount(dfs); + daos_fini(); + return 0; +} + +/* + * Creat and open a file through the DFS interface. + */ +static void * +DFS_Create(char *testFileName, IOR_param_t *param) +{ + char *name = NULL, *dir_name = NULL; + dfs_obj_t *obj = NULL, *parent = NULL; + mode_t pmode; + int fd_oflag = 0; + int rc; + + fd_oflag |= O_CREAT | O_RDWR; + + rc = parse_filename(testFileName, &name, &dir_name); + if (rc) + goto out; + + assert(dir_name); + assert(name); + + rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + if (rc || !S_ISDIR(pmode)) + goto out; + + mode_t mode = S_IFREG | param->mode; + rc = dfs_open(dfs, parent, name, mode, fd_oflag, NULL, &obj); + if (rc) + goto out; + +out: + if (name) + free(name); + if (dir_name) + free(dir_name); + if (parent) + dfs_release(parent); + + return ((void *)obj); +} + +/* + * Open a file through the DFS interface. + */ +static void *DFS_Open(char *testFileName, IOR_param_t *param) +{ + char *name = NULL, *dir_name = NULL; + dfs_obj_t *obj = NULL, *parent = NULL; + mode_t pmode; + int rc; + int fd_oflag = 0; + + fd_oflag |= O_RDWR; + + rc = parse_filename(testFileName, &name, &dir_name); + if (rc) + goto out; + + assert(dir_name); + assert(name); + + rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + if (rc || !S_ISDIR(pmode)) + goto out; + + rc = dfs_open(dfs, parent, name, S_IFREG, fd_oflag, NULL, &obj); + if (rc) + goto out; + +out: + if (name) + free(name); + if (dir_name) + free(dir_name); + if (parent) + dfs_release(parent); + + return ((void *)obj); +} + +/* + * Write or read access to file using the DFS interface. + */ +static IOR_offset_t +DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length, + IOR_param_t *param) +{ + int xferRetries = 0; + long long remaining = (long long)length; + char *ptr = (char *)buffer; + daos_size_t ret; + int rc; + dfs_obj_t *obj; + + obj = (dfs_obj_t *)file; + + while (remaining > 0) { + daos_iov_t iov; + daos_sg_list_t sgl; + + /** set memory location */ + sgl.sg_nr = 1; + sgl.sg_nr_out = 0; + daos_iov_set(&iov, (void *)ptr, remaining); + sgl.sg_iovs = &iov; + + /* write/read file */ + if (access == WRITE) { + rc = dfs_write(dfs, obj, sgl, param->offset); + if (rc) + ERR("write() failed"); + ret = remaining; + } else { + rc = dfs_read(dfs, obj, sgl, param->offset, &ret); + if (rc || ret == 0) + ERR("read() failed"); + } + + if (ret < remaining) { + if (param->singleXferAttempt == TRUE) + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), + "barrier error"); + if (xferRetries > MAX_RETRY) + ERR("too many retries -- aborting"); + } + + assert(ret >= 0); + assert(ret <= remaining); + remaining -= ret; + ptr += ret; + xferRetries++; + } + + return (length); +} + +/* + * Perform fsync(). + */ +static void DFS_Fsync(void *fd, IOR_param_t * param) +{ + return; +} + +/* + * Close a file through the DFS interface. + */ +static void DFS_Close(void *fd, IOR_param_t * param) +{ + dfs_release((dfs_obj_t *)fd); +} + +/* + * Delete a file through the DFS interface. + */ +static void DFS_Delete(char *testFileName, IOR_param_t * param) +{ + char *name = NULL, *dir_name = NULL; + dfs_obj_t *parent = NULL; + mode_t pmode; + int rc; + + rc = parse_filename(testFileName, &name, &dir_name); + if (rc) + goto out; + + assert(dir_name); + assert(name); + + rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + if (rc || !S_ISDIR(pmode)) + goto out; + + rc = dfs_remove(dfs, parent, name); + if (rc) + goto out; + +out: + if (name) + free(name); + if (dir_name) + free(dir_name); + if (parent) + dfs_release(parent); +} + +/* + * Determine api version. + */ +static void DFS_SetVersion(IOR_param_t * test) +{ + strcpy(test->apiVersion, test->api); +} + +/* + * Use DFS stat() to return aggregate file size. + */ +static IOR_offset_t DFS_GetFileSize(IOR_param_t * test, MPI_Comm testComm, + char *testFileName) +{ + dfs_obj_t *obj; + daos_size_t fsize, tmpMin, tmpMax, tmpSum; + int rc; + + rc = dfs_lookup(dfs, testFileName, &obj, NULL); + if (rc) + return -1; + + rc = dfs_get_size(dfs, obj, &fsize); + if (rc) + return -1; + + dfs_release(obj); + + if (test->filePerProc == TRUE) { + MPI_CHECK(MPI_Allreduce(&fsize, &tmpSum, 1, + MPI_LONG_LONG_INT, MPI_SUM, testComm), + "cannot total data moved"); + fsize = tmpSum; + } else { + MPI_CHECK(MPI_Allreduce(&fsize, &tmpMin, 1, + MPI_LONG_LONG_INT, MPI_MIN, testComm), + "cannot total data moved"); + MPI_CHECK(MPI_Allreduce(&fsize, &tmpMax, 1, + MPI_LONG_LONG_INT, MPI_MAX, testComm), + "cannot total data moved"); + if (tmpMin != tmpMax) { + if (rank == 0) { + WARN("inconsistent file size by different tasks"); + } + /* incorrect, but now consistent across tasks */ + fsize = tmpMin; + } + } + + return (fsize); +} + +static int +DFS_Statfs(const char *path, ior_aiori_statfs_t *sfs, IOR_param_t * param) +{ + return 0; +} + +static int +DFS_Mkdir (const char *path, mode_t mode, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + mode_t pmode; + char *name = NULL, *dir_name = NULL; + int rc; + + rc = parse_filename(path, &name, &dir_name); + if (rc) + return rc; + + assert(dir_name); + assert(name); + + rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + if (rc || !S_ISDIR(pmode)) + goto out; + + rc = dfs_mkdir(dfs, parent, name, mode); + if (rc) + goto out; + +out: + if (name) + free(name); + if (dir_name) + free(dir_name); + if (parent) + dfs_release(parent); + return rc; +} + +static int +DFS_Rmdir (const char *path, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + mode_t pmode; + char *name = NULL, *dir_name = NULL; + int rc; + + rc = parse_filename(path, &name, &dir_name); + if (rc) + return rc; + + assert(dir_name); + assert(name); + + rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + if (rc || !S_ISDIR(pmode)) + goto out; + + rc = dfs_remove(dfs, parent, name); + if (rc) + goto out; + +out: + if (name) + free(name); + if (dir_name) + free(dir_name); + if (parent) + dfs_release(parent); + return rc; +} + +static int +DFS_Access (const char *path, int mode, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + mode_t pmode; + char *name = NULL, *dir_name = NULL; + struct stat stbuf; + int rc; + + rc = parse_filename(path, &name, &dir_name); + if (rc) + return rc; + + assert(dir_name); + assert(name); + + rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + if (rc || !S_ISDIR(pmode)) + goto out; + + if (strcmp(name, ".") == 0) { + free(name); + name = NULL; + } + rc = dfs_stat(dfs, parent, name, &stbuf); + if (rc) { + rc = -1; + errno = -ENOENT; + goto out; + } + +out: + if (name) + free(name); + if (dir_name) + free(dir_name); + if (parent) + dfs_release(parent); + return rc; +} + +static int +DFS_Stat (const char *path, struct stat *buf, IOR_param_t * param) +{ + dfs_obj_t *parent = NULL; + mode_t pmode; + char *name = NULL, *dir_name = NULL; + int rc; + + rc = parse_filename(path, &name, &dir_name); + if (rc) + return rc; + + assert(dir_name); + assert(name); + + rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + if (rc || !S_ISDIR(pmode)) + goto out; + + rc = dfs_stat(dfs, parent, name, buf); + if (rc) + goto out; + +out: + if (name) + free(name); + if (dir_name) + free(dir_name); + if (parent) + dfs_release(parent); + return rc; +} diff --git a/src/aiori.c b/src/aiori.c index 677c1ea..a40cbd6 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -51,6 +51,9 @@ ior_aiori_t *available_aiori[] = { &s3_aiori, &s3_plus_aiori, &s3_emc_aiori, +#endif +#ifdef USE_DFS_AIORI + &dfs_aiori, #endif NULL }; diff --git a/src/aiori.h b/src/aiori.h index 4ee400a..9e5695e 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -88,11 +88,15 @@ extern ior_aiori_t mmap_aiori; extern ior_aiori_t s3_aiori; extern ior_aiori_t s3_plus_aiori; extern ior_aiori_t s3_emc_aiori; +extern ior_aiori_t dfs_aiori; const ior_aiori_t *aiori_select (const char *api); int aiori_count (void); const char *aiori_default (void); +int dfs_init(void); +int dfs_finalize(void); + IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName); diff --git a/src/ior.c b/src/ior.c index b92b40d..e4dc03b 100755 --- a/src/ior.c +++ b/src/ior.c @@ -733,7 +733,7 @@ static void DisplayUsage(char **argv) { char *opts[] = { "OPTIONS:", - " -a S api -- API for I/O [POSIX|MMAP|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]", + " -a S api -- API for I/O [POSIX|DFS|MMAP|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]", " -A N refNum -- user supplied reference number to include in the summary", " -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", " -B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers", @@ -2037,6 +2037,11 @@ static void TestIoSys(IOR_test_t *test) /* bind I/O calls to specific API */ AioriBind(params->api, params); +#ifdef USE_DFS_AIORI + if (strcmp(params->api, "DFS") == 0) + dfs_init(); +#endif + /* show test setup */ if (rank == 0 && verbose >= VERBOSE_0) ShowSetup(params); @@ -2310,6 +2315,10 @@ static void TestIoSys(IOR_test_t *test) /* Sync with the tasks that did not participate in this test */ MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); +#ifdef USE_DFS_AIORI + if (strcmp(params->api, "DFS") == 0) + dfs_finalize(); +#endif } /* diff --git a/src/mdtest.c b/src/mdtest.c index aadbf78..3245c47 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1936,6 +1936,11 @@ int main(int argc, char **argv) { } } +#ifdef USE_DFS_AIORI + if (strcmp(backend_name, "DFS") == 0) + dfs_init(); +#endif + if (!create_only && !stat_only && !read_only && !remove_only) { create_only = stat_only = read_only = remove_only = 1; if (( rank == 0 ) && ( verbose >= 1 )) { @@ -2411,6 +2416,11 @@ int main(int argc, char **argv) { free(rand_array); } +#ifdef USE_DFS_AIORI + if (strcmp(backend_name, "DFS") == 0) + dfs_finalize(); +#endif + MPI_Finalize(); exit(0); } From 1768eff5529f4b99cb322b096d2dd52546af8ac3 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 20 Jun 2018 21:25:22 +0000 Subject: [PATCH 02/66] update DFS plugin Signed-off-by: Mohamad Chaarawi --- .gitignore | 4 ++ configure.ac | 14 +++++ src/aiori-DFS.c | 132 +++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 136 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 73dd929..5ba0f00 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ Makefile.in aclocal.m4 config.log config.status +COPYING +INSTALL config/compile config/config.guess config/config.sub @@ -11,11 +13,13 @@ config/install-sh config/missing configure contrib/.deps/ +contrib/cbif contrib/Makefile contrib/Makefile.in doc/Makefile doc/Makefile.in src/.deps/ +src/mdtest src/Makefile src/Makefile.in src/config.h diff --git a/configure.ac b/configure.ac index af6e49a..9394f59 100755 --- a/configure.ac +++ b/configure.ac @@ -142,6 +142,18 @@ AM_COND_IF([USE_POSIX_AIORI],[ AC_DEFINE([USE_POSIX_AIORI], [], [Build POSIX backend AIORI]) ]) +AC_ARG_WITH([cart], + [AS_HELP_STRING([--with-cart], + [Build DAOS ROMIO driver[default=no]])],, + [with_cart=no]) + +AS_IF([test "x$with_cart" != xno], + CART="yes" + LDFLAGS="$LDFLAGS -L$with_cart/lib" + CPPFLAGS="$CPPFLAGS -I$with_cart/include/" + AC_CHECK_HEADERS(gurt/common.h,, [unset CART]) + AC_CHECK_LIB([gurt], [d_rank_list_alloc],, [unset CART])) + # DFS IO support AC_ARG_WITH([daos], [AS_HELP_STRING([--with-daos], @@ -159,6 +171,8 @@ AS_IF([test "x$with_daos" != xno], AC_CHECK_LIB([daos], [daos_init],, [unset DAOS]) AC_CHECK_LIB([dfs], [dfs_mkdir],, [unset DAOS])) +AS_IF([test "x$CART" != xyes], [unset DAOS]) + AM_CONDITIONAL([USE_DFS_AIORI], [test x$DAOS = xyes]) AM_COND_IF([USE_DFS_AIORI],[ AC_DEFINE([USE_DFS_AIORI], [], [Build DFS backend AIORI]) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index ad0c712..4e70d3b 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -36,6 +36,7 @@ #include "utilities.h" dfs_t *dfs; +daos_handle_t poh, coh; static int parse_filename(const char *path, char **_obj_name, char **_cont_name) @@ -151,20 +152,121 @@ ior_aiori_t dfs_aiori = { /***************************** F U N C T I O N S ******************************/ +/* MSC - Make a generic DAOS function instead */ +static d_rank_list_t * +daos_rank_list_parse(const char *str, const char *sep) +{ + d_rank_t *buf; + int cap = 8; + d_rank_list_t *ranks = NULL; + char *s, *p; + int n = 0; + + buf = malloc(sizeof(d_rank_t) * cap); + if (buf == NULL) + goto out; + s = strdup(str); + if (s == NULL) + goto out_buf; + + while ((s = strtok_r(s, sep, &p)) != NULL) { + if (n == cap) { + d_rank_t *buf_new; + int cap_new; + + /* Double the buffer. */ + cap_new = cap * 2; + buf_new = malloc(sizeof(d_rank_t) * cap_new); + if (buf_new == NULL) + goto out_s; + memcpy(buf_new, buf, sizeof(d_rank_t) * n); + free(buf); + buf = buf_new; + cap = cap_new; + } + buf[n] = atoi(s); + n++; + s = NULL; + } + + ranks = d_rank_list_alloc(n); + if (ranks == NULL) + goto out_s; + memcpy(ranks->rl_ranks, buf, sizeof(*buf) * n); + +out_s: + if (s) + free(s); +out_buf: + free(buf); +out: + return ranks; +} + int dfs_init(void) { - int rc; - + char *pool_str, *svcl_str, *group_str; + uuid_t pool_uuid, co_uuid; + daos_pool_info_t pool_info; + daos_cont_info_t co_info; + d_rank_list_t *svcl = NULL; + int rc; + rc = daos_init(); if (rc) { fprintf(stderr, "daos_init() failed with %d\n", rc); return rc; } - rc = dfs_mount(&dfs); + pool_str = getenv("DAOS_POOL"); + if (!pool_str) { + fprintf(stderr, "missing pool uuid\n"); + return -1; + } + if (uuid_parse(pool_str, pool_uuid) < 0) { + fprintf(stderr, "Invalid pool uuid\n"); + return -1; + } + + svcl_str = getenv("DAOS_SVCL"); + if (!svcl_str) { + fprintf(stderr, "missing pool service rank list\n"); + return -1; + } + svcl = daos_rank_list_parse(svcl_str, ":"); + if (svcl == NULL) { + fprintf(stderr, "Invalid pool service rank list\n"); + return -1; + } + + group_str = getenv("DAOS_GROUP"); + + /** Connect to DAOS pool */ + rc = daos_pool_connect(pool_uuid, group_str, svcl, DAOS_PC_RW, + &poh, &pool_info, NULL); + if (rc < 0) { + fprintf(stderr, "Failed to connect to pool %s %s (%d)\n", + pool_str, svcl_str, rc); + return -1; + } + + uuid_generate(co_uuid); + rc = daos_cont_create(poh, co_uuid, NULL); + if (rc) { + fprintf(stderr, "Failed to create container (%d)\n", rc); + return -1; + } + + rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL); + if (rc) { + fprintf(stderr, "Failed to open container (%d)\n", rc); + return -1; + } + + rc = dfs_mount(poh, coh, &dfs); if (rc) { fprintf(stderr, "dfs_mount failed (%d)\n", rc); - return 1; + return -1; } return rc; @@ -173,6 +275,8 @@ dfs_init(void) { int dfs_finalize(void) { dfs_umount(dfs); + daos_cont_close(coh, NULL); + daos_pool_disconnect(poh, NULL); daos_fini(); return 0; } @@ -198,7 +302,7 @@ DFS_Create(char *testFileName, IOR_param_t *param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); if (rc || !S_ISDIR(pmode)) goto out; @@ -238,7 +342,7 @@ static void *DFS_Open(char *testFileName, IOR_param_t *param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); if (rc || !S_ISDIR(pmode)) goto out; @@ -346,11 +450,11 @@ static void DFS_Delete(char *testFileName, IOR_param_t * param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); if (rc || !S_ISDIR(pmode)) goto out; - rc = dfs_remove(dfs, parent, name); + rc = dfs_remove(dfs, parent, name, false); if (rc) goto out; @@ -381,7 +485,7 @@ static IOR_offset_t DFS_GetFileSize(IOR_param_t * test, MPI_Comm testComm, daos_size_t fsize, tmpMin, tmpMax, tmpSum; int rc; - rc = dfs_lookup(dfs, testFileName, &obj, NULL); + rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL); if (rc) return -1; @@ -436,7 +540,7 @@ DFS_Mkdir (const char *path, mode_t mode, IOR_param_t * param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); if (rc || !S_ISDIR(pmode)) goto out; @@ -469,11 +573,11 @@ DFS_Rmdir (const char *path, IOR_param_t * param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); if (rc || !S_ISDIR(pmode)) goto out; - rc = dfs_remove(dfs, parent, name); + rc = dfs_remove(dfs, parent, name, false); if (rc) goto out; @@ -503,7 +607,7 @@ DFS_Access (const char *path, int mode, IOR_param_t * param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); if (rc || !S_ISDIR(pmode)) goto out; @@ -543,7 +647,7 @@ DFS_Stat (const char *path, struct stat *buf, IOR_param_t * param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, &parent, &pmode); + rc = dfs_lookup(dfs, dir_name, O_RDONLY, &parent, &pmode); if (rc || !S_ISDIR(pmode)) goto out; From 07ec65c0d58adeacac4dd989190201350182b771 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 19 Jul 2018 21:36:29 +0000 Subject: [PATCH 03/66] remoce rank_list_parse as it is exposed by DAOS API now. Signed-off-by: Mohamad Chaarawi --- src/aiori-DFS.c | 51 ------------------------------------------------- 1 file changed, 51 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 4e70d3b..6f0c07b 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -152,57 +152,6 @@ ior_aiori_t dfs_aiori = { /***************************** F U N C T I O N S ******************************/ -/* MSC - Make a generic DAOS function instead */ -static d_rank_list_t * -daos_rank_list_parse(const char *str, const char *sep) -{ - d_rank_t *buf; - int cap = 8; - d_rank_list_t *ranks = NULL; - char *s, *p; - int n = 0; - - buf = malloc(sizeof(d_rank_t) * cap); - if (buf == NULL) - goto out; - s = strdup(str); - if (s == NULL) - goto out_buf; - - while ((s = strtok_r(s, sep, &p)) != NULL) { - if (n == cap) { - d_rank_t *buf_new; - int cap_new; - - /* Double the buffer. */ - cap_new = cap * 2; - buf_new = malloc(sizeof(d_rank_t) * cap_new); - if (buf_new == NULL) - goto out_s; - memcpy(buf_new, buf, sizeof(d_rank_t) * n); - free(buf); - buf = buf_new; - cap = cap_new; - } - buf[n] = atoi(s); - n++; - s = NULL; - } - - ranks = d_rank_list_alloc(n); - if (ranks == NULL) - goto out_s; - memcpy(ranks->rl_ranks, buf, sizeof(*buf) * n); - -out_s: - if (s) - free(s); -out_buf: - free(buf); -out: - return ranks; -} - int dfs_init(void) { char *pool_str, *svcl_str, *group_str; From 5fb850c8108eb3c06fa3d0201908b3bc074a20bb Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 23 Aug 2018 21:58:53 +0000 Subject: [PATCH 04/66] - update the DFS driver to latest DFS API. - update cmd line options to add DAOS Pool and Container uuid and SVCL - Add init/finalize backend functions. Signed-off-by: Mohamad Chaarawi --- src/aiori-DFS.c | 195 +++++++++++++++++++++++++++++--------------- src/aiori.h | 5 +- src/ior.c | 17 ++-- src/ior.h | 6 ++ src/mdtest.c | 95 +++++++++++++++++---- src/parse_options.c | 9 +- 6 files changed, 231 insertions(+), 96 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 6f0c07b..f22bbbd 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -6,6 +6,14 @@ * Copyright (c) 2003, The Regents of the University of California * * See the file COPYRIGHT for a complete copyright notice and license. * * * +* Copyright (C) 2018 Intel Corporation +* +* GOVERNMENT LICENSE RIGHTS-OPEN SOURCE SOFTWARE +* The Government's rights to use, modify, reproduce, release, perform, display, +* or disclose this software are subject to the terms of the Apache License as +* provided in Contract No. 8F-30005. +* Any reproduction of computer software, computer software documentation, or +* portions thereof marked with this legend must also reproduce the markings. ******************************************************************************** * * Implement of abstract I/O interface for DFS. @@ -70,10 +78,11 @@ parse_filename(const char *path, char **_obj_name, char **_cont_name) cont_name = dirname(f2); if (cont_name[0] == '.' || cont_name[0] != '/') { - char *cwd; + char cwd[1024]; + + if (getcwd(cwd, 1024) == NULL) + D_GOTO(out, rc = -ENOMEM); - //getcwd(cwd, 1024); - cwd = strdup("/"); if (strcmp(cont_name, ".") == 0) { cont_name = strdup(cwd); if (cont_name == NULL) @@ -130,6 +139,8 @@ static int DFS_Stat (const char *, struct stat *, IOR_param_t *); static int DFS_Mkdir (const char *, mode_t, IOR_param_t *); static int DFS_Rmdir (const char *, IOR_param_t *); static int DFS_Access (const char *, int, IOR_param_t *); +static int DFS_Init(IOR_param_t *param); +static int DFS_Finalize(IOR_param_t *param); /************************** D E C L A R A T I O N S ***************************/ @@ -148,85 +159,122 @@ ior_aiori_t dfs_aiori = { .rmdir = DFS_Rmdir, .access = DFS_Access, .stat = DFS_Stat, + .init = DFS_Init, + .finalize = DFS_Finalize, }; /***************************** F U N C T I O N S ******************************/ -int -dfs_init(void) { - char *pool_str, *svcl_str, *group_str; +static int +DFS_Init(IOR_param_t *param) { uuid_t pool_uuid, co_uuid; daos_pool_info_t pool_info; daos_cont_info_t co_info; d_rank_list_t *svcl = NULL; + bool cont_created = false; int rc; - + + if (uuid_parse(param->daosPool, pool_uuid) < 0) { + fprintf(stderr, "Invalid pool uuid\n"); + return -1; + } + + if (uuid_parse(param->daosCont, co_uuid) < 0) { + fprintf(stderr, "Invalid pool uuid\n"); + return -1; + } + + svcl = daos_rank_list_parse(param->daosPoolSvc, ":"); + if (svcl == NULL) { + fprintf(stderr, "Invalid pool service rank list\n"); + return -1; + } + + printf("Pool uuid = %s, SVCL = %s\n", param->daosPool, + param->daosPoolSvc); + + printf("DFS Container namespace uuid = %s\n", param->daosCont); + rc = daos_init(); if (rc) { fprintf(stderr, "daos_init() failed with %d\n", rc); return rc; } - pool_str = getenv("DAOS_POOL"); - if (!pool_str) { - fprintf(stderr, "missing pool uuid\n"); - return -1; - } - if (uuid_parse(pool_str, pool_uuid) < 0) { - fprintf(stderr, "Invalid pool uuid\n"); - return -1; - } - - svcl_str = getenv("DAOS_SVCL"); - if (!svcl_str) { - fprintf(stderr, "missing pool service rank list\n"); - return -1; - } - svcl = daos_rank_list_parse(svcl_str, ":"); - if (svcl == NULL) { - fprintf(stderr, "Invalid pool service rank list\n"); - return -1; - } - - group_str = getenv("DAOS_GROUP"); - /** Connect to DAOS pool */ - rc = daos_pool_connect(pool_uuid, group_str, svcl, DAOS_PC_RW, - &poh, &pool_info, NULL); + rc = daos_pool_connect(pool_uuid, + strlen(param->daosGroup) ? param->daosGroup : NULL, + svcl, DAOS_PC_RW, &poh, &pool_info, NULL); if (rc < 0) { - fprintf(stderr, "Failed to connect to pool %s %s (%d)\n", - pool_str, svcl_str, rc); - return -1; + fprintf(stderr, "Failed to connect to pool (%d)\n", rc); + goto err_daos; } - uuid_generate(co_uuid); - rc = daos_cont_create(poh, co_uuid, NULL); + rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL); + /* If NOEXIST we create it */ + if (rc == -DER_NONEXIST) { + printf("Creating DFS Container ...\n"); + rc = daos_cont_create(poh, co_uuid, NULL); + if (rc == 0) { + cont_created = true; + rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, + &co_info, NULL); + } + } if (rc) { fprintf(stderr, "Failed to create container (%d)\n", rc); - return -1; + goto err_pool; } - rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL); - if (rc) { - fprintf(stderr, "Failed to open container (%d)\n", rc); - return -1; - } - - rc = dfs_mount(poh, coh, &dfs); + rc = dfs_mount(poh, coh, O_RDWR, &dfs); if (rc) { fprintf(stderr, "dfs_mount failed (%d)\n", rc); - return -1; + goto err_cont; } +out: + daos_rank_list_free(svcl); return rc; +err_cont: + daos_cont_close(coh, NULL); +err_pool: + if (cont_created) + daos_cont_destroy(poh, co_uuid, 1, NULL); + daos_pool_disconnect(poh, NULL); +err_daos: + daos_fini(); + goto out; } -int dfs_finalize(void) +int +DFS_Finalize(IOR_param_t *param) { - dfs_umount(dfs); - daos_cont_close(coh, NULL); + int rc; + + rc = dfs_umount(dfs, true); + if (rc) { + fprintf(stderr, "dfs_umount() failed (%d)\n", rc); + return -1; + } + + rc = daos_cont_close(coh, NULL); + if (rc) { + fprintf(stderr, "daos_cont_close() failed (%d)\n", rc); + return -1; + } + daos_pool_disconnect(poh, NULL); - daos_fini(); + if (rc) { + fprintf(stderr, "daos_pool_disconnect() failed (%d)\n", rc); + return -1; + } + + rc = daos_fini(); + if (rc) { + fprintf(stderr, "daos_fini() failed (%d)\n", rc); + return -1; + } + return 0; } @@ -238,11 +286,14 @@ DFS_Create(char *testFileName, IOR_param_t *param) { char *name = NULL, *dir_name = NULL; dfs_obj_t *obj = NULL, *parent = NULL; - mode_t pmode; + mode_t pmode, mode; int fd_oflag = 0; int rc; + assert(param); + fd_oflag |= O_CREAT | O_RDWR; + mode = S_IFREG | param->mode; rc = parse_filename(testFileName, &name, &dir_name); if (rc) @@ -255,8 +306,8 @@ DFS_Create(char *testFileName, IOR_param_t *param) if (rc || !S_ISDIR(pmode)) goto out; - mode_t mode = S_IFREG | param->mode; - rc = dfs_open(dfs, parent, name, mode, fd_oflag, NULL, &obj); + rc = dfs_open(dfs, parent, name, mode, fd_oflag, DAOS_OC_LARGE_RW, + NULL, &obj); if (rc) goto out; @@ -274,7 +325,8 @@ out: /* * Open a file through the DFS interface. */ -static void *DFS_Open(char *testFileName, IOR_param_t *param) +static void * +DFS_Open(char *testFileName, IOR_param_t *param) { char *name = NULL, *dir_name = NULL; dfs_obj_t *obj = NULL, *parent = NULL; @@ -295,7 +347,7 @@ static void *DFS_Open(char *testFileName, IOR_param_t *param) if (rc || !S_ISDIR(pmode)) goto out; - rc = dfs_open(dfs, parent, name, S_IFREG, fd_oflag, NULL, &obj); + rc = dfs_open(dfs, parent, name, S_IFREG, fd_oflag, 0, NULL, &obj); if (rc) goto out; @@ -369,15 +421,18 @@ DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length, /* * Perform fsync(). */ -static void DFS_Fsync(void *fd, IOR_param_t * param) +static void +DFS_Fsync(void *fd, IOR_param_t * param) { + dfs_sync(dfs); return; } /* * Close a file through the DFS interface. */ -static void DFS_Close(void *fd, IOR_param_t * param) +static void +DFS_Close(void *fd, IOR_param_t * param) { dfs_release((dfs_obj_t *)fd); } @@ -385,7 +440,8 @@ static void DFS_Close(void *fd, IOR_param_t * param) /* * Delete a file through the DFS interface. */ -static void DFS_Delete(char *testFileName, IOR_param_t * param) +static void +DFS_Delete(char *testFileName, IOR_param_t * param) { char *name = NULL, *dir_name = NULL; dfs_obj_t *parent = NULL; @@ -419,7 +475,8 @@ out: /* * Determine api version. */ -static void DFS_SetVersion(IOR_param_t * test) +static void +DFS_SetVersion(IOR_param_t * test) { strcpy(test->apiVersion, test->api); } @@ -427,8 +484,8 @@ static void DFS_SetVersion(IOR_param_t * test) /* * Use DFS stat() to return aggregate file size. */ -static IOR_offset_t DFS_GetFileSize(IOR_param_t * test, MPI_Comm testComm, - char *testFileName) +static IOR_offset_t +DFS_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) { dfs_obj_t *obj; daos_size_t fsize, tmpMin, tmpMax, tmpSum; @@ -475,7 +532,7 @@ DFS_Statfs(const char *path, ior_aiori_statfs_t *sfs, IOR_param_t * param) } static int -DFS_Mkdir (const char *path, mode_t mode, IOR_param_t * param) +DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) { dfs_obj_t *parent = NULL; mode_t pmode; @@ -504,11 +561,13 @@ out: free(dir_name); if (parent) dfs_release(parent); + if (rc) + return -1; return rc; } static int -DFS_Rmdir (const char *path, IOR_param_t * param) +DFS_Rmdir(const char *path, IOR_param_t * param) { dfs_obj_t *parent = NULL; mode_t pmode; @@ -537,11 +596,13 @@ out: free(dir_name); if (parent) dfs_release(parent); + if (rc) + return -1; return rc; } static int -DFS_Access (const char *path, int mode, IOR_param_t * param) +DFS_Access(const char *path, int mode, IOR_param_t * param) { dfs_obj_t *parent = NULL; mode_t pmode; @@ -578,11 +639,13 @@ out: free(dir_name); if (parent) dfs_release(parent); + if (rc) + return -1; return rc; } static int -DFS_Stat (const char *path, struct stat *buf, IOR_param_t * param) +DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param) { dfs_obj_t *parent = NULL; mode_t pmode; @@ -611,5 +674,7 @@ out: free(dir_name); if (parent) dfs_release(parent); + if (rc) + return -1; return rc; } diff --git a/src/aiori.h b/src/aiori.h index 9e5695e..b2c4818 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -77,6 +77,8 @@ typedef struct ior_aiori { int (*rmdir) (const char *path, IOR_param_t * param); int (*access) (const char *path, int mode, IOR_param_t * param); int (*stat) (const char *path, struct stat *buf, IOR_param_t * param); + int (*init)(IOR_param_t *); + int (*finalize)(IOR_param_t *); } ior_aiori_t; extern ior_aiori_t hdf5_aiori; @@ -94,9 +96,6 @@ const ior_aiori_t *aiori_select (const char *api); int aiori_count (void); const char *aiori_default (void); -int dfs_init(void); -int dfs_finalize(void); - IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName); diff --git a/src/ior.c b/src/ior.c index e4dc03b..aa64436 100755 --- a/src/ior.c +++ b/src/ior.c @@ -2037,10 +2037,10 @@ static void TestIoSys(IOR_test_t *test) /* bind I/O calls to specific API */ AioriBind(params->api, params); -#ifdef USE_DFS_AIORI - if (strcmp(params->api, "DFS") == 0) - dfs_init(); -#endif + /* initialize API session */ + if (backend->init != NULL) + if (backend->init(params) != 0) + ERR("Could not init backend"); /* show test setup */ if (rank == 0 && verbose >= VERBOSE_0) @@ -2312,13 +2312,12 @@ static void TestIoSys(IOR_test_t *test) free(timer[i]); } + /* finalize API session */ + if (backend->finalize != NULL) + backend->finalize(params); + /* Sync with the tasks that did not participate in this test */ MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); - -#ifdef USE_DFS_AIORI - if (strcmp(params->api, "DFS") == 0) - dfs_finalize(); -#endif } /* diff --git a/src/ior.h b/src/ior.h index ce1b4ec..31b962a 100755 --- a/src/ior.h +++ b/src/ior.h @@ -213,6 +213,12 @@ typedef struct int beegfs_numTargets; /* number storage targets to use */ int beegfs_chunkSize; /* srtipe pattern for new files */ + /* daos variables */ + char daosGroup[MAX_STR]; /* group name */ + char daosPool[37]; /* pool UUID */ + char daosPoolSvc[MAX_STR]; /* pool service ranks */ + char daosCont[37]; /* Container UUID */ + int id; /* test's unique ID */ int intraTestBarriers; /* barriers between open/op and op/close */ } IOR_param_t; diff --git a/src/mdtest.c b/src/mdtest.c index 3245c47..b95211d 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1801,6 +1801,59 @@ void create_remove_directory_tree(int create, } } +/* + * Set flags from commandline string/value pairs. + */ +static void +DecodeDirective(char *line, IOR_param_t *params) +{ + char option[MAX_STR]; + char value[MAX_STR]; + int rc; + + rc = sscanf(line, " %[^=# \t\r\n] = %[^# \t\r\n] ", option, value); + if (rc != 2 && rank == 0) { + fprintf(stdout, "Syntax error in configuration options: %s\n", + line); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + if (strcasecmp(option, "daospool") == 0) { + strcpy(params->daosPool, value); + } else if (strcasecmp(option, "daospoolsvc") == 0) { + strcpy(params->daosPoolSvc, value); + } else if (strcasecmp(option, "daosgroup") == 0) { + strcpy(params->daosGroup, value); + } else if (strcasecmp(option, "daoscont") == 0) { + strcpy(params->daosCont, value); + } + else { + if (rank == 0) + fprintf(stdout, "Unrecognized parameter \"%s\"\n", + option); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } +} + +/* + * Parse a single line, which may contain multiple comma-seperated directives + */ +static void +ParseLine(char *line, IOR_param_t * test) +{ + char *start, *end; + + start = line; + do { + end = strchr(start, ','); + if (end != NULL) + *end = '\0'; + DecodeDirective(start, test); + start = end + 1; + } while (end != NULL); + +} + int main(int argc, char **argv) { int i, j, k, c; int nodeCount; @@ -1853,7 +1906,7 @@ int main(int argc, char **argv) { /* Parse command line options */ while (1) { - c = getopt(argc, argv, "a:b:BcCd:De:Ef:Fhi:I:l:Ln:N:p:rR::s:StTuvV:w:yz:"); + c = getopt(argc, argv, "a:b:BcCd:De:Ef:Fhi:I:l:Ln:N:O:p:rR::s:StTuvV:w:yz:"); if (c == -1) { break; } @@ -1898,6 +1951,9 @@ int main(int argc, char **argv) { //items = atoi(optarg); break; case 'N': nstride = atoi(optarg); break; + case 'O': + ParseLine(optarg, ¶m); + break; case 'p': pre_delay = atoi(optarg); break; case 'r': @@ -1936,11 +1992,6 @@ int main(int argc, char **argv) { } } -#ifdef USE_DFS_AIORI - if (strcmp(backend_name, "DFS") == 0) - dfs_init(); -#endif - if (!create_only && !stat_only && !read_only && !remove_only) { create_only = stat_only = read_only = remove_only = 1; if (( rank == 0 ) && ( verbose >= 1 )) { @@ -2082,6 +2133,11 @@ int main(int argc, char **argv) { FAIL("Could not find suitable backend to use"); } + /* initialize API session */ + if (backend->init != NULL) + if (backend->init(¶m) != 0) + FAIL("Could not init backend"); + /* if directory does not exist, create it */ if ((rank < path_count) && backend->access(testdirpath, F_OK, ¶m) != 0) { if (backend->mkdir(testdirpath, DIRMODE, ¶m) != 0) { @@ -2090,16 +2146,20 @@ int main(int argc, char **argv) { } /* display disk usage */ - if (verbose >= 3 && rank == 0) { - printf( "V-3: main (before display_freespace): testdirpath is \"%s\"\n", testdirpath ); - fflush( stdout ); - } + if (strcmp(backend->name, "DFS")) { + if (verbose >= 3 && rank == 0) { + printf( "V-3: main (before display_freespace): testdirpath is \"%s\"\n", + testdirpath ); + fflush( stdout ); + } - if (rank == 0) display_freespace(testdirpath); + if (rank == 0) display_freespace(testdirpath); - if (verbose >= 3 && rank == 0) { - printf( "V-3: main (after display_freespace): testdirpath is \"%s\"\n", testdirpath ); - fflush( stdout ); + if (verbose >= 3 && rank == 0) { + printf( "V-3: main (after display_freespace): testdirpath is \"%s\"\n", + testdirpath ); + fflush( stdout ); + } } if (rank == 0) { @@ -2416,10 +2476,9 @@ int main(int argc, char **argv) { free(rand_array); } -#ifdef USE_DFS_AIORI - if (strcmp(backend_name, "DFS") == 0) - dfs_finalize(); -#endif + /* finalize API session */ + if (backend->finalize != NULL) + backend->finalize(¶m); MPI_Finalize(); exit(0); diff --git a/src/parse_options.c b/src/parse_options.c index cfa388a..1126559 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -327,7 +327,14 @@ void DecodeDirective(char *line, IOR_param_t *params) RecalculateExpectedFileSize(params); } else if (strcasecmp(option, "summaryalways") == 0) { params->summary_every_test = atoi(value); - } else { + } else if (strcasecmp(option, "daospool") == 0) { + strcpy(params->daosPool, value); + } else if (strcasecmp(option, "daospoolsvc") == 0) { + strcpy(params->daosPoolSvc, value); + } else if (strcasecmp(option, "daosgroup") == 0) { + strcpy(params->daosGroup, value); + } + else { if (rank == 0) fprintf(stdout, "Unrecognized parameter \"%s\"\n", option); From f4b03efd72dee9ecbead5a2d39baf8d0bbecdb4d Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 27 Aug 2018 17:22:38 +0000 Subject: [PATCH 05/66] Add the DAOS ior driver. Signed-off-by: Mohamad Chaarawi --- .gitignore | 9 + README_DAOS | 48 +++ configure.ac | 10 + doc/USER_GUIDE | 37 ++ src/Makefile.am | 5 + src/aiori-DAOS.c | 889 ++++++++++++++++++++++++++++++++++++++++++++ src/aiori.c | 11 +- src/aiori.h | 9 +- src/ior.c | 22 +- src/ior.h | 15 + src/list.h | 556 +++++++++++++++++++++++++++ src/mdtest-main.c | 4 +- src/parse_options.c | 36 +- 13 files changed, 1631 insertions(+), 20 deletions(-) create mode 100644 README_DAOS create mode 100644 src/aiori-DAOS.c create mode 100644 src/list.h diff --git a/.gitignore b/.gitignore index 73dd929..327640c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +tags Makefile Makefile.in aclocal.m4 @@ -9,10 +10,12 @@ config/config.sub config/depcomp config/install-sh config/missing +config/test-driver configure contrib/.deps/ contrib/Makefile contrib/Makefile.in +contrib/cbif doc/Makefile doc/Makefile.in src/.deps/ @@ -28,7 +31,13 @@ contrib/cbif.o src/*.o src/*.i src/*.s +src/*.a src/ior +src/mdtest +src/testlib +src/test/.deps/ +src/test/.dirstamp +src/test/lib.o doc/doxygen/build doc/sphinx/_*/ diff --git a/README_DAOS b/README_DAOS new file mode 100644 index 0000000..ba4fc3e --- /dev/null +++ b/README_DAOS @@ -0,0 +1,48 @@ +Building with DAOS API +---------------------- + +At step 1 above, one must specify "--with-daos". If the DAOS +headers and libraries are not installed at respective system +default locations, then one may also needs to set CPPFLAGS and +LDFLAGS accordingly. + +Running with DAOS API +--------------------- + +One must specify an existing pool using "-O +daospool=". IOR must be launched in a way that +attaches the IOR process group to the DAOS server process group. + +One must also specify a container UUID using "-o +". If the "-E" option is given, then this UUID +shall denote an existing container created by a "matching" IOR +run. Otherwise, IOR will create a new container with this UUID. +In the latter case, one may use uuidgen(1) to generate the UUID +of the new container. + +When benchmarking write performance, one likely do not want +"-W", which causes the write phase to do one additional memory +copy for every I/O. This is due to IOR's assumption that when a +DAOS_Xfer() call returns the buffer may be released. Therefore, +random data is written when "-W" is absent, while data is copied +from IOR buffers when "-W" is present. + +See doc/USER_GUIDE for all options and directives. Note that not +all combinations of options are supported. + +Examples that should work include: + + - "ior -a DAOS -w -W -o -O + daospool=,daospoolsvc=" writes into a new container + and verifies the data, using default daosRecordSize, transferSize, + daosStripeSize, blockSize, daosAios, etc. + + - "ior -a DAOS -w -W -r -R -o -b 1g -t 4m -C -O + daospool=,daospoolsvc=,daosrecordsize=1m, + daosstripesize=4m, daosstripecount=256,daosaios=8" does all IOR tests and + shifts ranks during checkWrite and checkRead. + + - "ior -a DAOS -w -r -o -b 8g -t 1m -C -O + daospool=,daospoolsvc=,daosrecordsize=1m,daosstripesize=4m, + daosstripecount=256,daosaios=8" may be a base to be tuned for performance + benchmarking. diff --git a/configure.ac b/configure.ac index 7042355..bb643d9 100755 --- a/configure.ac +++ b/configure.ac @@ -166,6 +166,16 @@ AM_COND_IF([USE_RADOS_AIORI],[ AC_DEFINE([USE_RADOS_AIORI], [], [Build RADOS backend AIORI]) ]) +# DAOS support +AC_ARG_WITH([daos], + [AS_HELP_STRING([--with-daos], + [support IO with DAOS backend @<:@default=no@:>@])], + [], + [with_daos=no]) +AM_CONDITIONAL([USE_DAOS_AIORI], [test x$with_daos = xyes]) +AM_COND_IF([USE_DAOS_AIORI],[ + AC_DEFINE([USE_DAOS_AIORI], [], [Build DAOS backend AIORI]) +]) # aws4c is needed for the S3 backend (see --with-S3, below). diff --git a/doc/USER_GUIDE b/doc/USER_GUIDE index 7ea6e49..d76266d 100755 --- a/doc/USER_GUIDE +++ b/doc/USER_GUIDE @@ -367,6 +367,43 @@ BeeGFS-SPECIFIC (POSIX only): * beegfsChunkSize - set the striping chunk size. Must be a power of two, and greater than 64kiB, (e.g.: 256k, 1M, ...) +DAOS-ONLY: +========== + * daosGroup - group name [NULL] + + * daosPool - UUID of the pool [] + + * daosPoolSvc - pool service replica ranks (e.g., 1:2:3:4:5) [] + + * daosRecordSize - size (in bytes) of an akey record [256k] + NOTE: must divide transferSize + + * daosStripeSize - size (in bytes) of a chunk in a stripe [512k] + NOTE: must be a multiple of transferSize + + * daosStripeCount - number of stripes [64 * number of targets] + NOTE: i.e., number of dkeys + + * daosStripeMax - max length of each stripe [0] + NOTE: must be a multiple of daosStripeSize + NOTE: for write testing with small storage + NOTE: offsets in a stripe larger than daosStripeMax + are mapped to offset % daosStripeMax + + * daosAios - max number of asychonous I/Os [1] + + * daosWriteOnly - skip flushing and committing [0=FALSE] + + * daosEpoch - epoch to read or write [0] + NOTE: 0 denotes reading GHCE or writing GHCE + 1 + + * daosWait - epoch to wait when opening the container [0] + + * daosKill - kill a target in the middle of the test [0] + NOTE: must also specify daosObjectClass=repl + + * daosObjectClass - object class (tiny, small, large, repl, repl_max) + [large] *********************** * 5. VERBOSITY LEVELS * diff --git a/src/Makefile.am b/src/Makefile.am index 0e0b916..32db201 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -70,6 +70,11 @@ extraSOURCES += aiori-RADOS.c extraLDADD += -lrados endif +if USE_DAOS_AIORI +extraSOURCES += aiori-DAOS.c list.h +extraLDADD += -ldaos -ldaos_common -luuid +endif + if USE_S3_AIORI extraSOURCES += aiori-S3.c if AWS4C_DIR diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c new file mode 100644 index 0000000..9175a5a --- /dev/null +++ b/src/aiori-DAOS.c @@ -0,0 +1,889 @@ +/* + * -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/* + * SPECIAL LICENSE RIGHTS-OPEN SOURCE SOFTWARE + * The Government's rights to use, modify, reproduce, release, perform, display, + * or disclose this software are subject to the terms of Contract No. B599860, + * and the terms of the GNU General Public License version 2. + * Any reproduction of computer software, computer software documentation, or + * portions thereof marked with this legend must also reproduce the markings. + */ +/* + * Copyright (c) 2013, 2016 Intel Corporation. + */ +/* + * This file implements the abstract I/O interface for DAOS. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ior.h" +#include "aiori.h" +#include "iordef.h" +#include "list.h" + +/**************************** P R O T O T Y P E S *****************************/ + +static void DAOS_Init(IOR_param_t *); +static void DAOS_Fini(IOR_param_t *); +static void *DAOS_Create(char *, IOR_param_t *); +static void *DAOS_Open(char *, IOR_param_t *); +static IOR_offset_t DAOS_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); +static void DAOS_Close(void *, IOR_param_t *); +static void DAOS_Delete(char *, IOR_param_t *); +static char* DAOS_GetVersion(); +static void DAOS_Fsync(void *, IOR_param_t *); +static IOR_offset_t DAOS_GetFileSize(IOR_param_t *, MPI_Comm, char *); + +/************************** D E C L A R A T I O N S ***************************/ + +ior_aiori_t daos_aiori = { + .name = "DAOS", + .create = DAOS_Create, + .open = DAOS_Open, + .xfer = DAOS_Xfer, + .close = DAOS_Close, + .delete = DAOS_Delete, + .get_version = DAOS_GetVersion, + .fsync = DAOS_Fsync, + .get_file_size = DAOS_GetFileSize, + .initialize = DAOS_Init, + .finalize = DAOS_Fini, +}; + +enum handleType { + POOL_HANDLE, + CONTAINER_HANDLE +}; + +struct fileDescriptor { + daos_handle_t container; + daos_cont_info_t containerInfo; + daos_handle_t object; + daos_epoch_t epoch; +}; + +struct aio { + cfs_list_t a_list; + char a_dkeyBuf[32]; + daos_key_t a_dkey; + daos_recx_t a_recx; + unsigned char a_csumBuf[32]; + daos_csum_buf_t a_csum; + daos_epoch_range_t a_epochRange; + daos_iod_t a_iod; + daos_iov_t a_iov; + daos_sg_list_t a_sgl; + struct daos_event a_event; +}; + +static daos_handle_t eventQueue; +static struct daos_event **events; +static unsigned char *buffers; +static int nAios; +static daos_handle_t pool; +static daos_pool_info_t poolInfo; +static daos_oclass_id_t objectClass = DAOS_OC_LARGE_RW; +static CFS_LIST_HEAD(aios); +static IOR_offset_t total_size; + +/***************************** F U N C T I O N S ******************************/ + +/* For DAOS methods. */ +#define DCHECK(rc, format, ...) \ +do { \ + int _rc = (rc); \ + \ + if (_rc < 0) { \ + fprintf(stdout, "ior ERROR (%s:%d): %d: %d: " \ + format"\n", __FILE__, __LINE__, rank, _rc, \ + ##__VA_ARGS__); \ + fflush(stdout); \ + MPI_Abort(MPI_COMM_WORLD, -1); \ + } \ +} while (0) + +#define INFO(level, param, format, ...) \ +do { \ + if (param->verbose >= level) \ + printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ +} while (0) + +/* For generic errors like invalid command line options. */ +#define GERR(format, ...) \ +do { \ + fprintf(stdout, format"\n", ##__VA_ARGS__); \ + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ +} while (0) + +/* Distribute process 0's pool or container handle to others. */ +static void HandleDistribute(daos_handle_t *handle, enum handleType type, + IOR_param_t *param) +{ + daos_iov_t global; + int rc; + + assert(type == POOL_HANDLE || !daos_handle_is_inval(pool)); + + global.iov_buf = NULL; + global.iov_buf_len = 0; + global.iov_len = 0; + + if (rank == 0) { + /* Get the global handle size. */ + if (type == POOL_HANDLE) + rc = daos_pool_local2global(*handle, &global); + else + rc = daos_cont_local2global(*handle, &global); + DCHECK(rc, "Failed to get global handle size"); + } + + MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, + param->testComm), + "Failed to bcast global handle buffer size"); + + global.iov_buf = malloc(global.iov_buf_len); + if (global.iov_buf == NULL) + ERR("Failed to allocate global handle buffer"); + + if (rank == 0) { + if (type == POOL_HANDLE) + rc = daos_pool_local2global(*handle, &global); + else + rc = daos_cont_local2global(*handle, &global); + DCHECK(rc, "Failed to create global handle"); + } + + MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, + param->testComm), + "Failed to bcast global pool handle"); + + if (rank != 0) { + /* A larger-than-actual length works just fine. */ + global.iov_len = global.iov_buf_len; + + if (type == POOL_HANDLE) + rc = daos_pool_global2local(global, handle); + else + rc = daos_cont_global2local(pool, global, handle); + DCHECK(rc, "Failed to get local handle"); + } + + free(global.iov_buf); +} + +static void ContainerOpen(char *testFileName, IOR_param_t *param, + daos_handle_t *container, daos_cont_info_t *info) +{ + int rc; + + if (rank == 0) { + uuid_t uuid; + unsigned int dFlags; + + rc = uuid_parse(testFileName, uuid); + DCHECK(rc, "Failed to parse 'testFile': %s", testFileName); + + if (param->open == WRITE && + param->useExistingTestFile == FALSE) { + INFO(VERBOSE_2, param, "Creating container %s", + testFileName); + + rc = daos_cont_create(pool, uuid, NULL /* ev */); + DCHECK(rc, "Failed to create container %s", + testFileName); + } + + INFO(VERBOSE_2, param, "Openning container %s", testFileName); + + if (param->open == WRITE) + dFlags = DAOS_COO_RW; + else + dFlags = DAOS_COO_RO; + + rc = daos_cont_open(pool, uuid, dFlags, container, info, + NULL /* ev */); + DCHECK(rc, "Failed to open container %s", testFileName); + + INFO(VERBOSE_2, param, "Container epoch state:"); + INFO(VERBOSE_2, param, " HCE: %lu", + info->ci_epoch_state.es_hce); + INFO(VERBOSE_2, param, " LRE: %lu", + info->ci_epoch_state.es_lre); + INFO(VERBOSE_2, param, " LHE: %lu (%lx)", + info->ci_epoch_state.es_lhe, info->ci_epoch_state.es_lhe); + INFO(VERBOSE_2, param, " GHCE: %lu", + info->ci_epoch_state.es_ghce); + INFO(VERBOSE_2, param, " GLRE: %lu", + info->ci_epoch_state.es_glre); + INFO(VERBOSE_2, param, " GHPCE: %lu", + info->ci_epoch_state.es_ghpce); + +#if 0 + if (param->open != WRITE && param->daosWait != 0) { + daos_epoch_t e; + + e = param->daosWait; + + INFO(VERBOSE_2, param, "Waiting for epoch %lu", e); + + rc = daos_epoch_wait(*container, &e, + NULL /* ignore HLE */, + NULL /* synchronous */); + DCHECK(rc, "Failed to wait for epoch %lu", + param->daosWait); + } + + if (param->open == WRITE && + param->useExistingTestFile == FALSE) { + daos_oclass_attr_t attr = { + .ca_schema = DAOS_OS_STRIPED, + .ca_resil_degree = 0, + .ca_resil = DAOS_RES_REPL, + .ca_grp_nr = 4, + .u.repl = { + .r_method = 0, + .r_num = 2 + } + }; + + INFO(VERBOSE_2, param, "Registering object class"); + + rc = daos_oclass_register(container, objectClass, &attr, + NULL /* ev */); + DCHECK(rc, "Failed to register object class"); + } +#endif + } + + HandleDistribute(container, CONTAINER_HANDLE, param); + + MPI_CHECK(MPI_Bcast(info, sizeof *info, MPI_BYTE, 0, param->testComm), + "Failed to broadcast container info"); +} + +static void ContainerClose(daos_handle_t container, IOR_param_t *param) +{ + int rc; + + if (rank != 0) { + rc = daos_cont_close(container, NULL /* ev */); + DCHECK(rc, "Failed to close container"); + } + + /* An MPI_Gather() call would probably be more efficient. */ + MPI_CHECK(MPI_Barrier(param->testComm), + "Failed to synchronize processes"); + + if (rank == 0) { + rc = daos_cont_close(container, NULL /* ev */); + DCHECK(rc, "Failed to close container"); + } +} + +static void ObjectOpen(daos_handle_t container, daos_handle_t *object, + daos_epoch_t epoch, IOR_param_t *param) +{ + daos_obj_id_t oid; + unsigned int flags; + int rc; + + oid.hi = 0; + oid.lo = 1; + daos_obj_id_generate(&oid, 0, objectClass); + +#if 0 + /** declaring object not implemented commenting it */ + if (rank == 0 && param->open == WRITE && + param->useExistingTestFile == FALSE) { + INFO(VERBOSE_2, param, "Declaring object"); + + rc = daos_obj_declare(container, oid, epoch, NULL /* oa */, + NULL /* ev */); + DCHECK(rc, "Failed to declare object"); + } +#endif + /* An MPI_Bcast() call would probably be more efficient. */ + MPI_CHECK(MPI_Barrier(param->testComm), + "Failed to synchronize processes"); + + if (param->open == WRITE) + flags = DAOS_OO_RW; + else + flags = DAOS_OO_RO; + + rc = daos_obj_open(container, oid, epoch, flags, object, NULL /* ev */); + DCHECK(rc, "Failed to open object"); +} + +static void ObjectClose(daos_handle_t object) +{ + int rc; + + rc = daos_obj_close(object, NULL /* ev */); + DCHECK(rc, "Failed to close object"); +} + +static void AIOInit(IOR_param_t *param) +{ + struct aio *aio; + int i; + int rc; + + rc = posix_memalign((void **) &buffers, sysconf(_SC_PAGESIZE), + param->transferSize * param->daosAios); + DCHECK(rc, "Failed to allocate buffer array"); + + for (i = 0; i < param->daosAios; i++) { + aio = malloc(sizeof *aio); + if (aio == NULL) + ERR("Failed to allocate aio array"); + + memset(aio, 0, sizeof *aio); + + aio->a_dkey.iov_buf = aio->a_dkeyBuf; + aio->a_dkey.iov_buf_len = sizeof aio->a_dkeyBuf; + + aio->a_recx.rx_nr = 1; + + aio->a_csum.cs_csum = &aio->a_csumBuf; + aio->a_csum.cs_buf_len = sizeof aio->a_csumBuf; + aio->a_csum.cs_len = aio->a_csum.cs_buf_len; + + aio->a_epochRange.epr_hi = DAOS_EPOCH_MAX; + + aio->a_iod.iod_name.iov_buf = "data"; + aio->a_iod.iod_name.iov_buf_len = + strlen(aio->a_iod.iod_name.iov_buf) + 1; + aio->a_iod.iod_name.iov_len = aio->a_iod.iod_name.iov_buf_len; + aio->a_iod.iod_nr = 1; + aio->a_iod.iod_type = DAOS_IOD_ARRAY; + aio->a_iod.iod_recxs = &aio->a_recx; + aio->a_iod.iod_csums = &aio->a_csum; + aio->a_iod.iod_eprs = &aio->a_epochRange; + aio->a_iod.iod_size = param->transferSize; + + aio->a_iov.iov_buf = buffers + param->transferSize * i; + aio->a_iov.iov_buf_len = param->transferSize; + aio->a_iov.iov_len = aio->a_iov.iov_buf_len; + + aio->a_sgl.sg_nr = 1; + aio->a_sgl.sg_iovs = &aio->a_iov; + + rc = daos_event_init(&aio->a_event, eventQueue, + NULL /* parent */); + DCHECK(rc, "Failed to initialize event for aio[%d]", i); + + cfs_list_add(&aio->a_list, &aios); + + INFO(VERBOSE_3, param, "Allocated AIO %p: buffer %p", aio, + aio->a_iov.iov_buf); + } + + nAios = param->daosAios; + + events = malloc((sizeof *events) * param->daosAios); + if (events == NULL) + ERR("Failed to allocate events array"); +} + +static void AIOFini(IOR_param_t *param) +{ + struct aio *aio; + struct aio *tmp; + + free(events); + + cfs_list_for_each_entry_safe(aio, tmp, &aios, a_list) { + INFO(VERBOSE_3, param, "Freeing AIO %p: buffer %p", aio, + aio->a_iov.iov_buf); + cfs_list_del_init(&aio->a_list); + daos_event_fini(&aio->a_event); + free(aio); + } + + free(buffers); +} + +static void AIOWait(IOR_param_t *param) +{ + struct aio *aio; + int i; + int rc; + + rc = daos_eq_poll(eventQueue, 0, DAOS_EQ_WAIT, param->daosAios, + events); + DCHECK(rc, "Failed to poll event queue"); + assert(rc <= param->daosAios - nAios); + + for (i = 0; i < rc; i++) { + int ret; + + aio = (struct aio *) + ((char *) events[i] - + (char *) (&((struct aio *) 0)->a_event)); + + DCHECK(aio->a_event.ev_error, "Failed to transfer (%lu, %lu)", + aio->a_iod.iod_recxs->rx_idx, + aio->a_iod.iod_recxs->rx_nr); + + daos_event_fini(&aio->a_event); + ret = daos_event_init(&aio->a_event, eventQueue, + NULL /* parent */); + DCHECK(ret, "Failed to reinitialize event for AIO %p", aio); + + cfs_list_move(&aio->a_list, &aios); + nAios++; + + if (param->verbose >= VERBOSE_3) + INFO(VERBOSE_3, param, "Completed AIO %p: buffer %p", aio, + aio->a_iov.iov_buf); + } + + INFO(VERBOSE_3, param, "Found %d completed AIOs (%d free %d busy)", rc, + nAios, param->daosAios - nAios); +} + +static void ObjectClassParse(const char *string) +{ + if (strcasecmp(string, "tiny") == 0) + objectClass = DAOS_OC_TINY_RW; + else if (strcasecmp(string, "small") == 0) + objectClass = DAOS_OC_SMALL_RW; + else if (strcasecmp(string, "large") == 0) + objectClass = DAOS_OC_LARGE_RW; + else if (strcasecmp(string, "echo") == 0) + objectClass = DAOS_OC_ECHO_RW; + else if (strcasecmp(string, "R2") == 0) + objectClass = DAOS_OC_R2_RW; + else if (strcasecmp(string, "R2S") == 0) + objectClass = DAOS_OC_R2S_RW; + else if (strcasecmp(string, "R3S") == 0) + objectClass = DAOS_OC_R3S_RW; + else if (strcasecmp(string, "R3") == 0) + objectClass = DAOS_OC_R3_RW; + else if (strcasecmp(string, "R4") == 0) + objectClass = DAOS_OC_R4_RW; + else if (strcasecmp(string, "R4S") == 0) + objectClass = DAOS_OC_R4S_RW; + else if (strcasecmp(string, "repl_max") == 0) + objectClass = DAOS_OC_REPL_MAX_RW; + else + GERR("Invalid 'daosObjectClass' argument: '%s'", string); +} + +static const char *GetGroup(IOR_param_t *param) +{ + if (strlen(param->daosGroup) == 0) + return NULL; + return param->daosGroup; +} + +static void ParseService(IOR_param_t *param, int max, d_rank_list_t *ranks) +{ + char *s; + + s = strdup(param->daosPoolSvc); + if (s == NULL) + GERR("failed to duplicate argument"); + ranks->rl_nr = 0; + while ((s = strtok(s, ":")) != NULL) { + if (ranks->rl_nr >= max) { + free(s); + GERR("at most %d pool service replicas supported", max); + } + ranks->rl_ranks[ranks->rl_nr] = atoi(s); + ranks->rl_nr++; + s = NULL; + } + free(s); +} + +static void DAOS_Init(IOR_param_t *param) +{ + int rc; + + if (strlen(param->daosObjectClass) != 0) + ObjectClassParse(param->daosObjectClass); + + if (param->filePerProc) + GERR("'filePerProc' not yet supported"); + if (param->daosStripeMax % param->daosStripeSize != 0) + GERR("'daosStripeMax' must be a multiple of 'daosStripeSize'"); + if (param->daosStripeSize % param->transferSize != 0) + GERR("'daosStripeSize' must be a multiple of 'transferSize'"); + if (param->transferSize % param->daosRecordSize != 0) + GERR("'transferSize' must be a multiple of 'daosRecordSize'"); + if (param->daosKill && ((objectClass != DAOS_OC_R2_RW) || + (objectClass != DAOS_OC_R3_RW) || + (objectClass != DAOS_OC_R4_RW) || + (objectClass != DAOS_OC_R2S_RW) || + (objectClass != DAOS_OC_R3S_RW) || + (objectClass != DAOS_OC_R4S_RW) || + (objectClass != DAOS_OC_REPL_MAX_RW))) + GERR("'daosKill' only makes sense with 'daosObjectClass=repl'"); + + if (rank == 0) + INFO(VERBOSE_0, param, "WARNING: USING daosStripeMax CAUSES READS TO RETURN INVALID DATA"); + + rc = daos_init(); + DCHECK(rc, "Failed to initialize daos"); + + rc = daos_eq_create(&eventQueue); + DCHECK(rc, "Failed to create event queue"); + + if (rank == 0) { + uuid_t uuid; + d_rank_t rank[13]; + d_rank_list_t ranks; + + if (strlen(param->daosPool) == 0) + GERR("'daosPool' must be specified"); + if (strlen(param->daosPoolSvc) == 0) + GERR("'daosPoolSvc' must be specified"); + + INFO(VERBOSE_2, param, "Connecting to pool %s %s", + param->daosPool, param->daosPoolSvc); + + rc = uuid_parse(param->daosPool, uuid); + DCHECK(rc, "Failed to parse 'daosPool': %s", param->daosPool); + ranks.rl_ranks = rank; + ParseService(param, sizeof(rank) / sizeof(rank[0]), &ranks); + + rc = daos_pool_connect(uuid, GetGroup(param), &ranks, + DAOS_PC_RW, &pool, &poolInfo, + NULL /* ev */); + DCHECK(rc, "Failed to connect to pool %s", param->daosPool); + } + + HandleDistribute(&pool, POOL_HANDLE, param); + + MPI_CHECK(MPI_Bcast(&poolInfo, sizeof poolInfo, MPI_BYTE, 0, + param->testComm), + "Failed to bcast pool info"); + + if (param->daosStripeCount == -1) + param->daosStripeCount = poolInfo.pi_ntargets * 64UL; +} + +static void DAOS_Fini(IOR_param_t *param) +{ + int rc; + + rc = daos_pool_disconnect(pool, NULL /* ev */); + DCHECK(rc, "Failed to disconnect from pool %s", param->daosPool); + + rc = daos_eq_destroy(eventQueue, 0 /* flags */); + DCHECK(rc, "Failed to destroy event queue"); + + rc = daos_fini(); + DCHECK(rc, "Failed to finalize daos"); +} + +static void *DAOS_Create(char *testFileName, IOR_param_t *param) +{ + return DAOS_Open(testFileName, param); +} + +static void *DAOS_Open(char *testFileName, IOR_param_t *param) +{ + struct fileDescriptor *fd; + daos_epoch_t ghce; + + fd = malloc(sizeof *fd); + if (fd == NULL) + ERR("Failed to allocate fd"); + + ContainerOpen(testFileName, param, &fd->container, &fd->containerInfo); + + ghce = fd->containerInfo.ci_epoch_state.es_ghce; + if (param->open == WRITE) { + if (param->daosEpoch == 0) + fd->epoch = ghce + 1; + else if (param->daosEpoch <= ghce) + GERR("Can't modify committed epoch\n"); + else + fd->epoch = param->daosEpoch; + } else { + if (param->daosEpoch == 0) { + if (param->daosWait == 0) + fd->epoch = ghce; + else + fd->epoch = param->daosWait; + } else if (param->daosEpoch > ghce) { + GERR("Can't read uncommitted epoch\n"); + } else { + fd->epoch = param->daosEpoch; + } + } + + if (rank == 0) + INFO(VERBOSE_2, param, "Accessing epoch %lu", fd->epoch); + + if (rank == 0 && param->open == WRITE) { + daos_epoch_t e = fd->epoch; + int rc; + + INFO(VERBOSE_2, param, "Holding epoch %lu", fd->epoch); + + rc = daos_epoch_hold(fd->container, &fd->epoch, + NULL /* state */, NULL /* ev */); + DCHECK(rc, "Failed to hold epoch"); + assert(fd->epoch == e); + } + + ObjectOpen(fd->container, &fd->object, fd->epoch, param); + + AIOInit(param); + + return fd; +} + +static void +kill_daos_server(IOR_param_t *param) +{ + daos_pool_info_t info; + d_rank_t rank, svc_ranks[13]; + d_rank_list_t svc, targets; + uuid_t uuid; + char *s; + int rc; + + rc = daos_pool_query(pool, NULL, &info, NULL); + DCHECK(rc, "Error in querying pool\n"); + + if (info.pi_ntargets - info.pi_ndisabled <= 1) + return; + /* choose the last alive one */ + rank = info.pi_ntargets - 1 - info.pi_ndisabled; + + rc = uuid_parse(param->daosPool, uuid); + DCHECK(rc, "Failed to parse 'daosPool': %s", param->daosPool); + + if (rc != 0) + printf("Killing tgt rank: %d (total of %d of %d already disabled)\n", + rank, info.pi_ndisabled, info.pi_ntargets); + fflush(stdout); + + rc = daos_mgmt_svc_rip(GetGroup(param), rank, true, NULL); + DCHECK(rc, "Error in killing server\n"); + + targets.rl_nr = 1; + targets.rl_ranks = &rank; + + svc.rl_ranks = svc_ranks; + ParseService(param, sizeof(svc_ranks)/ sizeof(svc_ranks[0]), &svc); + + rc = daos_pool_exclude(uuid, NULL, &svc, &targets, NULL); + DCHECK(rc, "Error in excluding pool from poolmap\n"); + + rc = daos_pool_query(pool, NULL, &info, NULL); + DCHECK(rc, "Error in querying pool\n"); + + printf("%d targets succesfully disabled\n", + info.pi_ndisabled); + +} + +static void +kill_and_sync(IOR_param_t *param) +{ + double start, end; + + start = MPI_Wtime(); + if (rank == 0) + kill_daos_server(param); + + if (rank == 0) + printf("Done killing and excluding\n"); + + MPI_CHECK(MPI_Barrier(param->testComm), + "Failed to synchronize processes"); + + end = MPI_Wtime(); + if (rank == 0) + printf("Time spent inducing failure: %lf\n", (end - start)); +} + +static IOR_offset_t DAOS_Xfer(int access, void *file, IOR_size_t *buffer, + IOR_offset_t length, IOR_param_t *param) +{ + struct fileDescriptor *fd = file; + struct aio *aio; + uint64_t stripe; + IOR_offset_t stripeOffset; + uint64_t round; + int rc; + + assert(length == param->transferSize); + assert(param->offset % length == 0); + + /** + * Currently killing only during writes + * Kills once when 1/2 of blocksize is + * written + **/ + total_size += length; + if (param->daosKill && (access == WRITE) && + ((param->blockSize)/2) == total_size) { + /** More than half written lets kill */ + if (rank == 0) + printf("Killing and Syncing\n", rank); + kill_and_sync(param); + param->daosKill = 0; + } + + /* + * Find an available AIO descriptor. If none, wait for one. + */ + while (nAios == 0) + AIOWait(param); + aio = cfs_list_entry(aios.next, struct aio, a_list); + cfs_list_move_tail(&aio->a_list, &aios); + nAios--; + + stripe = (param->offset / param->daosStripeSize) % + param->daosStripeCount; + rc = snprintf(aio->a_dkeyBuf, sizeof aio->a_dkeyBuf, "%lu", stripe); + assert(rc < sizeof aio->a_dkeyBuf); + aio->a_dkey.iov_len = strlen(aio->a_dkeyBuf) + 1; + round = param->offset / (param->daosStripeSize * param->daosStripeCount); + stripeOffset = param->daosStripeSize * round + + param->offset % param->daosStripeSize; + if (param->daosStripeMax != 0) + stripeOffset %= param->daosStripeMax; + aio->a_recx.rx_idx = stripeOffset / param->daosRecordSize; + aio->a_epochRange.epr_lo = fd->epoch; + + /* + * If the data written will be checked later, we have to copy in valid + * data instead of writing random bytes. If the data being read is for + * checking purposes, poison the buffer first. + */ + if (access == WRITE && param->checkWrite) + memcpy(aio->a_iov.iov_buf, buffer, length); + else if (access == WRITECHECK || access == READCHECK) + memset(aio->a_iov.iov_buf, '#', length); + + INFO(VERBOSE_3, param, "Starting AIO %p (%d free %d busy): access %d " + "dkey '%s' iod <%llu, %llu> sgl <%p, %lu>", aio, nAios, + param->daosAios - nAios, access, (char *) aio->a_dkey.iov_buf, + (unsigned long long) aio->a_iod.iod_recxs->rx_idx, + (unsigned long long) aio->a_iod.iod_recxs->rx_nr, + aio->a_sgl.sg_iovs->iov_buf, + (unsigned long long) aio->a_sgl.sg_iovs->iov_buf_len); + + if (access == WRITE) { + rc = daos_obj_update(fd->object, fd->epoch, &aio->a_dkey, + 1 /* nr */, &aio->a_iod, &aio->a_sgl, + &aio->a_event); + DCHECK(rc, "Failed to start update operation"); + } else { + rc = daos_obj_fetch(fd->object, fd->epoch, &aio->a_dkey, + 1 /* nr */, &aio->a_iod, &aio->a_sgl, + NULL /* maps */, &aio->a_event); + DCHECK(rc, "Failed to start fetch operation"); + } + + /* + * If this is a WRITECHECK or READCHECK, we are expected to fill data + * into the buffer before returning. Note that if this is a READ, we + * don't have to return valid data as WriteOrRead() doesn't care. + */ + if (access == WRITECHECK || access == READCHECK) { + while (param->daosAios - nAios > 0) + AIOWait(param); + memcpy(buffer, aio->a_sgl.sg_iovs->iov_buf, length); + } + + return length; +} + +static void DAOS_Close(void *file, IOR_param_t *param) +{ + struct fileDescriptor *fd = file; + int rc; + + while (param->daosAios - nAios > 0) + AIOWait(param); + AIOFini(param); + + ObjectClose(fd->object); + + if (param->open == WRITE && !param->daosWriteOnly) { + /* Wait for everybody for to complete the writes. */ + MPI_CHECK(MPI_Barrier(param->testComm), + "Failed to synchronize processes"); + + if (rank == 0) { + INFO(VERBOSE_2, param, "Flushing epoch %lu", fd->epoch); + + rc = daos_epoch_flush(fd->container, fd->epoch, + NULL /* state */, NULL /* ev */); + DCHECK(rc, "Failed to flush epoch"); + + INFO(VERBOSE_2, param, "Committing epoch %lu", + fd->epoch); + + rc = daos_epoch_commit(fd->container, fd->epoch, + NULL /* state */, NULL /* ev */); + DCHECK(rc, "Failed to commit object write"); + } + } + + ContainerClose(fd->container, param); + + free(fd); +} + +static void DAOS_Delete(char *testFileName, IOR_param_t *param) +{ + uuid_t uuid; + int rc; + + INFO(VERBOSE_2, param, "Deleting container %s", testFileName); + + rc = uuid_parse(testFileName, uuid); + DCHECK(rc, "Failed to parse 'testFile': %s", testFileName); + + rc = daos_cont_destroy(pool, uuid, 1 /* force */, NULL /* ev */); + if (rc != -DER_NONEXIST) + DCHECK(rc, "Failed to destroy container %s", testFileName); +} + +static char* DAOS_GetVersion() +{ + static char ver[1024] = {}; + + sprintf(ver, "%s", "DAOS"); + return ver; +} + +static void DAOS_Fsync(void *file, IOR_param_t *param) +{ + while (param->daosAios - nAios > 0) + AIOWait(param); +} + +static IOR_offset_t DAOS_GetFileSize(IOR_param_t *test, MPI_Comm testComm, + char *testFileName) +{ + /* + * Sizes are inapplicable to containers at the moment. + */ + return 0; +} diff --git a/src/aiori.c b/src/aiori.c index f5d5719..5978ed2 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -57,6 +57,9 @@ ior_aiori_t *available_aiori[] = { #endif #ifdef USE_RADOS_AIORI &rados_aiori, +#endif +#ifdef USE_DAOS_AIORI + &daos_aiori, #endif NULL }; @@ -133,7 +136,7 @@ char* aiori_get_version() static int is_initialized = FALSE; -void aiori_initialize(){ +void aiori_initialize(IOR_test_t *tests_head){ if (is_initialized) return; is_initialized = TRUE; @@ -145,18 +148,18 @@ void aiori_initialize(){ for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) { if((*tmp)->initialize){ - (*tmp)->initialize(); + (*tmp)->initialize(tests_head ? &tests_head->params : NULL); } } } -void aiori_finalize(){ +void aiori_finalize(IOR_test_t *tests_head){ if (! is_initialized) return; is_initialized = FALSE; for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) { if((*tmp)->finalize){ - (*tmp)->finalize(); + (*tmp)->finalize(tests_head ? &tests_head->params : NULL); } } } diff --git a/src/aiori.h b/src/aiori.h index 0b0ffda..c1f63e5 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -79,8 +79,8 @@ typedef struct ior_aiori { int (*rmdir) (const char *path, IOR_param_t * param); int (*access) (const char *path, int mode, IOR_param_t * param); int (*stat) (const char *path, struct stat *buf, IOR_param_t * param); - void (*initialize)(); /* called once per program before MPI is started */ - void (*finalize)(); /* called once per program after MPI is shutdown */ + void (*initialize)(IOR_param_t *); /* called once per program before MPI is started */ + void (*finalize)(IOR_param_t *); /* called once per program after MPI is shutdown */ option_help * (*get_options)(); } ior_aiori_t; @@ -96,9 +96,10 @@ extern ior_aiori_t s3_aiori; extern ior_aiori_t s3_plus_aiori; extern ior_aiori_t s3_emc_aiori; extern ior_aiori_t rados_aiori; +extern ior_aiori_t daos_aiori; -void aiori_initialize(); -void aiori_finalize(); +void aiori_initialize(IOR_test_t *th); +void aiori_finalize(IOR_test_t *th); const ior_aiori_t *aiori_select (const char *api); int aiori_count (void); void aiori_supported_apis(char * APIs); diff --git a/src/ior.c b/src/ior.c index 1fed65e..fbb6922 100755 --- a/src/ior.c +++ b/src/ior.c @@ -98,8 +98,6 @@ int ior_main(int argc, char **argv) out_logfile = stdout; out_resultfile = stdout; - aiori_initialize(); - /* * check -h option from commandline without starting MPI; */ @@ -125,6 +123,8 @@ int ior_main(int argc, char **argv) PrintHeader(argc, argv); + aiori_initialize(tests_head); + /* perform each test */ for (tptr = tests_head; tptr != NULL; tptr = tptr->next) { verbose = tptr->params.verbose; @@ -143,6 +143,8 @@ int ior_main(int argc, char **argv) ShowTestEnd(tptr); } + aiori_finalize(tests_head); + if (verbose < 0) /* always print final summary */ verbose = 0; @@ -155,8 +157,6 @@ int ior_main(int argc, char **argv) MPI_CHECK(MPI_Finalize(), "cannot finalize MPI"); - aiori_finalize(); - return totalErrorCount; } @@ -199,6 +199,11 @@ void init_IOR_Param_t(IOR_param_t * p) p->setAlignment = 1; p->lustre_start_ost = -1; + p->daosRecordSize = 262144; + p->daosStripeSize = 524288; + p->daosStripeCount = -1; + p->daosAios = 1; + hdfs_user = getenv("USER"); if (!hdfs_user) hdfs_user = ""; @@ -297,7 +302,8 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep) 1, MPI_LONG_LONG_INT, MPI_SUM, testComm), "cannot total data moved"); - if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) { + if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0 && + strcasecmp(params->api, "DAOS") != 0) { if (verbose >= VERBOSE_0 && rank == 0) { if ((params->expectedAggFileSize != results[rep].aggFileSizeFromXfer) @@ -913,7 +919,8 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) rankOffset = 0; GetTestFileName(testFileName, test); } - if (backend->access(testFileName, F_OK, test) == 0) { + if (backend->access(testFileName, F_OK, test) == 0 || + strcasecmp(test->api, "DAOS") == 0) { backend->delete(testFileName, test); } if (test->reorderTasksRandom == TRUE) { @@ -921,7 +928,8 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) GetTestFileName(testFileName, test); } } else { - if ((rank == 0) && (backend->access(testFileName, F_OK, test) == 0)) { + if ((rank == 0) && (backend->access(testFileName, F_OK, test) == 0 || + strcasecmp(test->api, "DAOS"))) { backend->delete(testFileName, test); } } diff --git a/src/ior.h b/src/ior.h index 43177fd..4fa9052 100755 --- a/src/ior.h +++ b/src/ior.h @@ -190,6 +190,21 @@ typedef struct int lustre_set_striping; /* flag that we need to set lustre striping */ int lustre_ignore_locks; + /* DAOS variables */ + char daosGroup[MAX_STR]; /* group name */ + char daosPool[37]; /* pool UUID */ + char daosPoolSvc[MAX_STR]; /* pool service ranks */ + int daosRecordSize; /* size of akey record (i.e., rx_rsize) */ + int daosStripeSize; + unsigned long daosStripeCount; + unsigned long daosStripeMax; /* max length of a stripe */ + int daosAios; /* max number of concurrent async I/Os */ + int daosWriteOnly; /* write only, no flush and commit */ + unsigned long daosEpoch; /* epoch to access */ + unsigned long daosWait; /* epoch to wait for before reading */ + int daosKill; /* kill a target while running IOR */ + char daosObjectClass[MAX_STR]; /* object class */ + /* gpfs variables */ int gpfs_hint_access; /* use gpfs "access range" hint */ int gpfs_release_token; /* immediately release GPFS tokens after diff --git a/src/list.h b/src/list.h new file mode 100644 index 0000000..dbe052c --- /dev/null +++ b/src/list.h @@ -0,0 +1,556 @@ +/** + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * GPL HEADER END + */ +#ifndef __DAOS_LIST_H__ +#define __DAOS_LIST_H__ + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +#define prefetch(a) ((void)a) + +struct cfs_list_head { + struct cfs_list_head *next, *prev; +}; + +typedef struct cfs_list_head cfs_list_t; + +#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) } + +#define CFS_LIST_HEAD(name) \ + cfs_list_t name = CFS_LIST_HEAD_INIT(name) + +#define CFS_INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/** + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __cfs_list_add(cfs_list_t * new, + cfs_list_t * prev, + cfs_list_t * next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * Insert an entry at the start of a list. + * \param new new entry to be inserted + * \param head list to add it to + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void cfs_list_add(cfs_list_t *new, + cfs_list_t *head) +{ + __cfs_list_add(new, head, head->next); +} + +/** + * Insert an entry at the end of a list. + * \param new new entry to be inserted + * \param head list to add it to + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void cfs_list_add_tail(cfs_list_t *new, + cfs_list_t *head) +{ + __cfs_list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __cfs_list_del(cfs_list_t *prev, + cfs_list_t *next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * Remove an entry from the list it is currently in. + * \param entry the entry to remove + * Note: list_empty(entry) does not return true after this, the entry is in an + * undefined state. + */ +static inline void cfs_list_del(cfs_list_t *entry) +{ + __cfs_list_del(entry->prev, entry->next); +} + +/** + * Remove an entry from the list it is currently in and reinitialize it. + * \param entry the entry to remove. + */ +static inline void cfs_list_del_init(cfs_list_t *entry) +{ + __cfs_list_del(entry->prev, entry->next); + CFS_INIT_LIST_HEAD(entry); +} + +/** + * Remove an entry from the list it is currently in and insert it at the start + * of another list. + * \param list the entry to move + * \param head the list to move it to + */ +static inline void cfs_list_move(cfs_list_t *list, + cfs_list_t *head) +{ + __cfs_list_del(list->prev, list->next); + cfs_list_add(list, head); +} + +/** + * Remove an entry from the list it is currently in and insert it at the end of + * another list. + * \param list the entry to move + * \param head the list to move it to + */ +static inline void cfs_list_move_tail(cfs_list_t *list, + cfs_list_t *head) +{ + __cfs_list_del(list->prev, list->next); + cfs_list_add_tail(list, head); +} + +/** + * Test whether a list is empty + * \param head the list to test. + */ +static inline int cfs_list_empty(cfs_list_t *head) +{ + return head->next == head; +} + +/** + * Test whether a list is empty and not being modified + * \param head the list to test + * + * Tests whether a list is empty _and_ checks that no other CPU might be + * in the process of modifying either member (next or prev) + * + * NOTE: using cfs_list_empty_careful() without synchronization + * can only be safe if the only activity that can happen + * to the list entry is cfs_list_del_init(). Eg. it cannot be used + * if another CPU could re-list_add() it. + */ +static inline int cfs_list_empty_careful(const cfs_list_t *head) +{ + cfs_list_t *next = head->next; + return (next == head) && (next == head->prev); +} + +static inline void __cfs_list_splice(cfs_list_t *list, + cfs_list_t *head) +{ + cfs_list_t *first = list->next; + cfs_list_t *last = list->prev; + cfs_list_t *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; +} + +/** + * Join two lists + * \param list the new list to add. + * \param head the place to add it in the first list. + * + * The contents of \a list are added at the start of \a head. \a list is in an + * undefined state on return. + */ +static inline void cfs_list_splice(cfs_list_t *list, + cfs_list_t *head) +{ + if (!cfs_list_empty(list)) + __cfs_list_splice(list, head); +} + +/** + * Join two lists and reinitialise the emptied list. + * \param list the new list to add. + * \param head the place to add it in the first list. + * + * The contents of \a list are added at the start of \a head. \a list is empty + * on return. + */ +static inline void cfs_list_splice_init(cfs_list_t *list, + cfs_list_t *head) +{ + if (!cfs_list_empty(list)) { + __cfs_list_splice(list, head); + CFS_INIT_LIST_HEAD(list); + } +} + +/** + * Get the container of a list + * \param ptr the embedded list. + * \param type the type of the struct this is embedded in. + * \param member the member name of the list within the struct. + */ +#define cfs_list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) + +/** + * Iterate over a list + * \param pos the iterator + * \param head the list to iterate over + * + * Behaviour is undefined if \a pos is removed from the list in the body of the + * loop. + */ +#define cfs_list_for_each(pos, head) \ + for (pos = (head)->next, prefetch(pos->next); pos != (head); \ + pos = pos->next, prefetch(pos->next)) + +/** + * Iterate over a list safely + * \param pos the iterator + * \param n temporary storage + * \param head the list to iterate over + * + * This is safe to use if \a pos could be removed from the list in the body of + * the loop. + */ +#define cfs_list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * Iterate over a list continuing after existing point + * \param pos the type * to use as a loop counter + * \param head the list head + * \param member the name of the list_struct within the struct + */ +#define cfs_list_for_each_entry_continue(pos, head, member) \ + for (pos = cfs_list_entry(pos->member.next, typeof(*pos), member); \ + prefetch(pos->member.next), &pos->member != (head); \ + pos = cfs_list_entry(pos->member.next, typeof(*pos), member)) + +/** + * \defgroup hlist Hash List + * Double linked lists with a single pointer list head. + * Mostly useful for hash tables where the two pointer list head is too + * wasteful. You lose the ability to access the tail in O(1). + * @{ + */ + +typedef struct cfs_hlist_node { + struct cfs_hlist_node *next, **pprev; +} cfs_hlist_node_t; + +typedef struct cfs_hlist_head { + cfs_hlist_node_t *first; +} cfs_hlist_head_t; + +/* @} */ + +/* + * "NULL" might not be defined at this point + */ +#ifdef NULL +#define NULL_P NULL +#else +#define NULL_P ((void *)0) +#endif + +/** + * \addtogroup hlist + * @{ + */ + +#define CFS_HLIST_HEAD_INIT { NULL_P } +#define CFS_HLIST_HEAD(name) cfs_hlist_head_t name = { NULL_P } +#define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P) +#define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P) + +static inline int cfs_hlist_unhashed(const cfs_hlist_node_t *h) +{ + return !h->pprev; +} + +static inline int cfs_hlist_empty(const cfs_hlist_head_t *h) +{ + return !h->first; +} + +static inline void __cfs_hlist_del(cfs_hlist_node_t *n) +{ + cfs_hlist_node_t *next = n->next; + cfs_hlist_node_t **pprev = n->pprev; + *pprev = next; + if (next) + next->pprev = pprev; +} + +static inline void cfs_hlist_del(cfs_hlist_node_t *n) +{ + __cfs_hlist_del(n); +} + +static inline void cfs_hlist_del_init(cfs_hlist_node_t *n) +{ + if (n->pprev) { + __cfs_hlist_del(n); + CFS_INIT_HLIST_NODE(n); + } +} + +static inline void cfs_hlist_add_head(cfs_hlist_node_t *n, + cfs_hlist_head_t *h) +{ + cfs_hlist_node_t *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + +/* next must be != NULL */ +static inline void cfs_hlist_add_before(cfs_hlist_node_t *n, + cfs_hlist_node_t *next) +{ + n->pprev = next->pprev; + n->next = next; + next->pprev = &n->next; + *(n->pprev) = n; +} + +static inline void cfs_hlist_add_after(cfs_hlist_node_t *n, + cfs_hlist_node_t *next) +{ + next->next = n->next; + n->next = next; + next->pprev = &n->next; + + if(next->next) + next->next->pprev = &next->next; +} + +#define cfs_hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define cfs_hlist_for_each(pos, head) \ + for (pos = (head)->first; pos && (prefetch(pos->next), 1); \ + pos = pos->next) + +#define cfs_hlist_for_each_safe(pos, n, head) \ + for (pos = (head)->first; pos && (n = pos->next, 1); \ + pos = n) + +/** + * Iterate over an hlist of given type + * \param tpos the type * to use as a loop counter. + * \param pos the &struct hlist_node to use as a loop counter. + * \param head the head for your list. + * \param member the name of the hlist_node within the struct. + */ +#define cfs_hlist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * Iterate over an hlist continuing after existing point + * \param tpos the type * to use as a loop counter. + * \param pos the &struct hlist_node to use as a loop counter. + * \param member the name of the hlist_node within the struct. + */ +#define cfs_hlist_for_each_entry_continue(tpos, pos, member) \ + for (pos = (pos)->next; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * Iterate over an hlist continuing from an existing point + * \param tpos the type * to use as a loop counter. + * \param pos the &struct hlist_node to use as a loop counter. + * \param member the name of the hlist_node within the struct. + */ +#define cfs_hlist_for_each_entry_from(tpos, pos, member) \ + for (; pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * Iterate over an hlist of given type safe against removal of list entry + * \param tpos the type * to use as a loop counter. + * \param pos the &struct hlist_node to use as a loop counter. + * \param n another &struct hlist_node to use as temporary storage + * \param head the head for your list. + * \param member the name of the hlist_node within the struct. + */ +#define cfs_hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +/* @} */ + +#ifndef cfs_list_for_each_prev +/** + * Iterate over a list in reverse order + * \param pos the &struct list_head to use as a loop counter. + * \param head the head for your list. + */ +#define cfs_list_for_each_prev(pos, head) \ + for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ + pos = pos->prev, prefetch(pos->prev)) + +#endif /* cfs_list_for_each_prev */ + +#ifndef cfs_list_for_each_entry +/** + * Iterate over a list of given type + * \param pos the type * to use as a loop counter. + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define cfs_list_for_each_entry(pos, head, member) \ + for (pos = cfs_list_entry((head)->next, typeof(*pos), member), \ + prefetch(pos->member.next); \ + &pos->member != (head); \ + pos = cfs_list_entry(pos->member.next, typeof(*pos), member), \ + prefetch(pos->member.next)) +#endif /* cfs_list_for_each_entry */ + +#ifndef cfs_list_for_each_entry_rcu +#define cfs_list_for_each_entry_rcu(pos, head, member) \ + list_for_each_entry(pos, head, member) +#endif + +#ifndef cfs_list_for_each_entry_rcu +#define cfs_list_for_each_entry_rcu(pos, head, member) \ + list_for_each_entry(pos, head, member) +#endif + +#ifndef cfs_list_for_each_entry_reverse +/** + * Iterate backwards over a list of given type. + * \param pos the type * to use as a loop counter. + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define cfs_list_for_each_entry_reverse(pos, head, member) \ + for (pos = cfs_list_entry((head)->prev, typeof(*pos), member); \ + prefetch(pos->member.prev), &pos->member != (head); \ + pos = cfs_list_entry(pos->member.prev, typeof(*pos), member)) +#endif /* cfs_list_for_each_entry_reverse */ + +#ifndef cfs_list_for_each_entry_safe +/** + * Iterate over a list of given type safe against removal of list entry + * \param pos the type * to use as a loop counter. + * \param n another type * to use as temporary storage + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + */ +#define cfs_list_for_each_entry_safe(pos, n, head, member) \ + for (pos = cfs_list_entry((head)->next, typeof(*pos), member), \ + n = cfs_list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = cfs_list_entry(n->member.next, typeof(*n), member)) + +#endif /* cfs_list_for_each_entry_safe */ + +#ifndef cfs_list_for_each_entry_safe_from +/** + * Iterate over a list continuing from an existing point + * \param pos the type * to use as a loop cursor. + * \param n another type * to use as temporary storage + * \param head the head for your list. + * \param member the name of the list_struct within the struct. + * + * Iterate over list of given type from current point, safe against + * removal of list entry. + */ +#define cfs_list_for_each_entry_safe_from(pos, n, head, member) \ + for (n = cfs_list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = cfs_list_entry(n->member.next, typeof(*n), member)) +#endif /* cfs_list_for_each_entry_safe_from */ + +#define cfs_list_for_each_entry_typed(pos, head, type, member) \ + for (pos = cfs_list_entry((head)->next, type, member), \ + prefetch(pos->member.next); \ + &pos->member != (head); \ + pos = cfs_list_entry(pos->member.next, type, member), \ + prefetch(pos->member.next)) + +#define cfs_list_for_each_entry_reverse_typed(pos, head, type, member) \ + for (pos = cfs_list_entry((head)->prev, type, member); \ + prefetch(pos->member.prev), &pos->member != (head); \ + pos = cfs_list_entry(pos->member.prev, type, member)) + +#define cfs_list_for_each_entry_safe_typed(pos, n, head, type, member) \ + for (pos = cfs_list_entry((head)->next, type, member), \ + n = cfs_list_entry(pos->member.next, type, member); \ + &pos->member != (head); \ + pos = n, n = cfs_list_entry(n->member.next, type, member)) + +#define cfs_list_for_each_entry_safe_from_typed(pos, n, head, type, member) \ + for (n = cfs_list_entry(pos->member.next, type, member); \ + &pos->member != (head); \ + pos = n, n = cfs_list_entry(n->member.next, type, member)) + +#define cfs_hlist_for_each_entry_typed(tpos, pos, head, type, member) \ + for (pos = (head)->first; \ + pos && (prefetch(pos->next), 1) && \ + (tpos = cfs_hlist_entry(pos, type, member), 1); \ + pos = pos->next) + +#define cfs_hlist_for_each_entry_safe_typed(tpos, pos, n, head, type, member) \ + for (pos = (head)->first; \ + pos && (n = pos->next, 1) && \ + (tpos = cfs_hlist_entry(pos, type, member), 1); \ + pos = n) + +#endif /* __DAOS_LIST_H__ */ diff --git a/src/mdtest-main.c b/src/mdtest-main.c index 854456f..f54cf66 100644 --- a/src/mdtest-main.c +++ b/src/mdtest-main.c @@ -2,12 +2,12 @@ #include "aiori.h" int main(int argc, char **argv) { - aiori_initialize(); MPI_Init(&argc, &argv); + aiori_initialize(NULL); mdtest_run(argc, argv, MPI_COMM_WORLD, stdout); + aiori_finalize(NULL); MPI_Finalize(); - aiori_finalize(); return 0; } diff --git a/src/parse_options.c b/src/parse_options.c index 80b99a3..0f251ef 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -300,6 +300,34 @@ void DecodeDirective(char *line, IOR_param_t *params) params->numTasks = atoi(value); } else if (strcasecmp(option, "summaryalways") == 0) { params->summary_every_test = atoi(value); + } else if (strcasecmp(option, "daosgroup") == 0) { + strcpy(params->daosGroup, value); + } else if (strcasecmp(option, "daospool") == 0) { + strcpy(params->daosPool, value); + } else if (strcasecmp(option, "daospoolsvc") == 0) { + strcpy(params->daosPoolSvc, value); + } else if (strcasecmp(option, "daosrecordsize") == 0) { + params->daosRecordSize = string_to_bytes(value); + } else if (strcasecmp(option, "daosstripesize") == 0) { + printf("HERE %s\n", value); + params->daosStripeSize = string_to_bytes(value); + printf("HERE %d\n", params->daosStripeSize); + } else if (strcasecmp(option, "daosstripecount") == 0) { + params->daosStripeCount = atoi(value); + } else if (strcasecmp(option, "daosstripemax") == 0) { + params->daosStripeMax = string_to_bytes(value); + } else if (strcasecmp(option, "daosaios") == 0) { + params->daosAios = atoi(value); + } else if (strcasecmp(option, "daosepoch") == 0) { + params->daosEpoch = atoi(value); + } else if (strcasecmp(option, "daoswait") == 0) { + params->daosWait = atoi(value); + } else if (strcasecmp(option, "daoswriteonly") == 0) { + params->daosWriteOnly = atoi(value); + } else if (strcasecmp(option, "daoskill") == 0) { + params->daosKill = atoi(value); + } else if (strcasecmp(option, "daosobjectclass") == 0) { + strcpy(params->daosObjectClass, value); } else { if (rank == 0) fprintf(out_logfile, "Unrecognized parameter \"%s\"\n", @@ -311,11 +339,13 @@ void DecodeDirective(char *line, IOR_param_t *params) /* * Parse a single line, which may contain multiple comma-seperated directives */ -void ParseLine(char *line, IOR_param_t * test) +void ParseLine(const char *line, IOR_param_t * test) { char *start, *end; - start = line; + start = strdup(line); + if (start == NULL) + ERR("failed to duplicate line"); do { end = strchr(start, ','); if (end != NULL) @@ -422,7 +452,7 @@ IOR_test_t *ReadConfigScript(char *scriptName) static IOR_param_t * parameters; static void decodeDirectiveWrapper(char *line){ - DecodeDirective(line, parameters); + ParseLine(line, parameters); } /* From 98ae5df16df69ddb7aee72cf634e0b662d073db0 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 28 Aug 2018 19:45:18 +0000 Subject: [PATCH 06/66] remove printf added by accident. Signed-off-by: Mohamad Chaarawi --- src/parse_options.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parse_options.c b/src/parse_options.c index 0f251ef..5769d74 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -309,9 +309,7 @@ void DecodeDirective(char *line, IOR_param_t *params) } else if (strcasecmp(option, "daosrecordsize") == 0) { params->daosRecordSize = string_to_bytes(value); } else if (strcasecmp(option, "daosstripesize") == 0) { - printf("HERE %s\n", value); params->daosStripeSize = string_to_bytes(value); - printf("HERE %d\n", params->daosStripeSize); } else if (strcasecmp(option, "daosstripecount") == 0) { params->daosStripeCount = atoi(value); } else if (strcasecmp(option, "daosstripemax") == 0) { From 14d67c19d9b4abb9c382f8b8f9a840a223ead1a8 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 29 Aug 2018 22:25:48 +0000 Subject: [PATCH 07/66] fix DAOS plugin options passing. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 45 ++++++++++++++++++++++----------------------- src/ior.c | 2 +- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 73d7837..3aadba9 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -98,21 +98,23 @@ static void DAOS_Delete(char *, IOR_param_t *); static char* DAOS_GetVersion(); static void DAOS_Fsync(void *, IOR_param_t *); static IOR_offset_t DAOS_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static option_help * DAOS_options(); /************************** D E C L A R A T I O N S ***************************/ ior_aiori_t daos_aiori = { - .name = "DAOS", - .create = DAOS_Create, - .open = DAOS_Open, - .xfer = DAOS_Xfer, - .close = DAOS_Close, - .delete = DAOS_Delete, - .get_version = DAOS_GetVersion, - .fsync = DAOS_Fsync, - .get_file_size = DAOS_GetFileSize, - .initialize = DAOS_Init, - .finalize = DAOS_Fini, + .name = "DAOS", + .create = DAOS_Create, + .open = DAOS_Open, + .xfer = DAOS_Xfer, + .close = DAOS_Close, + .delete = DAOS_Delete, + .get_version = DAOS_GetVersion, + .fsync = DAOS_Fsync, + .get_file_size = DAOS_GetFileSize, + .initialize = DAOS_Init, + .finalize = DAOS_Fini, + .get_options = DAOS_options, }; enum handleType { @@ -536,13 +538,6 @@ static void ObjectClassParse(const char *string) GERR("Invalid 'daosObjectClass' argument: '%s'", string); } -static const char *GetGroup(IOR_param_t *param) -{ - if (strlen(o.daosGroup) == 0) - return NULL; - return o.daosGroup; -} - static void ParseService(IOR_param_t *param, int max, d_rank_list_t *ranks) { char *s; @@ -563,11 +558,15 @@ static void ParseService(IOR_param_t *param, int max, d_rank_list_t *ranks) free(s); } +static option_help * DAOS_options(){ + return options; +} + static void DAOS_Init(IOR_param_t *param) { int rc; - if (strlen(o.daosObjectClass) != 0) + if (o.daosObjectClass) ObjectClassParse(o.daosObjectClass); if (param->filePerProc) @@ -601,9 +600,9 @@ static void DAOS_Init(IOR_param_t *param) d_rank_t rank[13]; d_rank_list_t ranks; - if (strlen(o.daosPool) == 0) + if (o.daosPool == NULL) GERR("'daosPool' must be specified"); - if (strlen(o.daosPoolSvc) == 0) + if (o.daosPoolSvc == NULL) GERR("'daosPoolSvc' must be specified"); INFO(VERBOSE_2, param, "Connecting to pool %s %s", @@ -614,7 +613,7 @@ static void DAOS_Init(IOR_param_t *param) ranks.rl_ranks = rank; ParseService(param, sizeof(rank) / sizeof(rank[0]), &ranks); - rc = daos_pool_connect(uuid, GetGroup(param), &ranks, + rc = daos_pool_connect(uuid, o.daosGroup, &ranks, DAOS_PC_RW, &pool, &poolInfo, NULL /* ev */); DCHECK(rc, "Failed to connect to pool %s", o.daosPool); @@ -729,7 +728,7 @@ kill_daos_server(IOR_param_t *param) rank, info.pi_ndisabled, info.pi_ntargets); fflush(stdout); - rc = daos_mgmt_svc_rip(GetGroup(param), rank, true, NULL); + rc = daos_mgmt_svc_rip(o.daosGroup, rank, true, NULL); DCHECK(rc, "Error in killing server\n"); targets.rl_nr = 1; diff --git a/src/ior.c b/src/ior.c index 7c7dffe..781a134 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1187,7 +1187,7 @@ static void TestIoSys(IOR_test_t *test) backend = aiori_select(params->api); if (backend->initialize) - backend->initialize(NULL); + backend->initialize(params); /* show test setup */ if (rank == 0 && verbose >= VERBOSE_0) From 2668363fb14d844c5b36b5531ef106220f150928 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 30 Aug 2018 14:47:17 +0000 Subject: [PATCH 08/66] - update README for DAOS drivers. - update flag options in DAOS API - make svcl option in DFS API the same as DAOS API Signed-off-by: Mohamad Chaarawi --- README_DAOS | 104 +++++++++++++++++++++++++++++++++++++---------- src/aiori-DAOS.c | 4 +- src/aiori-DFS.c | 2 +- 3 files changed, 85 insertions(+), 25 deletions(-) diff --git a/README_DAOS b/README_DAOS index ba4fc3e..409923d 100644 --- a/README_DAOS +++ b/README_DAOS @@ -1,25 +1,55 @@ -Building with DAOS API +Building ---------------------- -At step 1 above, one must specify "--with-daos". If the DAOS -headers and libraries are not installed at respective system -default locations, then one may also needs to set CPPFLAGS and -LDFLAGS accordingly. +The DAOS library must be installed on the system. + +./bootsrap +./configure --prefix=iorInstallDir --with-daos=DIR + +One must specify "--with-daos=/path/to/daos/install". When that is specified +the DAOS and DFS driver will be built. + +The DAOS driver uses the DAOS API to create a container (file) and update/fetch +an object to the container using the daos object API. The DAOS driver works with +IOR only (no mdtest support yet). + +The DFS (DAOS File System) driver creates an encapsulated namespace and emulates +the POSIX driver using the DFS API directly on top of DAOS. The DFS driver works +with both IOR and mdtest. Running with DAOS API --------------------- -One must specify an existing pool using "-O -daospool=". IOR must be launched in a way that -attaches the IOR process group to the DAOS server process group. +Driver specific options are specified at the end after "--". For example: -One must also specify a container UUID using "-o -". If the "-E" option is given, then this UUID +ior -a DAOS [ior_options] -- [daos_options] + +In the IOR options, the file name should be specified as a container uuid using +"-o ". If the "-E" option is given, then this UUID shall denote an existing container created by a "matching" IOR run. Otherwise, IOR will create a new container with this UUID. In the latter case, one may use uuidgen(1) to generate the UUID of the new container. +The DAOS options include: + +Required Options: +-p : pool uuid to connect to (has to be created beforehand) +-v : pool svcl list (: separated) + +Optional Options: +-g : group name of servers with the pool +-r : object record size for IO +-s +-c +-m +-a : number of concurrent async IOs +-w : Flag to indicate no commit, just update +-e +-t : wait for specific epoch before read +-k : flag to kill a rank during IO +-o : specific object class + When benchmarking write performance, one likely do not want "-W", which causes the write phase to do one additional memory copy for every I/O. This is due to IOR's assumption that when a @@ -32,17 +62,47 @@ all combinations of options are supported. Examples that should work include: - - "ior -a DAOS -w -W -o -O - daospool=,daospoolsvc=" writes into a new container - and verifies the data, using default daosRecordSize, transferSize, - daosStripeSize, blockSize, daosAios, etc. + - "ior -a DAOS -w -W -o -- -p -v " + writes into a new container and verifies the data, using default + daosRecordSize, transferSize, daosStripeSize, blockSize, daosAios, etc. - - "ior -a DAOS -w -W -r -R -o -b 1g -t 4m -C -O - daospool=,daospoolsvc=,daosrecordsize=1m, - daosstripesize=4m, daosstripecount=256,daosaios=8" does all IOR tests and - shifts ranks during checkWrite and checkRead. + - "ior -a DAOS -w -W -r -R -o -b 1g -t 4m -C -- + -p -v -r 1m -s 4m -c 256 -a 8" + does all IOR tests and shifts ranks during checkWrite and checkRead. - - "ior -a DAOS -w -r -o -b 8g -t 1m -C -O - daospool=,daospoolsvc=,daosrecordsize=1m,daosstripesize=4m, - daosstripecount=256,daosaios=8" may be a base to be tuned for performance - benchmarking. + - "ior -a DAOS -w -r -o -b 8g -t 1m -C -- + -p -v -r 1m -s 4m -c 256 -a 8" + may be a base to be tuned for performance benchmarking. + + +Running with DFS API +--------------------- + +Driver specific options are specified at the end after "--". For example: + +ior -a DFS [ior_options] -- [dfs_options] +mdtest -a DFS [mdtest_options] -- [dfs_options] + +Required Options: +-p : pool uuid to connect to (has to be created beforehand) +-v : pool svcl list (: separated) +-c : container uuid that will hold the encapsulated namespace + +Optional Options: +-g : group name of servers with the pool + +In the IOR options, the file name should be specified on the root dir directly +since ior does not create directories and the DFS container representing the +encapsulated namespace is not the same as the system namespace the user is +executing from. + +Examples that should work include: + - "ior -a DFS -w -W -o /test1 -- -p -v -c " + - "ior -a DFS -w -W -r -R -o /test2 -b 1g -t 4m -C -- -p -v -c " + - "ior -a DFS -w -r -o /test3 -b 8g -t 1m -C -- -p -v -c " + +Running mdtest, the user needs to specify a directory with -d where the test +tree will be created. Some examples: + - "mdtest -a DFS -n 100 -F -D -d /bla -- -p -v -c " + - "mdtest -a DFS -n 1000 -F -C -d /bla -- -p -v -c " + - "mdtest -a DFS -I 10 -z 5 -b 2 -L -d /bla -- -p -v -c " diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 3aadba9..e3830d9 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -77,10 +77,10 @@ static option_help options [] = { {'c', "daosStripeCount", "Stripe Count", OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeCount}, {'m', "daosStripeMax", "Max Stripe",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeMax}, {'a', "daosAios", "Concurrent Async IOs",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosAios}, - {'w', "daosWriteOnly", "Write Only, no commit",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosWriteOnly}, + {'w', "daosWriteOnly", "Write Only, no commit",OPTION_FLAG, 'd', &o.daosWriteOnly}, {'e', "daosEpoch", "Epoch Number to Access",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosEpoch}, {'t', "daosWait", "Epoch to wait for before read",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosWait}, - {'k', "daosKill", "Kill target while running",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosKill}, + {'k', "daosKill", "Kill target while running",OPTION_FLAG, 'd', &o.daosKill}, {'o', "daosObjectClass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.daosObjectClass}, LAST_OPTION }; diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index f33ed94..aead758 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -63,7 +63,7 @@ static struct dfs_options o = { static option_help options [] = { {'p', "pool", "DAOS pool uuid", OPTION_REQUIRED_ARGUMENT, 's', & o.pool}, - {'s', "svcl", "DAOS pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', & o.svcl}, + {'v', "svcl", "DAOS pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', & o.svcl}, {'g', "group", "DAOS server group", OPTION_OPTIONAL_ARGUMENT, 's', & o.group}, {'c', "cont", "DFS container uuid", OPTION_REQUIRED_ARGUMENT, 's', & o.cont}, LAST_OPTION From 2f713e947d4627c2dc1716b2e262f1968c6b24fb Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 12 Sep 2018 22:53:42 +0000 Subject: [PATCH 09/66] fix bug in driver name check for DAOS. Signed-off-by: Mohamad Chaarawi --- src/ior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ior.c b/src/ior.c index 781a134..7158210 100755 --- a/src/ior.c +++ b/src/ior.c @@ -921,7 +921,7 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) } } else { if ((rank == 0) && (backend->access(testFileName, F_OK, test) == 0 || - strcasecmp(test->api, "DAOS"))) { + strcasecmp(test->api, "DAOS") == 0)) { backend->delete(testFileName, test); } } From 0a7b1e38ea64d70c094305df6d64d5f604010083 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 13 Sep 2018 13:30:10 +0000 Subject: [PATCH 10/66] fix verbose output in DAOS & DFS drivers Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 4 ++-- src/aiori-DFS.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index e3830d9..657bc1d 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -171,7 +171,7 @@ do { \ #define INFO(level, param, format, ...) \ do { \ - if (param->verbose >= level) \ + if (verbose >= level) \ printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ } while (0) @@ -260,7 +260,7 @@ static void ContainerOpen(char *testFileName, IOR_param_t *param, testFileName); } - INFO(VERBOSE_2, param, "Openning container %s", testFileName); + INFO(VERBOSE_2, param, "Opening container %s", testFileName); if (param->open == WRITE) dFlags = DAOS_COO_RW; diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index aead758..46edeb6 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -243,7 +243,7 @@ DFS_Init(IOR_param_t *param) { if (svcl == NULL) ERR("Failed to allocate svcl"); - if (verbose >= 3) { + if (verbose >= VERBOSE_1) { printf("Pool uuid = %s, SVCL = %s\n", o.pool, o.svcl); printf("DFS Container namespace uuid = %s\n", o.cont); } @@ -259,7 +259,7 @@ DFS_Init(IOR_param_t *param) { rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL); /* If NOEXIST we create it */ if (rc == -DER_NONEXIST) { - if (verbose >= 3) + if (verbose >= VERBOSE_1) printf("Creating DFS Container ...\n"); rc = daos_cont_create(poh, co_uuid, NULL); if (rc == 0) { From ab71b4fc2abdcd6c4575987be744b848a5410695 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 13 Sep 2018 13:36:36 +0000 Subject: [PATCH 11/66] update DAOS README Signed-off-by: Mohamad Chaarawi --- README_DAOS | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/README_DAOS b/README_DAOS index 409923d..fb3c68e 100644 --- a/README_DAOS +++ b/README_DAOS @@ -25,11 +25,10 @@ Driver specific options are specified at the end after "--". For example: ior -a DAOS [ior_options] -- [daos_options] In the IOR options, the file name should be specified as a container uuid using -"-o ". If the "-E" option is given, then this UUID -shall denote an existing container created by a "matching" IOR -run. Otherwise, IOR will create a new container with this UUID. -In the latter case, one may use uuidgen(1) to generate the UUID -of the new container. +"-o ". If the "-E" option is given, then this UUID shall denote +an existing container created by a "matching" IOR run. Otherwise, IOR will +create a new container with this UUID. In the latter case, one may use +uuidgen(1) to generate the UUID of the new container. The DAOS options include: @@ -50,15 +49,18 @@ Optional Options: -k : flag to kill a rank during IO -o : specific object class -When benchmarking write performance, one likely do not want -"-W", which causes the write phase to do one additional memory -copy for every I/O. This is due to IOR's assumption that when a -DAOS_Xfer() call returns the buffer may be released. Therefore, -random data is written when "-W" is absent, while data is copied -from IOR buffers when "-W" is present. +When benchmarking write performance, one likely does not want "-W", which causes +the write phase to do one additional memory copy for every I/O. This is due to +IOR's assumption that when a DAOS_Xfer() call returns the buffer may be +released. Therefore, random data is written when "-W" is absent, while data is +copied from IOR buffers when "-W" is present. -See doc/USER_GUIDE for all options and directives. Note that not -all combinations of options are supported. +See doc/USER_GUIDE for all options and directives. Note that not all +combinations of options are supported. For example specifying an epoch to access +and running ior with multiple iterations would cause all iterations other than +first one to fail because the epoch will be committed in the first iteration. In +that case, the epoch should not be specified and the DAOS driver would choose +the epoch to access automatically on each iteration. Examples that should work include: From 72c487013a096b98483ad91459b1432d699c10b8 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 4 Dec 2018 22:59:15 +0000 Subject: [PATCH 12/66] rebase with latest DAOS Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 26 ++++++++++++++++---------- src/aiori-DFS.c | 10 +++++----- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 657bc1d..e498b8f 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -518,8 +518,14 @@ static void ObjectClassParse(const char *string) objectClass = DAOS_OC_SMALL_RW; else if (strcasecmp(string, "large") == 0) objectClass = DAOS_OC_LARGE_RW; - else if (strcasecmp(string, "echo") == 0) - objectClass = DAOS_OC_ECHO_RW; + else if (strcasecmp(string, "echo_tiny") == 0) + objectClass = DAOS_OC_ECHO_TINY_RW; + else if (strcasecmp(string, "echo_R2S") == 0) + objectClass = DAOS_OC_ECHO_R2S_RW; + else if (strcasecmp(string, "echo_R3S") == 0) + objectClass = DAOS_OC_ECHO_R3S_RW; + else if (strcasecmp(string, "echo_R4S") == 0) + objectClass = DAOS_OC_ECHO_R4S_RW; else if (strcasecmp(string, "R2") == 0) objectClass = DAOS_OC_R2_RW; else if (strcasecmp(string, "R2S") == 0) @@ -597,7 +603,7 @@ static void DAOS_Init(IOR_param_t *param) if (rank == 0) { uuid_t uuid; - d_rank_t rank[13]; + d_rank_t d_rank[13]; d_rank_list_t ranks; if (o.daosPool == NULL) @@ -610,8 +616,8 @@ static void DAOS_Init(IOR_param_t *param) rc = uuid_parse(o.daosPool, uuid); DCHECK(rc, "Failed to parse 'daosPool': %s", o.daosPool); - ranks.rl_ranks = rank; - ParseService(param, sizeof(rank) / sizeof(rank[0]), &ranks); + ranks.rl_ranks = d_rank; + ParseService(param, sizeof(d_rank) / sizeof(d_rank[0]), &ranks); rc = daos_pool_connect(uuid, o.daosGroup, &ranks, DAOS_PC_RW, &pool, &poolInfo, @@ -706,7 +712,7 @@ static void kill_daos_server(IOR_param_t *param) { daos_pool_info_t info; - d_rank_t rank, svc_ranks[13]; + d_rank_t d_rank, svc_ranks[13]; d_rank_list_t svc, targets; uuid_t uuid; char *s; @@ -718,21 +724,21 @@ kill_daos_server(IOR_param_t *param) if (info.pi_ntargets - info.pi_ndisabled <= 1) return; /* choose the last alive one */ - rank = info.pi_ntargets - 1 - info.pi_ndisabled; + d_rank = info.pi_ntargets - 1 - info.pi_ndisabled; rc = uuid_parse(o.daosPool, uuid); DCHECK(rc, "Failed to parse 'daosPool': %s", o.daosPool); if (rc != 0) printf("Killing tgt rank: %d (total of %d of %d already disabled)\n", - rank, info.pi_ndisabled, info.pi_ntargets); + d_rank, info.pi_ndisabled, info.pi_ntargets); fflush(stdout); - rc = daos_mgmt_svc_rip(o.daosGroup, rank, true, NULL); + rc = daos_mgmt_svc_rip(o.daosGroup, d_rank, true, NULL); DCHECK(rc, "Error in killing server\n"); targets.rl_nr = 1; - targets.rl_ranks = &rank; + targets.rl_ranks = &d_rank; svc.rl_ranks = svc_ranks; ParseService(param, sizeof(svc_ranks)/ sizeof(svc_ranks[0]), &svc); diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 46edeb6..06d452b 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -39,8 +39,8 @@ #include #include "ior.h" -#include "aiori.h" #include "iordef.h" +#include "aiori.h" #include "utilities.h" dfs_t *dfs; @@ -491,7 +491,7 @@ static char* DFS_GetVersion() * Use DFS stat() to return aggregate file size. */ static IOR_offset_t -DFS_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) +DFS_GetFileSize(IOR_param_t * test, MPI_Comm comm, char *testFileName) { dfs_obj_t *obj; daos_size_t fsize, tmpMin, tmpMax, tmpSum; @@ -511,15 +511,15 @@ DFS_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) if (test->filePerProc == TRUE) { MPI_CHECK(MPI_Allreduce(&fsize, &tmpSum, 1, - MPI_LONG_LONG_INT, MPI_SUM, testComm), + MPI_LONG_LONG_INT, MPI_SUM, comm), "cannot total data moved"); fsize = tmpSum; } else { MPI_CHECK(MPI_Allreduce(&fsize, &tmpMin, 1, - MPI_LONG_LONG_INT, MPI_MIN, testComm), + MPI_LONG_LONG_INT, MPI_MIN, comm), "cannot total data moved"); MPI_CHECK(MPI_Allreduce(&fsize, &tmpMax, 1, - MPI_LONG_LONG_INT, MPI_MAX, testComm), + MPI_LONG_LONG_INT, MPI_MAX, comm), "cannot total data moved"); if (tmpMin != tmpMax) { if (rank == 0) { From d2a0023765bcffa2b3b2afe605f436dc5ce7f937 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 7 Jan 2019 18:28:59 +0000 Subject: [PATCH 13/66] remove epoch usage and rebase with latest DAOS API that switches to transactions. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 148 +++++------------------------------------------ src/aiori-DFS.c | 2 +- 2 files changed, 16 insertions(+), 134 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index e498b8f..6846f1c 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -46,8 +46,6 @@ struct daos_options{ uint64_t daosStripeMax; /* max length of a stripe */ int daosAios; /* max number of concurrent async I/Os */ int daosWriteOnly; /* write only, no flush and commit */ - uint64_t daosEpoch; /* epoch to access */ - uint64_t daosWait; /* epoch to wait for before reading */ int daosKill; /* kill a target while running IOR */ char *daosObjectClass; /* object class */ }; @@ -62,8 +60,6 @@ static struct daos_options o = { .daosStripeMax = 0, .daosAios = 1, .daosWriteOnly = 0, - .daosEpoch = 0, - .daosWait = 0, .daosKill = 0, .daosObjectClass = NULL, }; @@ -78,8 +74,6 @@ static option_help options [] = { {'m', "daosStripeMax", "Max Stripe",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeMax}, {'a', "daosAios", "Concurrent Async IOs",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosAios}, {'w', "daosWriteOnly", "Write Only, no commit",OPTION_FLAG, 'd', &o.daosWriteOnly}, - {'e', "daosEpoch", "Epoch Number to Access",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosEpoch}, - {'t', "daosWait", "Epoch to wait for before read",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosWait}, {'k', "daosKill", "Kill target while running",OPTION_FLAG, 'd', &o.daosKill}, {'o', "daosObjectClass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.daosObjectClass}, LAST_OPTION @@ -126,7 +120,6 @@ struct fileDescriptor { daos_handle_t container; daos_cont_info_t containerInfo; daos_handle_t object; - daos_epoch_t epoch; }; struct aio { @@ -136,7 +129,6 @@ struct aio { daos_recx_t a_recx; unsigned char a_csumBuf[32]; daos_csum_buf_t a_csum; - daos_epoch_range_t a_epochRange; daos_iod_t a_iod; daos_iov_t a_iov; daos_sg_list_t a_sgl; @@ -270,56 +262,6 @@ static void ContainerOpen(char *testFileName, IOR_param_t *param, rc = daos_cont_open(pool, uuid, dFlags, container, info, NULL /* ev */); DCHECK(rc, "Failed to open container %s", testFileName); - - INFO(VERBOSE_2, param, "Container epoch state:"); - INFO(VERBOSE_2, param, " HCE: %lu", - info->ci_epoch_state.es_hce); - INFO(VERBOSE_2, param, " LRE: %lu", - info->ci_epoch_state.es_lre); - INFO(VERBOSE_2, param, " LHE: %lu (%lx)", - info->ci_epoch_state.es_lhe, info->ci_epoch_state.es_lhe); - INFO(VERBOSE_2, param, " GHCE: %lu", - info->ci_epoch_state.es_ghce); - INFO(VERBOSE_2, param, " GLRE: %lu", - info->ci_epoch_state.es_glre); - INFO(VERBOSE_2, param, " GHPCE: %lu", - info->ci_epoch_state.es_ghpce); - -#if 0 - if (param->open != WRITE && o.daosWait != 0) { - daos_epoch_t e; - - e = o.daosWait; - - INFO(VERBOSE_2, param, "Waiting for epoch %lu", e); - - rc = daos_epoch_wait(*container, &e, - NULL /* ignore HLE */, - NULL /* synchronous */); - DCHECK(rc, "Failed to wait for epoch %lu", - o.daosWait); - } - - if (param->open == WRITE && - param->useExistingTestFile == FALSE) { - daos_oclass_attr_t attr = { - .ca_schema = DAOS_OS_STRIPED, - .ca_resil_degree = 0, - .ca_resil = DAOS_RES_REPL, - .ca_grp_nr = 4, - .u.repl = { - .r_method = 0, - .r_num = 2 - } - }; - - INFO(VERBOSE_2, param, "Registering object class"); - - rc = daos_oclass_register(container, objectClass, &attr, - NULL /* ev */); - DCHECK(rc, "Failed to register object class"); - } -#endif } HandleDistribute(container, CONTAINER_HANDLE, param); @@ -348,7 +290,7 @@ static void ContainerClose(daos_handle_t container, IOR_param_t *param) } static void ObjectOpen(daos_handle_t container, daos_handle_t *object, - daos_epoch_t epoch, IOR_param_t *param) + IOR_param_t *param) { daos_obj_id_t oid; unsigned int flags; @@ -356,29 +298,14 @@ static void ObjectOpen(daos_handle_t container, daos_handle_t *object, oid.hi = 0; oid.lo = 1; - daos_obj_id_generate(&oid, 0, objectClass); - -#if 0 - /** declaring object not implemented commenting it */ - if (rank == 0 && param->open == WRITE && - param->useExistingTestFile == FALSE) { - INFO(VERBOSE_2, param, "Declaring object"); - - rc = daos_obj_declare(container, oid, epoch, NULL /* oa */, - NULL /* ev */); - DCHECK(rc, "Failed to declare object"); - } -#endif - /* An MPI_Bcast() call would probably be more efficient. */ - MPI_CHECK(MPI_Barrier(param->testComm), - "Failed to synchronize processes"); + daos_obj_generate_id(&oid, 0, objectClass); if (param->open == WRITE) flags = DAOS_OO_RW; else flags = DAOS_OO_RO; - rc = daos_obj_open(container, oid, epoch, flags, object, NULL /* ev */); + rc = daos_obj_open(container, oid, flags, object, NULL /* ev */); DCHECK(rc, "Failed to open object"); } @@ -416,8 +343,6 @@ static void AIOInit(IOR_param_t *param) aio->a_csum.cs_buf_len = sizeof aio->a_csumBuf; aio->a_csum.cs_len = aio->a_csum.cs_buf_len; - aio->a_epochRange.epr_hi = DAOS_EPOCH_MAX; - aio->a_iod.iod_name.iov_buf = "data"; aio->a_iod.iod_name.iov_buf_len = strlen(aio->a_iod.iod_name.iov_buf) + 1; @@ -426,7 +351,7 @@ static void AIOInit(IOR_param_t *param) aio->a_iod.iod_type = DAOS_IOD_ARRAY; aio->a_iod.iod_recxs = &aio->a_recx; aio->a_iod.iod_csums = &aio->a_csum; - aio->a_iod.iod_eprs = &aio->a_epochRange; + aio->a_iod.iod_eprs = NULL; aio->a_iod.iod_size = param->transferSize; aio->a_iov.iov_buf = buffers + param->transferSize * i; @@ -657,52 +582,13 @@ static void *DAOS_Create(char *testFileName, IOR_param_t *param) static void *DAOS_Open(char *testFileName, IOR_param_t *param) { struct fileDescriptor *fd; - daos_epoch_t ghce; fd = malloc(sizeof *fd); if (fd == NULL) ERR("Failed to allocate fd"); ContainerOpen(testFileName, param, &fd->container, &fd->containerInfo); - - ghce = fd->containerInfo.ci_epoch_state.es_ghce; - if (param->open == WRITE) { - if (o.daosEpoch == 0) - fd->epoch = ghce + 1; - else if (o.daosEpoch <= ghce) - GERR("Can't modify committed epoch\n"); - else - fd->epoch = o.daosEpoch; - } else { - if (o.daosEpoch == 0) { - if (o.daosWait == 0) - fd->epoch = ghce; - else - fd->epoch = o.daosWait; - } else if (o.daosEpoch > ghce) { - GERR("Can't read uncommitted epoch\n"); - } else { - fd->epoch = o.daosEpoch; - } - } - - if (rank == 0) - INFO(VERBOSE_2, param, "Accessing epoch %lu", fd->epoch); - - if (rank == 0 && param->open == WRITE) { - daos_epoch_t e = fd->epoch; - int rc; - - INFO(VERBOSE_2, param, "Holding epoch %lu", fd->epoch); - - rc = daos_epoch_hold(fd->container, &fd->epoch, - NULL /* state */, NULL /* ev */); - DCHECK(rc, "Failed to hold epoch"); - assert(fd->epoch == e); - } - - ObjectOpen(fd->container, &fd->object, fd->epoch, param); - + ObjectOpen(fd->container, &fd->object, param); AIOInit(param); return fd; @@ -822,7 +708,6 @@ static IOR_offset_t DAOS_Xfer(int access, void *file, IOR_size_t *buffer, if (o.daosStripeMax != 0) stripeOffset %= o.daosStripeMax; aio->a_recx.rx_idx = stripeOffset / o.daosRecordSize; - aio->a_epochRange.epr_lo = fd->epoch; /* * If the data written will be checked later, we have to copy in valid @@ -843,12 +728,12 @@ static IOR_offset_t DAOS_Xfer(int access, void *file, IOR_size_t *buffer, (unsigned long long) aio->a_sgl.sg_iovs->iov_buf_len); if (access == WRITE) { - rc = daos_obj_update(fd->object, fd->epoch, &aio->a_dkey, + rc = daos_obj_update(fd->object, DAOS_TX_NONE, &aio->a_dkey, 1 /* nr */, &aio->a_iod, &aio->a_sgl, &aio->a_event); DCHECK(rc, "Failed to start update operation"); } else { - rc = daos_obj_fetch(fd->object, fd->epoch, &aio->a_dkey, + rc = daos_obj_fetch(fd->object, DAOS_TX_NONE, &aio->a_dkey, 1 /* nr */, &aio->a_iod, &aio->a_sgl, NULL /* maps */, &aio->a_event); DCHECK(rc, "Failed to start fetch operation"); @@ -884,19 +769,16 @@ static void DAOS_Close(void *file, IOR_param_t *param) MPI_CHECK(MPI_Barrier(param->testComm), "Failed to synchronize processes"); + /* MSC - temp hack to commit since close will rollback */ if (rank == 0) { - INFO(VERBOSE_2, param, "Flushing epoch %lu", fd->epoch); + daos_handle_t th; - rc = daos_epoch_flush(fd->container, fd->epoch, - NULL /* state */, NULL /* ev */); - DCHECK(rc, "Failed to flush epoch"); - - INFO(VERBOSE_2, param, "Committing epoch %lu", - fd->epoch); - - rc = daos_epoch_commit(fd->container, fd->epoch, - NULL /* state */, NULL /* ev */); - DCHECK(rc, "Failed to commit object write"); + rc = daos_tx_open(fd->container, &th, NULL); + DCHECK(rc, "Failed sync"); + rc = daos_tx_commit(th, NULL); + DCHECK(rc, "Failed sync"); + rc = daos_tx_close(th, NULL); + DCHECK(rc, "Failed sync"); } } diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 06d452b..ea24bc8 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -279,7 +279,7 @@ DFS_Finalize(IOR_param_t *param) { int rc; - rc = dfs_umount(dfs, true); + rc = dfs_umount(dfs); DCHECK(rc, "Failed to umount DFS namespace"); rc = daos_cont_close(coh, NULL); From 334bf737974e5dffcfd92d647f9e6967358a4449 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 17 Jan 2019 20:58:58 +0000 Subject: [PATCH 14/66] remove transaction commit and options to do that. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 6846f1c..d790b9b 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -45,7 +45,6 @@ struct daos_options{ uint64_t daosStripeCount; uint64_t daosStripeMax; /* max length of a stripe */ int daosAios; /* max number of concurrent async I/Os */ - int daosWriteOnly; /* write only, no flush and commit */ int daosKill; /* kill a target while running IOR */ char *daosObjectClass; /* object class */ }; @@ -59,7 +58,6 @@ static struct daos_options o = { .daosStripeCount = -1, .daosStripeMax = 0, .daosAios = 1, - .daosWriteOnly = 0, .daosKill = 0, .daosObjectClass = NULL, }; @@ -73,7 +71,6 @@ static option_help options [] = { {'c', "daosStripeCount", "Stripe Count", OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeCount}, {'m', "daosStripeMax", "Max Stripe",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeMax}, {'a', "daosAios", "Concurrent Async IOs",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosAios}, - {'w', "daosWriteOnly", "Write Only, no commit",OPTION_FLAG, 'd', &o.daosWriteOnly}, {'k', "daosKill", "Kill target while running",OPTION_FLAG, 'd', &o.daosKill}, {'o', "daosObjectClass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.daosObjectClass}, LAST_OPTION @@ -521,7 +518,8 @@ static void DAOS_Init(IOR_param_t *param) INFO(VERBOSE_0, param, "WARNING: USING daosStripeMax CAUSES READS TO RETURN INVALID DATA"); rc = daos_init(); - DCHECK(rc, "Failed to initialize daos"); + if (rc != -DER_ALREADY) + DCHECK(rc, "Failed to initialize daos"); rc = daos_eq_create(&eventQueue); DCHECK(rc, "Failed to create event queue"); @@ -764,24 +762,6 @@ static void DAOS_Close(void *file, IOR_param_t *param) ObjectClose(fd->object); - if (param->open == WRITE && !o.daosWriteOnly) { - /* Wait for everybody for to complete the writes. */ - MPI_CHECK(MPI_Barrier(param->testComm), - "Failed to synchronize processes"); - - /* MSC - temp hack to commit since close will rollback */ - if (rank == 0) { - daos_handle_t th; - - rc = daos_tx_open(fd->container, &th, NULL); - DCHECK(rc, "Failed sync"); - rc = daos_tx_commit(th, NULL); - DCHECK(rc, "Failed sync"); - rc = daos_tx_close(th, NULL); - DCHECK(rc, "Failed sync"); - } - } - ContainerClose(fd->container, param); free(fd); From 29090df2833f5653748b902a3dc4edeb916eda90 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Fri, 8 Feb 2019 18:00:33 +0000 Subject: [PATCH 15/66] update DAOS and DFS drivers with new DAOS API changes. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 18 ++++++++++-------- src/aiori-DFS.c | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 4b35881..9035600 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -244,7 +244,7 @@ static void ContainerOpen(char *testFileName, IOR_param_t *param, param->useExistingTestFile == FALSE) { INFO(VERBOSE_2, "Creating container %s", testFileName); - rc = daos_cont_create(pool, uuid, NULL /* ev */); + rc = daos_cont_create(pool, uuid, NULL, NULL); DCHECK(rc, "Failed to create container %s", testFileName); } @@ -594,12 +594,14 @@ kill_daos_server(IOR_param_t *param) { daos_pool_info_t info; d_rank_t d_rank, svc_ranks[13]; - d_rank_list_t svc, targets; + d_rank_list_t svc; + struct d_tgt_list targets; + int tgt_idx = -1; uuid_t uuid; char *s; int rc; - rc = daos_pool_query(pool, NULL, &info, NULL); + rc = daos_pool_query(pool, NULL, &info, NULL, NULL); DCHECK(rc, "Error in querying pool\n"); if (info.pi_ntargets - info.pi_ndisabled <= 1) @@ -618,16 +620,16 @@ kill_daos_server(IOR_param_t *param) rc = daos_mgmt_svc_rip(o.daosGroup, d_rank, true, NULL); DCHECK(rc, "Error in killing server\n"); - targets.rl_nr = 1; - targets.rl_ranks = &d_rank; - + targets.tl_nr = 1; + targets.tl_ranks = &d_rank; + targets.tl_tgts = &tgt_idx; svc.rl_ranks = svc_ranks; ParseService(sizeof(svc_ranks)/ sizeof(svc_ranks[0]), &svc); - rc = daos_pool_exclude(uuid, NULL, &svc, &targets, NULL); + rc = daos_pool_tgt_exclude(uuid, NULL, &svc, &targets, NULL); DCHECK(rc, "Error in excluding pool from poolmap\n"); - rc = daos_pool_query(pool, NULL, &info, NULL); + rc = daos_pool_query(pool, NULL, &info, NULL, NULL); DCHECK(rc, "Error in querying pool\n"); printf("%d targets succesfully disabled\n", diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index e51d045..9b8f60f 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -261,7 +261,7 @@ DFS_Init() { if (rc == -DER_NONEXIST) { if (verbose >= VERBOSE_1) printf("Creating DFS Container ...\n"); - rc = daos_cont_create(poh, co_uuid, NULL); + rc = daos_cont_create(poh, co_uuid, NULL, NULL); if (rc == 0) { cont_created = true; rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, From db19141c1998c49083bcf7dbdbe66a3c916a042f Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Fri, 1 Mar 2019 20:41:01 +0000 Subject: [PATCH 16/66] remove usage of GURT's D_GOTO from DFS plugin. Signed-off-by: Mohamad Chaarawi --- src/aiori-DFS.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 9b8f60f..dd9bc7f 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -25,6 +25,7 @@ #endif #include +#include #include #include #include @@ -160,12 +161,16 @@ parse_filename(const char *path, char **_obj_name, char **_cont_name) } f1 = strdup(path); - if (f1 == NULL) - D_GOTO(out, rc = -ENOMEM); + if (f1 == NULL) { + rc = -ENOMEM; + goto out; + } f2 = strdup(path); - if (f2 == NULL) - D_GOTO(out, rc = -ENOMEM); + if (f2 == NULL) { + rc = -ENOMEM; + goto out; + } fname = basename(f1); cont_name = dirname(f2); @@ -173,18 +178,24 @@ parse_filename(const char *path, char **_obj_name, char **_cont_name) if (cont_name[0] == '.' || cont_name[0] != '/') { char cwd[1024]; - if (getcwd(cwd, 1024) == NULL) - D_GOTO(out, rc = -ENOMEM); + if (getcwd(cwd, 1024) == NULL) { + rc = -ENOMEM; + goto out; + } if (strcmp(cont_name, ".") == 0) { cont_name = strdup(cwd); - if (cont_name == NULL) - D_GOTO(out, rc = -ENOMEM); + if (cont_name == NULL) { + rc = -ENOMEM; + goto out; + } } else { char *new_dir = calloc(strlen(cwd) + strlen(cont_name) + 1, sizeof(char)); - if (new_dir == NULL) - D_GOTO(out, rc = -ENOMEM); + if (new_dir == NULL) { + rc = -ENOMEM; + goto out; + } strcpy(new_dir, cwd); if (cont_name[0] == '.') { @@ -198,15 +209,18 @@ parse_filename(const char *path, char **_obj_name, char **_cont_name) *_cont_name = cont_name; } else { *_cont_name = strdup(cont_name); - if (*_cont_name == NULL) - D_GOTO(out, rc = -ENOMEM); + if (*_cont_name == NULL) { + rc = -ENOMEM; + goto out; + } } *_obj_name = strdup(fname); if (*_obj_name == NULL) { free(*_cont_name); *_cont_name = NULL; - D_GOTO(out, rc = -ENOMEM); + rc = -ENOMEM; + goto out; } out: From c35f64237ffb8ef151abeff1b9669bc754ea160e Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 14 Mar 2019 16:55:54 +0000 Subject: [PATCH 17/66] initialize data and akey checksum to NULL since they are not being used. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 9035600..f06c917 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -336,18 +336,12 @@ static void AIOInit(IOR_param_t *param) aio->a_recx.rx_nr = 1; - aio->a_csum.cs_csum = &aio->a_csumBuf; - aio->a_csum.cs_buf_len = sizeof aio->a_csumBuf; - aio->a_csum.cs_len = aio->a_csum.cs_buf_len; - - aio->a_iod.iod_name.iov_buf = "data"; - aio->a_iod.iod_name.iov_buf_len = - strlen(aio->a_iod.iod_name.iov_buf) + 1; - aio->a_iod.iod_name.iov_len = aio->a_iod.iod_name.iov_buf_len; + daos_iov_set(&aio->a_iod.iod_name, "data", strlen("data")); + daos_csum_set(&aio->a_iod.iod_kcsum, NULL, 0); aio->a_iod.iod_nr = 1; aio->a_iod.iod_type = DAOS_IOD_ARRAY; aio->a_iod.iod_recxs = &aio->a_recx; - aio->a_iod.iod_csums = &aio->a_csum; + aio->a_iod.iod_csums = NULL; aio->a_iod.iod_eprs = NULL; aio->a_iod.iod_size = param->transferSize; From 39eca1bb088c960299c8a14a90d25640ff0e3cfc Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 14 Mar 2019 21:17:37 +0000 Subject: [PATCH 18/66] add DAOS_Access routine to check if a container exists before deleting it. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 26 ++++++++++++++++++++++++-- src/ior.c | 6 ++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index f06c917..1555c57 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -83,6 +83,7 @@ static void DAOS_Init(); static void DAOS_Fini(); static void *DAOS_Create(char *, IOR_param_t *); static void *DAOS_Open(char *, IOR_param_t *); +static int DAOS_Access(const char *, int, IOR_param_t *); static IOR_offset_t DAOS_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); static void DAOS_Close(void *, IOR_param_t *); @@ -98,6 +99,7 @@ ior_aiori_t daos_aiori = { .name = "DAOS", .create = DAOS_Create, .open = DAOS_Open, + .access = DAOS_Access, .xfer = DAOS_Xfer, .close = DAOS_Close, .delete = DAOS_Delete, @@ -568,6 +570,26 @@ static void *DAOS_Create(char *testFileName, IOR_param_t *param) return DAOS_Open(testFileName, param); } +static int +DAOS_Access(const char *testFileName, int mode, IOR_param_t * param) +{ + uuid_t uuid; + unsigned int dFlags; + daos_handle_t coh; + daos_cont_info_t info; + int rc; + + rc = uuid_parse(testFileName, uuid); + DCHECK(rc, "Failed to parse 'testFile': %s", testFileName); + + rc = daos_cont_open(pool, uuid, DAOS_COO_RO, &coh, &info, NULL); + if (rc) + return rc; + + rc = daos_cont_close(coh, NULL); + return rc; +} + static void *DAOS_Open(char *testFileName, IOR_param_t *param) { struct fileDescriptor *fd; @@ -785,8 +807,8 @@ static void DAOS_Delete(char *testFileName, IOR_param_t *param) DCHECK(rc, "Failed to parse 'testFile': %s", testFileName); rc = daos_cont_destroy(pool, uuid, 1 /* force */, NULL /* ev */); - if (rc != -DER_NONEXIST) - DCHECK(rc, "Failed to destroy container %s", testFileName); + if (rc) + DCHECK(rc, "Failed to destroy container %s (%d)", testFileName, rc); } static char* DAOS_GetVersion() diff --git a/src/ior.c b/src/ior.c index 2a5532a..8dba156 100755 --- a/src/ior.c +++ b/src/ior.c @@ -910,8 +910,7 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) rankOffset = 0; GetTestFileName(testFileName, test); } - if (backend->access(testFileName, F_OK, test) == 0 || - strcasecmp(test->api, "DAOS") == 0) { + if (backend->access(testFileName, F_OK, test) == 0) { backend->delete(testFileName, test); } if (test->reorderTasksRandom == TRUE) { @@ -919,8 +918,7 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) GetTestFileName(testFileName, test); } } else { - if ((rank == 0) && (backend->access(testFileName, F_OK, test) == 0 || - strcasecmp(test->api, "DAOS") == 0)) { + if (rank == 0 && backend->access(testFileName, F_OK, test) == 0) { backend->delete(testFileName, test); } } From 8b23c5038900f09661b6be0af048bee95955151f Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Fri, 22 Mar 2019 16:53:34 +0000 Subject: [PATCH 19/66] update dfs_open for API change to expose chunk size selection Signed-off-by: Mohamad Chaarawi --- src/aiori-DFS.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index dd9bc7f..86bd175 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -333,7 +333,7 @@ DFS_Create(char *testFileName, IOR_param_t *param) DERR(rc, "dfs_lookup() of %s Failed", dir_name); rc = dfs_open(dfs, parent, name, mode, fd_oflag, DAOS_OC_LARGE_RW, - NULL, &obj); + 0, NULL, &obj); DERR(rc, "dfs_open() of %s Failed", name); out: @@ -371,7 +371,7 @@ DFS_Open(char *testFileName, IOR_param_t *param) rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); DERR(rc, "dfs_lookup() of %s Failed", dir_name); - rc = dfs_open(dfs, parent, name, mode, fd_oflag, 0, NULL, &obj); + rc = dfs_open(dfs, parent, name, mode, fd_oflag, 0, 0, NULL, &obj); DERR(rc, "dfs_open() of %s Failed", name); out: From 4a788a0c2374e8cff3404eeee5114b19e6ce9e0e Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sat, 30 Mar 2019 19:11:44 +0000 Subject: [PATCH 20/66] re-write the DAOS IOR driver to use the Array API Signed-off-by: Mohamad Chaarawi --- README_DAOS | 50 +-- configure.ac | 15 +- src/aiori-DAOS.c | 844 ++++++++++++++++------------------------------- 3 files changed, 322 insertions(+), 587 deletions(-) diff --git a/README_DAOS b/README_DAOS index eea57a8..33d9464 100644 --- a/README_DAOS +++ b/README_DAOS @@ -4,14 +4,16 @@ Building The DAOS library must be installed on the system. ./bootsrap -./configure --prefix=iorInstallDir --with-daos=DIR +./configure --prefix=iorInstallDir --with-daos=DIR --with-cart=DIR -One must specify "--with-daos=/path/to/daos/install". When that is specified -the DAOS and DFS driver will be built. +One must specify "--with-daos=/path/to/daos/install and --with-cart". When that +is specified the DAOS and DFS driver will be built. -The DAOS driver uses the DAOS API to create a container (file) and update/fetch -an object to the container using the daos object API. The DAOS driver works with -IOR only (no mdtest support yet). +The DAOS driver uses the DAOS API to open a container (or create it if it +doesn't exist first) then create an array object in that container (file) and +read/write to the array object using the daos Array API. The DAOS driver works +with IOR only (no mdtest support yet). The file name used by IOR (passed by -o +option) is hashed to an object ID that is used as the array oid. The DFS (DAOS File System) driver creates an encapsulated namespace and emulates the POSIX driver using the DFS API directly on top of DAOS. The DFS driver works @@ -33,40 +35,22 @@ The DAOS options include: Required Options: --daos.pool : pool uuid to connect to (has to be created beforehand) --daos.svcl : pool svcl list (: separated) +--daos.cont : container for the IOR files/objects (can use `uuidgen`) Optional Options: --daos.group : group name of servers with the pool ---daos.recordSize : object record size for IO ---daos.stripeSize ---daos.stripeCount ---daos.stripeMax ---daos.aios : number of concurrent async IOs ---daos.kill flag to kill a rank during IO ---daos.objectClass : specific object class - -When benchmarking write performance, one likely does not want "-W", which causes -the write phase to do one additional memory copy for every I/O. This is due to -IOR's assumption that when a DAOS_Xfer() call returns the buffer may be -released. Therefore, random data is written when "-W" is absent, while data is -copied from IOR buffers when "-W" is present. - -See doc/USER_GUIDE for all options and directives. Note that not all -combinations of options are supported. For example specifying an epoch to access -and running ior with multiple iterations would cause all iterations other than -first one to fail because the epoch will be committed in the first iteration. In -that case, the epoch should not be specified and the DAOS driver would choose -the epoch to access automatically on each iteration. +--daos.chunk_size : Chunk size of the array object controlling striping over DKEYs +--daos.destroy flag to destory the container on finalize +--daos.oclass : specific object class for array object Examples that should work include: - - "ior -a DAOS -w -W -o --daos.pool --daos.svcl " - writes into a new container and verifies the data, using default - daosRecordSize, transferSize, daosStripeSize, blockSize, daosAios, etc. + - "ior -a DAOS -w -W -o file_name --daos.pool --daos.svcl \ + --daos.cont " - - "ior -a DAOS -w -W -r -R -o -b 1g -t 4m -C \ - --daos.pool --daos.svcl --daos.recordSize 1m --daos.stripeSize 4m\ - --daos.stripeCount 256 --daos.aios 8 - does all IOR tests and shifts ranks during checkWrite and checkRead. + - "ior -a DAOS -w -W -r -R -o file_name -b 1g -t 4m \ + --daos.pool --daos.svcl --daos.cont \ + --daos.chunk_size 1024 --daos.oclass R2" Running with DFS API --------------------- diff --git a/configure.ac b/configure.ac index 395c0ab..d38b2a3 100755 --- a/configure.ac +++ b/configure.ac @@ -185,7 +185,20 @@ AM_COND_IF([USE_RADOS_AIORI],[ AC_DEFINE([USE_RADOS_AIORI], [], [Build RADOS backend AIORI]) ]) -# DAOS Backends (DAOS and DFS) IO support +# DAOS Backends (DAOS and DFS) IO support require DAOS and CART/GURT +AC_ARG_WITH([cart], + [AS_HELP_STRING([--with-cart], + [support IO with DAOS backends @<:@default=no@:>@])], + [], + [with_daos=no]) + +AS_IF([test "x$with_cart" != xno], + CART="yes" + LDFLAGS="$LDFLAGS -L$with_cart/lib" + CPPFLAGS="$CPPFLAGS -I$with_cart/include/" + AC_CHECK_HEADERS(gurt/common.h,, [unset CART]) + AC_CHECK_LIB([gurt], [d_hash_murmur64],, [unset CART])) + AC_ARG_WITH([daos], [AS_HELP_STRING([--with-daos], [support IO with DAOS backends @<:@default=no@:>@])], diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 1555c57..3e7f9c7 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -28,8 +28,10 @@ #include #include #include +#include #include #include +#include #include "ior.h" #include "aiori.h" @@ -38,42 +40,33 @@ /************************** O P T I O N S *****************************/ struct daos_options{ - char *daosPool; - char *daosPoolSvc; - char *daosGroup; - int daosRecordSize; - int daosStripeSize; - uint64_t daosStripeCount; - uint64_t daosStripeMax; /* max length of a stripe */ - int daosAios; /* max number of concurrent async I/Os */ - int daosKill; /* kill a target while running IOR */ - char *daosObjectClass; /* object class */ + char *pool; + char *svcl; + char *group; + char *cont; + int chunk_size; + int destroy; + char *oclass; }; static struct daos_options o = { - .daosPool = NULL, - .daosPoolSvc = NULL, - .daosGroup = NULL, - .daosRecordSize = 262144, - .daosStripeSize = 524288, - .daosStripeCount = -1, - .daosStripeMax = 0, - .daosAios = 1, - .daosKill = 0, - .daosObjectClass = NULL, + .pool = NULL, + .svcl = NULL, + .group = NULL, + .cont = NULL, + .chunk_size = 1048576, + .destroy = 0, + .oclass = NULL, }; static option_help options [] = { - {0, "daos.pool", "pool uuid", OPTION_REQUIRED_ARGUMENT, 's', &o.daosPool}, - {0, "daos.svcl", "pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', &o.daosPoolSvc}, - {0, "daos.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', &o.daosGroup}, - {0, "daos.recordSize", "Record Size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosRecordSize}, - {0, "daos.stripeSize", "Stripe Size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosStripeSize}, - {0, "daos.stripeCount", "Stripe Count", OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeCount}, - {0, "daos.stripeMax", "Max Stripe",OPTION_OPTIONAL_ARGUMENT, 'u', &o.daosStripeMax}, - {0, "daos.aios", "Concurrent Async IOs",OPTION_OPTIONAL_ARGUMENT, 'd', &o.daosAios}, - {0, "daos.kill", "Kill target while running",OPTION_FLAG, 'd', &o.daosKill}, - {0, "daos.objectClass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.daosObjectClass}, + {0, "daos.pool", "pool uuid", OPTION_REQUIRED_ARGUMENT, 's', &o.pool}, + {0, "daos.svcl", "pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', &o.svcl}, + {0, "daos.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', &o.group}, + {0, "daos.cont", "container uuid", OPTION_REQUIRED_ARGUMENT, 's', &o.cont}, + {0, "daos.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.chunk_size}, + {0, "daos.destroy", "Destroy Container", OPTION_FLAG, 'd', &o.destroy}, + {0, "daos.oclass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.oclass}, LAST_OPTION }; @@ -111,40 +104,19 @@ ior_aiori_t daos_aiori = { .get_options = DAOS_options, }; +#define IOR_DAOS_MUR_SEED 0xDEAD10CC + enum handleType { POOL_HANDLE, - CONTAINER_HANDLE + CONT_HANDLE, + ARRAY_HANDLE }; -struct fileDescriptor { - daos_handle_t container; - daos_cont_info_t containerInfo; - daos_handle_t object; -}; - -struct aio { - cfs_list_t a_list; - char a_dkeyBuf[32]; - daos_key_t a_dkey; - daos_recx_t a_recx; - unsigned char a_csumBuf[32]; - daos_csum_buf_t a_csum; - daos_iod_t a_iod; - daos_iov_t a_iov; - daos_sg_list_t a_sgl; - struct daos_event a_event; -}; - -static daos_handle_t eventQueue; -static struct daos_event **events; -static unsigned char *buffers; -static int nAios; -static daos_handle_t pool; -static daos_pool_info_t poolInfo; -static daos_oclass_id_t objectClass = DAOS_OC_LARGE_RW; -static CFS_LIST_HEAD(aios); -static IOR_offset_t total_size; -static bool daos_initialized = false; +static daos_handle_t poh; +static daos_handle_t coh; +static daos_handle_t aoh; +static daos_oclass_id_t objectClass = DAOS_OC_LARGE_RW; +static bool daos_initialized = false; /***************************** F U N C T I O N S ******************************/ @@ -176,13 +148,12 @@ do { \ } while (0) /* Distribute process 0's pool or container handle to others. */ -static void HandleDistribute(daos_handle_t *handle, enum handleType type) +static void +HandleDistribute(daos_handle_t *handle, enum handleType type) { daos_iov_t global; int rc; - assert(type == POOL_HANDLE || !daos_handle_is_inval(pool)); - global.iov_buf = NULL; global.iov_buf_len = 0; global.iov_len = 0; @@ -191,8 +162,10 @@ static void HandleDistribute(daos_handle_t *handle, enum handleType type) /* Get the global handle size. */ if (type == POOL_HANDLE) rc = daos_pool_local2global(*handle, &global); - else + else if (type == CONT_HANDLE) rc = daos_cont_local2global(*handle, &global); + else + rc = daos_array_local2global(*handle, &global); DCHECK(rc, "Failed to get global handle size"); } @@ -207,8 +180,10 @@ static void HandleDistribute(daos_handle_t *handle, enum handleType type) if (rank == 0) { if (type == POOL_HANDLE) rc = daos_pool_local2global(*handle, &global); - else + else if (type == CONT_HANDLE) rc = daos_cont_local2global(*handle, &global); + else + rc = daos_array_local2global(*handle, &global); DCHECK(rc, "Failed to create global handle"); } @@ -222,213 +197,18 @@ static void HandleDistribute(daos_handle_t *handle, enum handleType type) if (type == POOL_HANDLE) rc = daos_pool_global2local(global, handle); - else - rc = daos_cont_global2local(pool, global, handle); + else if (type == CONT_HANDLE) + rc = daos_cont_global2local(poh, global, handle); + else + rc = daos_array_global2local(coh, global, handle); DCHECK(rc, "Failed to get local handle"); } free(global.iov_buf); } -static void ContainerOpen(char *testFileName, IOR_param_t *param, - daos_handle_t *container, daos_cont_info_t *info) -{ - int rc; - - if (rank == 0) { - uuid_t uuid; - unsigned int dFlags; - - rc = uuid_parse(testFileName, uuid); - DCHECK(rc, "Failed to parse 'testFile': %s", testFileName); - - if (param->open == WRITE && - param->useExistingTestFile == FALSE) { - INFO(VERBOSE_2, "Creating container %s", testFileName); - - rc = daos_cont_create(pool, uuid, NULL, NULL); - DCHECK(rc, "Failed to create container %s", - testFileName); - } - - INFO(VERBOSE_2, "Opening container %s", testFileName); - - if (param->open == WRITE) - dFlags = DAOS_COO_RW; - else - dFlags = DAOS_COO_RO; - - rc = daos_cont_open(pool, uuid, dFlags, container, info, - NULL /* ev */); - DCHECK(rc, "Failed to open container %s", testFileName); - } - - HandleDistribute(container, CONTAINER_HANDLE); - - MPI_CHECK(MPI_Bcast(info, sizeof *info, MPI_BYTE, 0, param->testComm), - "Failed to broadcast container info"); -} - -static void ContainerClose(daos_handle_t container, IOR_param_t *param) -{ - int rc; - - if (rank != 0) { - rc = daos_cont_close(container, NULL /* ev */); - DCHECK(rc, "Failed to close container"); - } - - /* An MPI_Gather() call would probably be more efficient. */ - MPI_CHECK(MPI_Barrier(param->testComm), - "Failed to synchronize processes"); - - if (rank == 0) { - rc = daos_cont_close(container, NULL /* ev */); - DCHECK(rc, "Failed to close container"); - } -} - -static void ObjectOpen(daos_handle_t container, daos_handle_t *object, - IOR_param_t *param) -{ - daos_obj_id_t oid; - unsigned int flags; - int rc; - - oid.hi = 0; - oid.lo = 1; - daos_obj_generate_id(&oid, 0, objectClass); - - if (param->open == WRITE) - flags = DAOS_OO_RW; - else - flags = DAOS_OO_RO; - - rc = daos_obj_open(container, oid, flags, object, NULL /* ev */); - DCHECK(rc, "Failed to open object"); -} - -static void ObjectClose(daos_handle_t object) -{ - int rc; - - rc = daos_obj_close(object, NULL /* ev */); - DCHECK(rc, "Failed to close object"); -} - -static void AIOInit(IOR_param_t *param) -{ - struct aio *aio; - int i; - int rc; - - rc = posix_memalign((void **) &buffers, sysconf(_SC_PAGESIZE), - param->transferSize * o.daosAios); - DCHECK(rc, "Failed to allocate buffer array"); - - for (i = 0; i < o.daosAios; i++) { - aio = malloc(sizeof *aio); - if (aio == NULL) - ERR("Failed to allocate aio array"); - - memset(aio, 0, sizeof *aio); - - aio->a_dkey.iov_buf = aio->a_dkeyBuf; - aio->a_dkey.iov_buf_len = sizeof aio->a_dkeyBuf; - - aio->a_recx.rx_nr = 1; - - daos_iov_set(&aio->a_iod.iod_name, "data", strlen("data")); - daos_csum_set(&aio->a_iod.iod_kcsum, NULL, 0); - aio->a_iod.iod_nr = 1; - aio->a_iod.iod_type = DAOS_IOD_ARRAY; - aio->a_iod.iod_recxs = &aio->a_recx; - aio->a_iod.iod_csums = NULL; - aio->a_iod.iod_eprs = NULL; - aio->a_iod.iod_size = param->transferSize; - - aio->a_iov.iov_buf = buffers + param->transferSize * i; - aio->a_iov.iov_buf_len = param->transferSize; - aio->a_iov.iov_len = aio->a_iov.iov_buf_len; - - aio->a_sgl.sg_nr = 1; - aio->a_sgl.sg_iovs = &aio->a_iov; - - rc = daos_event_init(&aio->a_event, eventQueue, - NULL /* parent */); - DCHECK(rc, "Failed to initialize event for aio[%d]", i); - - cfs_list_add(&aio->a_list, &aios); - - INFO(VERBOSE_3, "Allocated AIO %p: buffer %p", aio, - aio->a_iov.iov_buf); - } - - nAios = o.daosAios; - - events = malloc((sizeof *events) * o.daosAios); - if (events == NULL) - ERR("Failed to allocate events array"); -} - -static void AIOFini(IOR_param_t *param) -{ - struct aio *aio; - struct aio *tmp; - - free(events); - - cfs_list_for_each_entry_safe(aio, tmp, &aios, a_list) { - INFO(VERBOSE_3, "Freeing AIO %p: buffer %p", aio, - aio->a_iov.iov_buf); - cfs_list_del_init(&aio->a_list); - daos_event_fini(&aio->a_event); - free(aio); - } - - free(buffers); -} - -static void AIOWait(IOR_param_t *param) -{ - struct aio *aio; - int i; - int rc; - - rc = daos_eq_poll(eventQueue, 0, DAOS_EQ_WAIT, o.daosAios, - events); - DCHECK(rc, "Failed to poll event queue"); - assert(rc <= o.daosAios - nAios); - - for (i = 0; i < rc; i++) { - int ret; - - aio = (struct aio *) - ((char *) events[i] - - (char *) (&((struct aio *) 0)->a_event)); - - DCHECK(aio->a_event.ev_error, "Failed to transfer (%lu, %lu)", - aio->a_iod.iod_recxs->rx_idx, - aio->a_iod.iod_recxs->rx_nr); - - daos_event_fini(&aio->a_event); - ret = daos_event_init(&aio->a_event, eventQueue, - NULL /* parent */); - DCHECK(ret, "Failed to reinitialize event for AIO %p", aio); - - cfs_list_move(&aio->a_list, &aios); - nAios++; - - if (param->verbose >= VERBOSE_3) - INFO(VERBOSE_3, "Completed AIO %p: buffer %p", aio, - aio->a_iov.iov_buf); - } - - INFO(VERBOSE_3, "Found %d completed AIOs (%d free %d busy)", rc, - nAios, o.daosAios - nAios); -} - -static void ObjectClassParse(const char *string) +static void +ObjectClassParse(const char *string) { if (strcasecmp(string, "tiny") == 0) objectClass = DAOS_OC_TINY_RW; @@ -459,105 +239,107 @@ static void ObjectClassParse(const char *string) else if (strcasecmp(string, "repl_max") == 0) objectClass = DAOS_OC_REPL_MAX_RW; else - GERR("Invalid 'daosObjectClass' argument: '%s'", string); + GERR("Invalid 'oclass' argument: '%s'", string); } -static void ParseService(int max, d_rank_list_t *ranks) +static option_help * +DAOS_options() { - char *s; - - s = strdup(o.daosPoolSvc); - if (s == NULL) - GERR("failed to duplicate argument"); - ranks->rl_nr = 0; - while ((s = strtok(s, ":")) != NULL) { - if (ranks->rl_nr >= max) { - free(s); - GERR("at most %d pool service replicas supported", max); - } - ranks->rl_ranks[ranks->rl_nr] = atoi(s); - ranks->rl_nr++; - s = NULL; - } - free(s); -} - -static option_help * DAOS_options(){ return options; } -static void DAOS_Init() +static void +DAOS_Init() { int rc; if (daos_initialized) return; - if (o.daosPool == NULL || o.daosPoolSvc == NULL) - return; - if (o.daosObjectClass) - ObjectClassParse(o.daosObjectClass); - if (o.daosStripeMax % o.daosStripeSize != 0) - GERR("'daosStripeMax' must be a multiple of 'daosStripeSize'"); - if (o.daosKill && ((objectClass != DAOS_OC_R2_RW) || - (objectClass != DAOS_OC_R3_RW) || - (objectClass != DAOS_OC_R4_RW) || - (objectClass != DAOS_OC_R2S_RW) || - (objectClass != DAOS_OC_R3S_RW) || - (objectClass != DAOS_OC_R4S_RW) || - (objectClass != DAOS_OC_REPL_MAX_RW))) - GERR("'daosKill' only makes sense with 'daosObjectClass=repl'"); - if (rank == 0) - INFO(VERBOSE_0, "WARNING: USING daosStripeMax CAUSES READS TO RETURN INVALID DATA"); + if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) + return; + if (o.oclass) + ObjectClassParse(o.oclass); rc = daos_init(); - if (rc != -DER_ALREADY) + if (rc) DCHECK(rc, "Failed to initialize daos"); - rc = daos_eq_create(&eventQueue); - DCHECK(rc, "Failed to create event queue"); - if (rank == 0) { - uuid_t uuid; - d_rank_t d_rank[13]; - d_rank_list_t ranks; + uuid_t uuid; + d_rank_list_t *svcl = NULL; + d_rank_list_t ranks; + static daos_pool_info_t po_info; + static daos_cont_info_t co_info; - INFO(VERBOSE_2, "Connecting to pool %s %s", o.daosPool, o.daosPoolSvc); + INFO(VERBOSE_1, "Connecting to pool %s", o.pool); - rc = uuid_parse(o.daosPool, uuid); - DCHECK(rc, "Failed to parse 'daosPool': %s", o.daosPool); - ranks.rl_ranks = d_rank; - ParseService(sizeof(d_rank) / sizeof(d_rank[0]), &ranks); + rc = uuid_parse(o.pool, uuid); + DCHECK(rc, "Failed to parse 'pool': %s", o.pool); - rc = daos_pool_connect(uuid, o.daosGroup, &ranks, - DAOS_PC_RW, &pool, &poolInfo, - NULL /* ev */); - DCHECK(rc, "Failed to connect to pool %s", o.daosPool); + svcl = daos_rank_list_parse(o.svcl, ":"); + if (svcl == NULL) + ERR("Failed to allocate svcl"); + + rc = daos_pool_connect(uuid, o.group, svcl, DAOS_PC_RW, + &poh, &po_info, NULL); + daos_rank_list_free(svcl); + DCHECK(rc, "Failed to connect to pool %s", o.pool); + + INFO(VERBOSE_1, "Create/Open Container %s", o.cont); + + uuid_clear(uuid); + rc = uuid_parse(o.cont, uuid); + DCHECK(rc, "Failed to parse 'cont': %s", o.cont); + + rc = daos_cont_open(poh, uuid, DAOS_COO_RW, &coh, &co_info, + NULL); + /* If NOEXIST we create it */ + if (rc == -DER_NONEXIST) { + INFO(VERBOSE_2, "Creating DAOS Container...\n"); + rc = daos_cont_create(poh, uuid, NULL, NULL); + if (rc == 0) + rc = daos_cont_open(poh, uuid, DAOS_COO_RW, + &coh, &co_info, NULL); + } + DCHECK(rc, "Failed to create container"); } - HandleDistribute(&pool, POOL_HANDLE); - - MPI_CHECK(MPI_Bcast(&poolInfo, sizeof poolInfo, MPI_BYTE, 0, MPI_COMM_WORLD), - "Failed to bcast pool info"); - - if (o.daosStripeCount == -1) - o.daosStripeCount = poolInfo.pi_ntargets * 64UL; + HandleDistribute(&poh, POOL_HANDLE); + HandleDistribute(&coh, CONT_HANDLE); + aoh.cookie = 0; daos_initialized = true; } -static void DAOS_Fini() +static void +DAOS_Fini() { int rc; if (!daos_initialized) return; - rc = daos_pool_disconnect(pool, NULL /* ev */); - DCHECK(rc, "Failed to disconnect from pool %s", o.daosPool); + rc = daos_cont_close(coh, NULL); + DCHECK(rc, "Failed to close container\n"); - rc = daos_eq_destroy(eventQueue, 0 /* flags */); - DCHECK(rc, "Failed to destroy event queue"); + if (o.destroy) { + if (rank == 0) { + uuid_t uuid; + + INFO(VERBOSE_1, "Destroying Container %s", o.cont); + uuid_parse(o.cont, uuid); + rc = daos_cont_destroy(poh, o.cont, 1, NULL); + } + + MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (rc) + DCHECK(rc, "Failed to destroy container %s (%d)", + o.cont, rc); + } + + rc = daos_pool_disconnect(poh, NULL); + DCHECK(rc, "Failed to disconnect from pool %s", o.pool); rc = daos_fini(); DCHECK(rc, "Failed to finalize daos"); @@ -565,253 +347,180 @@ static void DAOS_Fini() daos_initialized = false; } -static void *DAOS_Create(char *testFileName, IOR_param_t *param) +static void +gen_oid(const char *name, daos_obj_id_t *oid) { - return DAOS_Open(testFileName, param); + daos_ofeat_t feat = 0; + + oid->lo = d_hash_murmur64(name, strlen(name), IOR_DAOS_MUR_SEED); + oid->hi = 0; + + feat = DAOS_OF_DKEY_UINT64 | DAOS_OF_AKEY_HASHED; + daos_obj_generate_id(oid, feat, objectClass); +} + +static void * +DAOS_Create(char *testFileName, IOR_param_t *param) +{ + daos_obj_id_t oid; + int rc; + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** Create the array */ + if (param->filePerProc || rank == 0) { + printf("Chunk size = %zu\n", o.chunk_size); + rc = daos_array_create(coh, oid, DAOS_TX_NONE, 1, o.chunk_size, + &aoh, NULL); + DCHECK(rc, "Failed to create array object\n"); + } + + /** Distribute the array handle if not FPP */ + if (!param->filePerProc) + HandleDistribute(&aoh, ARRAY_HANDLE); } static int DAOS_Access(const char *testFileName, int mode, IOR_param_t * param) { - uuid_t uuid; - unsigned int dFlags; - daos_handle_t coh; - daos_cont_info_t info; + daos_obj_id_t oid; int rc; - rc = uuid_parse(testFileName, uuid); - DCHECK(rc, "Failed to parse 'testFile': %s", testFileName); + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); - rc = daos_cont_open(pool, uuid, DAOS_COO_RO, &coh, &info, NULL); - if (rc) - return rc; + /** open the array to verify it exists */ + if (param->filePerProc || rank == 0) { + daos_size_t cell_size, chunk_size; + + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, + &cell_size, &chunk_size, &aoh, NULL); + if (rc) + return rc; + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + + rc = daos_array_close(aoh, NULL); + aoh.cookie = 0; + } + + if (!param->filePerProc) + MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); - rc = daos_cont_close(coh, NULL); return rc; } -static void *DAOS_Open(char *testFileName, IOR_param_t *param) +static void * +DAOS_Open(char *testFileName, IOR_param_t *param) { - struct fileDescriptor *fd; + daos_obj_id_t oid; - fd = malloc(sizeof *fd); - if (fd == NULL) - ERR("Failed to allocate fd"); + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); - ContainerOpen(testFileName, param, &fd->container, &fd->containerInfo); - ObjectOpen(fd->container, &fd->object, param); - AIOInit(param); + /** Open the array */ + if (param->filePerProc || rank == 0) { + daos_size_t cell_size, chunk_size; + int rc; - return fd; + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "Failed to create array object\n"); + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + } + + /** Distribute the array handle if not FPP */ + if (!param->filePerProc) + HandleDistribute(&aoh, ARRAY_HANDLE); } -static void -kill_daos_server(IOR_param_t *param) +static IOR_offset_t +DAOS_Xfer(int access, void *file, IOR_size_t *buffer, + IOR_offset_t length, IOR_param_t *param) { - daos_pool_info_t info; - d_rank_t d_rank, svc_ranks[13]; - d_rank_list_t svc; - struct d_tgt_list targets; - int tgt_idx = -1; - uuid_t uuid; - char *s; - int rc; + daos_array_iod_t iod; + daos_range_t rg; + daos_sg_list_t sgl; + daos_iov_t iov; + int rc; - rc = daos_pool_query(pool, NULL, &info, NULL, NULL); - DCHECK(rc, "Error in querying pool\n"); + /** set array location */ + iod.arr_nr = 1; + rg.rg_len = length; + rg.rg_idx = param->offset; + iod.arr_rgs = &rg; - if (info.pi_ntargets - info.pi_ndisabled <= 1) - return; - /* choose the last alive one */ - d_rank = info.pi_ntargets - 1 - info.pi_ndisabled; - - rc = uuid_parse(o.daosPool, uuid); - DCHECK(rc, "Failed to parse 'daosPool': %s", o.daosPool); - - if (rc != 0) - printf("Killing tgt rank: %d (total of %d of %d already disabled)\n", - d_rank, info.pi_ndisabled, info.pi_ntargets); - fflush(stdout); - - rc = daos_mgmt_svc_rip(o.daosGroup, d_rank, true, NULL); - DCHECK(rc, "Error in killing server\n"); - - targets.tl_nr = 1; - targets.tl_ranks = &d_rank; - targets.tl_tgts = &tgt_idx; - svc.rl_ranks = svc_ranks; - ParseService(sizeof(svc_ranks)/ sizeof(svc_ranks[0]), &svc); - - rc = daos_pool_tgt_exclude(uuid, NULL, &svc, &targets, NULL); - DCHECK(rc, "Error in excluding pool from poolmap\n"); - - rc = daos_pool_query(pool, NULL, &info, NULL, NULL); - DCHECK(rc, "Error in querying pool\n"); - - printf("%d targets succesfully disabled\n", - info.pi_ndisabled); - -} - -static void -kill_and_sync(IOR_param_t *param) -{ - double start, end; - - start = MPI_Wtime(); - if (rank == 0) - kill_daos_server(param); - - if (rank == 0) - printf("Done killing and excluding\n"); - - MPI_CHECK(MPI_Barrier(param->testComm), - "Failed to synchronize processes"); - - end = MPI_Wtime(); - if (rank == 0) - printf("Time spent inducing failure: %lf\n", (end - start)); -} - -static IOR_offset_t DAOS_Xfer(int access, void *file, IOR_size_t *buffer, - IOR_offset_t length, IOR_param_t *param) -{ - struct fileDescriptor *fd = file; - struct aio *aio; - uint64_t stripe; - IOR_offset_t stripeOffset; - uint64_t round; - int rc; - - if (!daos_initialized) - GERR("DAOS is not initialized!"); - if (param->filePerProc) - GERR("'filePerProc' not yet supported"); - if (o.daosStripeSize % param->transferSize != 0) - GERR("'daosStripeSize' must be a multiple of 'transferSize'"); - if (param->transferSize % o.daosRecordSize != 0) - GERR("'transferSize' must be a multiple of 'daosRecordSize'"); - - assert(length == param->transferSize); - assert(param->offset % length == 0); - - /** - * Currently killing only during writes - * Kills once when 1/2 of blocksize is - * written - **/ - total_size += length; - if (o.daosKill && (access == WRITE) && - ((param->blockSize)/2) == total_size) { - /** More than half written lets kill */ - if (rank == 0) - printf("Killing and Syncing\n", rank); - kill_and_sync(param); - o.daosKill = 0; - } - - /* - * Find an available AIO descriptor. If none, wait for one. - */ - while (nAios == 0) - AIOWait(param); - aio = cfs_list_entry(aios.next, struct aio, a_list); - cfs_list_move_tail(&aio->a_list, &aios); - nAios--; - - stripe = (param->offset / o.daosStripeSize) % - o.daosStripeCount; - rc = snprintf(aio->a_dkeyBuf, sizeof aio->a_dkeyBuf, "%lu", stripe); - assert(rc < sizeof aio->a_dkeyBuf); - aio->a_dkey.iov_len = strlen(aio->a_dkeyBuf) + 1; - round = param->offset / (o.daosStripeSize * o.daosStripeCount); - stripeOffset = o.daosStripeSize * round + - param->offset % o.daosStripeSize; - if (o.daosStripeMax != 0) - stripeOffset %= o.daosStripeMax; - aio->a_recx.rx_idx = stripeOffset / o.daosRecordSize; - - /* - * If the data written will be checked later, we have to copy in valid - * data instead of writing random bytes. If the data being read is for - * checking purposes, poison the buffer first. - */ - if (access == WRITE && param->checkWrite) - memcpy(aio->a_iov.iov_buf, buffer, length); - else if (access == WRITECHECK || access == READCHECK) - memset(aio->a_iov.iov_buf, '#', length); - - INFO(VERBOSE_3, "Starting AIO %p (%d free %d busy): access %d " - "dkey '%s' iod <%llu, %llu> sgl <%p, %lu>", aio, nAios, - o.daosAios - nAios, access, (char *) aio->a_dkey.iov_buf, - (unsigned long long) aio->a_iod.iod_recxs->rx_idx, - (unsigned long long) aio->a_iod.iod_recxs->rx_nr, - aio->a_sgl.sg_iovs->iov_buf, - (unsigned long long) aio->a_sgl.sg_iovs->iov_buf_len); + /** set memory location */ + sgl.sg_nr = 1; + daos_iov_set(&iov, buffer, length); + sgl.sg_iovs = &iov; if (access == WRITE) { - rc = daos_obj_update(fd->object, DAOS_TX_NONE, &aio->a_dkey, - 1 /* nr */, &aio->a_iod, &aio->a_sgl, - &aio->a_event); - DCHECK(rc, "Failed to start update operation"); - } else { - rc = daos_obj_fetch(fd->object, DAOS_TX_NONE, &aio->a_dkey, - 1 /* nr */, &aio->a_iod, &aio->a_sgl, - NULL /* maps */, &aio->a_event); - DCHECK(rc, "Failed to start fetch operation"); - } + rc = daos_array_write(aoh, DAOS_TX_NONE, &iod, &sgl, NULL, NULL); + DCHECK(rc, "daos_array_write() failed (%d).", rc); + } else { + rc = daos_array_read(aoh, DAOS_TX_NONE, &iod, &sgl, NULL, NULL); + DCHECK(rc, "daos_array_read() failed (%d).", rc); + } - /* - * If this is a WRITECHECK or READCHECK, we are expected to fill data - * into the buffer before returning. Note that if this is a READ, we - * don't have to return valid data as WriteOrRead() doesn't care. - */ - if (access == WRITECHECK || access == READCHECK) { - while (o.daosAios - nAios > 0) - AIOWait(param); - memcpy(buffer, aio->a_sgl.sg_iovs->iov_buf, length); - } - - return length; + return length; } -static void DAOS_Close(void *file, IOR_param_t *param) +static void +DAOS_Close(void *file, IOR_param_t *param) { - struct fileDescriptor *fd = file; - int rc; - - if (!daos_initialized) - return; - while (o.daosAios - nAios > 0) - AIOWait(param); - AIOFini(param); - - ObjectClose(fd->object); - - ContainerClose(fd->container, param); - - free(fd); -} - -static void DAOS_Delete(char *testFileName, IOR_param_t *param) -{ - uuid_t uuid; - int rc; + int rc; if (!daos_initialized) GERR("DAOS is not initialized!"); - INFO(VERBOSE_2, "Deleting container %s", testFileName); + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); - rc = uuid_parse(testFileName, uuid); - DCHECK(rc, "Failed to parse 'testFile': %s", testFileName); - - rc = daos_cont_destroy(pool, uuid, 1 /* force */, NULL /* ev */); - if (rc) - DCHECK(rc, "Failed to destroy container %s (%d)", testFileName, rc); + aoh.cookie = 0; } -static char* DAOS_GetVersion() +static void +DAOS_Delete(char *testFileName, IOR_param_t *param) +{ + daos_obj_id_t oid; + int rc; + + if (!daos_initialized) + GERR("DAOS is not initialized!"); + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** open the array to verify it exists */ + if (param->filePerProc || rank == 0) { + daos_size_t cell_size, chunk_size; + + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "daos_array_open() failed (%d).", rc); + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + + rc = daos_array_destroy(aoh, DAOS_TX_NONE, NULL); + DCHECK(rc, "daos_array_destroy() failed (%d).", rc); + + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); + aoh.cookie = 0; + } + + if (!param->filePerProc) + MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); +} + +static char * +DAOS_GetVersion() { static char ver[1024] = {}; @@ -819,17 +528,46 @@ static char* DAOS_GetVersion() return ver; } -static void DAOS_Fsync(void *file, IOR_param_t *param) +static void +DAOS_Fsync(void *file, IOR_param_t *param) { - while (o.daosAios - nAios > 0) - AIOWait(param); + return; } -static IOR_offset_t DAOS_GetFileSize(IOR_param_t *test, MPI_Comm testComm, - char *testFileName) +static IOR_offset_t +DAOS_GetFileSize(IOR_param_t *param, MPI_Comm testComm, char *testFileName) { - /* - * Sizes are inapplicable to containers at the moment. - */ - return 0; + daos_obj_id_t oid; + daos_size_t size; + int rc; + + if (!daos_initialized) + GERR("DAOS is not initialized!"); + + /** Convert file name into object ID */ + gen_oid(testFileName, &oid); + + /** open the array to verify it exists */ + if (param->filePerProc || rank == 0) { + daos_size_t cell_size, chunk_size; + + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "daos_array_open() failed (%d).", rc); + + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + + rc = daos_array_get_size(aoh, DAOS_TX_NONE, &size, NULL); + DCHECK(rc, "daos_array_get_size() failed (%d).", rc); + + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); + aoh.cookie = 0; + } + + if (!param->filePerProc) + MPI_Bcast(&size, 1, MPI_LONG, 0, MPI_COMM_WORLD); + + return size; } From e295a4e45f4d161d32a55164769c63e9cdf0f19d Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 1 Apr 2019 22:58:00 +0000 Subject: [PATCH 21/66] remove printf of chunk size. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 3e7f9c7..4fc7cc3 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -370,7 +370,6 @@ DAOS_Create(char *testFileName, IOR_param_t *param) /** Create the array */ if (param->filePerProc || rank == 0) { - printf("Chunk size = %zu\n", o.chunk_size); rc = daos_array_create(coh, oid, DAOS_TX_NONE, 1, o.chunk_size, &aoh, NULL); DCHECK(rc, "Failed to create array object\n"); From 5797624d56c45d1b14f35807f8e0ecaa78b297a0 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 15 Apr 2019 22:14:33 +0000 Subject: [PATCH 22/66] fix bug in file delete and access where only rank 0 is calling those functions. remove the bcasts for the error code since the other ranks are not calling it. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 58 ++++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 4fc7cc3..751e5a7 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -173,6 +173,7 @@ HandleDistribute(daos_handle_t *handle, enum handleType type) MPI_COMM_WORLD), "Failed to bcast global handle buffer size"); + global.iov_len = global.iov_buf_len; global.iov_buf = malloc(global.iov_buf_len); if (global.iov_buf == NULL) ERR("Failed to allocate global handle buffer"); @@ -192,9 +193,6 @@ HandleDistribute(daos_handle_t *handle, enum handleType type) "Failed to bcast global pool handle"); if (rank != 0) { - /* A larger-than-actual length works just fine. */ - global.iov_len = global.iov_buf_len; - if (type == POOL_HANDLE) rc = daos_pool_global2local(global, handle); else if (type == CONT_HANDLE) @@ -384,30 +382,22 @@ static int DAOS_Access(const char *testFileName, int mode, IOR_param_t * param) { daos_obj_id_t oid; + daos_size_t cell_size, chunk_size; int rc; /** Convert file name into object ID */ gen_oid(testFileName, &oid); - /** open the array to verify it exists */ - if (param->filePerProc || rank == 0) { - daos_size_t cell_size, chunk_size; + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, + &cell_size, &chunk_size, &aoh, NULL); + if (rc) + return rc; - rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, - &cell_size, &chunk_size, &aoh, NULL); - if (rc) - return rc; - - if (cell_size != 1) - GERR("Invalid DAOS Array object.\n"); - - rc = daos_array_close(aoh, NULL); - aoh.cookie = 0; - } - - if (!param->filePerProc) - MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); + rc = daos_array_close(aoh, NULL); + aoh.cookie = 0; return rc; } @@ -487,6 +477,7 @@ static void DAOS_Delete(char *testFileName, IOR_param_t *param) { daos_obj_id_t oid; + daos_size_t cell_size, chunk_size; int rc; if (!daos_initialized) @@ -496,26 +487,19 @@ DAOS_Delete(char *testFileName, IOR_param_t *param) gen_oid(testFileName, &oid); /** open the array to verify it exists */ - if (param->filePerProc || rank == 0) { - daos_size_t cell_size, chunk_size; + rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, + &cell_size, &chunk_size, &aoh, NULL); + DCHECK(rc, "daos_array_open() failed (%d).", rc); - rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, - &cell_size, &chunk_size, &aoh, NULL); - DCHECK(rc, "daos_array_open() failed (%d).", rc); + if (cell_size != 1) + GERR("Invalid DAOS Array object.\n"); - if (cell_size != 1) - GERR("Invalid DAOS Array object.\n"); + rc = daos_array_destroy(aoh, DAOS_TX_NONE, NULL); + DCHECK(rc, "daos_array_destroy() failed (%d).", rc); - rc = daos_array_destroy(aoh, DAOS_TX_NONE, NULL); - DCHECK(rc, "daos_array_destroy() failed (%d).", rc); - - rc = daos_array_close(aoh, NULL); - DCHECK(rc, "daos_array_close() failed (%d).", rc); - aoh.cookie = 0; - } - - if (!param->filePerProc) - MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); + rc = daos_array_close(aoh, NULL); + DCHECK(rc, "daos_array_close() failed (%d).", rc); + aoh.cookie = 0; } static char * From 13a17352ec0d5dabb64c0f523f7e7292449abd07 Mon Sep 17 00:00:00 2001 From: Vaclav Hapla Date: Thu, 16 May 2019 20:48:28 +0200 Subject: [PATCH 23/66] Update tutorial.rst "Defeating Page Cache" should be a heading I guess. --- doc/sphinx/userDoc/tutorial.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/sphinx/userDoc/tutorial.rst b/doc/sphinx/userDoc/tutorial.rst index 9556ceb..5fa6814 100644 --- a/doc/sphinx/userDoc/tutorial.rst +++ b/doc/sphinx/userDoc/tutorial.rst @@ -181,6 +181,7 @@ again, using this option changes our performance measurement quite a bit:: and we finally have a believable bandwidth measurement for our file system. Defeating Page Cache +-------------------- Since IOR is specifically designed to benchmark I/O, it provides these options that make it as easy as possible to ensure that you are actually measuring the performance of your file system and not your compute nodes' memory. That being From af1aa627ebe1ed8581b8ddb3472ff51689e73fd4 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Fri, 7 Jun 2019 16:59:11 +0000 Subject: [PATCH 24/66] use d_ version of function instead of daos_ since some of those are going to be removed from the DAOS API. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 10 +++++----- src/aiori-DFS.c | 9 +++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 751e5a7..922975d 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -151,7 +151,7 @@ do { \ static void HandleDistribute(daos_handle_t *handle, enum handleType type) { - daos_iov_t global; + d_iov_t global; int rc; global.iov_buf = NULL; @@ -281,7 +281,7 @@ DAOS_Init() rc = daos_pool_connect(uuid, o.group, svcl, DAOS_PC_RW, &poh, &po_info, NULL); - daos_rank_list_free(svcl); + d_rank_list_free(svcl); DCHECK(rc, "Failed to connect to pool %s", o.pool); INFO(VERBOSE_1, "Create/Open Container %s", o.cont); @@ -433,8 +433,8 @@ DAOS_Xfer(int access, void *file, IOR_size_t *buffer, { daos_array_iod_t iod; daos_range_t rg; - daos_sg_list_t sgl; - daos_iov_t iov; + d_sg_list_t sgl; + d_iov_t iov; int rc; /** set array location */ @@ -445,7 +445,7 @@ DAOS_Xfer(int access, void *file, IOR_size_t *buffer, /** set memory location */ sgl.sg_nr = 1; - daos_iov_set(&iov, buffer, length); + d_iov_set(&iov, buffer, length); sgl.sg_iovs = &iov; if (access == WRITE) { diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 86bd175..f9f945c 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -269,7 +270,7 @@ DFS_Init() { rc = daos_pool_connect(pool_uuid, o.group, svcl, DAOS_PC_RW, &poh, &pool_info, NULL); DCHECK(rc, "Failed to connect to pool"); - + d_rank_list_free(svcl); rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, NULL); /* If NOEXIST we create it */ if (rc == -DER_NONEXIST) { @@ -402,13 +403,13 @@ DFS_Xfer(int access, void *file, IOR_size_t *buffer, IOR_offset_t length, obj = (dfs_obj_t *)file; while (remaining > 0) { - daos_iov_t iov; - daos_sg_list_t sgl; + d_iov_t iov; + d_sg_list_t sgl; /** set memory location */ sgl.sg_nr = 1; sgl.sg_nr_out = 0; - daos_iov_set(&iov, (void *)ptr, remaining); + d_iov_set(&iov, (void *)ptr, remaining); sgl.sg_iovs = &iov; /* write/read file */ From 915283bae85813984da7247200b68f0fe725fed3 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 17 Jun 2019 20:38:02 +0000 Subject: [PATCH 25/66] update array_g2l usage with DAOS API change Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 922975d..6557d13 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -198,7 +198,7 @@ HandleDistribute(daos_handle_t *handle, enum handleType type) else if (type == CONT_HANDLE) rc = daos_cont_global2local(poh, global, handle); else - rc = daos_array_global2local(coh, global, handle); + rc = daos_array_global2local(coh, global, 0, handle); DCHECK(rc, "Failed to get local handle"); } From 6cae44b512ce9eae77bccf2e1b5a24e45ce80637 Mon Sep 17 00:00:00 2001 From: Osamu Tatebe Date: Fri, 21 Jun 2019 21:05:41 +0900 Subject: [PATCH 26/66] abstract I/O interface for Gfarm file system --- configure.ac | 20 ++- src/Makefile.am | 5 + src/aiori-Gfarm.c | 316 ++++++++++++++++++++++++++++++++++++++++++++++ src/aiori.c | 3 + src/aiori.h | 1 + 5 files changed, 344 insertions(+), 1 deletion(-) create mode 100644 src/aiori-Gfarm.c diff --git a/configure.ac b/configure.ac index 80e8533..7d8e4a7 100755 --- a/configure.ac +++ b/configure.ac @@ -185,7 +185,25 @@ AM_COND_IF([USE_RADOS_AIORI],[ AC_DEFINE([USE_RADOS_AIORI], [], [Build RADOS backend AIORI]) ]) - +# Gfarm support +AC_MSG_CHECKING([for Gfarm file system]) +AC_ARG_WITH([gfarm], + [AS_HELP_STRING([--with-gfarm=GFARM_ROOT], + [support IO with libgfarm backend @<:@default=no@:>@])], + [], [with_gfarm=no]) +AC_MSG_RESULT([$with_gfarm]) +AM_CONDITIONAL([USE_GFARM_AIORI], [test x$with_gfarm != xno]) +if test x$with_gfarm != xno; then + AC_DEFINE([USE_GFARM_AIORI], [], [Build Gfarm backend AIORI]) + case x$with_gfarm in + xyes) ;; + *) + CPPFLAGS="$CPPFLAGS -I$with_gfarm/include" + LDFLAGS="$LDFLAGS -L$with_gfarm/lib" ;; + esac + AC_CHECK_LIB([gfarm], [gfarm_initialize],, [AC_MSG_ERROR([libgfarm not found])]) + AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec]) +fi # aws4c is needed for the S3 backend (see --with-S3, below). # Version 0.5.2 of aws4c is available at https://github.com/jti-lanl/aws4c.git diff --git a/src/Makefile.am b/src/Makefile.am index 51fb873..74dcd31 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -70,6 +70,11 @@ extraSOURCES += aiori-RADOS.c extraLDADD += -lrados endif +if USE_GFARM_AIORI +extraSOURCES += aiori-Gfarm.c +extraLDADD += -lgfarm +endif + if USE_S3_AIORI extraSOURCES += aiori-S3.c if AWS4C_DIR diff --git a/src/aiori-Gfarm.c b/src/aiori-Gfarm.c new file mode 100644 index 0000000..cc835f1 --- /dev/null +++ b/src/aiori-Gfarm.c @@ -0,0 +1,316 @@ +#include +#include +#include +#include +#include +#undef PACKAGE_NAME +#undef PACKAGE_STRING +#undef PACKAGE_TARNAME +#undef PACKAGE_VERSION +#include "ior.h" +#include "aiori.h" + +struct gfarm_file { + GFS_File gf; +}; + +void +Gfarm_initialize() +{ + gfarm_initialize(NULL, NULL); +} + +void +Gfarm_finalize() +{ + gfarm_terminate(); +} + +void * +Gfarm_create(char *fn, IOR_param_t *param) +{ + GFS_File gf; + struct gfarm_file *fp; + gfarm_error_t e; + + if (param->dryRun) + return (NULL); + + e = gfs_pio_create(fn, GFARM_FILE_RDWR, 0664, &gf); + if (e != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_create failed"); + GFARM_MALLOC(fp); + if (fp == NULL) + ERR("no memory"); + fp->gf = gf; + return (fp); +} + +void * +Gfarm_open(char *fn, IOR_param_t *param) +{ + GFS_File gf; + struct gfarm_file *fp; + gfarm_error_t e; + + if (param->dryRun) + return (NULL); + + e = gfs_pio_open(fn, GFARM_FILE_RDWR, &gf); + if (e != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_open failed"); + GFARM_MALLOC(fp); + if (fp == NULL) + ERR("no memory"); + fp->gf = gf; + return (fp); +} + +IOR_offset_t +Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, + IOR_param_t *param) +{ + struct gfarm_file *fp = fd; + IOR_offset_t rem = len; + gfarm_off_t off; + gfarm_error_t e; +#define MAX_SZ (1024 * 1024 * 1024) + int sz, n; + char *buf = (char *)buffer; + + if (param->dryRun) + return (len); + + if (len > MAX_SZ) + sz = MAX_SZ; + else + sz = len; + + e = gfs_pio_seek(fp->gf, param->offset, GFARM_SEEK_SET, &off); + if (e != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_seek failed"); + while (rem > 0) { + if (access == WRITE) + e = gfs_pio_write(fp->gf, buf, sz, &n); + else + e = gfs_pio_read(fp->gf, buf, sz, &n); + if (e != GFARM_ERR_NO_ERROR) + ERR("xfer failed"); + if (n == 0) + ERR("EOF encountered"); + rem -= n; + buf += n; + } + return (len); +} + +void +Gfarm_close(void *fd, IOR_param_t *param) +{ + struct gfarm_file *fp = fd; + + if(param->dryRun) + return; + + if (gfs_pio_close(fp->gf) != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_close failed"); + free(fp); +} + +void +Gfarm_delete(char *fn, IOR_param_t *param) +{ + gfarm_error_t e; + + if (param->dryRun) + return; + + e = gfs_unlink(fn); + if (e != GFARM_ERR_NO_ERROR) + errno = gfarm_error_to_errno(e); +} + +char * +Gfarm_version() +{ + return ((char *)gfarm_version()); +} + +void +Gfarm_fsync(void *fd, IOR_param_t *param) +{ + struct gfarm_file *fp = fd; + + if(param->dryRun) + return; + + if (gfs_pio_sync(fp->gf) != GFARM_ERR_NO_ERROR) + ERR("gfs_pio_sync failed"); +} + +IOR_offset_t +Gfarm_get_file_size(IOR_param_t *param, MPI_Comm comm, char *fn) +{ + struct gfs_stat st; + IOR_offset_t size, sum, min, max; + + if (param->dryRun) + return (0); + + if (gfs_stat(fn, &st) != GFARM_ERR_NO_ERROR) + ERR("gfs_stat failed"); + size = st.st_size; + gfs_stat_free(&st); + + if (param->filePerProc == TRUE) { + MPI_CHECK(MPI_Allreduce(&size, &sum, 1, MPI_LONG_LONG_INT, + MPI_SUM, comm), "cannot total data moved"); + size = sum; + } else { + MPI_CHECK(MPI_Allreduce(&size, &min, 1, MPI_LONG_LONG_INT, + MPI_MIN, comm), "cannot total data moved"); + MPI_CHECK(MPI_Allreduce(&size, &max, 1, MPI_LONG_LONG_INT, + MPI_MAX, comm), "cannot total data moved"); + if (min != max) { + if (rank == 0) + WARN("inconsistent file size by different " + "tasks"); + /* incorrect, but now consistent across tasks */ + size = min; + } + } + return (size); +} + +int +Gfarm_statfs(const char *fn, ior_aiori_statfs_t *st, IOR_param_t *param) +{ + gfarm_off_t used, avail, files; + gfarm_error_t e; + int bsize = 4096; + + if (param->dryRun) + return (0); + + e = gfs_statfs_by_path(fn, &used, &avail, &files); + if (e != GFARM_ERR_NO_ERROR) { + errno = gfarm_error_to_errno(e); + return (-1); + } + st->f_bsize = bsize; + st->f_blocks = (used + avail) / bsize; + st->f_bfree = avail / bsize; + st->f_files = 2 * files; /* XXX */ + st->f_ffree = files; /* XXX */ + return (0); +} + +int +Gfarm_mkdir(const char *fn, mode_t mode, IOR_param_t *param) +{ + gfarm_error_t e; + + if (param->dryRun) + return (0); + + e = gfs_mkdir(fn, mode); + if (e == GFARM_ERR_NO_ERROR) + return (0); + errno = gfarm_error_to_errno(e); + return (-1); +} + +int +Gfarm_rmdir(const char *fn, IOR_param_t *param) +{ + gfarm_error_t e; + + if (param->dryRun) + return (0); + + e = gfs_rmdir(fn); + if (e == GFARM_ERR_NO_ERROR) + return (0); + errno = gfarm_error_to_errno(e); + return (-1); +} + +int +Gfarm_access(const char *fn, int mode, IOR_param_t *param) +{ + struct gfs_stat st; + gfarm_error_t e; + + if (param->dryRun) + return (0); + + e = gfs_stat(fn, &st); + if (e != GFARM_ERR_NO_ERROR) { + errno = gfarm_error_to_errno(e); + return (-1); + } + gfs_stat_free(&st); + return (0); +} + +/* XXX FIXME */ +#define GFS_DEV ((dev_t)-1) +#define GFS_BLKSIZE 8192 +#define STAT_BLKSIZ 512 /* for st_blocks */ + +int +Gfarm_stat(const char *fn, struct stat *buf, IOR_param_t *param) +{ + struct gfs_stat st; + gfarm_error_t e; + + if (param->dryRun) + return (0); + + e = gfs_stat(fn, &st); + if (e != GFARM_ERR_NO_ERROR) { + errno = gfarm_error_to_errno(e); + return (-1); + } + buf->st_dev = GFS_DEV; + buf->st_ino = st.st_ino; + buf->st_mode = st.st_mode; + buf->st_nlink = st.st_nlink; + buf->st_uid = getuid(); /* XXX */ + buf->st_gid = getgid(); /* XXX */ + buf->st_size = st.st_size; + buf->st_blksize = GFS_BLKSIZE; + buf->st_blocks = (st.st_size + STAT_BLKSIZ - 1) / STAT_BLKSIZ; + buf->st_atime = st.st_atimespec.tv_sec; + buf->st_mtime = st.st_mtimespec.tv_sec; + buf->st_ctime = st.st_ctimespec.tv_sec; +#if defined(HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC) + buf->st_atim.tv_nsec = st.st_atimespec.tv_nsec; + buf->st_mtim.tv_nsec = st.st_mtimespec.tv_nsec; + buf->st_ctim.tv_nsec = st.st_ctimespec.tv_nsec; +#endif + gfs_stat_free(&st); + return (0); +} + +ior_aiori_t gfarm_aiori = { + .name = "Gfarm", + .name_legacy = NULL, + .create = Gfarm_create, + .open = Gfarm_open, + .xfer = Gfarm_xfer, + .close = Gfarm_close, + .delete = Gfarm_delete, + .get_version = Gfarm_version, + .fsync = Gfarm_fsync, + .get_file_size = Gfarm_get_file_size, + .statfs = Gfarm_statfs, + .mkdir = Gfarm_mkdir, + .rmdir = Gfarm_rmdir, + .access = Gfarm_access, + .stat = Gfarm_stat, + .initialize = Gfarm_initialize, + .finalize = Gfarm_finalize, + .get_options = NULL, + .enable_mdtest = true, +}; diff --git a/src/aiori.c b/src/aiori.c index 52001c8..41773d9 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -68,6 +68,9 @@ ior_aiori_t *available_aiori[] = { #endif #ifdef USE_RADOS_AIORI &rados_aiori, +#endif +#ifdef USE_GFARM_AIORI + &gfarm_aiori, #endif NULL }; diff --git a/src/aiori.h b/src/aiori.h index c2074c2..56717fe 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -105,6 +105,7 @@ extern ior_aiori_t s3_aiori; extern ior_aiori_t s3_plus_aiori; extern ior_aiori_t s3_emc_aiori; extern ior_aiori_t rados_aiori; +extern ior_aiori_t gfarm_aiori; void aiori_initialize(IOR_test_t * tests); void aiori_finalize(IOR_test_t * tests); From a822363ee0d92beba47f359271f370608163cd07 Mon Sep 17 00:00:00 2001 From: Osamu Tatebe Date: Sat, 22 Jun 2019 10:48:41 +0900 Subject: [PATCH 27/66] style fix --- configure.ac | 4 ++-- src/aiori-Gfarm.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/configure.ac b/configure.ac index 7d8e4a7..af400ab 100755 --- a/configure.ac +++ b/configure.ac @@ -189,8 +189,8 @@ AM_COND_IF([USE_RADOS_AIORI],[ AC_MSG_CHECKING([for Gfarm file system]) AC_ARG_WITH([gfarm], [AS_HELP_STRING([--with-gfarm=GFARM_ROOT], - [support IO with libgfarm backend @<:@default=no@:>@])], - [], [with_gfarm=no]) + [support IO with Gfarm backend @<:@default=no@:>@])], + [], [with_gfarm=no]) AC_MSG_RESULT([$with_gfarm]) AM_CONDITIONAL([USE_GFARM_AIORI], [test x$with_gfarm != xno]) if test x$with_gfarm != xno; then diff --git a/src/aiori-Gfarm.c b/src/aiori-Gfarm.c index cc835f1..a7af0ea 100644 --- a/src/aiori-Gfarm.c +++ b/src/aiori-Gfarm.c @@ -77,7 +77,7 @@ Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, #define MAX_SZ (1024 * 1024 * 1024) int sz, n; char *buf = (char *)buffer; - + if (param->dryRun) return (len); @@ -109,7 +109,7 @@ Gfarm_close(void *fd, IOR_param_t *param) { struct gfarm_file *fp = fd; - if(param->dryRun) + if (param->dryRun) return; if (gfs_pio_close(fp->gf) != GFARM_ERR_NO_ERROR) @@ -141,7 +141,7 @@ Gfarm_fsync(void *fd, IOR_param_t *param) { struct gfarm_file *fp = fd; - if(param->dryRun) + if (param->dryRun) return; if (gfs_pio_sync(fp->gf) != GFARM_ERR_NO_ERROR) @@ -213,7 +213,7 @@ Gfarm_mkdir(const char *fn, mode_t mode, IOR_param_t *param) if (param->dryRun) return (0); - e = gfs_mkdir(fn, mode); + e = gfs_mkdir(fn, mode); if (e == GFARM_ERR_NO_ERROR) return (0); errno = gfarm_error_to_errno(e); @@ -228,7 +228,7 @@ Gfarm_rmdir(const char *fn, IOR_param_t *param) if (param->dryRun) return (0); - e = gfs_rmdir(fn); + e = gfs_rmdir(fn); if (e == GFARM_ERR_NO_ERROR) return (0); errno = gfarm_error_to_errno(e); From 960005351d06c22faf47ea9d8432b6fb2967d160 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 25 Jun 2019 22:41:30 +0000 Subject: [PATCH 28/66] update DAOS and DFS markings. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 14 +++++++------- src/aiori-DFS.c | 32 ++++++++++++++------------------ 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 7210ae3..c4128d1 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -3,18 +3,18 @@ * vim:expandtab:shiftwidth=8:tabstop=8: */ /* - * SPECIAL LICENSE RIGHTS-OPEN SOURCE SOFTWARE + * Copyright (C) 2018-2019 Intel Corporation + * + * GOVERNMENT LICENSE RIGHTS-OPEN SOURCE SOFTWARE * The Government's rights to use, modify, reproduce, release, perform, display, - * or disclose this software are subject to the terms of Contract No. B599860, - * and the terms of the GNU General Public License version 2. + * or disclose this software are subject to the terms of the Apache License as + * provided in Contract No. 8F-30005. * Any reproduction of computer software, computer software documentation, or * portions thereof marked with this legend must also reproduce the markings. */ + /* - * Copyright (c) 2013, 2016 Intel Corporation. - */ -/* - * This file implements the abstract I/O interface for DAOS. + * This file implements the abstract I/O interface for DAOS Array API. */ #ifdef HAVE_CONFIG_H diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 0a400b9..fe27d0b 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -1,24 +1,20 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ -/******************************************************************************\ -* * -* Copyright (c) 2003, The Regents of the University of California * -* See the file COPYRIGHT for a complete copyright notice and license. * -* * -* Copyright (C) 2018 Intel Corporation -* -* GOVERNMENT LICENSE RIGHTS-OPEN SOURCE SOFTWARE -* The Government's rights to use, modify, reproduce, release, perform, display, -* or disclose this software are subject to the terms of the Apache License as -* provided in Contract No. 8F-30005. -* Any reproduction of computer software, computer software documentation, or -* portions thereof marked with this legend must also reproduce the markings. -******************************************************************************** -* -* Implement of abstract I/O interface for DFS. -* -\******************************************************************************/ +/* + * Copyright (C) 2018-2019 Intel Corporation + * + * GOVERNMENT LICENSE RIGHTS-OPEN SOURCE SOFTWARE + * The Government's rights to use, modify, reproduce, release, perform, display, + * or disclose this software are subject to the terms of the Apache License as + * provided in Contract No. 8F-30005. + * Any reproduction of computer software, computer software documentation, or + * portions thereof marked with this legend must also reproduce the markings. + */ + +/* + * This file implements the abstract I/O interface for DAOS FS API. + */ #ifdef HAVE_CONFIG_H #include "config.h" From 2ae7c29de21490b1393856f4a405ac1cea2c9324 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 27 Jun 2019 21:31:07 +0000 Subject: [PATCH 29/66] cache the parent dir in a hashtable so we don't have to do a lookup on every access. Signed-off-by: Mohamad Chaarawi --- src/aiori-DFS.c | 141 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 102 insertions(+), 39 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index fe27d0b..d319ebd 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -43,6 +44,13 @@ dfs_t *dfs; daos_handle_t poh, coh; +static struct d_hash_table *dir_hash; + +struct aiori_dir_hdl { + d_list_t entry; + dfs_obj_t *oh; + char name[PATH_MAX]; +}; enum handleType { POOL_HANDLE, @@ -146,6 +154,36 @@ do { \ } \ } while (0) +static inline struct aiori_dir_hdl * +hdl_obj(d_list_t *rlink) +{ + return container_of(rlink, struct aiori_dir_hdl, entry); +} + +static bool +key_cmp(struct d_hash_table *htable, d_list_t *rlink, + const void *key, unsigned int ksize) +{ + struct aiori_dir_hdl *hdl = hdl_obj(rlink); + + return (strcmp(hdl->name, (const char *)key) == 0); +} + +static void +rec_free(struct d_hash_table *htable, d_list_t *rlink) +{ + struct aiori_dir_hdl *hdl = hdl_obj(rlink); + + assert(d_hash_rec_unlinked(&hdl->entry)); + dfs_release(hdl->oh); + free(hdl); +} + +static d_hash_table_ops_t hdl_hash_ops = { + .hop_key_cmp = key_cmp, + .hop_rec_free = rec_free +}; + /* Distribute process 0's pool or container handle to others. */ static void HandleDistribute(daos_handle_t *handle, enum handleType type) @@ -290,6 +328,39 @@ out: return rc; } +static dfs_obj_t * +lookup_insert_dir(const char *name) +{ + struct aiori_dir_hdl *hdl; + d_list_t *rlink; + mode_t mode; + int rc; + + rlink = d_hash_rec_find(dir_hash, name, strlen(name)); + if (rlink != NULL) { + hdl = hdl_obj(rlink); + return hdl->oh; + } + + hdl = calloc(1, sizeof(struct aiori_dir_hdl)); + if (hdl == NULL) + DERR(ENOMEM, "failed to alloc dir handle"); + + strncpy(hdl->name, name, PATH_MAX-1); + hdl->name[PATH_MAX-1] = '\0'; + + rc = dfs_lookup(dfs, name, O_RDWR, &hdl->oh, &mode); + DERR(rc, "dfs_lookup() of %s Failed", name); + + rc = d_hash_rec_insert(dir_hash, hdl->name, strlen(hdl->name), + &hdl->entry, true); + DERR(rc, "Failed to insert dir handle in hashtable"); + + return hdl->oh; +out: + return NULL; +} + static option_help * DFS_options(){ return options; } @@ -304,6 +375,9 @@ DFS_Init() { rc = daos_init(); DCHECK(rc, "Failed to initialize daos"); + rc = d_hash_table_create(0, 16, NULL, &hdl_hash_ops, &dir_hash); + DCHECK(rc, "Failed to initialize dir hashtable"); + if (rank == 0) { uuid_t pool_uuid, co_uuid; d_rank_list_t *svcl = NULL; @@ -359,6 +433,8 @@ DFS_Finalize() { int rc; + d_hash_table_destroy(dir_hash, true /* force */); + rc = dfs_umount(dfs); DCHECK(rc, "Failed to umount DFS namespace"); @@ -393,22 +469,22 @@ DFS_Create(char *testFileName, IOR_param_t *param) { char *name = NULL, *dir_name = NULL; dfs_obj_t *obj = NULL, *parent = NULL; - mode_t pmode, mode; + mode_t mode; int fd_oflag = 0; int rc; assert(param); - rc = parse_filename(testFileName, &name, &dir_name); + rc = parse_filename(testFileName, &name, &dir_name); DERR(rc, "Failed to parse path %s", testFileName); + assert(dir_name); + assert(name); - assert(dir_name); - assert(name); + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + DERR(rc, "Failed to lookup parent dir"); - rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); - DERR(rc, "dfs_lookup() of %s Failed", dir_name); mode = S_IFREG | param->mode; - if (param->filePerProc || rank == 0) { fd_oflag |= O_CREAT | O_RDWR | O_EXCL; @@ -431,8 +507,6 @@ out: free(name); if (dir_name) free(dir_name); - if (parent) - dfs_release(parent); return ((void *)obj); } @@ -445,7 +519,7 @@ DFS_Open(char *testFileName, IOR_param_t *param) { char *name = NULL, *dir_name = NULL; dfs_obj_t *obj = NULL, *parent = NULL; - mode_t pmode, mode; + mode_t mode; int rc; int fd_oflag = 0; @@ -458,8 +532,9 @@ DFS_Open(char *testFileName, IOR_param_t *param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); - DERR(rc, "dfs_lookup() of %s Failed", dir_name); + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + DERR(rc, "Failed to lookup parent dir"); rc = dfs_open(dfs, parent, name, mode, fd_oflag, 0, 0, NULL, &obj); DERR(rc, "dfs_open() of %s Failed", name); @@ -469,8 +544,6 @@ out: free(name); if (dir_name) free(dir_name); - if (parent) - dfs_release(parent); return ((void *)obj); } @@ -559,7 +632,6 @@ DFS_Delete(char *testFileName, IOR_param_t * param) { char *name = NULL, *dir_name = NULL; dfs_obj_t *parent = NULL; - mode_t pmode; int rc; rc = parse_filename(testFileName, &name, &dir_name); @@ -568,8 +640,9 @@ DFS_Delete(char *testFileName, IOR_param_t * param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); - DERR(rc, "dfs_lookup() of %s Failed", dir_name); + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + DERR(rc, "Failed to lookup parent dir"); rc = dfs_remove(dfs, parent, name, false); DERR(rc, "dfs_remove() of %s Failed", name); @@ -579,8 +652,6 @@ out: free(name); if (dir_name) free(dir_name); - if (parent) - dfs_release(parent); } static char* DFS_GetVersion() @@ -647,7 +718,6 @@ static int DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) { dfs_obj_t *parent = NULL; - mode_t pmode; char *name = NULL, *dir_name = NULL; int rc; @@ -657,8 +727,9 @@ DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); - DERR(rc, "dfs_lookup() of %s Failed", dir_name); + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + DERR(rc, "Failed to lookup parent dir"); rc = dfs_mkdir(dfs, parent, name, mode); DERR(rc, "dfs_mkdir() of %s Failed", name); @@ -668,8 +739,6 @@ out: free(name); if (dir_name) free(dir_name); - if (parent) - dfs_release(parent); if (rc) return -1; return rc; @@ -679,7 +748,6 @@ static int DFS_Rmdir(const char *path, IOR_param_t * param) { dfs_obj_t *parent = NULL; - mode_t pmode; char *name = NULL, *dir_name = NULL; int rc; @@ -689,8 +757,9 @@ DFS_Rmdir(const char *path, IOR_param_t * param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); - DERR(rc, "dfs_lookup() of %s Failed", dir_name); + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + DERR(rc, "Failed to lookup parent dir"); rc = dfs_remove(dfs, parent, name, false); DERR(rc, "dfs_remove() of %s Failed", name); @@ -700,8 +769,6 @@ out: free(name); if (dir_name) free(dir_name); - if (parent) - dfs_release(parent); if (rc) return -1; return rc; @@ -711,7 +778,6 @@ static int DFS_Access(const char *path, int mode, IOR_param_t * param) { dfs_obj_t *parent = NULL; - mode_t pmode; char *name = NULL, *dir_name = NULL; struct stat stbuf; int rc; @@ -721,8 +787,9 @@ DFS_Access(const char *path, int mode, IOR_param_t * param) assert(dir_name); - rc = dfs_lookup(dfs, dir_name, O_RDWR, &parent, &pmode); - DERR(rc, "dfs_lookup() of %s Failed", dir_name); + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + DERR(rc, "Failed to lookup parent dir"); if (name && strcmp(name, ".") == 0) { free(name); @@ -735,8 +802,6 @@ out: free(name); if (dir_name) free(dir_name); - if (parent) - dfs_release(parent); if (rc) return -1; return rc; @@ -746,7 +811,6 @@ static int DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param) { dfs_obj_t *parent = NULL; - mode_t pmode; char *name = NULL, *dir_name = NULL; int rc; @@ -756,8 +820,9 @@ DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param) assert(dir_name); assert(name); - rc = dfs_lookup(dfs, dir_name, O_RDONLY, &parent, &pmode); - DERR(rc, "dfs_lookup() of %s Failed", dir_name); + parent = lookup_insert_dir(dir_name); + if (parent == NULL) + DERR(rc, "Failed to lookup parent dir"); rc = dfs_stat(dfs, parent, name, buf); DERR(rc, "dfs_stat() of %s Failed", name); @@ -767,8 +832,6 @@ out: free(name); if (dir_name) free(dir_name); - if (parent) - dfs_release(parent); if (rc) return -1; return rc; From 9f9730d6fddd6359acbd9e1eb390a5f8ddfc5996 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Fri, 28 Jun 2019 16:20:56 +0000 Subject: [PATCH 30/66] should abort on daos init if pool/cont/svcl not specified. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index c4128d1..3b5c80c 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -258,8 +258,11 @@ DAOS_Init() if (daos_initialized) return; - if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) + if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) { + GERR("Invalid DAOS pool/cont\n"); return; + } + if (o.oclass) ObjectClassParse(o.oclass); From 52ed766df43114caa1a5bb6c59505ebe295a71a3 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 8 Jul 2019 15:33:06 +0000 Subject: [PATCH 31/66] update DAOS and DFS drivers to new DAOS API changes. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 97 +++++++++++++++++++++++++++++++----------------- src/aiori-DFS.c | 10 ++--- 2 files changed, 67 insertions(+), 40 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 3b5c80c..6b03c44 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -28,9 +28,9 @@ #include #include #include + #include #include -#include #include #include "ior.h" @@ -119,7 +119,7 @@ enum handleType { static daos_handle_t poh; static daos_handle_t coh; static daos_handle_t aoh; -static daos_oclass_id_t objectClass = DAOS_OC_LARGE_RW; +static daos_oclass_id_t objectClass = OC_SX; static bool daos_initialized = false; /***************************** F U N C T I O N S ******************************/ @@ -212,34 +212,58 @@ HandleDistribute(daos_handle_t *handle, enum handleType type) static void ObjectClassParse(const char *string) { - if (strcasecmp(string, "tiny") == 0) - objectClass = DAOS_OC_TINY_RW; - else if (strcasecmp(string, "small") == 0) - objectClass = DAOS_OC_SMALL_RW; - else if (strcasecmp(string, "large") == 0) - objectClass = DAOS_OC_LARGE_RW; - else if (strcasecmp(string, "echo_tiny") == 0) - objectClass = DAOS_OC_ECHO_TINY_RW; - else if (strcasecmp(string, "echo_R2S") == 0) - objectClass = DAOS_OC_ECHO_R2S_RW; - else if (strcasecmp(string, "echo_R3S") == 0) - objectClass = DAOS_OC_ECHO_R3S_RW; - else if (strcasecmp(string, "echo_R4S") == 0) - objectClass = DAOS_OC_ECHO_R4S_RW; - else if (strcasecmp(string, "R2") == 0) - objectClass = DAOS_OC_R2_RW; - else if (strcasecmp(string, "R2S") == 0) - objectClass = DAOS_OC_R2S_RW; - else if (strcasecmp(string, "R3S") == 0) - objectClass = DAOS_OC_R3S_RW; - else if (strcasecmp(string, "R3") == 0) - objectClass = DAOS_OC_R3_RW; - else if (strcasecmp(string, "R4") == 0) - objectClass = DAOS_OC_R4_RW; - else if (strcasecmp(string, "R4S") == 0) - objectClass = DAOS_OC_R4S_RW; - else if (strcasecmp(string, "repl_max") == 0) - objectClass = DAOS_OC_REPL_MAX_RW; + if (strcasecmp(string, "oc_s1") == 0) + objectClass = OC_S1; + else if (strcasecmp(string, "oc_s2") == 0) + objectClass = OC_S2; + else if (strcasecmp(string, "oc_s4") == 0) + objectClass = OC_S4; + else if (strcasecmp(string, "oc_sx") == 0) + objectClass = OC_SX; + else if (strcasecmp(string, "oc_tiny") == 0) + objectClass = OC_TINY; + else if (strcasecmp(string, "oc_small") == 0) + objectClass = OC_SMALL; + else if (strcasecmp(string, "oc_large") == 0) + objectClass = OC_LARGE; + else if (strcasecmp(string, "oc_max") == 0) + objectClass = OC_MAX; + else if (strcasecmp(string, "oc_rp_tiny") == 0) + objectClass = OC_RP_TINY; + else if (strcasecmp(string, "oc_rp_small") == 0) + objectClass = OC_RP_SMALL; + else if (strcasecmp(string, "oc_rp_large") == 0) + objectClass = OC_RP_LARGE; + else if (strcasecmp(string, "oc_rp_max") == 0) + objectClass = OC_RP_MAX; + else if (strcasecmp(string, "oc_rp_sf_tiny") == 0) + objectClass = OC_RP_SF_TINY; + else if (strcasecmp(string, "oc_rp_sf_small") == 0) + objectClass = OC_RP_SF_SMALL; + else if (strcasecmp(string, "oc_rp_sf_large") == 0) + objectClass = OC_RP_SF_LARGE; + else if (strcasecmp(string, "oc_rp_sf_max") == 0) + objectClass = OC_RP_SF_MAX; + else if (strcasecmp(string, "oc_ec_tiny") == 0) + objectClass = OC_EC_TINY; + else if (strcasecmp(string, "oc_ec_small") == 0) + objectClass = OC_EC_SMALL; + else if (strcasecmp(string, "oc_ec_large") == 0) + objectClass = OC_EC_LARGE; + else if (strcasecmp(string, "oc_ec_max") == 0) + objectClass = OC_EC_MAX; + else if (strcasecmp(string, "oc_rp_2g1") == 0) + objectClass = OC_RP_2G1; + else if (strcasecmp(string, "oc_rp_2g4") == 0) + objectClass = OC_RP_2G4; + else if (strcasecmp(string, "oc_rp_2gx") == 0) + objectClass = OC_RP_2GX; + else if (strcasecmp(string, "oc_rp_3g1") == 0) + objectClass = OC_RP_3G1; + else if (strcasecmp(string, "oc_rp_3g4") == 0) + objectClass = OC_RP_3G4; + else if (strcasecmp(string, "oc_rp_3gx") == 0) + objectClass = OC_RP_3GX; else GERR("Invalid 'oclass' argument: '%s'", string); } @@ -338,9 +362,12 @@ DAOS_Fini() } MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (rc) - DCHECK(rc, "Failed to destroy container %s (%d)", - o.cont, rc); + if (rc) { + if (rank == 0) + DCHECK(rc, "Failed to destroy container %s (%d)", + o.cont, rc); + MPI_Abort(MPI_COMM_WORLD, -1); + } } rc = daos_pool_disconnect(poh, NULL); @@ -360,8 +387,8 @@ gen_oid(const char *name, daos_obj_id_t *oid) oid->lo = d_hash_murmur64(name, strlen(name), IOR_DAOS_MUR_SEED); oid->hi = 0; - feat = DAOS_OF_DKEY_UINT64 | DAOS_OF_AKEY_HASHED; - daos_obj_generate_id(oid, feat, objectClass); + feat = DAOS_OF_DKEY_UINT64; + daos_obj_generate_id(oid, feat, objectClass, 0); } static void * diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index d319ebd..cd66ee0 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -33,8 +33,7 @@ #include #include -#include -#include +#include #include #include "ior.h" @@ -489,7 +488,7 @@ DFS_Create(char *testFileName, IOR_param_t *param) fd_oflag |= O_CREAT | O_RDWR | O_EXCL; rc = dfs_open(dfs, parent, name, mode, fd_oflag, - DAOS_OC_LARGE_RW, 0, NULL, &obj); + OC_SX, 0, NULL, &obj); DERR(rc, "dfs_open() of %s Failed", name); MPI_CHECK(MPI_Barrier(testComm), "barrier error"); @@ -498,7 +497,7 @@ DFS_Create(char *testFileName, IOR_param_t *param) fd_oflag |= O_RDWR; rc = dfs_open(dfs, parent, name, mode, fd_oflag, - DAOS_OC_LARGE_RW, 0, NULL, &obj); + OC_SX, 0, NULL, &obj); DERR(rc, "dfs_open() of %s Failed", name); } @@ -725,7 +724,8 @@ DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) DERR(rc, "Failed to parse path %s", path); assert(dir_name); - assert(name); + if (!name) + return 0; parent = lookup_insert_dir(dir_name); if (parent == NULL) From 04e3df0d42eb43dc534f550e8834baf418c33595 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 8 Jul 2019 17:02:30 +0000 Subject: [PATCH 32/66] use the daos provided function for name to object class Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 61 +----------------------------------------------- 1 file changed, 1 insertion(+), 60 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 6b03c44..8d80a69 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -209,65 +209,6 @@ HandleDistribute(daos_handle_t *handle, enum handleType type) free(global.iov_buf); } -static void -ObjectClassParse(const char *string) -{ - if (strcasecmp(string, "oc_s1") == 0) - objectClass = OC_S1; - else if (strcasecmp(string, "oc_s2") == 0) - objectClass = OC_S2; - else if (strcasecmp(string, "oc_s4") == 0) - objectClass = OC_S4; - else if (strcasecmp(string, "oc_sx") == 0) - objectClass = OC_SX; - else if (strcasecmp(string, "oc_tiny") == 0) - objectClass = OC_TINY; - else if (strcasecmp(string, "oc_small") == 0) - objectClass = OC_SMALL; - else if (strcasecmp(string, "oc_large") == 0) - objectClass = OC_LARGE; - else if (strcasecmp(string, "oc_max") == 0) - objectClass = OC_MAX; - else if (strcasecmp(string, "oc_rp_tiny") == 0) - objectClass = OC_RP_TINY; - else if (strcasecmp(string, "oc_rp_small") == 0) - objectClass = OC_RP_SMALL; - else if (strcasecmp(string, "oc_rp_large") == 0) - objectClass = OC_RP_LARGE; - else if (strcasecmp(string, "oc_rp_max") == 0) - objectClass = OC_RP_MAX; - else if (strcasecmp(string, "oc_rp_sf_tiny") == 0) - objectClass = OC_RP_SF_TINY; - else if (strcasecmp(string, "oc_rp_sf_small") == 0) - objectClass = OC_RP_SF_SMALL; - else if (strcasecmp(string, "oc_rp_sf_large") == 0) - objectClass = OC_RP_SF_LARGE; - else if (strcasecmp(string, "oc_rp_sf_max") == 0) - objectClass = OC_RP_SF_MAX; - else if (strcasecmp(string, "oc_ec_tiny") == 0) - objectClass = OC_EC_TINY; - else if (strcasecmp(string, "oc_ec_small") == 0) - objectClass = OC_EC_SMALL; - else if (strcasecmp(string, "oc_ec_large") == 0) - objectClass = OC_EC_LARGE; - else if (strcasecmp(string, "oc_ec_max") == 0) - objectClass = OC_EC_MAX; - else if (strcasecmp(string, "oc_rp_2g1") == 0) - objectClass = OC_RP_2G1; - else if (strcasecmp(string, "oc_rp_2g4") == 0) - objectClass = OC_RP_2G4; - else if (strcasecmp(string, "oc_rp_2gx") == 0) - objectClass = OC_RP_2GX; - else if (strcasecmp(string, "oc_rp_3g1") == 0) - objectClass = OC_RP_3G1; - else if (strcasecmp(string, "oc_rp_3g4") == 0) - objectClass = OC_RP_3G4; - else if (strcasecmp(string, "oc_rp_3gx") == 0) - objectClass = OC_RP_3GX; - else - GERR("Invalid 'oclass' argument: '%s'", string); -} - static option_help * DAOS_options() { @@ -288,7 +229,7 @@ DAOS_Init() } if (o.oclass) - ObjectClassParse(o.oclass); + objectClass = daos_oclass_name2id(o.oclass); rc = daos_init(); if (rc) From ad61fdd89c364888cdd645fee68ec86008135e18 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 8 Jul 2019 18:18:24 +0000 Subject: [PATCH 33/66] add proper check for DAOS object class and exit if unknown. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 8d80a69..cce5f53 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -228,8 +228,11 @@ DAOS_Init() return; } - if (o.oclass) + if (o.oclass) { objectClass = daos_oclass_name2id(o.oclass); + if (objectClass == OC_UNKNOWN) + GERR("Invalid DAOS Object class %s\n", o.oclass); + } rc = daos_init(); if (rc) From 1c640716c7e821c5be63d54a4abd697aaa69c735 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 8 Jul 2019 21:31:00 +0000 Subject: [PATCH 34/66] Use MPI Info object for all MPI_File_open calls. Signed-off-by: Mohamad Chaarawi --- src/aiori-MPIIO.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index 6bf991b..46f6247 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -74,6 +74,7 @@ int MPIIO_Access(const char *path, int mode, IOR_param_t *param) } MPI_File fd; int mpi_mode = MPI_MODE_UNIQUE_OPEN; + MPI_Info mpiHints = MPI_INFO_NULL; if ((mode & W_OK) && (mode & R_OK)) mpi_mode |= MPI_MODE_RDWR; @@ -82,12 +83,15 @@ int MPIIO_Access(const char *path, int mode, IOR_param_t *param) else mpi_mode |= MPI_MODE_RDONLY; - int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode, - MPI_INFO_NULL, &fd); + SetHints(&mpiHints, param->hintsFileName); + + int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode, mpiHints, &fd); if (!ret) MPI_File_close(&fd); + if (mpiHints != MPI_INFO_NULL) + MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); return ret; } @@ -497,6 +501,7 @@ IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; MPI_File fd; MPI_Comm comm; + MPI_Info mpiHints = MPI_INFO_NULL; if (test->filePerProc == TRUE) { comm = MPI_COMM_SELF; @@ -504,12 +509,15 @@ IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, comm = testComm; } + SetHints(&mpiHints, test->hintsFileName); MPI_CHECK(MPI_File_open(comm, testFileName, MPI_MODE_RDONLY, - MPI_INFO_NULL, &fd), + mpiHints, &fd), "cannot open file to get file size"); MPI_CHECK(MPI_File_get_size(fd, (MPI_Offset *) & aggFileSizeFromStat), "cannot get file size"); MPI_CHECK(MPI_File_close(&fd), "cannot close file"); + if (mpiHints != MPI_INFO_NULL) + MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); if (test->filePerProc == TRUE) { MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, From 92939e4fbdf33485627b9bae1ddcc6bec355ff60 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 31 Jul 2019 17:22:20 +0000 Subject: [PATCH 35/66] update for DAOS API changes Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 5 +---- src/aiori-DFS.c | 4 ---- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index cce5f53..71aff90 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -31,7 +31,6 @@ #include #include -#include #include "ior.h" #include "aiori.h" @@ -326,13 +325,11 @@ DAOS_Fini() static void gen_oid(const char *name, daos_obj_id_t *oid) { - daos_ofeat_t feat = 0; oid->lo = d_hash_murmur64(name, strlen(name), IOR_DAOS_MUR_SEED); oid->hi = 0; - feat = DAOS_OF_DKEY_UINT64; - daos_obj_generate_id(oid, feat, objectClass, 0); + daos_array_generate_id(oid, objectClass, true, 0); } static void * diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index cd66ee0..c88b80e 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -490,11 +490,7 @@ DFS_Create(char *testFileName, IOR_param_t *param) rc = dfs_open(dfs, parent, name, mode, fd_oflag, OC_SX, 0, NULL, &obj); DERR(rc, "dfs_open() of %s Failed", name); - - MPI_CHECK(MPI_Barrier(testComm), "barrier error"); } else { - MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - fd_oflag |= O_RDWR; rc = dfs_open(dfs, parent, name, mode, fd_oflag, OC_SX, 0, NULL, &obj); From b1b66962acc1f1ecfdcd9e89965c5a3a24516f84 Mon Sep 17 00:00:00 2001 From: Osamu Tatebe Date: Fri, 2 Aug 2019 13:03:59 +0900 Subject: [PATCH 36/66] incorrect warning --- src/ior.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ior.c b/src/ior.c index 3ef411c..1ebcda8 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1580,6 +1580,7 @@ static void ValidateTests(IOR_param_t * test) && (strcasecmp(test->api, "MPIIO") != 0) && (strcasecmp(test->api, "MMAP") != 0) && (strcasecmp(test->api, "HDFS") != 0) + && (strcasecmp(test->api, "Gfarm") != 0) && (strcasecmp(test->api, "RADOS") != 0)) && test->fsync) WARN_RESET("fsync() not supported in selected backend", test, &defaults, fsync); From f16ef9ace517a471bd59fcfddecaf9d80bde16a1 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sat, 3 Aug 2019 05:07:13 +0000 Subject: [PATCH 37/66] update dfs_lookup() call for extra parameter. Signed-off-by: Mohamad Chaarawi --- src/aiori-DFS.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index c88b80e..35870e1 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -332,7 +332,6 @@ lookup_insert_dir(const char *name) { struct aiori_dir_hdl *hdl; d_list_t *rlink; - mode_t mode; int rc; rlink = d_hash_rec_find(dir_hash, name, strlen(name)); @@ -348,7 +347,7 @@ lookup_insert_dir(const char *name) strncpy(hdl->name, name, PATH_MAX-1); hdl->name[PATH_MAX-1] = '\0'; - rc = dfs_lookup(dfs, name, O_RDWR, &hdl->oh, &mode); + rc = dfs_lookup(dfs, name, O_RDWR, &hdl->oh, NULL, NULL); DERR(rc, "dfs_lookup() of %s Failed", name); rc = d_hash_rec_insert(dir_hash, hdl->name, strlen(hdl->name), @@ -667,7 +666,7 @@ DFS_GetFileSize(IOR_param_t * test, MPI_Comm comm, char *testFileName) daos_size_t fsize, tmpMin, tmpMax, tmpSum; int rc; - rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL); + rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL, NULL); if (rc) { fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc); return -1; From c4ff3d7c4e35edc4022e81e779d09f38b00f1954 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sat, 3 Aug 2019 09:12:48 +0100 Subject: [PATCH 38/66] Trivial fix for #168 --- src/mdtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdtest.c b/src/mdtest.c index 1ff204d..24f3ec1 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -79,7 +79,7 @@ #define FILEMODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH #define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH #define RELEASE_VERS META_VERSION -#define TEST_DIR "#test-dir" +#define TEST_DIR "test-dir" #define ITEM_COUNT 25000 #define LLU "%lu" From 361a3261d1f8be2621bce8be8ed41ee9f6d0bf1f Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sat, 3 Aug 2019 09:15:34 +0100 Subject: [PATCH 39/66] Updated test patterns --- testing/mdtest-patterns/advanced/3.txt | 188 +++++++++++++------------ testing/mdtest-patterns/advanced/4.txt | 102 +++++++------- testing/mdtest-patterns/advanced/5.txt | 154 ++++++++++---------- testing/mdtest-patterns/basic/0.txt | 51 +++---- testing/mdtest-patterns/basic/1.txt | 51 +++---- testing/mdtest-patterns/basic/2.txt | 54 +++---- testing/mdtest-patterns/basic/3.txt | 65 +++++---- 7 files changed, 341 insertions(+), 324 deletions(-) diff --git a/testing/mdtest-patterns/advanced/3.txt b/testing/mdtest-patterns/advanced/3.txt index a51000a..4c45941 100644 --- a/testing/mdtest-patterns/advanced/3.txt +++ b/testing/mdtest-patterns/advanced/3.txt @@ -1,93 +1,95 @@ -V-3: main (before display_freespace): testdirpath is "/dev/shm/mdest" -V-3: testdirpath is "/dev/shm/mdest" -V-3: Before show_file_system_size, dirpath is "/dev/shm" -V-3: After show_file_system_size, dirpath is "/dev/shm" -V-3: main (after display_freespace): testdirpath is "/dev/shm/mdest" -V-3: main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" -V-3: main: Using unique_mk_dir, "mdtest_tree.0" -V-3: main: Copied unique_mk_dir, "mdtest_tree.0", to topdir -V-3: directory_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19" -V-3: file_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.0" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.1" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.2" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.3" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.4" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.5" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.6" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.7" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.8" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.9" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.10" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.11" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.12" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.13" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.14" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.15" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.16" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.17" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.18" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.19" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: main: Using testdir, "/dev/shm/mdest/#test-dir.0-0" +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/advanced/4.txt b/testing/mdtest-patterns/advanced/4.txt index 8df166a..5d3b7da 100644 --- a/testing/mdtest-patterns/advanced/4.txt +++ b/testing/mdtest-patterns/advanced/4.txt @@ -1,50 +1,52 @@ -V-3: main (before display_freespace): testdirpath is "/dev/shm/mdest" -V-3: testdirpath is "/dev/shm/mdest" -V-3: Before show_file_system_size, dirpath is "/dev/shm" -V-3: After show_file_system_size, dirpath is "/dev/shm" -V-3: main (after display_freespace): testdirpath is "/dev/shm/mdest" -V-3: main: Using unique_mk_dir, "mdtest_tree.0" -V-3: main: Copied unique_mk_dir, "mdtest_tree.0", to topdir -V-3: directory_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 -V-3: file_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.0 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.1 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.2 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.3 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.4 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.5 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.6 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.7 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.8 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.9 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.10 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.11 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.12 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.13 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.14 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.15 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.16 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.17 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.18 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/file.mdtest.0.19 -V-3: main: Using testdir, "/dev/shm/mdest/#test-dir.0-0" +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19 +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/advanced/5.txt b/testing/mdtest-patterns/advanced/5.txt index 300bce6..e87ae0a 100644 --- a/testing/mdtest-patterns/advanced/5.txt +++ b/testing/mdtest-patterns/advanced/5.txt @@ -1,77 +1,77 @@ -V-3: main (before display_freespace): testdirpath is "/dev/shm/mdest" -V-3: testdirpath is "/dev/shm/mdest" -V-3: Before show_file_system_size, dirpath is "/dev/shm" -V-3: After show_file_system_size, dirpath is "/dev/shm" -V-3: main (after display_freespace): testdirpath is "/dev/shm/mdest" -V-3: main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" -V-3: main: Using unique_mk_dir, "mdtest_tree.0" -V-3: main: Copied unique_mk_dir, "mdtest_tree.0", to topdir -V-3: directory_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19" -V-3: directory_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 -V-3: directory_test: read path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: directory_test: remove directories path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18" -V-3: create_remove_items_helper (dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19" -V-3: directory_test: remove unique directories path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: main: Using testdir, "/dev/shm/mdest/#test-dir.0-0" -V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 +V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' +V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/0.txt b/testing/mdtest-patterns/basic/0.txt index 7ff10fc..ebe0f14 100644 --- a/testing/mdtest-patterns/basic/0.txt +++ b/testing/mdtest-patterns/basic/0.txt @@ -1,24 +1,27 @@ -V-3: main (before display_freespace): testdirpath is "/dev/shm/mdest" -V-3: testdirpath is "/dev/shm/mdest" -V-3: Before show_file_system_size, dirpath is "/dev/shm" -V-3: After show_file_system_size, dirpath is "/dev/shm" -V-3: main (after display_freespace): testdirpath is "/dev/shm/mdest" -V-3: main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" -V-3: main: Using unique_mk_dir, "mdtest_tree.0" -V-3: main: Copied unique_mk_dir, "mdtest_tree.0", to topdir -V-3: directory_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: directory_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: directory_test: read path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: directory_test: remove directories path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: directory_test: remove unique directories path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: read path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: rm directories path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: rm unique directories path is "mdtest_tree.0" -V-3: main: Using testdir, "/dev/shm/mdest/#test-dir.0-0" -V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1104 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1158 file_test: rm unique directories path is 'mdtest_tree.0' +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/1.txt b/testing/mdtest-patterns/basic/1.txt index 7ff10fc..ebe0f14 100644 --- a/testing/mdtest-patterns/basic/1.txt +++ b/testing/mdtest-patterns/basic/1.txt @@ -1,24 +1,27 @@ -V-3: main (before display_freespace): testdirpath is "/dev/shm/mdest" -V-3: testdirpath is "/dev/shm/mdest" -V-3: Before show_file_system_size, dirpath is "/dev/shm" -V-3: After show_file_system_size, dirpath is "/dev/shm" -V-3: main (after display_freespace): testdirpath is "/dev/shm/mdest" -V-3: main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" -V-3: main: Using unique_mk_dir, "mdtest_tree.0" -V-3: main: Copied unique_mk_dir, "mdtest_tree.0", to topdir -V-3: directory_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: directory_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: directory_test: read path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: directory_test: remove directories path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: directory_test: remove unique directories path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: read path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: rm directories path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0" -V-3: file_test: rm unique directories path is "mdtest_tree.0" -V-3: main: Using testdir, "/dev/shm/mdest/#test-dir.0-0" -V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1104 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 Line 1158 file_test: rm unique directories path is 'mdtest_tree.0' +V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/2.txt b/testing/mdtest-patterns/basic/2.txt index a21f5a3..77f5c78 100644 --- a/testing/mdtest-patterns/basic/2.txt +++ b/testing/mdtest-patterns/basic/2.txt @@ -1,25 +1,29 @@ -V-3: main (before display_freespace): testdirpath is "/dev/shm/mdest" -V-3: testdirpath is "/dev/shm/mdest" -V-3: Before show_file_system_size, dirpath is "/dev/shm" -V-3: After show_file_system_size, dirpath is "/dev/shm" -V-3: main (after display_freespace): testdirpath is "/dev/shm/mdest" -V-3: main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" -V-3: main: Copied unique_mk_dir, "mdtest_tree.0.0", to topdir -V-3: file_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (for loop): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/" -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/" -V-3: file_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 -V-3: file_test: rm directories path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (for loop): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/" -V-3: create_remove_items_helper (non-dirs remove): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/" -V-3: file_test: rm unique directories path is "/dev/shm/mdest/#test-dir.0-0/" -V-3: main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1647 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1694 i 1 nstride 0 +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 +V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0 +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 310 create_remove_items_helper (non-dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 1158 file_test: rm unique directories path is '/dev/shm/mdest/test-dir.0-0/' +V-3: Rank 0 Line 1754 main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/3.txt b/testing/mdtest-patterns/basic/3.txt index 3e4aa78..eafadc1 100644 --- a/testing/mdtest-patterns/basic/3.txt +++ b/testing/mdtest-patterns/basic/3.txt @@ -1,31 +1,34 @@ -V-3: main (before display_freespace): testdirpath is "/dev/shm/mdest" -V-3: testdirpath is "/dev/shm/mdest" -V-3: Before show_file_system_size, dirpath is "/dev/shm" -V-3: After show_file_system_size, dirpath is "/dev/shm" -V-3: main (after display_freespace): testdirpath is "/dev/shm/mdest" -V-3: main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with "/dev/shm/mdest/#test-dir.0-0" -V-3: main: Copied unique_mk_dir, "mdtest_tree.0.0", to topdir -V-3: directory_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (for loop): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/" -V-3: create_remove_items_helper (dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//dir.mdtest.0.1" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/" -V-3: directory_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0 -V-3: mdtest_stat dir : /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/dir.mdtest.0.1 -V-3: file_test: create path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: create_remove_items (for loop): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/" -V-3: create_remove_items_helper (non-dirs create): curr_item is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1" -V-3: create_remove_items_helper (non-collective, shared): open... -V-3: create_remove_items_helper: close... -V-3: create_remove_items (start): temp_path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/" -V-3: file_test: stat path is "/dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0" -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0 -V-3: mdtest_stat file: /dev/shm/mdest/#test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 +V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... +V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' +V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' +V-3: Rank 0 Line 1647 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 Line 1694 i 1 nstride 0 +V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir +V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//dir.mdtest.0.1' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/dir.mdtest.0.1 +V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0.0 +V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0.0 +V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' +V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 Line 373 create_remove_items_helper: close... +V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0 +V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 From 8cb878507e73f57ebe0809ee623dcf95a5417221 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 12 Aug 2019 14:25:05 +0000 Subject: [PATCH 40/66] Add dfs chunk_size and oclass options. update dfs_remove for API change. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 2 +- src/aiori-DFS.c | 64 +++++++++++++++++++++++++++++++----------------- 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 71aff90..fc339ca 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -211,7 +211,7 @@ HandleDistribute(daos_handle_t *handle, enum handleType type) static option_help * DAOS_options() { - return options; + return options; } static void diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 35870e1..2acd9bb 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -42,7 +42,8 @@ #include "utilities.h" dfs_t *dfs; -daos_handle_t poh, coh; +static daos_handle_t poh, coh; +static daos_oclass_id_t objectClass = OC_SX; static struct d_hash_table *dir_hash; struct aiori_dir_hdl { @@ -63,6 +64,8 @@ struct dfs_options{ char *svcl; char *group; char *cont; + int chunk_size; + char *oclass; int destroy; }; @@ -71,14 +74,18 @@ static struct dfs_options o = { .svcl = NULL, .group = NULL, .cont = NULL, + .chunk_size = 1048576, + .oclass = NULL, .destroy = 0, }; static option_help options [] = { - {0, "dfs.pool", "DAOS pool uuid", OPTION_REQUIRED_ARGUMENT, 's', & o.pool}, - {0, "dfs.svcl", "DAOS pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', & o.svcl}, - {0, "dfs.group", "DAOS server group", OPTION_OPTIONAL_ARGUMENT, 's', & o.group}, + {0, "dfs.pool", "pool uuid", OPTION_REQUIRED_ARGUMENT, 's', & o.pool}, + {0, "dfs.svcl", "pool SVCL", OPTION_REQUIRED_ARGUMENT, 's', & o.svcl}, + {0, "dfs.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', & o.group}, {0, "dfs.cont", "DFS container uuid", OPTION_REQUIRED_ARGUMENT, 's', & o.cont}, + {0, "dfs.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o.chunk_size}, + {0, "dfs.oclass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o.oclass}, {0, "dfs.destroy", "Destroy DFS Container", OPTION_FLAG, 'd', &o.destroy}, LAST_OPTION }; @@ -156,31 +163,31 @@ do { \ static inline struct aiori_dir_hdl * hdl_obj(d_list_t *rlink) { - return container_of(rlink, struct aiori_dir_hdl, entry); + return container_of(rlink, struct aiori_dir_hdl, entry); } static bool key_cmp(struct d_hash_table *htable, d_list_t *rlink, const void *key, unsigned int ksize) { - struct aiori_dir_hdl *hdl = hdl_obj(rlink); + struct aiori_dir_hdl *hdl = hdl_obj(rlink); - return (strcmp(hdl->name, (const char *)key) == 0); + return (strcmp(hdl->name, (const char *)key) == 0); } static void rec_free(struct d_hash_table *htable, d_list_t *rlink) { - struct aiori_dir_hdl *hdl = hdl_obj(rlink); + struct aiori_dir_hdl *hdl = hdl_obj(rlink); - assert(d_hash_rec_unlinked(&hdl->entry)); - dfs_release(hdl->oh); - free(hdl); + assert(d_hash_rec_unlinked(&hdl->entry)); + dfs_release(hdl->oh); + free(hdl); } static d_hash_table_ops_t hdl_hash_ops = { - .hop_key_cmp = key_cmp, - .hop_rec_free = rec_free + .hop_key_cmp = key_cmp, + .hop_rec_free = rec_free }; /* Distribute process 0's pool or container handle to others. */ @@ -360,7 +367,7 @@ out: } static option_help * DFS_options(){ - return options; + return options; } static void @@ -370,6 +377,12 @@ DFS_Init() { if (o.pool == NULL || o.svcl == NULL || o.cont == NULL) ERR("Invalid pool or container options\n"); + if (o.oclass) { + objectClass = daos_oclass_name2id(o.oclass); + if (objectClass == OC_UNKNOWN) + DCHECK(-1, "Invalid DAOS Object class %s\n", o.oclass); + } + rc = daos_init(); DCHECK(rc, "Failed to initialize daos"); @@ -487,14 +500,18 @@ DFS_Create(char *testFileName, IOR_param_t *param) fd_oflag |= O_CREAT | O_RDWR | O_EXCL; rc = dfs_open(dfs, parent, name, mode, fd_oflag, - OC_SX, 0, NULL, &obj); - DERR(rc, "dfs_open() of %s Failed", name); - } else { - fd_oflag |= O_RDWR; - rc = dfs_open(dfs, parent, name, mode, fd_oflag, - OC_SX, 0, NULL, &obj); + objectClass, o.chunk_size, NULL, &obj); DERR(rc, "dfs_open() of %s Failed", name); } + if (!param->filePerProc) { + MPI_Barrier(MPI_COMM_WORLD); + if (rank != 0) { + fd_oflag |= O_RDWR; + rc = dfs_open(dfs, parent, name, mode, fd_oflag, + objectClass, o.chunk_size, NULL, &obj); + DERR(rc, "dfs_open() of %s Failed", name); + } + } out: if (name) @@ -530,7 +547,8 @@ DFS_Open(char *testFileName, IOR_param_t *param) if (parent == NULL) DERR(rc, "Failed to lookup parent dir"); - rc = dfs_open(dfs, parent, name, mode, fd_oflag, 0, 0, NULL, &obj); + rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, + o.chunk_size, NULL, &obj); DERR(rc, "dfs_open() of %s Failed", name); out: @@ -638,7 +656,7 @@ DFS_Delete(char *testFileName, IOR_param_t * param) if (parent == NULL) DERR(rc, "Failed to lookup parent dir"); - rc = dfs_remove(dfs, parent, name, false); + rc = dfs_remove(dfs, parent, name, false, NULL); DERR(rc, "dfs_remove() of %s Failed", name); out: @@ -756,7 +774,7 @@ DFS_Rmdir(const char *path, IOR_param_t * param) if (parent == NULL) DERR(rc, "Failed to lookup parent dir"); - rc = dfs_remove(dfs, parent, name, false); + rc = dfs_remove(dfs, parent, name, false, NULL); DERR(rc, "dfs_remove() of %s Failed", name); out: From 1320aa279cdb18150d5f01e8838062793dbf0877 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 12 Aug 2019 16:04:20 +0000 Subject: [PATCH 41/66] add some barriers before cont close and destroy to make sure all ranks are done. Signed-off-by: Mohamad Chaarawi --- .gitignore | 1 + src/aiori-DAOS.c | 10 +++++++--- src/aiori-DFS.c | 5 ++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 8c0ab90..eb59cf7 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ src/testlib src/test/.deps/ src/test/.dirstamp src/test/lib.o +build/ doc/doxygen/build doc/sphinx/_*/ diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index fc339ca..9f4270b 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -292,8 +292,13 @@ DAOS_Fini() if (!daos_initialized) return; + MPI_Barrier(MPI_COMM_WORLD); rc = daos_cont_close(coh, NULL); - DCHECK(rc, "Failed to close container\n"); + if (rc) { + DCHECK(rc, "Failed to close container %s (%d)", o.cont, rc); + MPI_Abort(MPI_COMM_WORLD, -1); + } + MPI_Barrier(MPI_COMM_WORLD); if (o.destroy) { if (rank == 0) { @@ -307,8 +312,7 @@ DAOS_Fini() MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); if (rc) { if (rank == 0) - DCHECK(rc, "Failed to destroy container %s (%d)", - o.cont, rc); + DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); MPI_Abort(MPI_COMM_WORLD, -1); } } diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 2acd9bb..9c4ca25 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -444,13 +444,16 @@ DFS_Finalize() { int rc; + MPI_Barrier(MPI_COMM_WORLD); d_hash_table_destroy(dir_hash, true /* force */); rc = dfs_umount(dfs); DCHECK(rc, "Failed to umount DFS namespace"); + MPI_Barrier(MPI_COMM_WORLD); rc = daos_cont_close(coh, NULL); - DCHECK(rc, "Failed to close container"); + DCHECK(rc, "Failed to close container %s (%d)", o.cont, rc); + MPI_Barrier(MPI_COMM_WORLD); if (rank == 0 && o.destroy) { uuid_t uuid; From b3663bd29a7bb15f60d0714faa898945181c11a6 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 14 Aug 2019 13:32:51 +0000 Subject: [PATCH 42/66] add sleep depending on MPI rank to avoid all ranks calling daos_fini() at once (issue with PSM2). Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 5 +++++ src/aiori-DFS.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 9f4270b..870aa68 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -17,6 +17,8 @@ * This file implements the abstract I/O interface for DAOS Array API. */ +#define _BSD_SOURCE + #ifdef HAVE_CONFIG_H #include "config.h" #endif @@ -320,6 +322,9 @@ DAOS_Fini() rc = daos_pool_disconnect(poh, NULL); DCHECK(rc, "Failed to disconnect from pool %s", o.pool); + MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); + usleep(20000 * rank); + rc = daos_fini(); DCHECK(rc, "Failed to finalize daos"); diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 9c4ca25..ef89f41 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -16,6 +16,8 @@ * This file implements the abstract I/O interface for DAOS FS API. */ +#define _BSD_SOURCE + #ifdef HAVE_CONFIG_H #include "config.h" #endif @@ -471,6 +473,9 @@ DFS_Finalize() daos_pool_disconnect(poh, NULL); DCHECK(rc, "Failed to disconnect from pool"); + MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); + usleep(20000 * rank); + rc = daos_fini(); DCHECK(rc, "Failed to finalize DAOS"); } From a4068be551c38e327fb9b054a91cbeb778a9bd76 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 26 Aug 2019 15:57:13 +0100 Subject: [PATCH 43/66] Improved help for fsync. --- src/parse_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parse_options.c b/src/parse_options.c index af30c36..47f9920 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -477,7 +477,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT}, - {'e', NULL, "fsync -- perform sync operation after each block write", OPTION_FLAG, 'd', & params->fsync}, + {'e', NULL, "fsync -- perform a fsync() operation at the end of each read/write phase", OPTION_FLAG, 'd', & params->fsync}, {'E', NULL, "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & params->useExistingTestFile}, {'f', NULL, "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & params->testscripts}, {'F', NULL, "filePerProc -- file-per-process", OPTION_FLAG, 'd', & params->filePerProc}, From 4df051bf283675faefcb8523dc034b7f817419c6 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 26 Aug 2019 18:57:14 +0100 Subject: [PATCH 44/66] New option -Y to invoke the sync command. --- src/mdtest.c | 47 ++++++++++++++++++++++------------------------- src/utilities.c | 17 ++++++++++++----- src/utilities.h | 1 + 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 77b2759..e07613b 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -148,6 +148,7 @@ static size_t write_bytes; static int stone_wall_timer_seconds; static size_t read_bytes; static int sync_file; +static int call_sync; static int path_count; static int nstride; /* neighbor stride */ static int make_node = 0; @@ -263,6 +264,16 @@ static void prep_testdir(int j, int dir_iter){ pos += sprintf(& testdir[pos], ".%d-%d", j, dir_iter); } +static void phase_end(){ + if (call_sync){ + call_sync_cmd(); + } + + if (barriers) { + MPI_Barrier(testComm); + } +} + /* * This function copies the unique directory name for a given option to * the "to" parameter. Some memory must be allocated to the "to" parameter. @@ -836,9 +847,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } - if (barriers) { - MPI_Barrier(testComm); - } + phase_end(); t[1] = GetTimeStamp(); /* stat phase */ @@ -864,10 +873,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } } - - if (barriers) { - MPI_Barrier(testComm); - } + phase_end(); t[2] = GetTimeStamp(); /* read phase */ @@ -894,9 +900,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } - if (barriers) { - MPI_Barrier(testComm); - } + phase_end(); t[3] = GetTimeStamp(); if (remove_only) { @@ -924,9 +928,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } - if (barriers) { - MPI_Barrier(testComm); - } + phase_end(); t[4] = GetTimeStamp(); if (remove_only) { @@ -1082,9 +1084,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } - if (barriers) { - MPI_Barrier(testComm); - } + phase_end(); t[1] = GetTimeStamp(); /* stat phase */ @@ -1107,9 +1107,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } - if (barriers) { - MPI_Barrier(testComm); - } + phase_end(); t[2] = GetTimeStamp(); /* read phase */ @@ -1136,9 +1134,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } - if (barriers) { - MPI_Barrier(testComm); - } + phase_end(); t[3] = GetTimeStamp(); if (remove_only) { @@ -1168,9 +1164,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } - if (barriers) { - MPI_Barrier(testComm); - } + phase_end(); t[4] = GetTimeStamp(); if (remove_only) { if (unique_dir_per_task) { @@ -1853,6 +1847,7 @@ void mdtest_init_args(){ stone_wall_timer_seconds = 0; read_bytes = 0; sync_file = 0; + call_sync = 0; path_count = 0; nstride = 0; make_node = 0; @@ -1925,6 +1920,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & stoneWallingStatusFile}, {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & verify_read}, {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & sync_file}, + {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & call_sync}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & depth}, {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & print_time}, LAST_OPTION @@ -2008,6 +2004,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * VERBOSE(1,-1, "unique_dir_per_task : %s", ( unique_dir_per_task ? "True" : "False" )); VERBOSE(1,-1, "write_bytes : "LLU"", write_bytes ); VERBOSE(1,-1, "sync_file : %s", ( sync_file ? "True" : "False" )); + VERBOSE(1,-1, "call_sync : %s", ( call_sync ? "True" : "False" )); VERBOSE(1,-1, "depth : %d", depth ); VERBOSE(1,-1, "make_node : %d", make_node ); diff --git a/src/utilities.c b/src/utilities.c index cdb090e..a2e4b0d 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -77,15 +77,15 @@ void* safeMalloc(uint64_t size){ } void FailMessage(int rank, const char *location, char *format, ...) { - char msg[4096]; + char msg[4096]; va_list args; va_start(args, format); vsnprintf(msg, 4096, format, args); va_end(args); - fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", - PrintTimestamp(), rank, location, msg, strerror(errno)); - fflush(out_logfile); - MPI_Abort(testComm, 1); + fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", + PrintTimestamp(), rank, location, msg, strerror(errno)); + fflush(out_logfile); + MPI_Abort(testComm, 1); } size_t NodeMemoryStringToBytes(char *size_str) @@ -809,3 +809,10 @@ char *HumanReadable(IOR_offset_t value, int base) } return valueStr; } + +void call_sync_cmd(){ + int ret = system("sync"); + if (ret != 0){ + FAIL("Error executing the sync command, ensure it exists."); + } +} diff --git a/src/utilities.h b/src/utilities.h index d2c9962..b85f957 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -60,6 +60,7 @@ int QueryNodeMapping(MPI_Comm comm, int print_nodemap); void DelaySecs(int delay); void updateParsedOptions(IOR_param_t * options, options_all_t * global_options); size_t NodeMemoryStringToBytes(char *size_str); +void call_sync_cmd(); /* Returns -1, if cannot be read */ int64_t ReadStoneWallingIterations(char * const filename); From 93730771fd599fde7b57fb78ad346d655602cb6a Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 28 Aug 2019 17:01:51 +0000 Subject: [PATCH 45/66] add some verbose messages on finalize routines for DAOS and DFS drivers. Signed-off-by: Mohamad Chaarawi --- README_DAOS | 2 +- src/aiori-DAOS.c | 8 +++++++- src/aiori-DFS.c | 12 +++++++++--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/README_DAOS b/README_DAOS index 33d9464..9467989 100644 --- a/README_DAOS +++ b/README_DAOS @@ -3,7 +3,7 @@ Building The DAOS library must be installed on the system. -./bootsrap +./bootstrap ./configure --prefix=iorInstallDir --with-daos=DIR --with-cart=DIR One must specify "--with-daos=/path/to/daos/install and --with-cart". When that diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 870aa68..16d386a 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -306,7 +306,7 @@ DAOS_Fini() if (rank == 0) { uuid_t uuid; - INFO(VERBOSE_1, "Destroying Container %s", o.cont); + INFO(VERBOSE_1, "Destroying DAOS Container %s", o.cont); uuid_parse(o.cont, uuid); rc = daos_cont_destroy(poh, uuid, 1, NULL); } @@ -319,12 +319,18 @@ DAOS_Fini() } } + if (rank == 0) + INFO(VERBOSE_1, "Disconnecting from DAOS POOL.."); + rc = daos_pool_disconnect(poh, NULL); DCHECK(rc, "Failed to disconnect from pool %s", o.pool); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); usleep(20000 * rank); + if (rank == 0) + INFO(VERBOSE_1, "Finalizing DAOS.."); + rc = daos_fini(); DCHECK(rc, "Failed to finalize daos"); diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index ef89f41..17a344d 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -140,7 +140,7 @@ ior_aiori_t dfs_aiori = { do { \ int _rc = (rc); \ \ - if (_rc < 0) { \ + if (_rc != 0) { \ fprintf(stderr, "ERROR (%s:%d): %d: %d: " \ format"\n", __FILE__, __LINE__, rank, _rc, \ ##__VA_ARGS__); \ @@ -153,7 +153,7 @@ do { \ do { \ int _rc = (rc); \ \ - if (_rc < 0) { \ + if (_rc != 0) { \ fprintf(stderr, "ERROR (%s:%d): %d: %d: " \ format"\n", __FILE__, __LINE__, rank, _rc, \ ##__VA_ARGS__); \ @@ -470,12 +470,18 @@ DFS_Finalize() if (rc) DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); - daos_pool_disconnect(poh, NULL); + if (rank == 0 && verbose >= VERBOSE_1) + printf("Disconnecting from DAOS POOL\n"); + + rc = daos_pool_disconnect(poh, NULL); DCHECK(rc, "Failed to disconnect from pool"); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); usleep(20000 * rank); + if (rank == 0 && verbose >= VERBOSE_1) + printf("Finalizing DAOS..\n"); + rc = daos_fini(); DCHECK(rc, "Failed to finalize DAOS"); } From 32db1cd9029df0b69fa8f480d83ef412223dd06c Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 29 Aug 2019 03:24:48 +0000 Subject: [PATCH 46/66] add timing for container close. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 5 +++++ src/aiori-DFS.c | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 16d386a..18dd689 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -305,10 +305,15 @@ DAOS_Fini() if (o.destroy) { if (rank == 0) { uuid_t uuid; + double t1, t2; INFO(VERBOSE_1, "Destroying DAOS Container %s", o.cont); uuid_parse(o.cont, uuid); + t1 = MPI_Wtime(); rc = daos_cont_destroy(poh, uuid, 1, NULL); + t2 = MPI_Wtime(); + if (rc == 0) + INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); } MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 17a344d..df956e8 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -459,11 +459,16 @@ DFS_Finalize() if (rank == 0 && o.destroy) { uuid_t uuid; + double t1, t2; if (verbose >= VERBOSE_1) printf("Destorying DFS Container: %s\n", o.cont); uuid_parse(o.cont, uuid); + t1 = MPI_Wtime(); rc = daos_cont_destroy(poh, uuid, 1, NULL); + t2 = MPI_Wtime(); + if (rc == 0 && verbose >= VERBOSE_1) + printf("Container Destroy time = %f secs", t2-t1); } MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); From 0b809b36e292267c0c22bf7077769641e0b7c1fd Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Fri, 30 Aug 2019 20:35:15 +0000 Subject: [PATCH 47/66] fix README_DAOS for DFS plugin Signed-off-by: Mohamad Chaarawi --- README_DAOS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README_DAOS b/README_DAOS index 9467989..b4e6dba 100644 --- a/README_DAOS +++ b/README_DAOS @@ -78,6 +78,6 @@ Examples that should work include: Running mdtest, the user needs to specify a directory with -d where the test tree will be created. Some examples: - - "mdtest -a DFS -n 100 -F -D -d /bla --daos.pool --daos.svcl --daos.cont " - - "mdtest -a DFS -n 1000 -F -C -d /bla --daos.pool --daos.svcl --daos.cont " - - "mdtest -a DFS -I 10 -z 5 -b 2 -L -d /bla --daos.pool --daos.svcl --daos.cont " + - "mdtest -a DFS -n 100 -F -D -d /bla --dfs.pool --dfs.svcl --dfs.cont " + - "mdtest -a DFS -n 1000 -F -C -d /bla --dfs.pool --dfs.svcl --dfs.cont " + - "mdtest -a DFS -I 10 -z 5 -b 2 -L -d /bla --dfs.pool --dfs.svcl --dfs.cont " From 0bd979637e8b8c340fb7475897c84aa8172d7117 Mon Sep 17 00:00:00 2001 From: Josh Schwartz <52082483+jschwartz-cray@users.noreply.github.com> Date: Fri, 30 Aug 2019 15:11:25 -0600 Subject: [PATCH 48/66] Added some extra debug including ERRF, WARNF, and MPI_CHECKF format string macros. --- src/aiori-MPIIO.c | 14 ++++++------ src/aiori-POSIX.c | 54 +++++++++++++++++++++++++---------------------- src/ior.c | 17 +++++++++------ src/iordef.h | 52 +++++++++++++++++++++++++++++++++------------ 4 files changed, 85 insertions(+), 52 deletions(-) diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index 6bf991b..707b3a7 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -178,8 +178,8 @@ static void *MPIIO_Open(char *testFileName, IOR_param_t * param) fprintf(stdout, "}\n"); } if(! param->dryRun){ - MPI_CHECK(MPI_File_open(comm, testFileName, fd_mode, mpiHints, fd), - "cannot open file"); + MPI_CHECKF(MPI_File_open(comm, testFileName, fd_mode, mpiHints, fd), + "cannot open file: %s", testFileName); } /* show hints actually attached to file handle */ @@ -428,8 +428,8 @@ void MPIIO_Delete(char *testFileName, IOR_param_t * param) { if(param->dryRun) return; - MPI_CHECK(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL), - "cannot delete file"); + MPI_CHECKF(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL), + "cannot delete file: %s", testFileName); } /* @@ -504,9 +504,9 @@ IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, comm = testComm; } - MPI_CHECK(MPI_File_open(comm, testFileName, MPI_MODE_RDONLY, - MPI_INFO_NULL, &fd), - "cannot open file to get file size"); + MPI_CHECKF(MPI_File_open(comm, testFileName, MPI_MODE_RDONLY, + MPI_INFO_NULL, &fd), + "cannot open file to get file size: %s", testFileName); MPI_CHECK(MPI_File_get_size(fd, (MPI_Offset *) & aggFileSizeFromStat), "cannot get file size"); MPI_CHECK(MPI_File_close(&fd), "cannot close file"); diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index d78cd40..00e974c 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -146,7 +146,7 @@ void gpfs_free_all_locks(int fd) rc = gpfs_fcntl(fd, &release_all); if (verbose >= VERBOSE_0 && rc != 0) { - EWARN("gpfs_fcntl release all locks hint failed."); + EWARNF("gpfs_fcntl(%d, ...) release all locks hint failed.", fd); } } void gpfs_access_start(int fd, IOR_offset_t length, IOR_param_t *param, int access) @@ -169,7 +169,7 @@ void gpfs_access_start(int fd, IOR_offset_t length, IOR_param_t *param, int acce rc = gpfs_fcntl(fd, &take_locks); if (verbose >= VERBOSE_2 && rc != 0) { - EWARN("gpfs_fcntl access range hint failed."); + EWARNF("gpfs_fcntl(fd, ...) access range hint failed.", fd); } } @@ -193,7 +193,7 @@ void gpfs_access_end(int fd, IOR_offset_t length, IOR_param_t *param, int access rc = gpfs_fcntl(fd, &free_locks); if (verbose >= VERBOSE_2 && rc != 0) { - EWARN("gpfs_fcntl free range hint failed."); + EWARNF("gpfs_fcntl(fd, ...) free range hint failed.", fd); } } @@ -260,14 +260,14 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun char* dir = dirname(dirTmp); DIR* parentDirS = opendir(dir); if (!parentDirS) { - ERR("Failed to get directory"); + ERRF("Failed to get directory: %s", dir); } else { int parentDirFd = dirfd(parentDirS); if (parentDirFd < 0) { - ERR("Failed to get directory descriptor"); + ERRF("Failed to get directory descriptor: %s", dir); } else { @@ -319,6 +319,7 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun void *POSIX_Create(char *testFileName, IOR_param_t * param) { int fd_oflag = O_BINARY; + int mode = 0664; int *fd; fd = (int *)malloc(sizeof(int)); @@ -346,9 +347,10 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) if (!param->filePerProc && rank != 0) { MPI_CHECK(MPI_Barrier(testComm), "barrier error"); fd_oflag |= O_RDWR; - *fd = open64(testFileName, fd_oflag, 0664); + *fd = open64(testFileName, fd_oflag, mode); if (*fd < 0) - ERR("open64() failed"); + ERRF("open64(\"%s\", %d, %#o) failed", + testFileName, fd_oflag, mode); } else { struct lov_user_md opts = { 0 }; @@ -363,7 +365,7 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) fd_oflag |= O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE; - *fd = open64(testFileName, fd_oflag, 0664); + *fd = open64(testFileName, fd_oflag, mode); if (*fd < 0) { fprintf(stdout, "\nUnable to open '%s': %s\n", testFileName, strerror(errno)); @@ -392,7 +394,7 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) if (beegfs_isOptionSet(param->beegfs_chunkSize) || beegfs_isOptionSet(param->beegfs_numTargets)) { bool result = beegfs_createFilePath(testFileName, - 0664, + mode, param->beegfs_numTargets, param->beegfs_chunkSize); if (result) { @@ -403,9 +405,10 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) } #endif /* HAVE_BEEGFS_BEEGFS_H */ - *fd = open64(testFileName, fd_oflag, 0664); + *fd = open64(testFileName, fd_oflag, mode); if (*fd < 0) - ERR("open64() failed"); + ERRF("open64(\"%s\", %d, %#o) failed", + testFileName, fd_oflag, mode); #ifdef HAVE_LUSTRE_LUSTRE_USER_H } @@ -413,7 +416,7 @@ void *POSIX_Create(char *testFileName, IOR_param_t * param) if (param->lustre_ignore_locks) { int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK; if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) - ERR("ioctl(LL_IOC_SETFLAGS) failed"); + ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); } #endif /* HAVE_LUSTRE_LUSTRE_USER_H */ @@ -469,7 +472,7 @@ void *POSIX_Open(char *testFileName, IOR_param_t * param) *fd = open64(testFileName, fd_oflag); if (*fd < 0) - ERR("open64 failed"); + ERRF("open64(\"%s\", %d) failed", testFileName, fd_oflag); #ifdef HAVE_LUSTRE_LUSTRE_USER_H if (param->lustre_ignore_locks) { @@ -479,7 +482,7 @@ void *POSIX_Open(char *testFileName, IOR_param_t * param) "** Disabling lustre range locking **\n"); } if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) - ERR("ioctl(LL_IOC_SETFLAGS) failed"); + ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); } #endif /* HAVE_LUSTRE_LUSTRE_USER_H */ @@ -517,7 +520,7 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, /* seek to offset */ if (lseek64(fd, param->offset, SEEK_SET) == -1) - ERR("lseek64() failed"); + ERRF("lseek64(%d, %lld, SEEK_SET) failed", fd, param->offset); while (remaining > 0) { /* write/read file */ @@ -530,7 +533,8 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, } rc = write(fd, ptr, remaining); if (rc == -1) - ERR("write() failed"); + ERRF("write(%d, %p, %lld) failed", + fd, (void*)ptr, remaining); if (param->fsyncPerWrite == TRUE) POSIX_Fsync(&fd, param); } else { /* READ or CHECK */ @@ -542,9 +546,11 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, } rc = read(fd, ptr, remaining); if (rc == 0) - ERR("read() returned EOF prematurely"); + ERRF("read(%d, %p, %lld) returned EOF prematurely", + fd, (void*)ptr, remaining); if (rc == -1) - ERR("read() failed"); + ERRF("read(%d, %p, %lld) failed", + fd, (void*)ptr, remaining); } if (rc < remaining) { fprintf(stdout, @@ -579,7 +585,7 @@ static IOR_offset_t POSIX_Xfer(int access, void *file, IOR_size_t * buffer, static void POSIX_Fsync(void *fd, IOR_param_t * param) { if (fsync(*(int *)fd) != 0) - EWARN("fsync() failed"); + EWARNF("fsync(%d) failed", *(int *)fd); } /* @@ -590,7 +596,7 @@ void POSIX_Close(void *fd, IOR_param_t * param) if(param->dryRun) return; if (close(*(int *)fd) != 0) - ERR("close() failed"); + ERRF("close(%d) failed", *(int *)fd); free(fd); } @@ -602,10 +608,8 @@ void POSIX_Delete(char *testFileName, IOR_param_t * param) if(param->dryRun) return; if (unlink(testFileName) != 0){ - char errmsg[256]; - sprintf(errmsg, "[RANK %03d]: unlink() of file \"%s\" failed\n", - rank, testFileName); - EWARN(errmsg); + EWARNF("[RANK %03d]: unlink() of file \"%s\" failed\n", + rank, testFileName); } } @@ -621,7 +625,7 @@ IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; if (stat(testFileName, &stat_buf) != 0) { - ERR("stat() failed"); + ERRF("stat(\"%s\", ...) failed", testFileName); } aggFileSizeFromStat = stat_buf.st_size; diff --git a/src/ior.c b/src/ior.c index 4cd5571..f693321 100755 --- a/src/ior.c +++ b/src/ior.c @@ -822,14 +822,14 @@ static char *PrependDir(IOR_param_t * test, char *rootDir) /* dir doesn't exist, so create */ if (backend->access(dir, F_OK, test) != 0) { if (backend->mkdir(dir, S_IRWXU, test) < 0) { - ERR("cannot create directory"); + ERRF("cannot create directory: %s", dir); } /* check if correct permissions */ } else if (backend->access(dir, R_OK, test) != 0 || backend->access(dir, W_OK, test) != 0 || backend->access(dir, X_OK, test) != 0) { - ERR("invalid directory permissions"); + ERRF("invalid directory permissions: %s", dir); } /* concatenate dir and file names */ @@ -900,6 +900,10 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) GetTestFileName(testFileName, test); } if (backend->access(testFileName, F_OK, test) == 0) { + if (verbose >= VERBOSE_3) { + fprintf(out_logfile, "task %d removing %s\n", rank, + testFileName); + } backend->delete(testFileName, test); } if (test->reorderTasksRandom == TRUE) { @@ -908,6 +912,10 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) } } else { if ((rank == 0) && (backend->access(testFileName, F_OK, test) == 0)) { + if (verbose >= VERBOSE_3) { + fprintf(out_logfile, "task %d removing %s\n", rank, + testFileName); + } backend->delete(testFileName, test); } } @@ -1667,11 +1675,8 @@ static void ValidateTests(IOR_param_t * test) #if (H5_VERS_MAJOR > 0 && H5_VERS_MINOR > 5) ; #else - char errorString[MAX_STR]; - sprintf(errorString, - "'no fill' option not available in %s", + ERRF("'no fill' option not available in %s", test->apiVersion); - ERR(errorString); #endif #else WARN("unable to determine HDF5 version for 'no fill' usage"); diff --git a/src/iordef.h b/src/iordef.h index 1d1f216..78cf1d3 100755 --- a/src/iordef.h +++ b/src/iordef.h @@ -151,28 +151,41 @@ typedef long long int IOR_size_t; fflush(stdout); \ } while (0) -/* warning with errno printed */ -#define EWARN(MSG) do { \ + +/* warning with format string and errno printed */ +#define EWARNF(FORMAT, ...) do { \ if (verbose > VERBOSE_2) { \ - fprintf(stdout, "ior WARNING: %s, errno %d, %s (%s:%d).\n", \ - MSG, errno, strerror(errno), __FILE__, __LINE__); \ + fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s (%s:%d).\n", \ + __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ } else { \ - fprintf(stdout, "ior WARNING: %s, errno %d, %s \n", \ - MSG, errno, strerror(errno)); \ + fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s \n", \ + __VA_ARGS__, errno, strerror(errno)); \ } \ fflush(stdout); \ } while (0) -/* display error message and terminate execution */ -#define ERR(MSG) do { \ - fprintf(stdout, "ior ERROR: %s, errno %d, %s (%s:%d)\n", \ - MSG, errno, strerror(errno), __FILE__, __LINE__); \ +/* warning with errno printed */ +#define EWARN(MSG) do { \ + EWARNF("%s", MSG); \ +} while (0) + + +/* display error message with format string and terminate execution */ +#define ERRF(FORMAT, ...) do { \ + fprintf(stdout, "ior ERROR: " FORMAT ", errno %d, %s (%s:%d)\n", \ + __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ fflush(stdout); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } while (0) +/* display error message and terminate execution */ +#define ERR(MSG) do { \ + ERRF("%s", MSG); \ +} while (0) + + /* display a simple error message (i.e. errno is not set) and terminate execution */ #define ERR_SIMPLE(MSG) do { \ fprintf(stdout, "ior ERROR: %s, (%s:%d)\n", \ @@ -184,24 +197,35 @@ typedef long long int IOR_size_t; /******************************************************************************/ /* - * MPI_CHECK will display a custom error message as well as an error string + * MPI_CHECKF will display a custom format string as well as an error string * from the MPI_STATUS and then exit the program */ -#define MPI_CHECK(MPI_STATUS, MSG) do { \ +#define MPI_CHECKF(MPI_STATUS, FORMAT, ...) do { \ char resultString[MPI_MAX_ERROR_STRING]; \ int resultLength; \ \ if (MPI_STATUS != MPI_SUCCESS) { \ MPI_Error_string(MPI_STATUS, resultString, &resultLength); \ - fprintf(stdout, "ior ERROR: %s, MPI %s, (%s:%d)\n", \ - MSG, resultString, __FILE__, __LINE__); \ + fprintf(stdout, "ior ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ + __VA_ARGS__, resultString, __FILE__, __LINE__); \ fflush(stdout); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } \ } while(0) +/******************************************************************************/ +/* + * MPI_CHECK will display a custom error message as well as an error string + * from the MPI_STATUS and then exit the program + */ + +#define MPI_CHECK(MPI_STATUS, MSG) do { \ + MPI_CHECKF(MPI_STATUS, "%s", MSG); \ +} while(0) + + /******************************************************************************/ /* * System info for Windows. From 4c3d96bfed7f783acd7b2beb7195eee579487257 Mon Sep 17 00:00:00 2001 From: Josh Schwartz <52082483+jschwartz-cray@users.noreply.github.com> Date: Fri, 30 Aug 2019 15:31:23 -0600 Subject: [PATCH 49/66] Fix #179. -u (uniqueDir) will once again use the full file path specified by the client instead of truncating it. This was caused by a broken sprintf which was trying to read/write overlapping buffers. From the glibc sprintf() documentation: "The behavior of this function is undefined if copying takes place between objects that overlap" --- src/ior.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/ior.c b/src/ior.c index 4cd5571..e173807 100755 --- a/src/ior.c +++ b/src/ior.c @@ -785,8 +785,7 @@ void GetTestFileName(char *testFileName, IOR_param_t * test) static char *PrependDir(IOR_param_t * test, char *rootDir) { char *dir; - char fname[MAX_STR + 1]; - char *p; + char *fname; int i; dir = (char *)malloc(MAX_STR + 1); @@ -806,18 +805,10 @@ static char *PrependDir(IOR_param_t * test, char *rootDir) } /* get file name */ - strcpy(fname, rootDir); - p = fname; - while (i > 0) { - if (fname[i] == '\0' || fname[i] == '/') { - p = fname + (i + 1); - break; - } - i--; - } + fname = rootDir + i + 1; /* create directory with rank as subdirectory */ - sprintf(dir, "%s%d", dir, (rank + rankOffset) % test->numTasks); + sprintf(dir + i + 1, "%d", (rank + rankOffset) % test->numTasks); /* dir doesn't exist, so create */ if (backend->access(dir, F_OK, test) != 0) { @@ -834,7 +825,7 @@ static char *PrependDir(IOR_param_t * test, char *rootDir) /* concatenate dir and file names */ strcat(dir, "/"); - strcat(dir, p); + strcat(dir, fname); return dir; } From 0e952f0f8cc56aad2967d61e3d8777131fa2a040 Mon Sep 17 00:00:00 2001 From: Josh Schwartz <52082483+jschwartz-cray@users.noreply.github.com> Date: Fri, 30 Aug 2019 16:45:03 -0600 Subject: [PATCH 50/66] Fix #181. On systems where numTasks is not evenly divisible by 'tasksPerNode' we were seeing some nodes reading multiple files while others read none after reordering. Commonly all nodes have the same number of tasks but there is nothing requiring that to be the case. Imagine having 64 tasks running against 4 nodes which can run 20 tasks each. Here you get three groups of 20 and one group of 4. On this sytem nodes running in the group of 4 were previously getting tasksPerNode of 4 which meant they reordered tasks differently than the nodes which got tasksPerNode of 20. The key to fixing this is ensuring that every node reorders tasks the same way, which means ensuring they all use the same input values. Obviously on systems where the number of tasks per node is inconsistent the reordering will also be inconsistent (some tasks may end up on the same node, or not as far separated as desired, etc.) but at least this way you'll always end up with a 1:1 reordering. - Renamed nodes/nodeCount to numNodes - Renamed tasksPerNode to numTasksOnNode0 - Ensured that numTasksOnNode0 will always have the same value regardless of which node you're on - Removed inconsistently used globals numTasksWorld and tasksPerNode and replaced with per-test params equivalents - Added utility functions for setting these values: - numNodes -> GetNumNodes - numTasks -> GetNumTasks - numTasksOnNode0 -> GetNumNodesOnTask0 - Improved MPI_VERSION < 3 logic for GetNumNodes so it works when numTasks is not evenly divisible by numTasksOnNode0 - Left 'nodes' and 'tasksPerNode' in output alone to not break compatibility - Allowed command-line params to override numTasks, numNodes, and numTasksOnNode0 but default to using the MPI-calculated values --- NEWS | 2 +- src/aiori-NCMPI.c | 4 +- src/ior-output.c | 11 +++-- src/ior.c | 79 ++++++++++++++++++-------------- src/ior.h | 4 +- src/mdtest.c | 13 +++--- src/parse_options.c | 12 +++-- src/utilities.c | 109 ++++++++++++++++++++++++++++++++++++-------- src/utilities.h | 6 +-- 9 files changed, 164 insertions(+), 76 deletions(-) diff --git a/NEWS b/NEWS index 1f2cc74..9367112 100644 --- a/NEWS +++ b/NEWS @@ -120,7 +120,7 @@ Version 2.10.1 - Corrected IOR_GetFileSize() function to point to HDF5 and NCMPI versions of IOR_GetFileSize() calls - Changed the netcdf dataset from 1D array to 4D array, where the 4 dimensions - are: [segmentCount][numTasksWorld][numTransfers][transferSize] + are: [segmentCount][numTasks][numTransfers][transferSize] This patch from Wei-keng Liao allows for file sizes > 4GB (provided no single dimension is > 4GB). - Finalized random-capability release diff --git a/src/aiori-NCMPI.c b/src/aiori-NCMPI.c index 3607466..5fc1375 100755 --- a/src/aiori-NCMPI.c +++ b/src/aiori-NCMPI.c @@ -216,7 +216,7 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, param->blockSize / param->transferSize; /* reshape 1D array to 3D array: - [segmentCount*numTasksWorld][numTransfers][transferSize] + [segmentCount*numTasks][numTransfers][transferSize] Requirement: none of these dimensions should be > 4G, */ NCMPI_CHECK(ncmpi_def_dim @@ -267,7 +267,7 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, bufSize[1] = 1; bufSize[2] = param->transferSize; - offset[0] = segmentNum * numTasksWorld + rank; + offset[0] = segmentNum * param->numTasks + rank; offset[1] = transferNum; offset[2] = 0; diff --git a/src/ior-output.c b/src/ior-output.c index 7049a97..26600a3 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -339,10 +339,10 @@ void ShowTestStart(IOR_param_t *test) PrintKeyVal("options", test->options); PrintKeyValInt("dryRun", test->dryRun); - PrintKeyValInt("nodes", test->nodes); + PrintKeyValInt("nodes", test->numNodes); PrintKeyValInt("memoryPerTask", (unsigned long) test->memoryPerTask); PrintKeyValInt("memoryPerNode", (unsigned long) test->memoryPerNode); - PrintKeyValInt("tasksPerNode", tasksPerNode); + PrintKeyValInt("tasksPerNode", test->numTasksOnNode0); PrintKeyValInt("repetitions", test->repetitions); PrintKeyValInt("multiFile", test->multiFile); PrintKeyValInt("interTestDelay", test->interTestDelay); @@ -430,8 +430,9 @@ void ShowSetup(IOR_param_t *params) PrintKeyValInt("task offset", params->taskPerNodeOffset); PrintKeyValInt("reorder random seed", params->reorderTasksRandomSeed); } + PrintKeyValInt("nodes", params->numNodes); PrintKeyValInt("tasks", params->numTasks); - PrintKeyValInt("clients per node", params->tasksPerNode); + PrintKeyValInt("clients per node", params->numTasksOnNode0); if (params->memoryPerTask != 0){ PrintKeyVal("memoryPerTask", HumanReadable(params->memoryPerTask, BASE_TWO)); } @@ -571,7 +572,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) } fprintf(out_resultfile, "%5d ", params->id); fprintf(out_resultfile, "%6d ", params->numTasks); - fprintf(out_resultfile, "%3d ", params->tasksPerNode); + fprintf(out_resultfile, "%3d ", params->numTasksOnNode0); fprintf(out_resultfile, "%4d ", params->repetitions); fprintf(out_resultfile, "%3d ", params->filePerProc); fprintf(out_resultfile, "%5d ", params->reorderTasks); @@ -595,7 +596,7 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) PrintKeyValInt("blockSize", params->blockSize); PrintKeyValInt("transferSize", params->transferSize); PrintKeyValInt("numTasks", params->numTasks); - PrintKeyValInt("tasksPerNode", params->tasksPerNode); + PrintKeyValInt("tasksPerNode", params->numTasksOnNode0); PrintKeyValInt("repetitions", params->repetitions); PrintKeyValInt("filePerProc", params->filePerProc); PrintKeyValInt("reorderTasks", params->reorderTasks); diff --git a/src/ior.c b/src/ior.c index 4cd5571..b694c51 100755 --- a/src/ior.c +++ b/src/ior.c @@ -65,7 +65,6 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out out_resultfile = world_out; mpi_comm_world = world_com; - MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld), "cannot get number of tasks"); MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank"); /* setup tests, and validate parameters */ @@ -113,8 +112,6 @@ int ior_main(int argc, char **argv) MPI_CHECK(MPI_Init(&argc, &argv), "cannot initialize MPI"); mpi_comm_world = MPI_COMM_WORLD; - MPI_CHECK(MPI_Comm_size(mpi_comm_world, &numTasksWorld), - "cannot get number of tasks"); MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank"); /* set error-handling */ @@ -188,8 +185,14 @@ void init_IOR_Param_t(IOR_param_t * p) p->writeFile = p->readFile = FALSE; p->checkWrite = p->checkRead = FALSE; - p->nodes = 1; - p->tasksPerNode = 1; + /* + * These can be overridden from the command-line but otherwise will be + * set from MPI. + */ + p->numTasks = -1; + p->numNodes = -1; + p->numTasksOnNode0 = -1; + p->repetitions = 1; p->repCounter = -1; p->open = WRITE; @@ -919,12 +922,17 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) */ static void InitTests(IOR_test_t *tests, MPI_Comm com) { - int size; + int mpiNumNodes = 0; + int mpiNumTasks = 0; + int mpiNumTasksOnNode0 = 0; - MPI_CHECK(MPI_Comm_size(com, & size), "MPI_Comm_size() error"); - - /* count the tasks per node */ - tasksPerNode = CountTasksPerNode(com); + /* + * These default values are the same for every test and expensive to + * retrieve so just do it once. + */ + mpiNumNodes = GetNumNodes(com); + mpiNumTasks = GetNumTasks(com); + mpiNumTasksOnNode0 = GetNumTasksOnNode0(com); /* * Since there is no guarantee that anyone other than @@ -937,12 +945,28 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) while (tests != NULL) { IOR_param_t *params = & tests->params; params->testComm = com; - params->nodes = params->numTasks / tasksPerNode; - params->tasksPerNode = tasksPerNode; - params->tasksBlockMapping = QueryNodeMapping(com,false); - if (params->numTasks == 0) { - params->numTasks = size; + + /* use MPI values if not overridden on command-line */ + if (params->numNodes == -1) { + params->numNodes = mpiNumNodes; } + if (params->numTasks == -1) { + params->numTasks = mpiNumTasks; + } else if (params->numTasks > mpiNumTasks) { + if (rank == 0) { + fprintf(out_logfile, + "WARNING: More tasks requested (%d) than available (%d),", + params->numTasks, mpiNumTasks); + fprintf(out_logfile, " running with %d tasks.\n", + mpiNumTasks); + } + params->numTasks = mpiNumTasks; + } + if (params->numTasksOnNode0 == -1) { + params->numTasksOnNode0 = mpiNumTasksOnNode0; + } + + params->tasksBlockMapping = QueryNodeMapping(com,false); params->expectedAggFileSize = params->blockSize * params->segmentCount * params->numTasks; @@ -1090,7 +1114,7 @@ static void *HogMemory(IOR_param_t *params) if (verbose >= VERBOSE_3) fprintf(out_logfile, "This node hogging %ld bytes of memory\n", params->memoryPerNode); - size = params->memoryPerNode / params->tasksPerNode; + size = params->memoryPerNode / params->numTasksOnNode0; } else { return NULL; } @@ -1190,16 +1214,6 @@ static void TestIoSys(IOR_test_t *test) IOR_io_buffers ioBuffers; /* set up communicator for test */ - if (params->numTasks > numTasksWorld) { - if (rank == 0) { - fprintf(out_logfile, - "WARNING: More tasks requested (%d) than available (%d),", - params->numTasks, numTasksWorld); - fprintf(out_logfile, " running on %d tasks.\n", - numTasksWorld); - } - params->numTasks = numTasksWorld; - } MPI_CHECK(MPI_Comm_group(mpi_comm_world, &orig_group), "MPI_Comm_group() error"); range[0] = 0; /* first rank */ @@ -1226,7 +1240,6 @@ static void TestIoSys(IOR_test_t *test) "Using reorderTasks '-C' (useful to avoid read cache in client)\n"); fflush(out_logfile); } - params->tasksPerNode = CountTasksPerNode(testComm); backend = params->backend; /* show test setup */ if (rank == 0 && verbose >= VERBOSE_0) @@ -1363,7 +1376,7 @@ static void TestIoSys(IOR_test_t *test) /* move two nodes away from writing node */ int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */ if (params->tasksBlockMapping) { - shift = params->tasksPerNode; /* switch to by-slot (contiguous block) mapping */ + shift = params->numTasksOnNode0; /* switch to by-slot (contiguous block) mapping */ } rankOffset = (2 * shift) % params->numTasks; } @@ -1388,7 +1401,7 @@ static void TestIoSys(IOR_test_t *test) if(params->stoneWallingStatusFile){ params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile); if(params->stoneWallingWearOutIterations == -1 && rank == 0){ - fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!"); + fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!\n"); params->stoneWallingWearOutIterations = 0; } } @@ -1403,7 +1416,7 @@ static void TestIoSys(IOR_test_t *test) /* move one node away from writing node */ int shift = 1; /* assume a by-node (round-robin) mapping of tasks to nodes */ if (params->tasksBlockMapping) { - shift=params->tasksPerNode; /* switch to a by-slot (contiguous block) mapping */ + shift=params->numTasksOnNode0; /* switch to a by-slot (contiguous block) mapping */ } rankOffset = (params->taskPerNodeOffset * shift) % params->numTasks; } @@ -1414,7 +1427,7 @@ static void TestIoSys(IOR_test_t *test) int nodeoffset; unsigned int iseed0; nodeoffset = params->taskPerNodeOffset; - nodeoffset = (nodeoffset < params->nodes) ? nodeoffset : params->nodes - 1; + nodeoffset = (nodeoffset < params->numNodes) ? nodeoffset : params->numNodes - 1; if (params->reorderTasksRandomSeed < 0) iseed0 = -1 * params->reorderTasksRandomSeed + rep; else @@ -1424,7 +1437,7 @@ static void TestIoSys(IOR_test_t *test) rankOffset = rand() % params->numTasks; } while (rankOffset < - (nodeoffset * params->tasksPerNode)) { + (nodeoffset * params->numTasksOnNode0)) { rankOffset = rand() % params->numTasks; } /* Get more detailed stats if requested by verbose level */ @@ -1454,7 +1467,7 @@ static void TestIoSys(IOR_test_t *test) "barrier error"); if (rank == 0 && verbose >= VERBOSE_1) { fprintf(out_logfile, - "Commencing read performance test: %s", + "Commencing read performance test: %s\n", CurrentTimeString()); } timer[2] = GetTimeStamp(); diff --git a/src/ior.h b/src/ior.h index e245b08..ccf47fa 100755 --- a/src/ior.h +++ b/src/ior.h @@ -98,8 +98,8 @@ typedef struct // intermediate options int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */ int numTasks; /* number of tasks for test */ - int nodes; /* number of nodes for test */ - int tasksPerNode; /* number of tasks per node */ + int numNodes; /* number of nodes for test */ + int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */ int tasksBlockMapping; /* are the tasks in contiguous blocks across nodes or round-robin */ int repetitions; /* number of repetitions of test */ int repCounter; /* rep counter */ diff --git a/src/mdtest.c b/src/mdtest.c index 77b2759..39edf8c 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1867,7 +1867,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * mdtest_init_args(); int i, j; - int nodeCount; + int numNodes; + int numTasksOnNode0 = 0; MPI_Group worldgroup, testgroup; struct { int first; @@ -1943,8 +1944,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * pid = getpid(); uid = getuid(); - tasksPerNode = CountTasksPerNode(testComm); - nodeCount = size / tasksPerNode; + numNodes = GetNumNodes(testComm); + numTasksOnNode0 = GetNumTasksOnNode0(testComm); char cmd_buffer[4096]; strncpy(cmd_buffer, argv[0], 4096); @@ -1953,7 +1954,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp()); - VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, size, nodeCount); + VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, size, numNodes); VERBOSE(0,-1,"Command line used: %s", cmd_buffer); /* adjust special variables */ @@ -2120,10 +2121,10 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* set the shift to mimic IOR and shift by procs per node */ if (nstride > 0) { - if ( nodeCount > 1 && tasksBlockMapping ) { + if ( numNodes > 1 && tasksBlockMapping ) { /* the user set the stride presumably to get the consumer tasks on a different node than the producer tasks however, if the mpirun scheduler placed the tasks by-slot (in a contiguous block) then we need to adjust the shift by ppn */ - nstride *= tasksPerNode; + nstride *= numTasksOnNode0; } VERBOSE(0,5,"Shifting ranks by %d for each phase.", nstride); } diff --git a/src/parse_options.c b/src/parse_options.c index af30c36..ab9509d 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -151,8 +151,12 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt params->maxTimeDuration = atoi(value); } else if (strcasecmp(option, "outlierthreshold") == 0) { params->outlierThreshold = atoi(value); - } else if (strcasecmp(option, "nodes") == 0) { - params->nodes = atoi(value); + } else if (strcasecmp(option, "numnodes") == 0) { + params->numNodes = atoi(value); + } else if (strcasecmp(option, "numtasks") == 0) { + params->numTasks = atoi(value); + } else if (strcasecmp(option, "numtasksonnode0") == 0) { + params->numTasksOnNode0 = atoi(value); } else if (strcasecmp(option, "repetitions") == 0) { params->repetitions = atoi(value); } else if (strcasecmp(option, "intertestdelay") == 0) { @@ -286,8 +290,6 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt params->beegfs_chunkSize = string_to_bytes(value); if (!ISPOWEROFTWO(params->beegfs_chunkSize) || params->beegfs_chunkSize < (1<<16)) ERR("beegfsChunkSize must be a power of two and >64k"); - } else if (strcasecmp(option, "numtasks") == 0) { - params->numTasks = atoi(value); } else if (strcasecmp(option, "summaryalways") == 0) { params->summary_every_test = atoi(value); } else { @@ -498,7 +500,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'m', NULL, "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & params->multiFile}, {'M', NULL, "memoryPerNode -- hog memory on the node (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & params->memoryPerNodeStr}, {'n', NULL, "noFill -- no fill in HDF5 file creation", OPTION_FLAG, 'd', & params->noFill}, - {'N', NULL, "numTasks -- number of tasks that should participate in the test", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks}, + {'N', NULL, "numTasks -- number of tasks that are participating in the test (overrides MPI)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks}, {'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & params->testFileName}, {'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper}, {'p', NULL, "preallocate -- preallocate file size", OPTION_FLAG, 'd', & params->preallocate}, diff --git a/src/utilities.c b/src/utilities.c index cdb090e..f7c073d 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -53,11 +53,9 @@ extern int errno; extern int numTasks; -/* globals used by other files, also defined "extern" in ior.h */ -int numTasksWorld = 0; +/* globals used by other files, also defined "extern" in utilities.h */ int rank = 0; int rankOffset = 0; -int tasksPerNode = 0; /* tasks per node */ int verbose = VERBOSE_0; /* verbose output */ MPI_Comm testComm; MPI_Comm mpi_comm_world; @@ -265,35 +263,108 @@ int QueryNodeMapping(MPI_Comm comm, int print_nodemap) { return ret; } +/* + * There is a more direct way to determine the node count in modern MPI + * versions so we use that if possible. + * + * For older versions we use a method which should still provide accurate + * results even if the total number of tasks is not evenly divisible by the + * tasks on node rank 0. + */ +int GetNumNodes(MPI_Comm comm) { #if MPI_VERSION >= 3 -int CountTasksPerNode(MPI_Comm comm) { - /* modern MPI provides a simple way to get the local process count */ - MPI_Comm shared_comm; - int count; + MPI_Comm shared_comm; + int shared_rank = 0; + int local_result = 0; + int numNodes = 0; + MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm), + "MPI_Comm_split_type() error"); + MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank), "MPI_Comm_rank() error"); + local_result = shared_rank == 0? 1 : 0; + MPI_CHECK(MPI_Allreduce(&local_result, &numNodes, 1, MPI_INT, MPI_SUM, comm), + "MPI_Allreduce() error"); + MPI_CHECK(MPI_Comm_free(&shared_comm), "MPI_Comm_free() error"); - MPI_Comm_split_type (comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm); - MPI_Comm_size (shared_comm, &count); - MPI_Comm_free (&shared_comm); + return numNodes; +#else + int numTasks = 0; + int numTasksOnNode0 = 0; - return count; + numTasks = GetNumTasks(comm); + numTasksOnNode0 = GetNumTasksOnNode0(comm); + + return ((numTasks - 1) / numTasksOnNode0) + 1; +#endif } + + +int GetNumTasks(MPI_Comm comm) { + int numTasks = 0; + + MPI_CHECK(MPI_Comm_size(comm, &numTasks), "cannot get number of tasks"); + + return numTasks; +} + + +/* + * It's very important that this method provide the same result to every + * process as it's used for redistributing which jobs read from which files. + * It was renamed accordingly. + * + * If different nodes get different results from this method then jobs get + * redistributed unevenly and you no longer have a 1:1 relationship with some + * nodes reading multiple files while others read none. + * + * In the common case the number of tasks on each node (MPI_Comm_size on an + * MPI_COMM_TYPE_SHARED communicator) will be the same. However, there is + * nothing which guarantees this. It's valid to have, for example, 64 jobs + * across 4 systems which can run 20 jobs each. In that scenario you end up + * with 3 MPI_COMM_TYPE_SHARED groups of 20, and one group of 4. + * + * In the (MPI_VERSION < 3) implementation of this method consistency is + * ensured by asking specifically about the number of tasks on the node with + * rank 0. In the original implementation for (MPI_VERSION >= 3) this was + * broken by using the LOCAL process count which differed depending on which + * node you were on. + * + * This was corrected below by first splitting the comm into groups by node + * (MPI_COMM_TYPE_SHARED) and then having only the node with world rank 0 and + * shared rank 0 return the MPI_Comm_size of its shared subgroup. This yields + * the original consistent behavior no matter which node asks. + * + * In the common case where every node has the same number of tasks this + * method will return the same value it always has. + */ +int GetNumTasksOnNode0(MPI_Comm comm) { +#if MPI_VERSION >= 3 + MPI_Comm shared_comm; + int shared_rank = 0; + int tasks_on_node_rank0 = 0; + int local_result = 0; + + MPI_CHECK(MPI_Comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &shared_comm), + "MPI_Comm_split_type() error"); + MPI_CHECK(MPI_Comm_rank(shared_comm, &shared_rank), "MPI_Comm_rank() error"); + if (rank == 0 && shared_rank == 0) { + MPI_CHECK(MPI_Comm_size(shared_comm, &local_result), "MPI_Comm_size() error"); + } + MPI_CHECK(MPI_Allreduce(&local_result, &tasks_on_node_rank0, 1, MPI_INT, MPI_SUM, comm), + "MPI_Allreduce() error"); + MPI_CHECK(MPI_Comm_free(&shared_comm), "MPI_Comm_free() error"); + + return tasks_on_node_rank0; #else /* - * Count the number of tasks that share a host. - * - * This function employees the gethostname() call, rather than using + * This version employs the gethostname() call, rather than using * MPI_Get_processor_name(). We are interested in knowing the number * of tasks that share a file system client (I/O node, compute node, * whatever that may be). However on machines like BlueGene/Q, * MPI_Get_processor_name() uniquely identifies a cpu in a compute node, * not the node where the I/O is function shipped to. gethostname() * is assumed to identify the shared filesystem client in more situations. - * - * NOTE: This also assumes that the task count on all nodes is equal - * to the task count on the host running MPI task 0. */ -int CountTasksPerNode(MPI_Comm comm) { int size; MPI_Comm_size(comm, & size); /* for debugging and testing */ @@ -336,8 +407,8 @@ int CountTasksPerNode(MPI_Comm comm) { MPI_Bcast(&count, 1, MPI_INT, 0, comm); return(count); -} #endif +} /* diff --git a/src/utilities.h b/src/utilities.h index d2c9962..2a9abe3 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -18,10 +18,8 @@ #include #include "ior.h" -extern int numTasksWorld; extern int rank; extern int rankOffset; -extern int tasksPerNode; extern int verbose; extern MPI_Comm testComm; extern MPI_Comm mpi_comm_world; @@ -55,8 +53,10 @@ void SeedRandGen(MPI_Comm); void SetHints (MPI_Info *, char *); void ShowHints (MPI_Info *); char *HumanReadable(IOR_offset_t value, int base); -int CountTasksPerNode(MPI_Comm comm); int QueryNodeMapping(MPI_Comm comm, int print_nodemap); +int GetNumNodes(MPI_Comm); +int GetNumTasks(MPI_Comm); +int GetNumTasksOnNode0(MPI_Comm); void DelaySecs(int delay); void updateParsedOptions(IOR_param_t * options, options_all_t * global_options); size_t NodeMemoryStringToBytes(char *size_str); From 10d3db1dc829dd78ea8ac2da97f233e9dc01ec06 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sat, 31 Aug 2019 17:28:08 +0100 Subject: [PATCH 51/66] MDTest: fixing the memset() to account for the number of iterations. --- src/mdtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdtest.c b/src/mdtest.c index 77b2759..96555d2 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -2148,7 +2148,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* setup summary table for recording results */ summary_table = (mdtest_results_t *) malloc(iterations * sizeof(mdtest_results_t)); - memset(summary_table, 0, sizeof(mdtest_results_t)); + memset(summary_table, 0, iterations * sizeof(mdtest_results_t)); for(int i=0; i < iterations; i++){ for(int j=0; j < MDTEST_LAST_NUM; j++){ summary_table[i].rate[j] = 0.0; From 60a641f911bb1b73685a10102b894f3a58341979 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sun, 1 Sep 2019 15:29:12 +0100 Subject: [PATCH 52/66] sync help corrected. --- src/parse_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parse_options.c b/src/parse_options.c index af30c36..47f9920 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -477,7 +477,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT}, - {'e', NULL, "fsync -- perform sync operation after each block write", OPTION_FLAG, 'd', & params->fsync}, + {'e', NULL, "fsync -- perform a fsync() operation at the end of each read/write phase", OPTION_FLAG, 'd', & params->fsync}, {'E', NULL, "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & params->useExistingTestFile}, {'f', NULL, "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & params->testscripts}, {'F', NULL, "filePerProc -- file-per-process", OPTION_FLAG, 'd', & params->filePerProc}, From e3db1759b2736aaef826dae756283bfd7e58a1ee Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sun, 1 Sep 2019 15:47:42 +0100 Subject: [PATCH 53/66] Moded sync() to aiori backend. --- src/aiori-POSIX.c | 12 ++++++++++++ src/aiori.h | 1 + src/mdtest.c | 5 ++++- src/utilities.c | 7 ------- src/utilities.h | 1 - 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 00e974c..99a68f1 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -71,6 +71,7 @@ static IOR_offset_t POSIX_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); static void POSIX_Fsync(void *, IOR_param_t *); +static void POSIX_Sync(IOR_param_t * ); /************************** O P T I O N S *****************************/ typedef struct{ @@ -122,6 +123,7 @@ ior_aiori_t posix_aiori = { .stat = aiori_posix_stat, .get_options = POSIX_options, .enable_mdtest = true, + .sync = POSIX_Sync }; /***************************** F U N C T I O N S ******************************/ @@ -588,6 +590,16 @@ static void POSIX_Fsync(void *fd, IOR_param_t * param) EWARNF("fsync(%d) failed", *(int *)fd); } + +static void POSIX_Sync(IOR_param_t * param) +{ + int ret = system("sync"); + if (ret != 0){ + FAIL("Error executing the sync command, ensure it exists."); + } +} + + /* * Close a file through the POSIX interface. */ diff --git a/src/aiori.h b/src/aiori.h index c2074c2..8a5e207 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -86,6 +86,7 @@ typedef struct ior_aiori { void (*finalize)(); /* called once per program after MPI is shutdown */ option_help * (*get_options)(void ** init_backend_options, void* init_values); /* initializes the backend options as well and returns the pointer to the option help structure */ bool enable_mdtest; + void (*sync)(IOR_param_t * ); /* synchronize every pending operation for this storage */ } ior_aiori_t; enum bench_type { diff --git a/src/mdtest.c b/src/mdtest.c index e34496a..52efa34 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -266,7 +266,10 @@ static void prep_testdir(int j, int dir_iter){ static void phase_end(){ if (call_sync){ - call_sync_cmd(); + if(! backend->sync){ + FAIL("Error, backend does not provide the sync method, but your requested to use sync."); + } + backend->sync(& param); } if (barriers) { diff --git a/src/utilities.c b/src/utilities.c index a2e4b0d..c7e1c8c 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -809,10 +809,3 @@ char *HumanReadable(IOR_offset_t value, int base) } return valueStr; } - -void call_sync_cmd(){ - int ret = system("sync"); - if (ret != 0){ - FAIL("Error executing the sync command, ensure it exists."); - } -} diff --git a/src/utilities.h b/src/utilities.h index b85f957..d2c9962 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -60,7 +60,6 @@ int QueryNodeMapping(MPI_Comm comm, int print_nodemap); void DelaySecs(int delay); void updateParsedOptions(IOR_param_t * options, options_all_t * global_options); size_t NodeMemoryStringToBytes(char *size_str); -void call_sync_cmd(); /* Returns -1, if cannot be read */ int64_t ReadStoneWallingIterations(char * const filename); From c83edfe39b49481db7472dce85f1dbe972e6bfc6 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sun, 1 Sep 2019 15:59:52 +0100 Subject: [PATCH 54/66] Extracted check function into aiori. #24. #177 --- src/aiori-DUMMY.c | 5 +++++ src/aiori-S3.c | 21 ++++++++++++++++++++- src/aiori.h | 1 + src/ior.c | 15 ++++++--------- 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/aiori-DUMMY.c b/src/aiori-DUMMY.c index 0494cb4..90fec9a 100755 --- a/src/aiori-DUMMY.c +++ b/src/aiori-DUMMY.c @@ -143,6 +143,10 @@ static int DUMMY_stat (const char *path, struct stat *buf, IOR_param_t * param){ return 0; } +static int DUMMY_check_params(IOR_param_t * test){ + return 1; +} + ior_aiori_t dummy_aiori = { .name = "DUMMY", .name_legacy = NULL, @@ -163,4 +167,5 @@ ior_aiori_t dummy_aiori = { .finalize = NULL, .get_options = DUMMY_options, .enable_mdtest = true, + .check_params = DUMMY_check_params }; diff --git a/src/aiori-S3.c b/src/aiori-S3.c index 3fc1208..2c9a9af 100755 --- a/src/aiori-S3.c +++ b/src/aiori-S3.c @@ -159,6 +159,8 @@ static void S3_Fsync(void*, IOR_param_t*); static IOR_offset_t S3_GetFileSize(IOR_param_t*, MPI_Comm, char*); static void S3_init(); static void S3_finalize(); +static int S3_check_params(IOR_param_t *); + /************************** D E C L A R A T I O N S ***************************/ @@ -177,7 +179,8 @@ ior_aiori_t s3_aiori = { .fsync = S3_Fsync, .get_file_size = S3_GetFileSize, .initialize = S3_init, - .finalize = S3_finalize + .finalize = S3_finalize, + .check_params = S3_check_params }; // "S3", plus EMC-extensions enabled @@ -228,6 +231,22 @@ static void S3_finalize(){ aws_cleanup(); } +static int S3_check_params(IOR_param_t * test){ + /* N:1 and N:N */ + IOR_offset_t NtoN = test->filePerProc; + IOR_offset_t Nto1 = ! NtoN; + IOR_offset_t s = test->segmentCount; + IOR_offset_t t = test->transferSize; + IOR_offset_t b = test->blockSize; + + if (Nto1 && (s != 1) && (b != t)) { + ERR("N:1 (strided) requires xfer-size == block-size"); + return 0; + } + + return 1; +} + /* modelled on similar macros in iordef.h */ #define CURL_ERR(MSG, CURL_ERRNO, PARAM) \ do { \ diff --git a/src/aiori.h b/src/aiori.h index 8a5e207..06f9b31 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -86,6 +86,7 @@ typedef struct ior_aiori { void (*finalize)(); /* called once per program after MPI is shutdown */ option_help * (*get_options)(void ** init_backend_options, void* init_values); /* initializes the backend options as well and returns the pointer to the option help structure */ bool enable_mdtest; + int (*check_params)(IOR_param_t *); /* check if the provided parameters for the given test and the module options are correct, if they aren't print a message and exit(1) or return 1*/ void (*sync)(IOR_param_t * ); /* synchronize every pending operation for this storage */ } ior_aiori_t; diff --git a/src/ior.c b/src/ior.c index 1e66387..123a4b5 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1677,15 +1677,12 @@ static void ValidateTests(IOR_param_t * test) if (test->useExistingTestFile && test->lustre_set_striping) ERR("Lustre stripe options are incompatible with useExistingTestFile"); - /* N:1 and N:N */ - IOR_offset_t NtoN = test->filePerProc; - IOR_offset_t Nto1 = ! NtoN; - IOR_offset_t s = test->segmentCount; - IOR_offset_t t = test->transferSize; - IOR_offset_t b = test->blockSize; - - if (Nto1 && (s != 1) && (b != t)) { - ERR("N:1 (strided) requires xfer-size == block-size"); + /* allow the backend to validate the options */ + if(test->backend->check_params){ + int check = test->backend->check_params(test); + if (check == 0){ + ERR("The backend returned that the test parameters are invalid."); + } } } From 57a16ddda855193e0c117aa4c00b84d3888959ad Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 9 Sep 2019 19:14:35 +0000 Subject: [PATCH 55/66] - update debugging error checking in DFS. - remove usleep() before daos_fini() - fix README Signed-off-by: Mohamad Chaarawi --- README_DAOS | 17 ++++--- src/aiori-DAOS.c | 6 +-- src/aiori-DFS.c | 130 ++++++++++++++++++++++------------------------- 3 files changed, 72 insertions(+), 81 deletions(-) diff --git a/README_DAOS b/README_DAOS index b4e6dba..ed98bd6 100644 --- a/README_DAOS +++ b/README_DAOS @@ -59,12 +59,15 @@ ior -a DFS [ior_options] [dfs_options] mdtest -a DFS [mdtest_options] [dfs_options] Required Options: ---daos.pool : pool uuid to connect to (has to be created beforehand) ---daos.svcl : pool svcl list (: separated) ---daos.cont : container uuid that will hold the encapsulated namespace +--dfs.pool : pool uuid to connect to (has to be created beforehand) +--dfs.svcl : pool svcl list (: separated) +--dfs.cont : container uuid that will hold the encapsulated namespace Optional Options: ---daos.group : group name of servers with the pool +--dfs.group : group name of servers with the pool +--dfs.chunk_size : Chunk size of the files +--dfs.destroy flag to destory the container on finalize +--dfs.oclass : specific object class for files In the IOR options, the file name should be specified on the root dir directly since ior does not create directories and the DFS container representing the @@ -72,9 +75,9 @@ encapsulated namespace is not the same as the system namespace the user is executing from. Examples that should work include: - - "ior -a DFS -w -W -o /test1 --daos.pool --daos.svcl --daos.cont " - - "ior -a DFS -w -W -r -R -o /test2 -b 1g -t 4m -C --daos.pool --daos.svcl --daos.cont " - - "ior -a DFS -w -r -o /test3 -b 8g -t 1m -C --daos.pool --daos.svcl --daos.cont " + - "ior -a DFS -w -W -o /test1 --dfs.pool --dfs.svcl --dfs.cont " + - "ior -a DFS -w -W -r -R -o /test2 -b 1g -t 4m -C --dfs.pool --dfs.svcl --dfs.cont " + - "ior -a DFS -w -r -o /test3 -b 8g -t 1m -C --dfs.pool --dfs.svcl --dfs.cont " Running mdtest, the user needs to specify a directory with -d where the test tree will be created. Some examples: diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 18dd689..a929758 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -131,7 +131,7 @@ do { \ int _rc = (rc); \ \ if (_rc < 0) { \ - fprintf(stdout, "ior ERROR (%s:%d): %d: %d: " \ + fprintf(stderr, "ior ERROR (%s:%d): %d: %d: " \ format"\n", __FILE__, __LINE__, rank, _rc, \ ##__VA_ARGS__); \ fflush(stdout); \ @@ -148,7 +148,7 @@ do { \ /* For generic errors like invalid command line options. */ #define GERR(format, ...) \ do { \ - fprintf(stdout, format"\n", ##__VA_ARGS__); \ + fprintf(stderr, format"\n", ##__VA_ARGS__); \ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ } while (0) @@ -331,8 +331,6 @@ DAOS_Fini() DCHECK(rc, "Failed to disconnect from pool %s", o.pool); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); - usleep(20000 * rank); - if (rank == 0) INFO(VERBOSE_1, "Finalizing DAOS.."); diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index df956e8..a49434e 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -149,17 +149,16 @@ do { \ } \ } while (0) -#define DERR(rc, format, ...) \ +#define INFO(level, format, ...) \ do { \ - int _rc = (rc); \ - \ - if (_rc != 0) { \ - fprintf(stderr, "ERROR (%s:%d): %d: %d: " \ - format"\n", __FILE__, __LINE__, rank, _rc, \ - ##__VA_ARGS__); \ - fflush(stderr); \ - goto out; \ - } \ + if (verbose >= level) \ + printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ +} while (0) + +#define GERR(format, ...) \ +do { \ + fprintf(stderr, format"\n", ##__VA_ARGS__); \ + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ } while (0) static inline struct aiori_dir_hdl * @@ -351,21 +350,19 @@ lookup_insert_dir(const char *name) hdl = calloc(1, sizeof(struct aiori_dir_hdl)); if (hdl == NULL) - DERR(ENOMEM, "failed to alloc dir handle"); + GERR("failed to alloc dir handle"); strncpy(hdl->name, name, PATH_MAX-1); hdl->name[PATH_MAX-1] = '\0'; rc = dfs_lookup(dfs, name, O_RDWR, &hdl->oh, NULL, NULL); - DERR(rc, "dfs_lookup() of %s Failed", name); + DCHECK(rc, "dfs_lookup() of %s Failed", name); rc = d_hash_rec_insert(dir_hash, hdl->name, strlen(hdl->name), &hdl->entry, true); - DERR(rc, "Failed to insert dir handle in hashtable"); + DCHECK(rc, "Failed to insert dir handle in hashtable"); return hdl->oh; -out: - return NULL; } static option_help * DFS_options(){ @@ -382,7 +379,7 @@ DFS_Init() { if (o.oclass) { objectClass = daos_oclass_name2id(o.oclass); if (objectClass == OC_UNKNOWN) - DCHECK(-1, "Invalid DAOS Object class %s\n", o.oclass); + GERR("Invalid DAOS Object class %s\n", o.oclass); } rc = daos_init(); @@ -407,10 +404,8 @@ DFS_Init() { if (svcl == NULL) ERR("Failed to allocate svcl"); - if (verbose >= VERBOSE_1) { - printf("Pool uuid = %s, SVCL = %s\n", o.pool, o.svcl); - printf("DFS Container namespace uuid = %s\n", o.cont); - } + INFO(VERBOSE_1, "Pool uuid = %s, SVCL = %s\n", o.pool, o.svcl); + INFO(VERBOSE_1, "DFS Container namespace uuid = %s\n", o.cont); /** Connect to DAOS pool */ rc = daos_pool_connect(pool_uuid, o.group, svcl, DAOS_PC_RW, @@ -422,8 +417,7 @@ DFS_Init() { NULL); /* If NOEXIST we create it */ if (rc == -DER_NONEXIST) { - if (verbose >= VERBOSE_1) - printf("Creating DFS Container ...\n"); + INFO(VERBOSE_1, "Creating DFS Container ...\n"); rc = daos_cont_create(poh, co_uuid, NULL, NULL); if (rc == 0) { @@ -457,35 +451,38 @@ DFS_Finalize() DCHECK(rc, "Failed to close container %s (%d)", o.cont, rc); MPI_Barrier(MPI_COMM_WORLD); - if (rank == 0 && o.destroy) { - uuid_t uuid; - double t1, t2; + if (o.destroy) { + if (rank == 0) { + uuid_t uuid; + double t1, t2; - if (verbose >= VERBOSE_1) - printf("Destorying DFS Container: %s\n", o.cont); - uuid_parse(o.cont, uuid); - t1 = MPI_Wtime(); - rc = daos_cont_destroy(poh, uuid, 1, NULL); - t2 = MPI_Wtime(); - if (rc == 0 && verbose >= VERBOSE_1) - printf("Container Destroy time = %f secs", t2-t1); + INFO(VERBOSE_1, "Destorying DFS Container: %s\n", o.cont); + uuid_parse(o.cont, uuid); + t1 = MPI_Wtime(); + rc = daos_cont_destroy(poh, uuid, 1, NULL); + t2 = MPI_Wtime(); + if (rc == 0) + INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); + } + + MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); + if (rc) { + if (rank == 0) + DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); + MPI_Abort(MPI_COMM_WORLD, -1); + } } - MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (rc) - DCHECK(rc, "Failed to destroy container %s (%d)", o.cont, rc); - - if (rank == 0 && verbose >= VERBOSE_1) - printf("Disconnecting from DAOS POOL\n"); + if (rank == 0) + INFO(VERBOSE_1, "Disconnecting from DAOS POOL\n"); rc = daos_pool_disconnect(poh, NULL); DCHECK(rc, "Failed to disconnect from pool"); MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); - usleep(20000 * rank); - if (rank == 0 && verbose >= VERBOSE_1) - printf("Finalizing DAOS..\n"); + if (rank == 0) + INFO(VERBOSE_1, "Finalizing DAOS..\n"); rc = daos_fini(); DCHECK(rc, "Failed to finalize DAOS"); @@ -506,13 +503,13 @@ DFS_Create(char *testFileName, IOR_param_t *param) assert(param); rc = parse_filename(testFileName, &name, &dir_name); - DERR(rc, "Failed to parse path %s", testFileName); + DCHECK(rc, "Failed to parse path %s", testFileName); assert(dir_name); assert(name); parent = lookup_insert_dir(dir_name); if (parent == NULL) - DERR(rc, "Failed to lookup parent dir"); + GERR("Failed to lookup parent dir"); mode = S_IFREG | param->mode; if (param->filePerProc || rank == 0) { @@ -520,7 +517,7 @@ DFS_Create(char *testFileName, IOR_param_t *param) rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, o.chunk_size, NULL, &obj); - DERR(rc, "dfs_open() of %s Failed", name); + DCHECK(rc, "dfs_open() of %s Failed", name); } if (!param->filePerProc) { MPI_Barrier(MPI_COMM_WORLD); @@ -528,11 +525,10 @@ DFS_Create(char *testFileName, IOR_param_t *param) fd_oflag |= O_RDWR; rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, o.chunk_size, NULL, &obj); - DERR(rc, "dfs_open() of %s Failed", name); + DCHECK(rc, "dfs_open() of %s Failed", name); } } -out: if (name) free(name); if (dir_name) @@ -557,20 +553,19 @@ DFS_Open(char *testFileName, IOR_param_t *param) mode = S_IFREG | param->mode; rc = parse_filename(testFileName, &name, &dir_name); - DERR(rc, "Failed to parse path %s", testFileName); + DCHECK(rc, "Failed to parse path %s", testFileName); assert(dir_name); assert(name); parent = lookup_insert_dir(dir_name); if (parent == NULL) - DERR(rc, "Failed to lookup parent dir"); + GERR("Failed to lookup parent dir"); rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, o.chunk_size, NULL, &obj); - DERR(rc, "dfs_open() of %s Failed", name); + DCHECK(rc, "dfs_open() of %s Failed", name); -out: if (name) free(name); if (dir_name) @@ -666,19 +661,18 @@ DFS_Delete(char *testFileName, IOR_param_t * param) int rc; rc = parse_filename(testFileName, &name, &dir_name); - DERR(rc, "Failed to parse path %s", testFileName); + DCHECK(rc, "Failed to parse path %s", testFileName); assert(dir_name); assert(name); parent = lookup_insert_dir(dir_name); if (parent == NULL) - DERR(rc, "Failed to lookup parent dir"); + GERR("Failed to lookup parent dir"); rc = dfs_remove(dfs, parent, name, false, NULL); - DERR(rc, "dfs_remove() of %s Failed", name); + DCHECK(rc, "dfs_remove() of %s Failed", name); -out: if (name) free(name); if (dir_name) @@ -753,7 +747,7 @@ DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) int rc; rc = parse_filename(path, &name, &dir_name); - DERR(rc, "Failed to parse path %s", path); + DCHECK(rc, "Failed to parse path %s", path); assert(dir_name); if (!name) @@ -761,12 +755,11 @@ DFS_Mkdir(const char *path, mode_t mode, IOR_param_t * param) parent = lookup_insert_dir(dir_name); if (parent == NULL) - DERR(rc, "Failed to lookup parent dir"); + GERR("Failed to lookup parent dir"); rc = dfs_mkdir(dfs, parent, name, mode); - DERR(rc, "dfs_mkdir() of %s Failed", name); + DCHECK(rc, "dfs_mkdir() of %s Failed", name); -out: if (name) free(name); if (dir_name) @@ -784,19 +777,18 @@ DFS_Rmdir(const char *path, IOR_param_t * param) int rc; rc = parse_filename(path, &name, &dir_name); - DERR(rc, "Failed to parse path %s", path); + DCHECK(rc, "Failed to parse path %s", path); assert(dir_name); assert(name); parent = lookup_insert_dir(dir_name); if (parent == NULL) - DERR(rc, "Failed to lookup parent dir"); + GERR("Failed to lookup parent dir"); rc = dfs_remove(dfs, parent, name, false, NULL); - DERR(rc, "dfs_remove() of %s Failed", name); + DCHECK(rc, "dfs_remove() of %s Failed", name); -out: if (name) free(name); if (dir_name) @@ -815,13 +807,13 @@ DFS_Access(const char *path, int mode, IOR_param_t * param) int rc; rc = parse_filename(path, &name, &dir_name); - DERR(rc, "Failed to parse path %s", path); + DCHECK(rc, "Failed to parse path %s", path); assert(dir_name); parent = lookup_insert_dir(dir_name); if (parent == NULL) - DERR(rc, "Failed to lookup parent dir"); + GERR("Failed to lookup parent dir"); if (name && strcmp(name, ".") == 0) { free(name); @@ -829,7 +821,6 @@ DFS_Access(const char *path, int mode, IOR_param_t * param) } rc = dfs_stat(dfs, parent, name, &stbuf); -out: if (name) free(name); if (dir_name) @@ -847,19 +838,18 @@ DFS_Stat(const char *path, struct stat *buf, IOR_param_t * param) int rc; rc = parse_filename(path, &name, &dir_name); - DERR(rc, "Failed to parse path %s", path); + DCHECK(rc, "Failed to parse path %s", path); assert(dir_name); assert(name); parent = lookup_insert_dir(dir_name); if (parent == NULL) - DERR(rc, "Failed to lookup parent dir"); + GERR("Failed to lookup parent dir"); rc = dfs_stat(dfs, parent, name, buf); - DERR(rc, "dfs_stat() of %s Failed", name); + DCHECK(rc, "dfs_stat() of Failed (%d)", rc); -out: if (name) free(name); if (dir_name) From d332d586bfe2460e819bf169dfeef35a5128c174 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 9 Sep 2019 20:07:55 +0000 Subject: [PATCH 56/66] remove uneeded code and doc Signed-off-by: Mohamad Chaarawi --- doc/USER_GUIDE | 37 ---- src/aiori-DAOS.c | 1 - src/list.h | 556 ----------------------------------------------- 3 files changed, 594 deletions(-) delete mode 100644 src/list.h diff --git a/doc/USER_GUIDE b/doc/USER_GUIDE index 7e0af0f..b8888b9 100755 --- a/doc/USER_GUIDE +++ b/doc/USER_GUIDE @@ -369,43 +369,6 @@ BeeGFS-SPECIFIC (POSIX only): * beegfsChunkSize - set the striping chunk size. Must be a power of two, and greater than 64kiB, (e.g.: 256k, 1M, ...) -DAOS-ONLY: -========== - * daosGroup - group name [NULL] - - * daosPool - UUID of the pool [] - - * daosPoolSvc - pool service replica ranks (e.g., 1:2:3:4:5) [] - - * daosRecordSize - size (in bytes) of an akey record [256k] - NOTE: must divide transferSize - - * daosStripeSize - size (in bytes) of a chunk in a stripe [512k] - NOTE: must be a multiple of transferSize - - * daosStripeCount - number of stripes [64 * number of targets] - NOTE: i.e., number of dkeys - - * daosStripeMax - max length of each stripe [0] - NOTE: must be a multiple of daosStripeSize - NOTE: for write testing with small storage - NOTE: offsets in a stripe larger than daosStripeMax - are mapped to offset % daosStripeMax - - * daosAios - max number of asychonous I/Os [1] - - * daosWriteOnly - skip flushing and committing [0=FALSE] - - * daosEpoch - epoch to read or write [0] - NOTE: 0 denotes reading GHCE or writing GHCE + 1 - - * daosWait - epoch to wait when opening the container [0] - - * daosKill - kill a target in the middle of the test [0] - NOTE: must also specify daosObjectClass=repl - - * daosObjectClass - object class (tiny, small, large, repl, repl_max) - [large] *********************** * 5. VERBOSITY LEVELS * diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index a91fe08..21df9aa 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -37,7 +37,6 @@ #include "ior.h" #include "aiori.h" #include "iordef.h" -#include "list.h" /************************** O P T I O N S *****************************/ struct daos_options{ diff --git a/src/list.h b/src/list.h deleted file mode 100644 index dbe052c..0000000 --- a/src/list.h +++ /dev/null @@ -1,556 +0,0 @@ -/** - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * GPL HEADER END - */ -#ifndef __DAOS_LIST_H__ -#define __DAOS_LIST_H__ - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -#define prefetch(a) ((void)a) - -struct cfs_list_head { - struct cfs_list_head *next, *prev; -}; - -typedef struct cfs_list_head cfs_list_t; - -#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) } - -#define CFS_LIST_HEAD(name) \ - cfs_list_t name = CFS_LIST_HEAD_INIT(name) - -#define CFS_INIT_LIST_HEAD(ptr) do { \ - (ptr)->next = (ptr); (ptr)->prev = (ptr); \ -} while (0) - -/** - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __cfs_list_add(cfs_list_t * new, - cfs_list_t * prev, - cfs_list_t * next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * Insert an entry at the start of a list. - * \param new new entry to be inserted - * \param head list to add it to - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void cfs_list_add(cfs_list_t *new, - cfs_list_t *head) -{ - __cfs_list_add(new, head, head->next); -} - -/** - * Insert an entry at the end of a list. - * \param new new entry to be inserted - * \param head list to add it to - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void cfs_list_add_tail(cfs_list_t *new, - cfs_list_t *head) -{ - __cfs_list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __cfs_list_del(cfs_list_t *prev, - cfs_list_t *next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * Remove an entry from the list it is currently in. - * \param entry the entry to remove - * Note: list_empty(entry) does not return true after this, the entry is in an - * undefined state. - */ -static inline void cfs_list_del(cfs_list_t *entry) -{ - __cfs_list_del(entry->prev, entry->next); -} - -/** - * Remove an entry from the list it is currently in and reinitialize it. - * \param entry the entry to remove. - */ -static inline void cfs_list_del_init(cfs_list_t *entry) -{ - __cfs_list_del(entry->prev, entry->next); - CFS_INIT_LIST_HEAD(entry); -} - -/** - * Remove an entry from the list it is currently in and insert it at the start - * of another list. - * \param list the entry to move - * \param head the list to move it to - */ -static inline void cfs_list_move(cfs_list_t *list, - cfs_list_t *head) -{ - __cfs_list_del(list->prev, list->next); - cfs_list_add(list, head); -} - -/** - * Remove an entry from the list it is currently in and insert it at the end of - * another list. - * \param list the entry to move - * \param head the list to move it to - */ -static inline void cfs_list_move_tail(cfs_list_t *list, - cfs_list_t *head) -{ - __cfs_list_del(list->prev, list->next); - cfs_list_add_tail(list, head); -} - -/** - * Test whether a list is empty - * \param head the list to test. - */ -static inline int cfs_list_empty(cfs_list_t *head) -{ - return head->next == head; -} - -/** - * Test whether a list is empty and not being modified - * \param head the list to test - * - * Tests whether a list is empty _and_ checks that no other CPU might be - * in the process of modifying either member (next or prev) - * - * NOTE: using cfs_list_empty_careful() without synchronization - * can only be safe if the only activity that can happen - * to the list entry is cfs_list_del_init(). Eg. it cannot be used - * if another CPU could re-list_add() it. - */ -static inline int cfs_list_empty_careful(const cfs_list_t *head) -{ - cfs_list_t *next = head->next; - return (next == head) && (next == head->prev); -} - -static inline void __cfs_list_splice(cfs_list_t *list, - cfs_list_t *head) -{ - cfs_list_t *first = list->next; - cfs_list_t *last = list->prev; - cfs_list_t *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * Join two lists - * \param list the new list to add. - * \param head the place to add it in the first list. - * - * The contents of \a list are added at the start of \a head. \a list is in an - * undefined state on return. - */ -static inline void cfs_list_splice(cfs_list_t *list, - cfs_list_t *head) -{ - if (!cfs_list_empty(list)) - __cfs_list_splice(list, head); -} - -/** - * Join two lists and reinitialise the emptied list. - * \param list the new list to add. - * \param head the place to add it in the first list. - * - * The contents of \a list are added at the start of \a head. \a list is empty - * on return. - */ -static inline void cfs_list_splice_init(cfs_list_t *list, - cfs_list_t *head) -{ - if (!cfs_list_empty(list)) { - __cfs_list_splice(list, head); - CFS_INIT_LIST_HEAD(list); - } -} - -/** - * Get the container of a list - * \param ptr the embedded list. - * \param type the type of the struct this is embedded in. - * \param member the member name of the list within the struct. - */ -#define cfs_list_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) - -/** - * Iterate over a list - * \param pos the iterator - * \param head the list to iterate over - * - * Behaviour is undefined if \a pos is removed from the list in the body of the - * loop. - */ -#define cfs_list_for_each(pos, head) \ - for (pos = (head)->next, prefetch(pos->next); pos != (head); \ - pos = pos->next, prefetch(pos->next)) - -/** - * Iterate over a list safely - * \param pos the iterator - * \param n temporary storage - * \param head the list to iterate over - * - * This is safe to use if \a pos could be removed from the list in the body of - * the loop. - */ -#define cfs_list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -/** - * Iterate over a list continuing after existing point - * \param pos the type * to use as a loop counter - * \param head the list head - * \param member the name of the list_struct within the struct - */ -#define cfs_list_for_each_entry_continue(pos, head, member) \ - for (pos = cfs_list_entry(pos->member.next, typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = cfs_list_entry(pos->member.next, typeof(*pos), member)) - -/** - * \defgroup hlist Hash List - * Double linked lists with a single pointer list head. - * Mostly useful for hash tables where the two pointer list head is too - * wasteful. You lose the ability to access the tail in O(1). - * @{ - */ - -typedef struct cfs_hlist_node { - struct cfs_hlist_node *next, **pprev; -} cfs_hlist_node_t; - -typedef struct cfs_hlist_head { - cfs_hlist_node_t *first; -} cfs_hlist_head_t; - -/* @} */ - -/* - * "NULL" might not be defined at this point - */ -#ifdef NULL -#define NULL_P NULL -#else -#define NULL_P ((void *)0) -#endif - -/** - * \addtogroup hlist - * @{ - */ - -#define CFS_HLIST_HEAD_INIT { NULL_P } -#define CFS_HLIST_HEAD(name) cfs_hlist_head_t name = { NULL_P } -#define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P) -#define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P) - -static inline int cfs_hlist_unhashed(const cfs_hlist_node_t *h) -{ - return !h->pprev; -} - -static inline int cfs_hlist_empty(const cfs_hlist_head_t *h) -{ - return !h->first; -} - -static inline void __cfs_hlist_del(cfs_hlist_node_t *n) -{ - cfs_hlist_node_t *next = n->next; - cfs_hlist_node_t **pprev = n->pprev; - *pprev = next; - if (next) - next->pprev = pprev; -} - -static inline void cfs_hlist_del(cfs_hlist_node_t *n) -{ - __cfs_hlist_del(n); -} - -static inline void cfs_hlist_del_init(cfs_hlist_node_t *n) -{ - if (n->pprev) { - __cfs_hlist_del(n); - CFS_INIT_HLIST_NODE(n); - } -} - -static inline void cfs_hlist_add_head(cfs_hlist_node_t *n, - cfs_hlist_head_t *h) -{ - cfs_hlist_node_t *first = h->first; - n->next = first; - if (first) - first->pprev = &n->next; - h->first = n; - n->pprev = &h->first; -} - -/* next must be != NULL */ -static inline void cfs_hlist_add_before(cfs_hlist_node_t *n, - cfs_hlist_node_t *next) -{ - n->pprev = next->pprev; - n->next = next; - next->pprev = &n->next; - *(n->pprev) = n; -} - -static inline void cfs_hlist_add_after(cfs_hlist_node_t *n, - cfs_hlist_node_t *next) -{ - next->next = n->next; - n->next = next; - next->pprev = &n->next; - - if(next->next) - next->next->pprev = &next->next; -} - -#define cfs_hlist_entry(ptr, type, member) container_of(ptr,type,member) - -#define cfs_hlist_for_each(pos, head) \ - for (pos = (head)->first; pos && (prefetch(pos->next), 1); \ - pos = pos->next) - -#define cfs_hlist_for_each_safe(pos, n, head) \ - for (pos = (head)->first; pos && (n = pos->next, 1); \ - pos = n) - -/** - * Iterate over an hlist of given type - * \param tpos the type * to use as a loop counter. - * \param pos the &struct hlist_node to use as a loop counter. - * \param head the head for your list. - * \param member the name of the hlist_node within the struct. - */ -#define cfs_hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * Iterate over an hlist continuing after existing point - * \param tpos the type * to use as a loop counter. - * \param pos the &struct hlist_node to use as a loop counter. - * \param member the name of the hlist_node within the struct. - */ -#define cfs_hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * Iterate over an hlist continuing from an existing point - * \param tpos the type * to use as a loop counter. - * \param pos the &struct hlist_node to use as a loop counter. - * \param member the name of the hlist_node within the struct. - */ -#define cfs_hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * Iterate over an hlist of given type safe against removal of list entry - * \param tpos the type * to use as a loop counter. - * \param pos the &struct hlist_node to use as a loop counter. - * \param n another &struct hlist_node to use as temporary storage - * \param head the head for your list. - * \param member the name of the hlist_node within the struct. - */ -#define cfs_hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = cfs_hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) - -/* @} */ - -#ifndef cfs_list_for_each_prev -/** - * Iterate over a list in reverse order - * \param pos the &struct list_head to use as a loop counter. - * \param head the head for your list. - */ -#define cfs_list_for_each_prev(pos, head) \ - for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ - pos = pos->prev, prefetch(pos->prev)) - -#endif /* cfs_list_for_each_prev */ - -#ifndef cfs_list_for_each_entry -/** - * Iterate over a list of given type - * \param pos the type * to use as a loop counter. - * \param head the head for your list. - * \param member the name of the list_struct within the struct. - */ -#define cfs_list_for_each_entry(pos, head, member) \ - for (pos = cfs_list_entry((head)->next, typeof(*pos), member), \ - prefetch(pos->member.next); \ - &pos->member != (head); \ - pos = cfs_list_entry(pos->member.next, typeof(*pos), member), \ - prefetch(pos->member.next)) -#endif /* cfs_list_for_each_entry */ - -#ifndef cfs_list_for_each_entry_rcu -#define cfs_list_for_each_entry_rcu(pos, head, member) \ - list_for_each_entry(pos, head, member) -#endif - -#ifndef cfs_list_for_each_entry_rcu -#define cfs_list_for_each_entry_rcu(pos, head, member) \ - list_for_each_entry(pos, head, member) -#endif - -#ifndef cfs_list_for_each_entry_reverse -/** - * Iterate backwards over a list of given type. - * \param pos the type * to use as a loop counter. - * \param head the head for your list. - * \param member the name of the list_struct within the struct. - */ -#define cfs_list_for_each_entry_reverse(pos, head, member) \ - for (pos = cfs_list_entry((head)->prev, typeof(*pos), member); \ - prefetch(pos->member.prev), &pos->member != (head); \ - pos = cfs_list_entry(pos->member.prev, typeof(*pos), member)) -#endif /* cfs_list_for_each_entry_reverse */ - -#ifndef cfs_list_for_each_entry_safe -/** - * Iterate over a list of given type safe against removal of list entry - * \param pos the type * to use as a loop counter. - * \param n another type * to use as temporary storage - * \param head the head for your list. - * \param member the name of the list_struct within the struct. - */ -#define cfs_list_for_each_entry_safe(pos, n, head, member) \ - for (pos = cfs_list_entry((head)->next, typeof(*pos), member), \ - n = cfs_list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = cfs_list_entry(n->member.next, typeof(*n), member)) - -#endif /* cfs_list_for_each_entry_safe */ - -#ifndef cfs_list_for_each_entry_safe_from -/** - * Iterate over a list continuing from an existing point - * \param pos the type * to use as a loop cursor. - * \param n another type * to use as temporary storage - * \param head the head for your list. - * \param member the name of the list_struct within the struct. - * - * Iterate over list of given type from current point, safe against - * removal of list entry. - */ -#define cfs_list_for_each_entry_safe_from(pos, n, head, member) \ - for (n = cfs_list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = cfs_list_entry(n->member.next, typeof(*n), member)) -#endif /* cfs_list_for_each_entry_safe_from */ - -#define cfs_list_for_each_entry_typed(pos, head, type, member) \ - for (pos = cfs_list_entry((head)->next, type, member), \ - prefetch(pos->member.next); \ - &pos->member != (head); \ - pos = cfs_list_entry(pos->member.next, type, member), \ - prefetch(pos->member.next)) - -#define cfs_list_for_each_entry_reverse_typed(pos, head, type, member) \ - for (pos = cfs_list_entry((head)->prev, type, member); \ - prefetch(pos->member.prev), &pos->member != (head); \ - pos = cfs_list_entry(pos->member.prev, type, member)) - -#define cfs_list_for_each_entry_safe_typed(pos, n, head, type, member) \ - for (pos = cfs_list_entry((head)->next, type, member), \ - n = cfs_list_entry(pos->member.next, type, member); \ - &pos->member != (head); \ - pos = n, n = cfs_list_entry(n->member.next, type, member)) - -#define cfs_list_for_each_entry_safe_from_typed(pos, n, head, type, member) \ - for (n = cfs_list_entry(pos->member.next, type, member); \ - &pos->member != (head); \ - pos = n, n = cfs_list_entry(n->member.next, type, member)) - -#define cfs_hlist_for_each_entry_typed(tpos, pos, head, type, member) \ - for (pos = (head)->first; \ - pos && (prefetch(pos->next), 1) && \ - (tpos = cfs_hlist_entry(pos, type, member), 1); \ - pos = pos->next) - -#define cfs_hlist_for_each_entry_safe_typed(tpos, pos, n, head, type, member) \ - for (pos = (head)->first; \ - pos && (n = pos->next, 1) && \ - (tpos = cfs_hlist_entry(pos, type, member), 1); \ - pos = n) - -#endif /* __DAOS_LIST_H__ */ From 5622aabf467a3ec7110a678426a06c2469f60d80 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 9 Sep 2019 20:14:12 +0000 Subject: [PATCH 57/66] missed Makefile.am update Signed-off-by: Mohamad Chaarawi --- src/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile.am b/src/Makefile.am index 966508c..0de3b4b 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -71,7 +71,7 @@ extraLDADD += -lrados endif if USE_DAOS_AIORI -extraSOURCES += aiori-DAOS.c aiori-DFS.c list.h +extraSOURCES += aiori-DAOS.c aiori-DFS.c endif if USE_GFARM_AIORI From 73dbda09c6e3376d309918da079af7104975bb5f Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 9 Sep 2019 20:19:42 +0000 Subject: [PATCH 58/66] remove printf added Signed-off-by: Mohamad Chaarawi --- src/option.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/option.c b/src/option.c index 096e5c0..41ed09d 100644 --- a/src/option.c +++ b/src/option.c @@ -391,7 +391,6 @@ int option_parse(int argc, char ** argv, options_all_t * opt_all){ } if( requiredArgsSeen != requiredArgsNeeded ){ - printf("Seen = %d, needed = %d\n", requiredArgsSeen, requiredArgsNeeded); printf("Error: Missing some required arguments\n\n"); printhelp = 1; } From fe9d76ddf3a2662f53043b79e632bb175a392297 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 9 Sep 2019 22:03:55 +0000 Subject: [PATCH 59/66] fix segfault when no API is specified to mdtest Signed-off-by: Mohamad Chaarawi --- src/mdtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdtest.c b/src/mdtest.c index 3a48bc9..17f6e5c 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1548,7 +1548,7 @@ void display_freespace(char *testdirpath) strcpy(dirpath, "."); } - if (strcasecmp(param.api, "DFS") == 0) + if (param.api && strcasecmp(param.api, "DFS") == 0) return; VERBOSE(3,5,"Before show_file_system_size, dirpath is '%s'", dirpath ); From c58ba8ffb581384561533218c05162e18ecae2df Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 10 Sep 2019 14:11:55 +0000 Subject: [PATCH 60/66] add break that was accidentally removed Signed-off-by: Mohamad Chaarawi --- src/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/option.c b/src/option.c index 41ed09d..26c7b2d 100644 --- a/src/option.c +++ b/src/option.c @@ -316,6 +316,7 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi if(strlen(arg) > 1){ printf("Error, ignoring remainder of string for option %c (%s).\n", o->shortVar, o->longVar); } + break; } case('l'):{ *(long long*) o->variable = string_to_bytes(arg); From 12284ae04af584e293dde105a4b2a564d551395b Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 10 Sep 2019 18:39:31 +0000 Subject: [PATCH 61/66] Add latency and iops numbers to each iteration. - Latency reported is computed by taking the average latency of all ops from a single task, then taking the minimum of that between all tasks. - IOPS is computed by taking the total number of ops across all tasks divided by the total access time to execute those ops. Signed-off-by: Mohamad Chaarawi --- src/ior-internal.h | 3 ++- src/ior-output.c | 11 +++++++---- src/ior.c | 30 +++++++++++++++++++++++------- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/ior-internal.h b/src/ior-internal.h index 6b89af1..7daf8de 100644 --- a/src/ior-internal.h +++ b/src/ior-internal.h @@ -20,7 +20,8 @@ void PrintLongSummaryOneTest(IOR_test_t *test); void DisplayFreespace(IOR_param_t * test); void GetTestFileName(char *, IOR_param_t *); void PrintRemoveTiming(double start, double finish, int rep); -void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep); +void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency, + double *diff_subset, double totalTime, int rep); void PrintTestEnds(); void PrintTableHeader(); /* End of ior-output */ diff --git a/src/ior-output.c b/src/ior-output.c index 560d995..c3e0cb2 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -18,8 +18,8 @@ static void PrintNextToken(); void PrintTableHeader(){ if (outputFormat == OUTPUT_DEFAULT){ fprintf(out_resultfile, "\n"); - fprintf(out_resultfile, "access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); - fprintf(out_resultfile, "------ --------- ---------- --------- -------- -------- -------- -------- ----\n"); + fprintf(out_resultfile, "access bw(MiB/s) IOPS Latency(s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); + fprintf(out_resultfile, "------ --------- ---- ---------- ---------- --------- -------- -------- -------- -------- ----\n"); } } @@ -219,10 +219,13 @@ void PrintTestEnds(){ PrintEndSection(); } -void PrintReducedResult(IOR_test_t *test, int access, double bw, double *diff_subset, double totalTime, int rep){ +void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, double latency, + double *diff_subset, double totalTime, int rep){ if (outputFormat == OUTPUT_DEFAULT){ fprintf(out_resultfile, "%-10s", access == WRITE ? "write" : "read"); PPDouble(1, bw / MEBIBYTE, " "); + PPDouble(1, iops, " "); + PPDouble(1, latency, " "); PPDouble(1, (double)test->params.blockSize / KIBIBYTE, " "); PPDouble(1, (double)test->params.transferSize / KIBIBYTE, " "); PPDouble(1, diff_subset[0], " "); @@ -772,7 +775,7 @@ void PrintRemoveTiming(double start, double finish, int rep) return; if (outputFormat == OUTPUT_DEFAULT){ - fprintf(out_resultfile, "remove - - - - - - "); + fprintf(out_resultfile, "remove - - - - - - - - "); PPDouble(1, finish-start, " "); fprintf(out_resultfile, "%-4d\n", rep); }else if (outputFormat == OUTPUT_JSON){ diff --git a/src/ior.c b/src/ior.c index 2d08234..ef8b44c 100755 --- a/src/ior.c +++ b/src/ior.c @@ -841,8 +841,9 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce { double reduced[IOR_NB_TIMERS] = { 0 }; double diff[IOR_NB_TIMERS / 2 + 1]; - double totalTime; - double bw; + double totalTime, accessTime; + IOR_param_t *params = &test->params; + double bw, iops, latency, minlatency; int i; MPI_Op op; @@ -856,15 +857,12 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce op, 0, testComm), "MPI_Reduce()"); } - /* Only rank 0 tallies and prints the results. */ - if (rank != 0) - return; - /* Calculate elapsed times and throughput numbers */ for (i = 0; i < IOR_NB_TIMERS / 2; i++) diff[i] = reduced[2 * i + 1] - reduced[2 * i]; totalTime = reduced[5] - reduced[0]; + accessTime = reduced[3] - reduced[2]; IOR_point_t *point = (access == WRITE) ? &test->results[rep].write : &test->results[rep].read; @@ -875,7 +873,25 @@ ReduceIterResults(IOR_test_t *test, double *timer, const int rep, const int acce return; bw = (double)point->aggFileSizeForBW / totalTime; - PrintReducedResult(test, access, bw, diff, totalTime, rep); + + /* For IOPS in this iteration, we divide the total amount of IOs from + * all ranks over the entire access time (first start -> last end). */ + iops = (point->aggFileSizeForBW / params->transferSize) / accessTime; + + /* For Latency, we divide the total access time for each task over the + * number of I/Os issued from that task; then reduce and display the + * minimum (best) latency achieved. So what is reported is the average + * latency of all ops from a single task, then taking the minimum of + * that between all tasks. */ + latency = (timer[3] - timer[2]) / (params->blockSize / params->transferSize); + MPI_CHECK(MPI_Reduce(&latency, &minlatency, 1, MPI_DOUBLE, + MPI_MIN, 0, testComm), "MPI_Reduce()"); + + /* Only rank 0 tallies and prints the results. */ + if (rank != 0) + return; + + PrintReducedResult(test, access, bw, iops, latency, diff, totalTime, rep); } /* From 0d0df855e689a9e494c165a2f956f3ce88d07a9f Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 18 Sep 2019 19:50:43 +0000 Subject: [PATCH 62/66] update user guide with IOPS and latency numbers for each iteration. Signed-off-by: Mohamad Chaarawi --- doc/USER_GUIDE | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/USER_GUIDE b/doc/USER_GUIDE index b8888b9..62a6802 100755 --- a/doc/USER_GUIDE +++ b/doc/USER_GUIDE @@ -550,6 +550,17 @@ HOW DOES IOR CALCULATE PERFORMANCE? operations (-g), the sum of the open, transfer, and close times may not equal the elapsed time from the first open to the last close. + After each iteration (-i) IOR reports performance for that iteration, and + those numbers include: + + - Bandwidth (described above) + + - IOPS: I/O rate (operations per second) achieved by all tasks given the total + time spent in reading and writing the data. + + - Latency: computed by taking the average latency of all I/O operations from a + single task. If ior is run with multiple tasks, then the latency reported is + the minimum that was computed between all tasks. HOW DO I ACCESS MULTIPLE FILE SYSTEMS IN IOR? From fca0a62ccba1d374a54d23fd6d95ee2b8ddb39a4 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sat, 21 Sep 2019 22:05:54 +0000 Subject: [PATCH 63/66] update to new DAOS API for creating dfs containers. Signed-off-by: Mohamad Chaarawi --- src/aiori-DFS.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 2838440..eb15b2a 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -419,13 +419,12 @@ DFS_Init() { if (rc == -DER_NONEXIST) { INFO(VERBOSE_1, "Creating DFS Container ...\n"); - rc = daos_cont_create(poh, co_uuid, NULL, NULL); - if (rc == 0) { - rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, - &coh, &co_info, NULL); - } + rc = dfs_cont_create(poh, co_uuid, NULL, &coh, NULL); + if (rc) + DCHECK(rc, "Failed to create container"); + } else if (rc) { + DCHECK(rc, "Failed to create container"); } - DCHECK(rc, "Failed to create container"); } HandleDistribute(&poh, POOL_HANDLE); From 2aea04c8cb148f1a9ecf78a7668b59c3fcd92b9c Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sat, 21 Sep 2019 22:43:20 +0000 Subject: [PATCH 64/66] fix some compiler warnings. Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 21df9aa..19a9f64 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -241,7 +241,6 @@ DAOS_Init() if (rank == 0) { uuid_t uuid; d_rank_list_t *svcl = NULL; - d_rank_list_t ranks; static daos_pool_info_t po_info; static daos_cont_info_t co_info; @@ -368,6 +367,8 @@ DAOS_Create(char *testFileName, IOR_param_t *param) /** Distribute the array handle if not FPP */ if (!param->filePerProc) HandleDistribute(&aoh, ARRAY_HANDLE); + + return &aoh; } static int @@ -417,6 +418,8 @@ DAOS_Open(char *testFileName, IOR_param_t *param) /** Distribute the array handle if not FPP */ if (!param->filePerProc) HandleDistribute(&aoh, ARRAY_HANDLE); + + return &aoh; } static IOR_offset_t From a69a5916cf0a0572669ae5496fc85ca9038ef7de Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 16 Oct 2019 09:42:10 +0100 Subject: [PATCH 65/66] Hotfix for ior -F -u --- src/ior.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ior.c b/src/ior.c index d6b0418..30c3d28 100755 --- a/src/ior.c +++ b/src/ior.c @@ -130,7 +130,8 @@ int ior_main(int argc, char **argv) for (tptr = tests_head; tptr != NULL; tptr = tptr->next) { verbose = tptr->params.verbose; if (rank == 0 && verbose >= VERBOSE_0) { - ShowTestStart(&tptr->params); + backend = tptr->params.backend; + ShowTestStart(&tptr->params); } // This is useful for trapping a running MPI process. While From dc82a1bf2fb49203879af6f37527b1f86cbfd653 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 16 Oct 2019 14:54:00 +0000 Subject: [PATCH 66/66] ior -R should memset the buffer being read, otherwise a read that does nothing will report success in the data verification phase. Signed-off-by: Mohamad Chaarawi --- src/ior.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ior.c b/src/ior.c index ef8b44c..658c3ae 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1882,14 +1882,16 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset *transferCount, test, WRITECHECK); } else if (access == READCHECK) { - amtXferred = backend->xfer(access, fd, buffer, transfer, test); + memset(checkBuffer, 'a', transfer); + + amtXferred = backend->xfer(access, fd, checkBuffer, transfer, test); if (amtXferred != transfer){ ERR("cannot read from file"); } if (test->storeFileOffset == TRUE) { FillBuffer(readCheckBuffer, test, test->offset, pretendRank); } - *errors += CompareBuffers(readCheckBuffer, buffer, transfer, *transferCount, test, READCHECK); + *errors += CompareBuffers(readCheckBuffer, checkBuffer, transfer, *transferCount, test, READCHECK); } return amtXferred; }