From 44ce8e5aaf8e60c4d655b49d968f8d77bd393064 Mon Sep 17 00:00:00 2001 From: Jean-Yves VET Date: Mon, 6 Aug 2018 12:04:26 +0200 Subject: [PATCH] Add backend for DDN's Infinite Memory Engine (IME) This patch adds the support of IME Native interface as a new AIORI backend. --- configure.ac | 11 ++ doc/USER_GUIDE | 8 +- src/Makefile.am | 7 ++ src/aiori-IME.c | 273 ++++++++++++++++++++++++++++++++++++++++++++++++ src/aiori.c | 3 + src/aiori.h | 1 + 6 files changed, 299 insertions(+), 4 deletions(-) create mode 100755 src/aiori-IME.c diff --git a/configure.ac b/configure.ac index 7b49a39..7042355 100755 --- a/configure.ac +++ b/configure.ac @@ -78,6 +78,17 @@ AS_IF([test "x$with_lustre" != xno], [ ]) ]) +# IME (DDN's Infinite Memory Engine) support +AC_ARG_WITH([ime], + [AS_HELP_STRING([--with-ime], + [support IO with IME backend @<:@default=no@:>@])], + [], + [with_ime=no]) +AM_CONDITIONAL([USE_IME_AIORI], [test x$with_ime = xyes]) +AM_COND_IF([USE_IME_AIORI],[ + AC_DEFINE([USE_IME_AIORI], [], [Build IME backend AIORI]) +]) + # HDF5 support AC_ARG_WITH([hdf5], [AS_HELP_STRING([--with-hdf5], diff --git a/doc/USER_GUIDE b/doc/USER_GUIDE index 8581d3e..dd02262 100755 --- a/doc/USER_GUIDE +++ b/doc/USER_GUIDE @@ -52,7 +52,7 @@ Two ways to run IOR: * 3. OPTIONS * ************** These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'. - -a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI] + -a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|IME|S3|S3_EMC|NCMPI] -A N refNum -- user reference number to include in long summary -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g) -B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers @@ -121,8 +121,8 @@ GENERAL: * refNum - user supplied reference number, included in long summary [0] - * api - must be set to one of POSIX, MPIIO, HDF5, HDFS, S3, - S3_EMC, or NCMPI, depending on test [POSIX] + * api - must be set to one of POSIX, MPIIO, HDF5, HDFS, IME, + S3, S3_EMC, or NCMPI, depending on test [POSIX] * testFile - name of the output file [testFile] NOTE: with filePerProc set, the tasks can round @@ -396,7 +396,7 @@ various application codes. Details are included in each script as necessary. An example of a script: ===============> start script <=============== IOR START - api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI] + api=[POSIX|MPIIO|HDF5|HDFS|IME|S3|S3_EMC|NCMPI] testFile=testFile hintsFileName=hintsFile repetitions=8 diff --git a/src/Makefile.am b/src/Makefile.am index 94416cd..2508802 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -42,6 +42,13 @@ extraSOURCES += aiori-HDF5.c extraLDADD += -lhdf5 -lz endif +if USE_IME_AIORI +extraSOURCES += aiori-IME.c +extraCPPFLAGS += -I/opt/ddn/ime/include +extraLDFLAGS += -L/opt/ddn/ime/lib +extraLDADD += -lim_client +endif + if USE_MPIIO_AIORI extraSOURCES += aiori-MPIIO.c endif diff --git a/src/aiori-IME.c b/src/aiori-IME.c new file mode 100755 index 0000000..70b4f0f --- /dev/null +++ b/src/aiori-IME.c @@ -0,0 +1,273 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/******************************************************************************\ +* * +* Copyright (c) 2003, The Regents of the University of California. * +* Copyright (c) 2018, DataDirect Networks. * +* See the file COPYRIGHT for a complete copyright notice and license. * +* * +******************************************************************************** +* +* Implement abstract I/O interface for DDN Infinite Memory Engine (IME). +* +\******************************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include /* sys_errlist */ +#include /* IO operations */ + +#include "ior.h" +#include "iordef.h" +#include "aiori.h" +#include "utilities.h" +#include "ime_native.h" + +#ifndef O_BINARY /* Required on Windows */ +# define O_BINARY 0 +#endif + +/**************************** P R O T O T Y P E S *****************************/ + +static void *IME_Create(char *, IOR_param_t *); +static void *IME_Open(char *, IOR_param_t *); +static void IME_Close(void *, IOR_param_t *); +static void IME_Delete(char *, IOR_param_t *); +static void IME_SetVersion(IOR_param_t *); +static void IME_Fsync(void *, IOR_param_t *); +static int IME_Access(const char *, int, IOR_param_t *); +static IOR_offset_t IME_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static IOR_offset_t IME_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); + +/************************** D E C L A R A T I O N S ***************************/ + +extern int rank; +extern int rankOffset; +extern int verbose; +extern MPI_Comm testComm; + +ior_aiori_t ime_aiori = { + .name = "IME", + .create = IME_Create, + .open = IME_Open, + .xfer = IME_Xfer, + .close = IME_Close, + .delete = IME_Delete, + .set_version = IME_SetVersion, + .fsync = IME_Fsync, + .get_file_size = IME_GetFileSize, + .access = IME_Access, +}; + +/***************************** F U N C T I O N S ******************************/ + +/* + * Try to access a file through the IME interface. + */ +static int IME_Access(const char *path, int mode, IOR_param_t *param) +{ + (void)param; + + return ime_native_access(path, mode); +} + +/* + * Creat and open a file through the IME interface. + */ +static void *IME_Create(char *testFileName, IOR_param_t *param) +{ + return IME_Open(testFileName, param); +} + +/* + * Open a file through the IME interface. + */ +static void *IME_Open(char *testFileName, IOR_param_t *param) +{ + int fd_oflag = O_BINARY; + int *fd; + + fd = (int *)malloc(sizeof(int)); + if (fd == NULL) + ERR("Unable to malloc file descriptor"); + + if (param->useO_DIRECT) + set_o_direct_flag(&fd_oflag); + + if (param->openFlags & IOR_RDONLY) + fd_oflag |= O_RDONLY; + if (param->openFlags & IOR_WRONLY) + fd_oflag |= O_WRONLY; + if (param->openFlags & IOR_RDWR) + fd_oflag |= O_RDWR; + if (param->openFlags & IOR_APPEND) + fd_oflag |= O_APPEND; + if (param->openFlags & IOR_CREAT) + fd_oflag |= O_CREAT; + if (param->openFlags & IOR_EXCL) + fd_oflag |= O_EXCL; + if (param->openFlags & IOR_TRUNC) + fd_oflag |= O_TRUNC; + + *fd = ime_native_open(testFileName, fd_oflag, 0664); + if (*fd < 0) { + free(fd); + ERR("cannot open file"); + } + + return((void *)fd); +} + +/* + * Write or read access to file using the IM interface. + */ +static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, + IOR_offset_t length, IOR_param_t *param) +{ + int xferRetries = 0; + long long remaining = (long long)length; + char *ptr = (char *)buffer; + int fd = *(int *)file; + long long rc; + + while (remaining > 0) { + /* write/read file */ + if (access == WRITE) { /* WRITE */ + if (verbose >= VERBOSE_4) { + fprintf(stdout, "task %d writing to offset %lld\n", + rank, param->offset + length - remaining); + } + + rc = ime_native_pwrite(fd, ptr, remaining, param->offset); + + if (param->fsyncPerWrite) + IME_Fsync(&fd, param); + } else { /* READ or CHECK */ + if (verbose >= VERBOSE_4) { + fprintf(stdout, "task %d reading from offset %lld\n", + rank, param->offset + length - remaining); + } + + rc = ime_native_pread(fd, ptr, remaining, param->offset); + if (rc == 0) + ERR("hit EOF prematurely"); + else if (rc < 0) + ERR("read failed"); + } + + if (rc < remaining) { + fprintf(stdout, "WARNING: Task %d, partial %s, %lld of " + "%lld bytes at offset %lld\n", + rank, access == WRITE ? "write" : "read", rc, + remaining, param->offset + length - remaining ); + + if (param->singleXferAttempt) { + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), + "barrier error"); + } + + if (xferRetries > MAX_RETRY) { + ERR( "too many retries -- aborting" ); + } + } else if (rc > remaining) /* this should never happen */ + ERR("too many bytes transferred!?!"); + + assert(rc >= 0); + assert(rc <= remaining); + remaining -= rc; + ptr += rc; + xferRetries++; + } + + return(length); +} + +/* + * Perform fsync(). + */ +static void IME_Fsync(void *fd, IOR_param_t *param) +{ + if (ime_native_fsync(*(int *)fd) != 0) + WARN("cannot perform fsync on file"); +} + +/* + * Close a file through the IME interface. + */ +static void IME_Close(void *fd, IOR_param_t *param) +{ + if (ime_native_close(*(int *)fd) != 0) + { + free(fd); + ERR("cannot close file"); + } + else + free(fd); +} + +/* + * Delete a file through the IME interface. + */ +static void IME_Delete(char *testFileName, IOR_param_t *param) +{ + char errmsg[256]; + sprintf(errmsg, "[RANK %03d]:cannot delete file %s\n", + rank, testFileName); + if (ime_native_unlink(testFileName) != 0) + WARN(errmsg); +} + +/* + * Determine API version. + */ +static void IME_SetVersion(IOR_param_t *test) +{ + strcpy(test->apiVersion, test->api); +} + +/* + * Use IME stat() to return aggregate file size. + */ +static IOR_offset_t IME_GetFileSize(IOR_param_t *test, MPI_Comm testComm, + char *testFileName) +{ + struct stat stat_buf; + IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; + + if (ime_native_stat(testFileName, &stat_buf) != 0) { + ERR("cannot get status of written file"); + } + aggFileSizeFromStat = stat_buf.st_size; + + if (test->filePerProc) { + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, + MPI_LONG_LONG_INT, MPI_SUM, testComm), + "cannot total data moved"); + aggFileSizeFromStat = tmpSum; + } else { + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, + MPI_LONG_LONG_INT, MPI_MIN, testComm), + "cannot total data moved"); + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, + MPI_LONG_LONG_INT, MPI_MAX, testComm), + "cannot total data moved"); + + if (tmpMin != tmpMax) { + if (rank == 0) { + WARN("inconsistent file size by different tasks"); + } + /* incorrect, but now consistent across tasks */ + aggFileSizeFromStat = tmpMin; + } + } + + return(aggFileSizeFromStat); +} diff --git a/src/aiori.c b/src/aiori.c index b886487..cc3ca8d 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -38,6 +38,9 @@ ior_aiori_t *available_aiori[] = { #ifdef USE_HDFS_AIORI &hdfs_aiori, #endif +#ifdef USE_IME_AIORI + &ime_aiori, +#endif #ifdef USE_MPIIO_AIORI &mpiio_aiori, #endif diff --git a/src/aiori.h b/src/aiori.h index 0046e1f..5ddd24d 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -87,6 +87,7 @@ typedef struct ior_aiori { extern ior_aiori_t dummy_aiori; extern ior_aiori_t hdf5_aiori; extern ior_aiori_t hdfs_aiori; +extern ior_aiori_t ime_aiori; extern ior_aiori_t mpiio_aiori; extern ior_aiori_t ncmpi_aiori; extern ior_aiori_t posix_aiori;