Add backend for DDN's Infinite Memory Engine (IME)

This patch adds the support of IME Native interface
as a new AIORI backend.
master
Jean-Yves VET 2018-08-06 12:04:26 +02:00 committed by Julian M. Kunkel
parent e708681f19
commit 44ce8e5aaf
6 changed files with 299 additions and 4 deletions

View File

@ -78,6 +78,17 @@ AS_IF([test "x$with_lustre" != xno], [
])
])
# IME (DDN's Infinite Memory Engine) support
AC_ARG_WITH([ime],
[AS_HELP_STRING([--with-ime],
[support IO with IME backend @<:@default=no@:>@])],
[],
[with_ime=no])
AM_CONDITIONAL([USE_IME_AIORI], [test x$with_ime = xyes])
AM_COND_IF([USE_IME_AIORI],[
AC_DEFINE([USE_IME_AIORI], [], [Build IME backend AIORI])
])
# HDF5 support
AC_ARG_WITH([hdf5],
[AS_HELP_STRING([--with-hdf5],

View File

@ -52,7 +52,7 @@ Two ways to run IOR:
* 3. OPTIONS *
**************
These options are to be used on the command line. E.g., 'IOR -a POSIX -b 4K'.
-a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
-a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|IME|S3|S3_EMC|NCMPI]
-A N refNum -- user reference number to include in long summary
-b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)
-B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers
@ -121,8 +121,8 @@ GENERAL:
* refNum - user supplied reference number, included in
long summary [0]
* api - must be set to one of POSIX, MPIIO, HDF5, HDFS, S3,
S3_EMC, or NCMPI, depending on test [POSIX]
* api - must be set to one of POSIX, MPIIO, HDF5, HDFS, IME,
S3, S3_EMC, or NCMPI, depending on test [POSIX]
* testFile - name of the output file [testFile]
NOTE: with filePerProc set, the tasks can round
@ -396,7 +396,7 @@ various application codes. Details are included in each script as necessary.
An example of a script:
===============> start script <===============
IOR START
api=[POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]
api=[POSIX|MPIIO|HDF5|HDFS|IME|S3|S3_EMC|NCMPI]
testFile=testFile
hintsFileName=hintsFile
repetitions=8

View File

@ -42,6 +42,13 @@ extraSOURCES += aiori-HDF5.c
extraLDADD += -lhdf5 -lz
endif
if USE_IME_AIORI
extraSOURCES += aiori-IME.c
extraCPPFLAGS += -I/opt/ddn/ime/include
extraLDFLAGS += -L/opt/ddn/ime/lib
extraLDADD += -lim_client
endif
if USE_MPIIO_AIORI
extraSOURCES += aiori-MPIIO.c
endif

273
src/aiori-IME.c Executable file
View File

@ -0,0 +1,273 @@
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
/******************************************************************************\
* *
* Copyright (c) 2003, The Regents of the University of California. *
* Copyright (c) 2018, DataDirect Networks. *
* See the file COPYRIGHT for a complete copyright notice and license. *
* *
********************************************************************************
*
* Implement abstract I/O interface for DDN Infinite Memory Engine (IME).
*
\******************************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <errno.h> /* sys_errlist */
#include <fcntl.h> /* IO operations */
#include "ior.h"
#include "iordef.h"
#include "aiori.h"
#include "utilities.h"
#include "ime_native.h"
#ifndef O_BINARY /* Required on Windows */
# define O_BINARY 0
#endif
/**************************** P R O T O T Y P E S *****************************/
static void *IME_Create(char *, IOR_param_t *);
static void *IME_Open(char *, IOR_param_t *);
static void IME_Close(void *, IOR_param_t *);
static void IME_Delete(char *, IOR_param_t *);
static void IME_SetVersion(IOR_param_t *);
static void IME_Fsync(void *, IOR_param_t *);
static int IME_Access(const char *, int, IOR_param_t *);
static IOR_offset_t IME_GetFileSize(IOR_param_t *, MPI_Comm, char *);
static IOR_offset_t IME_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
/************************** D E C L A R A T I O N S ***************************/
extern int rank;
extern int rankOffset;
extern int verbose;
extern MPI_Comm testComm;
ior_aiori_t ime_aiori = {
.name = "IME",
.create = IME_Create,
.open = IME_Open,
.xfer = IME_Xfer,
.close = IME_Close,
.delete = IME_Delete,
.set_version = IME_SetVersion,
.fsync = IME_Fsync,
.get_file_size = IME_GetFileSize,
.access = IME_Access,
};
/***************************** F U N C T I O N S ******************************/
/*
* Try to access a file through the IME interface.
*/
static int IME_Access(const char *path, int mode, IOR_param_t *param)
{
(void)param;
return ime_native_access(path, mode);
}
/*
* Creat and open a file through the IME interface.
*/
static void *IME_Create(char *testFileName, IOR_param_t *param)
{
return IME_Open(testFileName, param);
}
/*
* Open a file through the IME interface.
*/
static void *IME_Open(char *testFileName, IOR_param_t *param)
{
int fd_oflag = O_BINARY;
int *fd;
fd = (int *)malloc(sizeof(int));
if (fd == NULL)
ERR("Unable to malloc file descriptor");
if (param->useO_DIRECT)
set_o_direct_flag(&fd_oflag);
if (param->openFlags & IOR_RDONLY)
fd_oflag |= O_RDONLY;
if (param->openFlags & IOR_WRONLY)
fd_oflag |= O_WRONLY;
if (param->openFlags & IOR_RDWR)
fd_oflag |= O_RDWR;
if (param->openFlags & IOR_APPEND)
fd_oflag |= O_APPEND;
if (param->openFlags & IOR_CREAT)
fd_oflag |= O_CREAT;
if (param->openFlags & IOR_EXCL)
fd_oflag |= O_EXCL;
if (param->openFlags & IOR_TRUNC)
fd_oflag |= O_TRUNC;
*fd = ime_native_open(testFileName, fd_oflag, 0664);
if (*fd < 0) {
free(fd);
ERR("cannot open file");
}
return((void *)fd);
}
/*
* Write or read access to file using the IM interface.
*/
static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer,
IOR_offset_t length, IOR_param_t *param)
{
int xferRetries = 0;
long long remaining = (long long)length;
char *ptr = (char *)buffer;
int fd = *(int *)file;
long long rc;
while (remaining > 0) {
/* write/read file */
if (access == WRITE) { /* WRITE */
if (verbose >= VERBOSE_4) {
fprintf(stdout, "task %d writing to offset %lld\n",
rank, param->offset + length - remaining);
}
rc = ime_native_pwrite(fd, ptr, remaining, param->offset);
if (param->fsyncPerWrite)
IME_Fsync(&fd, param);
} else { /* READ or CHECK */
if (verbose >= VERBOSE_4) {
fprintf(stdout, "task %d reading from offset %lld\n",
rank, param->offset + length - remaining);
}
rc = ime_native_pread(fd, ptr, remaining, param->offset);
if (rc == 0)
ERR("hit EOF prematurely");
else if (rc < 0)
ERR("read failed");
}
if (rc < remaining) {
fprintf(stdout, "WARNING: Task %d, partial %s, %lld of "
"%lld bytes at offset %lld\n",
rank, access == WRITE ? "write" : "read", rc,
remaining, param->offset + length - remaining );
if (param->singleXferAttempt) {
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1),
"barrier error");
}
if (xferRetries > MAX_RETRY) {
ERR( "too many retries -- aborting" );
}
} else if (rc > remaining) /* this should never happen */
ERR("too many bytes transferred!?!");
assert(rc >= 0);
assert(rc <= remaining);
remaining -= rc;
ptr += rc;
xferRetries++;
}
return(length);
}
/*
* Perform fsync().
*/
static void IME_Fsync(void *fd, IOR_param_t *param)
{
if (ime_native_fsync(*(int *)fd) != 0)
WARN("cannot perform fsync on file");
}
/*
* Close a file through the IME interface.
*/
static void IME_Close(void *fd, IOR_param_t *param)
{
if (ime_native_close(*(int *)fd) != 0)
{
free(fd);
ERR("cannot close file");
}
else
free(fd);
}
/*
* Delete a file through the IME interface.
*/
static void IME_Delete(char *testFileName, IOR_param_t *param)
{
char errmsg[256];
sprintf(errmsg, "[RANK %03d]:cannot delete file %s\n",
rank, testFileName);
if (ime_native_unlink(testFileName) != 0)
WARN(errmsg);
}
/*
* Determine API version.
*/
static void IME_SetVersion(IOR_param_t *test)
{
strcpy(test->apiVersion, test->api);
}
/*
* Use IME stat() to return aggregate file size.
*/
static IOR_offset_t IME_GetFileSize(IOR_param_t *test, MPI_Comm testComm,
char *testFileName)
{
struct stat stat_buf;
IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum;
if (ime_native_stat(testFileName, &stat_buf) != 0) {
ERR("cannot get status of written file");
}
aggFileSizeFromStat = stat_buf.st_size;
if (test->filePerProc) {
MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1,
MPI_LONG_LONG_INT, MPI_SUM, testComm),
"cannot total data moved");
aggFileSizeFromStat = tmpSum;
} else {
MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1,
MPI_LONG_LONG_INT, MPI_MIN, testComm),
"cannot total data moved");
MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1,
MPI_LONG_LONG_INT, MPI_MAX, testComm),
"cannot total data moved");
if (tmpMin != tmpMax) {
if (rank == 0) {
WARN("inconsistent file size by different tasks");
}
/* incorrect, but now consistent across tasks */
aggFileSizeFromStat = tmpMin;
}
}
return(aggFileSizeFromStat);
}

View File

@ -38,6 +38,9 @@ ior_aiori_t *available_aiori[] = {
#ifdef USE_HDFS_AIORI
&hdfs_aiori,
#endif
#ifdef USE_IME_AIORI
&ime_aiori,
#endif
#ifdef USE_MPIIO_AIORI
&mpiio_aiori,
#endif

View File

@ -87,6 +87,7 @@ typedef struct ior_aiori {
extern ior_aiori_t dummy_aiori;
extern ior_aiori_t hdf5_aiori;
extern ior_aiori_t hdfs_aiori;
extern ior_aiori_t ime_aiori;
extern ior_aiori_t mpiio_aiori;
extern ior_aiori_t ncmpi_aiori;
extern ior_aiori_t posix_aiori;