Add mmap as a new AIORI backend

It shares the create/open/delete/set_version/get_file_size
functions with POSIX backend.
The mmap backend also supports fsync and fsyncPerWrite options,
and it will use msync() instead and fsync().

Signed-off-by: Li Dongyang <dongyangli@ddn.com>
master
Li Dongyang 2018-05-08 21:08:29 +10:00
parent 7a371cfeda
commit 6ba22a19e4
8 changed files with 190 additions and 17 deletions

View File

@ -120,6 +120,17 @@ AM_COND_IF([USE_NCMPI_AIORI],[
AC_DEFINE([USE_NCMPI_AIORI], [], [Build NCMPI backend AIORI]) AC_DEFINE([USE_NCMPI_AIORI], [], [Build NCMPI backend AIORI])
]) ])
# MMAP IO support
AC_ARG_WITH([mmap],
[AS_HELP_STRING([--with-mmap],
[support IO with MMAP backend @<:@default=yes@:>@])],
[],
[with_mmap=yes])
AM_CONDITIONAL([USE_MMAP_AIORI], [test x$with_mmap = xyes])
AM_COND_IF([USE_MMAP_AIORI],[
AC_DEFINE([USE_MMAP_AIORI], [], [Build MMAP backend AIORI])
])
# POSIX IO support # POSIX IO support
AC_ARG_WITH([posix], AC_ARG_WITH([posix],
[AS_HELP_STRING([--with-posix], [AS_HELP_STRING([--with-posix],

View File

@ -45,6 +45,10 @@ extraSOURCES += aiori-NCMPI.c
extraLDADD += -lpnetcdf extraLDADD += -lpnetcdf
endif endif
if USE_MMAP_AIORI
extraSOURCES += aiori-MMAP.c
endif
if USE_POSIX_AIORI if USE_POSIX_AIORI
extraSOURCES += aiori-POSIX.c extraSOURCES += aiori-POSIX.c
endif endif

144
src/aiori-MMAP.c Normal file
View File

@ -0,0 +1,144 @@
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
/******************************************************************************\
*
* Implement of abstract I/O interface for MMAP.
*
\******************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h> /* IO operations */
#include <sys/stat.h>
#include <sys/mman.h>
#include <assert.h>
#include "ior.h"
#include "aiori.h"
#include "iordef.h"
#include "utilities.h"
/**************************** P R O T O T Y P E S *****************************/
static void *MMAP_Create(char *, IOR_param_t *);
static void *MMAP_Open(char *, IOR_param_t *);
static IOR_offset_t MMAP_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static void MMAP_Close(void *, IOR_param_t *);
static void MMAP_Fsync(void *, IOR_param_t *);
/************************** D E C L A R A T I O N S ***************************/
ior_aiori_t mmap_aiori = {
.name = "MMAP",
.create = MMAP_Create,
.open = MMAP_Open,
.xfer = MMAP_Xfer,
.close = MMAP_Close,
.delete = POSIX_Delete,
.set_version = POSIX_SetVersion,
.fsync = MMAP_Fsync,
.get_file_size = POSIX_GetFileSize,
};
/***************************** F U N C T I O N S ******************************/
static void ior_mmap_file(int *file, IOR_param_t *param)
{
int flags = PROT_READ;
IOR_offset_t size = param->expectedAggFileSize;
if (param->open == WRITE)
flags |= PROT_WRITE;
param->mmap_ptr = mmap(NULL, size, flags, MAP_SHARED,
*file, 0);
if (param->mmap_ptr == MAP_FAILED)
ERR("mmap() failed");
if (param->randomOffset)
flags = POSIX_MADV_RANDOM;
else
flags = POSIX_MADV_SEQUENTIAL;
if (posix_madvise(param->mmap_ptr, size, flags) != 0)
ERR("madvise() failed");
if (posix_madvise(param->mmap_ptr, size, POSIX_MADV_DONTNEED) != 0)
ERR("madvise() failed");
return;
}
/*
* Creat and open a file through the POSIX interface, then setup mmap.
*/
static void *MMAP_Create(char *testFileName, IOR_param_t * param)
{
int *fd;
fd = POSIX_Create(testFileName, param);
if (ftruncate(*fd, param->expectedAggFileSize) != 0)
ERR("ftruncate() failed");
ior_mmap_file(fd, param);
return ((void *)fd);
}
/*
* Open a file through the POSIX interface and setup mmap.
*/
static void *MMAP_Open(char *testFileName, IOR_param_t * param)
{
int *fd;
fd = POSIX_Open(testFileName, param);
ior_mmap_file(fd, param);
return ((void *)fd);
}
/*
* Write or read access to file using mmap
*/
static IOR_offset_t MMAP_Xfer(int access, void *file, IOR_size_t * buffer,
IOR_offset_t length, IOR_param_t * param)
{
if (access == WRITE) {
memcpy(param->mmap_ptr + param->offset, buffer, length);
} else {
memcpy(buffer, param->mmap_ptr + param->offset, length);
}
if (param->fsyncPerWrite == TRUE) {
if (msync(param->mmap_ptr + param->offset, length, MS_SYNC) != 0)
ERR("msync() failed");
if (posix_madvise(param->mmap_ptr + param->offset, length,
POSIX_MADV_DONTNEED) != 0)
ERR("madvise() failed");
}
return (length);
}
/*
* Perform msync().
*/
static void MMAP_Fsync(void *fd, IOR_param_t * param)
{
if (msync(param->mmap_ptr, param->expectedAggFileSize, MS_SYNC) != 0)
EWARN("msync() failed");
}
/*
* Close a file through the POSIX interface, after tear down the mmap.
*/
static void MMAP_Close(void *fd, IOR_param_t * param)
{
if (munmap(param->mmap_ptr, param->expectedAggFileSize) != 0)
ERR("munmap failed");
param->mmap_ptr = NULL;
POSIX_Close(fd, param);
}

View File

@ -65,15 +65,9 @@
#endif #endif
/**************************** P R O T O T Y P E S *****************************/ /**************************** P R O T O T Y P E S *****************************/
static void *POSIX_Create(char *, IOR_param_t *);
static void *POSIX_Open(char *, IOR_param_t *);
static IOR_offset_t POSIX_Xfer(int, void *, IOR_size_t *, static IOR_offset_t POSIX_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *); IOR_offset_t, IOR_param_t *);
static void POSIX_Close(void *, IOR_param_t *);
static void POSIX_Delete(char *, IOR_param_t *);
static void POSIX_SetVersion(IOR_param_t *);
static void POSIX_Fsync(void *, IOR_param_t *); static void POSIX_Fsync(void *, IOR_param_t *);
static IOR_offset_t POSIX_GetFileSize(IOR_param_t *, MPI_Comm, char *);
/************************** D E C L A R A T I O N S ***************************/ /************************** D E C L A R A T I O N S ***************************/
@ -262,7 +256,7 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun
/* /*
* Creat and open a file through the POSIX interface. * Creat and open a file through the POSIX interface.
*/ */
static void *POSIX_Create(char *testFileName, IOR_param_t * param) void *POSIX_Create(char *testFileName, IOR_param_t * param)
{ {
int fd_oflag = O_BINARY; int fd_oflag = O_BINARY;
int *fd; int *fd;
@ -366,7 +360,7 @@ static void *POSIX_Create(char *testFileName, IOR_param_t * param)
/* /*
* Open a file through the POSIX interface. * Open a file through the POSIX interface.
*/ */
static void *POSIX_Open(char *testFileName, IOR_param_t * param) void *POSIX_Open(char *testFileName, IOR_param_t * param)
{ {
int fd_oflag = O_BINARY; int fd_oflag = O_BINARY;
int *fd; int *fd;
@ -494,7 +488,7 @@ static void POSIX_Fsync(void *fd, IOR_param_t * param)
/* /*
* Close a file through the POSIX interface. * Close a file through the POSIX interface.
*/ */
static void POSIX_Close(void *fd, IOR_param_t * param) void POSIX_Close(void *fd, IOR_param_t * param)
{ {
if (close(*(int *)fd) != 0) if (close(*(int *)fd) != 0)
ERR("close() failed"); ERR("close() failed");
@ -504,7 +498,7 @@ static void POSIX_Close(void *fd, IOR_param_t * param)
/* /*
* Delete a file through the POSIX interface. * Delete a file through the POSIX interface.
*/ */
static void POSIX_Delete(char *testFileName, IOR_param_t * param) void POSIX_Delete(char *testFileName, IOR_param_t * param)
{ {
char errmsg[256]; char errmsg[256];
sprintf(errmsg, "[RANK %03d]: unlink() of file \"%s\" failed\n", sprintf(errmsg, "[RANK %03d]: unlink() of file \"%s\" failed\n",
@ -516,7 +510,7 @@ static void POSIX_Delete(char *testFileName, IOR_param_t * param)
/* /*
* Determine api version. * Determine api version.
*/ */
static void POSIX_SetVersion(IOR_param_t * test) void POSIX_SetVersion(IOR_param_t * test)
{ {
strcpy(test->apiVersion, test->api); strcpy(test->apiVersion, test->api);
} }
@ -524,7 +518,7 @@ static void POSIX_SetVersion(IOR_param_t * test)
/* /*
* Use POSIX stat() to return aggregate file size. * Use POSIX stat() to return aggregate file size.
*/ */
static IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
char *testFileName) char *testFileName)
{ {
struct stat stat_buf; struct stat stat_buf;

View File

@ -44,6 +44,9 @@ ior_aiori_t *available_aiori[] = {
#ifdef USE_POSIX_AIORI #ifdef USE_POSIX_AIORI
&posix_aiori, &posix_aiori,
#endif #endif
#ifdef USE_MMAP_AIORI
&mmap_aiori,
#endif
#ifdef USE_S3_AIORI #ifdef USE_S3_AIORI
&s3_aiori, &s3_aiori,
&s3_plus_aiori, &s3_plus_aiori,

View File

@ -84,6 +84,7 @@ extern ior_aiori_t hdfs_aiori;
extern ior_aiori_t mpiio_aiori; extern ior_aiori_t mpiio_aiori;
extern ior_aiori_t ncmpi_aiori; extern ior_aiori_t ncmpi_aiori;
extern ior_aiori_t posix_aiori; extern ior_aiori_t posix_aiori;
extern ior_aiori_t mmap_aiori;
extern ior_aiori_t s3_aiori; extern ior_aiori_t s3_aiori;
extern ior_aiori_t s3_plus_aiori; extern ior_aiori_t s3_plus_aiori;
extern ior_aiori_t s3_emc_aiori; extern ior_aiori_t s3_emc_aiori;
@ -95,4 +96,12 @@ const char *aiori_default (void);
IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
char *testFileName); char *testFileName);
void *POSIX_Create(char *testFileName, IOR_param_t *test);
void *POSIX_Open(char *testFileName, IOR_param_t *test);
void POSIX_Close(void *fd, IOR_param_t *test);
void POSIX_Delete(char *testFileName, IOR_param_t *test);
void POSIX_SetVersion(IOR_param_t *test);
IOR_offset_t POSIX_GetFileSize(IOR_param_t *test, MPI_Comm testComm,
char *testFileName);
#endif /* not _AIORI_H */ #endif /* not _AIORI_H */

View File

@ -228,6 +228,8 @@ void init_IOR_Param_t(IOR_param_t * p)
p->beegfs_numTargets = -1; p->beegfs_numTargets = -1;
p->beegfs_chunkSize = -1; p->beegfs_chunkSize = -1;
p->mmap_ptr = NULL;
} }
/** /**
@ -731,7 +733,7 @@ static void DisplayUsage(char **argv)
{ {
char *opts[] = { char *opts[] = {
"OPTIONS:", "OPTIONS:",
" -a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]", " -a S api -- API for I/O [POSIX|MMAP|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]",
" -A N refNum -- user supplied reference number to include in the summary", " -A N refNum -- user supplied reference number to include in the summary",
" -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", " -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)",
" -B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers", " -B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers",
@ -741,7 +743,7 @@ static void DisplayUsage(char **argv)
" -D N deadlineForStonewalling -- seconds before stopping write or read phase", " -D N deadlineForStonewalling -- seconds before stopping write or read phase",
" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", " -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data",
" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", " -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut",
" -e fsync -- perform fsync upon POSIX write close", " -e fsync -- perform fsync/msync upon POSIX/MMAP write close",
" -E useExistingTestFile -- do not remove test file before write access", " -E useExistingTestFile -- do not remove test file before write access",
" -f S scriptFile -- test script name", " -f S scriptFile -- test script name",
" -F filePerProc -- file-per-process", " -F filePerProc -- file-per-process",
@ -780,7 +782,7 @@ static void DisplayUsage(char **argv)
" -W checkWrite -- check read after write", " -W checkWrite -- check read after write",
" -x singleXferAttempt -- do not retry transfer if incomplete", " -x singleXferAttempt -- do not retry transfer if incomplete",
" -X N reorderTasksRandomSeed -- random seed for -Z option", " -X N reorderTasksRandomSeed -- random seed for -Z option",
" -Y fsyncPerWrite -- perform fsync after each POSIX write", " -Y fsyncPerWrite -- perform fsync/msync after each POSIX/MMAP write",
" -z randomOffset -- access is to random, not sequential, offsets within a file", " -z randomOffset -- access is to random, not sequential, offsets within a file",
" -Z reorderTasksRandom -- changes task ordering to random ordering for readback", " -Z reorderTasksRandom -- changes task ordering to random ordering for readback",
" ", " ",
@ -2377,8 +2379,9 @@ static void ValidateTests(IOR_param_t * test)
if ((strcasecmp(test->api, "POSIX") != 0) && test->singleXferAttempt) if ((strcasecmp(test->api, "POSIX") != 0) && test->singleXferAttempt)
WARN_RESET("retry only available in POSIX", WARN_RESET("retry only available in POSIX",
test, &defaults, singleXferAttempt); test, &defaults, singleXferAttempt);
if ((strcasecmp(test->api, "POSIX") != 0) && test->fsync) if ((strcasecmp(test->api, "POSIX") != 0) && (strcasecmp(test->api, "MMAP") != 0)
WARN_RESET("fsync() only available in POSIX", && test->fsync)
WARN_RESET("fsync() only available in POSIX/MMAP",
test, &defaults, fsync); test, &defaults, fsync);
if ((strcasecmp(test->api, "MPIIO") != 0) && test->preallocate) if ((strcasecmp(test->api, "MPIIO") != 0) && test->preallocate)
WARN_RESET("preallocation only available in MPIIO", WARN_RESET("preallocation only available in MPIIO",
@ -2409,6 +2412,9 @@ static void ValidateTests(IOR_param_t * test)
if ((strcasecmp(test->api, "POSIX") == 0) && test->collective) if ((strcasecmp(test->api, "POSIX") == 0) && test->collective)
WARN_RESET("collective not available in POSIX", WARN_RESET("collective not available in POSIX",
test, &defaults, collective); test, &defaults, collective);
if ((strcasecmp(test->api, "MMAP") == 0) && test->fsyncPerWrite
&& (test->transferSize & (sysconf(_SC_PAGESIZE) - 1)))
ERR("transfer size must be aligned with PAGESIZE for MMAP with fsyncPerWrite");
/* parameter consitency */ /* parameter consitency */
if (test->reorderTasks == TRUE && test->reorderTasksRandom == TRUE) if (test->reorderTasks == TRUE && test->reorderTasksRandom == TRUE)

View File

@ -162,6 +162,8 @@ typedef struct
int fsyncPerWrite; /* fsync() after each write */ int fsyncPerWrite; /* fsync() after each write */
int fsync; /* fsync() after write */ int fsync; /* fsync() after write */
void* mmap_ptr;
/* MPI variables */ /* MPI variables */
MPI_Comm testComm; /* MPI communicator */ MPI_Comm testComm; /* MPI communicator */
MPI_Datatype transferType; /* datatype for transfer */ MPI_Datatype transferType; /* datatype for transfer */