Add mmap as a new AIORI backend

It shares the create/open/delete/set_version/get_file_size
functions with POSIX backend.
The mmap backend also supports fsync and fsyncPerWrite options,
and it will use msync() instead and fsync().

Signed-off-by: Li Dongyang <dongyangli@ddn.com>
master
Li Dongyang 2018-05-08 21:08:29 +10:00
parent 7a371cfeda
commit 6ba22a19e4
8 changed files with 190 additions and 17 deletions

View File

@ -120,6 +120,17 @@ AM_COND_IF([USE_NCMPI_AIORI],[
AC_DEFINE([USE_NCMPI_AIORI], [], [Build NCMPI backend AIORI])
])
# MMAP IO support
AC_ARG_WITH([mmap],
[AS_HELP_STRING([--with-mmap],
[support IO with MMAP backend @<:@default=yes@:>@])],
[],
[with_mmap=yes])
AM_CONDITIONAL([USE_MMAP_AIORI], [test x$with_mmap = xyes])
AM_COND_IF([USE_MMAP_AIORI],[
AC_DEFINE([USE_MMAP_AIORI], [], [Build MMAP backend AIORI])
])
# POSIX IO support
AC_ARG_WITH([posix],
[AS_HELP_STRING([--with-posix],

View File

@ -45,6 +45,10 @@ extraSOURCES += aiori-NCMPI.c
extraLDADD += -lpnetcdf
endif
if USE_MMAP_AIORI
extraSOURCES += aiori-MMAP.c
endif
if USE_POSIX_AIORI
extraSOURCES += aiori-POSIX.c
endif

144
src/aiori-MMAP.c Normal file
View File

@ -0,0 +1,144 @@
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
/******************************************************************************\
*
* Implement of abstract I/O interface for MMAP.
*
\******************************************************************************/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <fcntl.h> /* IO operations */
#include <sys/stat.h>
#include <sys/mman.h>
#include <assert.h>
#include "ior.h"
#include "aiori.h"
#include "iordef.h"
#include "utilities.h"
/**************************** P R O T O T Y P E S *****************************/
static void *MMAP_Create(char *, IOR_param_t *);
static void *MMAP_Open(char *, IOR_param_t *);
static IOR_offset_t MMAP_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static void MMAP_Close(void *, IOR_param_t *);
static void MMAP_Fsync(void *, IOR_param_t *);
/************************** D E C L A R A T I O N S ***************************/
ior_aiori_t mmap_aiori = {
.name = "MMAP",
.create = MMAP_Create,
.open = MMAP_Open,
.xfer = MMAP_Xfer,
.close = MMAP_Close,
.delete = POSIX_Delete,
.set_version = POSIX_SetVersion,
.fsync = MMAP_Fsync,
.get_file_size = POSIX_GetFileSize,
};
/***************************** F U N C T I O N S ******************************/
static void ior_mmap_file(int *file, IOR_param_t *param)
{
int flags = PROT_READ;
IOR_offset_t size = param->expectedAggFileSize;
if (param->open == WRITE)
flags |= PROT_WRITE;
param->mmap_ptr = mmap(NULL, size, flags, MAP_SHARED,
*file, 0);
if (param->mmap_ptr == MAP_FAILED)
ERR("mmap() failed");
if (param->randomOffset)
flags = POSIX_MADV_RANDOM;
else
flags = POSIX_MADV_SEQUENTIAL;
if (posix_madvise(param->mmap_ptr, size, flags) != 0)
ERR("madvise() failed");
if (posix_madvise(param->mmap_ptr, size, POSIX_MADV_DONTNEED) != 0)
ERR("madvise() failed");
return;
}
/*
* Creat and open a file through the POSIX interface, then setup mmap.
*/
static void *MMAP_Create(char *testFileName, IOR_param_t * param)
{
int *fd;
fd = POSIX_Create(testFileName, param);
if (ftruncate(*fd, param->expectedAggFileSize) != 0)
ERR("ftruncate() failed");
ior_mmap_file(fd, param);
return ((void *)fd);
}
/*
* Open a file through the POSIX interface and setup mmap.
*/
static void *MMAP_Open(char *testFileName, IOR_param_t * param)
{
int *fd;
fd = POSIX_Open(testFileName, param);
ior_mmap_file(fd, param);
return ((void *)fd);
}
/*
* Write or read access to file using mmap
*/
static IOR_offset_t MMAP_Xfer(int access, void *file, IOR_size_t * buffer,
IOR_offset_t length, IOR_param_t * param)
{
if (access == WRITE) {
memcpy(param->mmap_ptr + param->offset, buffer, length);
} else {
memcpy(buffer, param->mmap_ptr + param->offset, length);
}
if (param->fsyncPerWrite == TRUE) {
if (msync(param->mmap_ptr + param->offset, length, MS_SYNC) != 0)
ERR("msync() failed");
if (posix_madvise(param->mmap_ptr + param->offset, length,
POSIX_MADV_DONTNEED) != 0)
ERR("madvise() failed");
}
return (length);
}
/*
* Perform msync().
*/
static void MMAP_Fsync(void *fd, IOR_param_t * param)
{
if (msync(param->mmap_ptr, param->expectedAggFileSize, MS_SYNC) != 0)
EWARN("msync() failed");
}
/*
* Close a file through the POSIX interface, after tear down the mmap.
*/
static void MMAP_Close(void *fd, IOR_param_t * param)
{
if (munmap(param->mmap_ptr, param->expectedAggFileSize) != 0)
ERR("munmap failed");
param->mmap_ptr = NULL;
POSIX_Close(fd, param);
}

View File

@ -65,15 +65,9 @@
#endif
/**************************** P R O T O T Y P E S *****************************/
static void *POSIX_Create(char *, IOR_param_t *);
static void *POSIX_Open(char *, IOR_param_t *);
static IOR_offset_t POSIX_Xfer(int, void *, IOR_size_t *,
IOR_offset_t, IOR_param_t *);
static void POSIX_Close(void *, IOR_param_t *);
static void POSIX_Delete(char *, IOR_param_t *);
static void POSIX_SetVersion(IOR_param_t *);
static void POSIX_Fsync(void *, IOR_param_t *);
static IOR_offset_t POSIX_GetFileSize(IOR_param_t *, MPI_Comm, char *);
/************************** D E C L A R A T I O N S ***************************/
@ -262,7 +256,7 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun
/*
* Creat and open a file through the POSIX interface.
*/
static void *POSIX_Create(char *testFileName, IOR_param_t * param)
void *POSIX_Create(char *testFileName, IOR_param_t * param)
{
int fd_oflag = O_BINARY;
int *fd;
@ -366,7 +360,7 @@ static void *POSIX_Create(char *testFileName, IOR_param_t * param)
/*
* Open a file through the POSIX interface.
*/
static void *POSIX_Open(char *testFileName, IOR_param_t * param)
void *POSIX_Open(char *testFileName, IOR_param_t * param)
{
int fd_oflag = O_BINARY;
int *fd;
@ -494,7 +488,7 @@ static void POSIX_Fsync(void *fd, IOR_param_t * param)
/*
* Close a file through the POSIX interface.
*/
static void POSIX_Close(void *fd, IOR_param_t * param)
void POSIX_Close(void *fd, IOR_param_t * param)
{
if (close(*(int *)fd) != 0)
ERR("close() failed");
@ -504,7 +498,7 @@ static void POSIX_Close(void *fd, IOR_param_t * param)
/*
* Delete a file through the POSIX interface.
*/
static void POSIX_Delete(char *testFileName, IOR_param_t * param)
void POSIX_Delete(char *testFileName, IOR_param_t * param)
{
char errmsg[256];
sprintf(errmsg, "[RANK %03d]: unlink() of file \"%s\" failed\n",
@ -516,7 +510,7 @@ static void POSIX_Delete(char *testFileName, IOR_param_t * param)
/*
* Determine api version.
*/
static void POSIX_SetVersion(IOR_param_t * test)
void POSIX_SetVersion(IOR_param_t * test)
{
strcpy(test->apiVersion, test->api);
}
@ -524,7 +518,7 @@ static void POSIX_SetVersion(IOR_param_t * test)
/*
* Use POSIX stat() to return aggregate file size.
*/
static IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
char *testFileName)
{
struct stat stat_buf;

View File

@ -44,6 +44,9 @@ ior_aiori_t *available_aiori[] = {
#ifdef USE_POSIX_AIORI
&posix_aiori,
#endif
#ifdef USE_MMAP_AIORI
&mmap_aiori,
#endif
#ifdef USE_S3_AIORI
&s3_aiori,
&s3_plus_aiori,

View File

@ -84,6 +84,7 @@ extern ior_aiori_t hdfs_aiori;
extern ior_aiori_t mpiio_aiori;
extern ior_aiori_t ncmpi_aiori;
extern ior_aiori_t posix_aiori;
extern ior_aiori_t mmap_aiori;
extern ior_aiori_t s3_aiori;
extern ior_aiori_t s3_plus_aiori;
extern ior_aiori_t s3_emc_aiori;
@ -95,4 +96,12 @@ const char *aiori_default (void);
IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm,
char *testFileName);
void *POSIX_Create(char *testFileName, IOR_param_t *test);
void *POSIX_Open(char *testFileName, IOR_param_t *test);
void POSIX_Close(void *fd, IOR_param_t *test);
void POSIX_Delete(char *testFileName, IOR_param_t *test);
void POSIX_SetVersion(IOR_param_t *test);
IOR_offset_t POSIX_GetFileSize(IOR_param_t *test, MPI_Comm testComm,
char *testFileName);
#endif /* not _AIORI_H */

View File

@ -228,6 +228,8 @@ void init_IOR_Param_t(IOR_param_t * p)
p->beegfs_numTargets = -1;
p->beegfs_chunkSize = -1;
p->mmap_ptr = NULL;
}
/**
@ -731,7 +733,7 @@ static void DisplayUsage(char **argv)
{
char *opts[] = {
"OPTIONS:",
" -a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]",
" -a S api -- API for I/O [POSIX|MMAP|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]",
" -A N refNum -- user supplied reference number to include in the summary",
" -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)",
" -B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers",
@ -741,7 +743,7 @@ static void DisplayUsage(char **argv)
" -D N deadlineForStonewalling -- seconds before stopping write or read phase",
" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data",
" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut",
" -e fsync -- perform fsync upon POSIX write close",
" -e fsync -- perform fsync/msync upon POSIX/MMAP write close",
" -E useExistingTestFile -- do not remove test file before write access",
" -f S scriptFile -- test script name",
" -F filePerProc -- file-per-process",
@ -780,7 +782,7 @@ static void DisplayUsage(char **argv)
" -W checkWrite -- check read after write",
" -x singleXferAttempt -- do not retry transfer if incomplete",
" -X N reorderTasksRandomSeed -- random seed for -Z option",
" -Y fsyncPerWrite -- perform fsync after each POSIX write",
" -Y fsyncPerWrite -- perform fsync/msync after each POSIX/MMAP write",
" -z randomOffset -- access is to random, not sequential, offsets within a file",
" -Z reorderTasksRandom -- changes task ordering to random ordering for readback",
" ",
@ -2377,8 +2379,9 @@ static void ValidateTests(IOR_param_t * test)
if ((strcasecmp(test->api, "POSIX") != 0) && test->singleXferAttempt)
WARN_RESET("retry only available in POSIX",
test, &defaults, singleXferAttempt);
if ((strcasecmp(test->api, "POSIX") != 0) && test->fsync)
WARN_RESET("fsync() only available in POSIX",
if ((strcasecmp(test->api, "POSIX") != 0) && (strcasecmp(test->api, "MMAP") != 0)
&& test->fsync)
WARN_RESET("fsync() only available in POSIX/MMAP",
test, &defaults, fsync);
if ((strcasecmp(test->api, "MPIIO") != 0) && test->preallocate)
WARN_RESET("preallocation only available in MPIIO",
@ -2409,6 +2412,9 @@ static void ValidateTests(IOR_param_t * test)
if ((strcasecmp(test->api, "POSIX") == 0) && test->collective)
WARN_RESET("collective not available in POSIX",
test, &defaults, collective);
if ((strcasecmp(test->api, "MMAP") == 0) && test->fsyncPerWrite
&& (test->transferSize & (sysconf(_SC_PAGESIZE) - 1)))
ERR("transfer size must be aligned with PAGESIZE for MMAP with fsyncPerWrite");
/* parameter consitency */
if (test->reorderTasks == TRUE && test->reorderTasksRandom == TRUE)

View File

@ -162,6 +162,8 @@ typedef struct
int fsyncPerWrite; /* fsync() after each write */
int fsync; /* fsync() after write */
void* mmap_ptr;
/* MPI variables */
MPI_Comm testComm; /* MPI communicator */
MPI_Datatype transferType; /* datatype for transfer */