diff --git a/configure.ac b/configure.ac index 3513c12..4f9461c 100755 --- a/configure.ac +++ b/configure.ac @@ -120,6 +120,17 @@ AM_COND_IF([USE_NCMPI_AIORI],[ AC_DEFINE([USE_NCMPI_AIORI], [], [Build NCMPI backend AIORI]) ]) +# MMAP IO support +AC_ARG_WITH([mmap], + [AS_HELP_STRING([--with-mmap], + [support IO with MMAP backend @<:@default=yes@:>@])], + [], + [with_mmap=yes]) +AM_CONDITIONAL([USE_MMAP_AIORI], [test x$with_mmap = xyes]) +AM_COND_IF([USE_MMAP_AIORI],[ + AC_DEFINE([USE_MMAP_AIORI], [], [Build MMAP backend AIORI]) +]) + # POSIX IO support AC_ARG_WITH([posix], [AS_HELP_STRING([--with-posix], diff --git a/src/Makefile.am b/src/Makefile.am index 36bbde7..7d2575b 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -45,6 +45,10 @@ extraSOURCES += aiori-NCMPI.c extraLDADD += -lpnetcdf endif +if USE_MMAP_AIORI +extraSOURCES += aiori-MMAP.c +endif + if USE_POSIX_AIORI extraSOURCES += aiori-POSIX.c endif diff --git a/src/aiori-MMAP.c b/src/aiori-MMAP.c new file mode 100644 index 0000000..7f34d51 --- /dev/null +++ b/src/aiori-MMAP.c @@ -0,0 +1,144 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/******************************************************************************\ +* +* Implement of abstract I/O interface for MMAP. +* +\******************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include + +#include +#include /* IO operations */ +#include +#include +#include + +#include "ior.h" +#include "aiori.h" +#include "iordef.h" +#include "utilities.h" + +/**************************** P R O T O T Y P E S *****************************/ +static void *MMAP_Create(char *, IOR_param_t *); +static void *MMAP_Open(char *, IOR_param_t *); +static IOR_offset_t MMAP_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); +static void MMAP_Close(void *, IOR_param_t *); +static void MMAP_Fsync(void *, IOR_param_t *); + +/************************** D E C L A R A T I O N S ***************************/ + +ior_aiori_t mmap_aiori = { + .name = "MMAP", + .create = MMAP_Create, + .open = MMAP_Open, + .xfer = MMAP_Xfer, + .close = MMAP_Close, + .delete = POSIX_Delete, + .set_version = POSIX_SetVersion, + .fsync = MMAP_Fsync, + .get_file_size = POSIX_GetFileSize, +}; + +/***************************** F U N C T I O N S ******************************/ + +static void ior_mmap_file(int *file, IOR_param_t *param) +{ + int flags = PROT_READ; + IOR_offset_t size = param->expectedAggFileSize; + + if (param->open == WRITE) + flags |= PROT_WRITE; + + param->mmap_ptr = mmap(NULL, size, flags, MAP_SHARED, + *file, 0); + if (param->mmap_ptr == MAP_FAILED) + ERR("mmap() failed"); + + if (param->randomOffset) + flags = POSIX_MADV_RANDOM; + else + flags = POSIX_MADV_SEQUENTIAL; + if (posix_madvise(param->mmap_ptr, size, flags) != 0) + ERR("madvise() failed"); + + if (posix_madvise(param->mmap_ptr, size, POSIX_MADV_DONTNEED) != 0) + ERR("madvise() failed"); + + return; +} + +/* + * Creat and open a file through the POSIX interface, then setup mmap. + */ +static void *MMAP_Create(char *testFileName, IOR_param_t * param) +{ + int *fd; + + fd = POSIX_Create(testFileName, param); + if (ftruncate(*fd, param->expectedAggFileSize) != 0) + ERR("ftruncate() failed"); + ior_mmap_file(fd, param); + return ((void *)fd); +} + +/* + * Open a file through the POSIX interface and setup mmap. + */ +static void *MMAP_Open(char *testFileName, IOR_param_t * param) +{ + int *fd; + + fd = POSIX_Open(testFileName, param); + ior_mmap_file(fd, param); + return ((void *)fd); +} + +/* + * Write or read access to file using mmap + */ +static IOR_offset_t MMAP_Xfer(int access, void *file, IOR_size_t * buffer, + IOR_offset_t length, IOR_param_t * param) +{ + if (access == WRITE) { + memcpy(param->mmap_ptr + param->offset, buffer, length); + } else { + memcpy(buffer, param->mmap_ptr + param->offset, length); + } + + if (param->fsyncPerWrite == TRUE) { + if (msync(param->mmap_ptr + param->offset, length, MS_SYNC) != 0) + ERR("msync() failed"); + if (posix_madvise(param->mmap_ptr + param->offset, length, + POSIX_MADV_DONTNEED) != 0) + ERR("madvise() failed"); + } + return (length); +} + +/* + * Perform msync(). + */ +static void MMAP_Fsync(void *fd, IOR_param_t * param) +{ + if (msync(param->mmap_ptr, param->expectedAggFileSize, MS_SYNC) != 0) + EWARN("msync() failed"); +} + +/* + * Close a file through the POSIX interface, after tear down the mmap. + */ +static void MMAP_Close(void *fd, IOR_param_t * param) +{ + if (munmap(param->mmap_ptr, param->expectedAggFileSize) != 0) + ERR("munmap failed"); + param->mmap_ptr = NULL; + POSIX_Close(fd, param); +} diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 969f7a1..ff5ddc2 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -65,15 +65,9 @@ #endif /**************************** P R O T O T Y P E S *****************************/ -static void *POSIX_Create(char *, IOR_param_t *); -static void *POSIX_Open(char *, IOR_param_t *); static IOR_offset_t POSIX_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); -static void POSIX_Close(void *, IOR_param_t *); -static void POSIX_Delete(char *, IOR_param_t *); -static void POSIX_SetVersion(IOR_param_t *); static void POSIX_Fsync(void *, IOR_param_t *); -static IOR_offset_t POSIX_GetFileSize(IOR_param_t *, MPI_Comm, char *); /************************** D E C L A R A T I O N S ***************************/ @@ -262,7 +256,7 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun /* * Creat and open a file through the POSIX interface. */ -static void *POSIX_Create(char *testFileName, IOR_param_t * param) +void *POSIX_Create(char *testFileName, IOR_param_t * param) { int fd_oflag = O_BINARY; int *fd; @@ -366,7 +360,7 @@ static void *POSIX_Create(char *testFileName, IOR_param_t * param) /* * Open a file through the POSIX interface. */ -static void *POSIX_Open(char *testFileName, IOR_param_t * param) +void *POSIX_Open(char *testFileName, IOR_param_t * param) { int fd_oflag = O_BINARY; int *fd; @@ -494,7 +488,7 @@ static void POSIX_Fsync(void *fd, IOR_param_t * param) /* * Close a file through the POSIX interface. */ -static void POSIX_Close(void *fd, IOR_param_t * param) +void POSIX_Close(void *fd, IOR_param_t * param) { if (close(*(int *)fd) != 0) ERR("close() failed"); @@ -504,7 +498,7 @@ static void POSIX_Close(void *fd, IOR_param_t * param) /* * Delete a file through the POSIX interface. */ -static void POSIX_Delete(char *testFileName, IOR_param_t * param) +void POSIX_Delete(char *testFileName, IOR_param_t * param) { char errmsg[256]; sprintf(errmsg, "[RANK %03d]: unlink() of file \"%s\" failed\n", @@ -516,7 +510,7 @@ static void POSIX_Delete(char *testFileName, IOR_param_t * param) /* * Determine api version. */ -static void POSIX_SetVersion(IOR_param_t * test) +void POSIX_SetVersion(IOR_param_t * test) { strcpy(test->apiVersion, test->api); } @@ -524,7 +518,7 @@ static void POSIX_SetVersion(IOR_param_t * test) /* * Use POSIX stat() to return aggregate file size. */ -static IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, +IOR_offset_t POSIX_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) { struct stat stat_buf; diff --git a/src/aiori.c b/src/aiori.c index 95d3a07..677c1ea 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -44,6 +44,9 @@ ior_aiori_t *available_aiori[] = { #ifdef USE_POSIX_AIORI &posix_aiori, #endif +#ifdef USE_MMAP_AIORI + &mmap_aiori, +#endif #ifdef USE_S3_AIORI &s3_aiori, &s3_plus_aiori, diff --git a/src/aiori.h b/src/aiori.h index 06f733c..4ee400a 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -84,6 +84,7 @@ extern ior_aiori_t hdfs_aiori; extern ior_aiori_t mpiio_aiori; extern ior_aiori_t ncmpi_aiori; extern ior_aiori_t posix_aiori; +extern ior_aiori_t mmap_aiori; extern ior_aiori_t s3_aiori; extern ior_aiori_t s3_plus_aiori; extern ior_aiori_t s3_emc_aiori; @@ -95,4 +96,12 @@ const char *aiori_default (void); IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName); +void *POSIX_Create(char *testFileName, IOR_param_t *test); +void *POSIX_Open(char *testFileName, IOR_param_t *test); +void POSIX_Close(void *fd, IOR_param_t *test); +void POSIX_Delete(char *testFileName, IOR_param_t *test); +void POSIX_SetVersion(IOR_param_t *test); +IOR_offset_t POSIX_GetFileSize(IOR_param_t *test, MPI_Comm testComm, + char *testFileName); + #endif /* not _AIORI_H */ diff --git a/src/ior.c b/src/ior.c index a6029cd..b92b40d 100755 --- a/src/ior.c +++ b/src/ior.c @@ -228,6 +228,8 @@ void init_IOR_Param_t(IOR_param_t * p) p->beegfs_numTargets = -1; p->beegfs_chunkSize = -1; + + p->mmap_ptr = NULL; } /** @@ -731,7 +733,7 @@ static void DisplayUsage(char **argv) { char *opts[] = { "OPTIONS:", - " -a S api -- API for I/O [POSIX|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]", + " -a S api -- API for I/O [POSIX|MMAP|MPIIO|HDF5|HDFS|S3|S3_EMC|NCMPI]", " -A N refNum -- user supplied reference number to include in the summary", " -b N blockSize -- contiguous bytes to write per task (e.g.: 8, 4k, 2m, 1g)", " -B useO_DIRECT -- uses O_DIRECT for POSIX, bypassing I/O buffers", @@ -741,7 +743,7 @@ static void DisplayUsage(char **argv) " -D N deadlineForStonewalling -- seconds before stopping write or read phase", " -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", " -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", - " -e fsync -- perform fsync upon POSIX write close", + " -e fsync -- perform fsync/msync upon POSIX/MMAP write close", " -E useExistingTestFile -- do not remove test file before write access", " -f S scriptFile -- test script name", " -F filePerProc -- file-per-process", @@ -780,7 +782,7 @@ static void DisplayUsage(char **argv) " -W checkWrite -- check read after write", " -x singleXferAttempt -- do not retry transfer if incomplete", " -X N reorderTasksRandomSeed -- random seed for -Z option", - " -Y fsyncPerWrite -- perform fsync after each POSIX write", + " -Y fsyncPerWrite -- perform fsync/msync after each POSIX/MMAP write", " -z randomOffset -- access is to random, not sequential, offsets within a file", " -Z reorderTasksRandom -- changes task ordering to random ordering for readback", " ", @@ -2377,8 +2379,9 @@ static void ValidateTests(IOR_param_t * test) if ((strcasecmp(test->api, "POSIX") != 0) && test->singleXferAttempt) WARN_RESET("retry only available in POSIX", test, &defaults, singleXferAttempt); - if ((strcasecmp(test->api, "POSIX") != 0) && test->fsync) - WARN_RESET("fsync() only available in POSIX", + if ((strcasecmp(test->api, "POSIX") != 0) && (strcasecmp(test->api, "MMAP") != 0) + && test->fsync) + WARN_RESET("fsync() only available in POSIX/MMAP", test, &defaults, fsync); if ((strcasecmp(test->api, "MPIIO") != 0) && test->preallocate) WARN_RESET("preallocation only available in MPIIO", @@ -2409,6 +2412,9 @@ static void ValidateTests(IOR_param_t * test) if ((strcasecmp(test->api, "POSIX") == 0) && test->collective) WARN_RESET("collective not available in POSIX", test, &defaults, collective); + if ((strcasecmp(test->api, "MMAP") == 0) && test->fsyncPerWrite + && (test->transferSize & (sysconf(_SC_PAGESIZE) - 1))) + ERR("transfer size must be aligned with PAGESIZE for MMAP with fsyncPerWrite"); /* parameter consitency */ if (test->reorderTasks == TRUE && test->reorderTasksRandom == TRUE) diff --git a/src/ior.h b/src/ior.h index fce447b..ce1b4ec 100755 --- a/src/ior.h +++ b/src/ior.h @@ -162,6 +162,8 @@ typedef struct int fsyncPerWrite; /* fsync() after each write */ int fsync; /* fsync() after write */ + void* mmap_ptr; + /* MPI variables */ MPI_Comm testComm; /* MPI communicator */ MPI_Datatype transferType; /* datatype for transfer */