From fa25c31994a3522031b99206ddef7dcd337c1d19 Mon Sep 17 00:00:00 2001 From: Afrian Jackson Date: Thu, 31 Oct 2019 15:29:09 +0000 Subject: [PATCH] Adding PMDK functionality --- configure.ac | 14 +++ src/Makefile.am | 5 + src/aiori-PMDK.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++ src/aiori.c | 3 + src/aiori.h | 1 + 5 files changed, 286 insertions(+) create mode 100644 src/aiori-PMDK.c diff --git a/configure.ac b/configure.ac index f6b958b..134fc2e 100755 --- a/configure.ac +++ b/configure.ac @@ -174,6 +174,20 @@ AM_COND_IF([USE_POSIX_AIORI],[ AC_DEFINE([USE_POSIX_AIORI], [], [Build POSIX backend AIORI]) ]) +# PMDK IO support +AC_ARG_WITH([pmdk], + [AS_HELP_STRING([--with-pmdk], + [support IO with PMDK backend @<:@default=no@:>@])], + [], + [with_pmdk=yes]) +AM_CONDITIONAL([USE_PMDK_AIORI], [test x$with_pmdk = xyes]) +AM_COND_IF([USE_PMDK_AIORI],[ + AC_DEFINE([USE_PMDK_AIORI], [], [Build PMDK backend AIORI]) + AC_SEARCH_LIBS([pmem_map_file], [pmdk], + [AC_MSG_ERROR([Library containing pmdk symbols not found])]) +]) + + # RADOS support AC_ARG_WITH([rados], [AS_HELP_STRING([--with-rados], diff --git a/src/Makefile.am b/src/Makefile.am index 0de3b4b..c4cd099 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -65,6 +65,11 @@ if USE_POSIX_AIORI extraSOURCES += aiori-POSIX.c endif +if USE_PMDK_AIORI +extraSOURCES += aiori-PMDK.c +extraLDADD += -lpmem +endif + if USE_RADOS_AIORI extraSOURCES += aiori-RADOS.c extraLDADD += -lrados diff --git a/src/aiori-PMDK.c b/src/aiori-PMDK.c new file mode 100644 index 0000000..eed2230 --- /dev/null +++ b/src/aiori-PMDK.c @@ -0,0 +1,263 @@ +/******************************************************************************\ + * * + * Copyright (c) 2019 EPCC, The University of Edinburgh * + * * + ******************************************************************************* + * * + * * + * This file implements the abstract I/O interface for the low-level PMDK API * + * * +\******************************************************************************/ + + +#include "aiori.h" /* abstract IOR interface */ +#include /* sys_errlist */ +#include /* only for fprintf() */ +#include +#include +#include + + + +static option_help options [] = { + LAST_OPTION +}; + + +/**************************** P R O T O T Y P E S *****************************/ + +static option_help * PMDK_options(); +static void *PMDK_Create(char *, IOR_param_t *); +static void *PMDK_Open(char *, IOR_param_t *); +static IOR_offset_t PMDK_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); +static void PMDK_Fsync(void *, IOR_param_t *); +static void PMDK_Close(void *, IOR_param_t *); +static void PMDK_Delete(char *, IOR_param_t *); +static IOR_offset_t PMDK_GetFileSize(IOR_param_t *, MPI_Comm, char *); + + +/************************** D E C L A R A T I O N S ***************************/ + +extern int errno; +extern int rank; +extern int rankOffset; +extern int verbose; +extern MPI_Comm testComm; + +ior_aiori_t pmdk_aiori = { + .name = "PMDK", + .name_legacy = NULL, + .create = PMDK_Create, + .open = PMDK_Open, + .xfer = PMDK_Xfer, + .close = PMDK_Close, + .delete = PMDK_Delete, + .get_version = aiori_get_version, + .fsync = PMDK_Fsync, + .get_file_size = PMDK_GetFileSize, + .statfs = aiori_posix_statfs, + .mkdir = aiori_posix_mkdir, + .rmdir = aiori_posix_rmdir, + .access = aiori_posix_access, + .stat = aiori_posix_stat, + .get_options = PMDK_options, + .enable_mdtest = false, +}; + + +/***************************** F U N C T I O N S ******************************/ + +/******************************************************************************/ + +static option_help * PMDK_options(){ + return options; +} + + +/* + * Create and open a memory space through the PMDK interface. + */ +static void *PMDK_Create(char * testFileName, IOR_param_t * param){ + char *pmemaddr = NULL; + int is_pmem; + size_t mapped_len; + size_t open_length; + + if(!param->filePerProc){ + fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + open_length = param->blockSize * param->segmentCount; + + if((pmemaddr = pmem_map_file(testFileName, open_length, + PMEM_FILE_CREATE|PMEM_FILE_EXCL, + 0666, &mapped_len, &is_pmem)) == NULL) { + fprintf(stdout, "\nFailed to pmem_map_file for filename: %s in IOR_Create_PMDK\n", testFileName); + perror("pmem_map_file"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + if(!is_pmem){ + fprintf(stdout, "\npmem_map_file thinks the hardware being used is not pmem\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + + return((void *)pmemaddr); +} /* PMDK_Create() */ + + +/******************************************************************************/ +/* + * Open a memory space through the PMDK interface. + */ + +static void *PMDK_Open(char * testFileName, IOR_param_t * param){ + + char *pmemaddr = NULL; + int is_pmem; + size_t mapped_len; + size_t open_length; + + if(!param->filePerProc){ + fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + open_length = param->blockSize * param->segmentCount; + + if((pmemaddr = pmem_map_file(testFileName, 0, + PMEM_FILE_EXCL, + 0666, &mapped_len, &is_pmem)) == NULL) { + fprintf(stdout, "\nFailed to pmem_map_file for filename: %s\n in IOR_Open_PMDK", testFileName); + perror("pmem_map_file"); + fprintf(stdout, "\n %ld %ld\n",open_length, mapped_len); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + if(!is_pmem){ + fprintf(stdout, "pmem_map_file thinks the hardware being used is not pmem\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + return((void *)pmemaddr); +} /* PMDK_Open() */ + + +/******************************************************************************/ +/* + * Write or read access to a memory space created with PMDK. Include drain/flush functionality. + */ + +static IOR_offset_t PMDK_Xfer(int access, void *file, IOR_size_t * buffer, + IOR_offset_t length, IOR_param_t * param){ + int xferRetries = 0; + long long remaining = (long long)length; + char * ptr = (char *)buffer; + long long rc; + long long i; + long long offset_size; + + offset_size = param->offset; + + if(access == WRITE){ + if(param->fsync){ + pmem_memcpy_nodrain(&file[offset_size], ptr, length); + }else{ + pmem_memcpy_persist(&file[offset_size], ptr, length); + } + /* for(i=0; itransferSize; + pmem_persist(&fd, open_length); +} /* PMDK_Fsync() */ + + +/******************************************************************************/ +/* + * Stub for close functionality that is not required for PMDK + */ + +static void PMDK_Close(void *fd, IOR_param_t * param){ + size_t open_length; + open_length = param->transferSize; + pmem_unmap(fd, open_length); + +} /* PMDK_Close() */ + + +/******************************************************************************/ +/* + * Delete the file backing a memory space through PMDK + */ + +static void PMDK_Delete(char *testFileName, IOR_param_t * param) +{ + char errmsg[256]; + sprintf(errmsg,"[RANK %03d]:cannot delete file %s\n",rank,testFileName); + if (unlink(testFileName) != 0) WARN(errmsg); +} /* PMDK_Delete() */ + + +/******************************************************************************/ +/* + * Determine api version. + */ + +static void PMDK_SetVersion(IOR_param_t *test) +{ + strcpy(test->apiVersion, test->api); +} /* PMDK_SetVersion() */ + + +/******************************************************************************/ +/* + * Use POSIX stat() to return aggregate file size. + */ + +static IOR_offset_t PMDK_GetFileSize(IOR_param_t * test, + MPI_Comm testComm, + char * testFileName) +{ + struct stat stat_buf; + IOR_offset_t aggFileSizeFromStat, + tmpMin, tmpMax, tmpSum; + if (test->filePerProc == FALSE) { + fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + if (stat(testFileName, &stat_buf) != 0) { + ERR("cannot get status of written file"); + } + aggFileSizeFromStat = stat_buf.st_size; + + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, + MPI_LONG_LONG_INT, MPI_SUM, testComm), + "cannot total data moved"); + aggFileSizeFromStat = tmpSum; + + return(aggFileSizeFromStat); +} /* PMDK_GetFileSize() */ diff --git a/src/aiori.c b/src/aiori.c index a72180d..db124d1 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -42,6 +42,9 @@ ior_aiori_t *available_aiori[] = { #ifdef USE_POSIX_AIORI &posix_aiori, #endif +#ifdef USE_PMDK_AIORI + &pmdk_aiori, +#endif #ifdef USE_DAOS_AIORI &daos_aiori, &dfs_aiori, diff --git a/src/aiori.h b/src/aiori.h index da93a1a..7f40156 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -104,6 +104,7 @@ extern ior_aiori_t ime_aiori; extern ior_aiori_t mpiio_aiori; extern ior_aiori_t ncmpi_aiori; extern ior_aiori_t posix_aiori; +extern ior_aiori_t pmdk_aiori; extern ior_aiori_t mmap_aiori; extern ior_aiori_t s3_aiori; extern ior_aiori_t s3_plus_aiori;