From fa25c31994a3522031b99206ddef7dcd337c1d19 Mon Sep 17 00:00:00 2001 From: Afrian Jackson Date: Thu, 31 Oct 2019 15:29:09 +0000 Subject: [PATCH 1/6] Adding PMDK functionality --- configure.ac | 14 +++ src/Makefile.am | 5 + src/aiori-PMDK.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++ src/aiori.c | 3 + src/aiori.h | 1 + 5 files changed, 286 insertions(+) create mode 100644 src/aiori-PMDK.c diff --git a/configure.ac b/configure.ac index f6b958b..134fc2e 100755 --- a/configure.ac +++ b/configure.ac @@ -174,6 +174,20 @@ AM_COND_IF([USE_POSIX_AIORI],[ AC_DEFINE([USE_POSIX_AIORI], [], [Build POSIX backend AIORI]) ]) +# PMDK IO support +AC_ARG_WITH([pmdk], + [AS_HELP_STRING([--with-pmdk], + [support IO with PMDK backend @<:@default=no@:>@])], + [], + [with_pmdk=yes]) +AM_CONDITIONAL([USE_PMDK_AIORI], [test x$with_pmdk = xyes]) +AM_COND_IF([USE_PMDK_AIORI],[ + AC_DEFINE([USE_PMDK_AIORI], [], [Build PMDK backend AIORI]) + AC_SEARCH_LIBS([pmem_map_file], [pmdk], + [AC_MSG_ERROR([Library containing pmdk symbols not found])]) +]) + + # RADOS support AC_ARG_WITH([rados], [AS_HELP_STRING([--with-rados], diff --git a/src/Makefile.am b/src/Makefile.am index 0de3b4b..c4cd099 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -65,6 +65,11 @@ if USE_POSIX_AIORI extraSOURCES += aiori-POSIX.c endif +if USE_PMDK_AIORI +extraSOURCES += aiori-PMDK.c +extraLDADD += -lpmem +endif + if USE_RADOS_AIORI extraSOURCES += aiori-RADOS.c extraLDADD += -lrados diff --git a/src/aiori-PMDK.c b/src/aiori-PMDK.c new file mode 100644 index 0000000..eed2230 --- /dev/null +++ b/src/aiori-PMDK.c @@ -0,0 +1,263 @@ +/******************************************************************************\ + * * + * Copyright (c) 2019 EPCC, The University of Edinburgh * + * * + ******************************************************************************* + * * + * * + * This file implements the abstract I/O interface for the low-level PMDK API * + * * +\******************************************************************************/ + + +#include "aiori.h" /* abstract IOR interface */ +#include /* sys_errlist */ +#include /* only for fprintf() */ +#include +#include +#include + + + +static option_help options [] = { + LAST_OPTION +}; + + +/**************************** P R O T O T Y P E S *****************************/ + +static option_help * PMDK_options(); +static void *PMDK_Create(char *, IOR_param_t *); +static void *PMDK_Open(char *, IOR_param_t *); +static IOR_offset_t PMDK_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); +static void PMDK_Fsync(void *, IOR_param_t *); +static void PMDK_Close(void *, IOR_param_t *); +static void PMDK_Delete(char *, IOR_param_t *); +static IOR_offset_t PMDK_GetFileSize(IOR_param_t *, MPI_Comm, char *); + + +/************************** D E C L A R A T I O N S ***************************/ + +extern int errno; +extern int rank; +extern int rankOffset; +extern int verbose; +extern MPI_Comm testComm; + +ior_aiori_t pmdk_aiori = { + .name = "PMDK", + .name_legacy = NULL, + .create = PMDK_Create, + .open = PMDK_Open, + .xfer = PMDK_Xfer, + .close = PMDK_Close, + .delete = PMDK_Delete, + .get_version = aiori_get_version, + .fsync = PMDK_Fsync, + .get_file_size = PMDK_GetFileSize, + .statfs = aiori_posix_statfs, + .mkdir = aiori_posix_mkdir, + .rmdir = aiori_posix_rmdir, + .access = aiori_posix_access, + .stat = aiori_posix_stat, + .get_options = PMDK_options, + .enable_mdtest = false, +}; + + +/***************************** F U N C T I O N S ******************************/ + +/******************************************************************************/ + +static option_help * PMDK_options(){ + return options; +} + + +/* + * Create and open a memory space through the PMDK interface. + */ +static void *PMDK_Create(char * testFileName, IOR_param_t * param){ + char *pmemaddr = NULL; + int is_pmem; + size_t mapped_len; + size_t open_length; + + if(!param->filePerProc){ + fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + open_length = param->blockSize * param->segmentCount; + + if((pmemaddr = pmem_map_file(testFileName, open_length, + PMEM_FILE_CREATE|PMEM_FILE_EXCL, + 0666, &mapped_len, &is_pmem)) == NULL) { + fprintf(stdout, "\nFailed to pmem_map_file for filename: %s in IOR_Create_PMDK\n", testFileName); + perror("pmem_map_file"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + if(!is_pmem){ + fprintf(stdout, "\npmem_map_file thinks the hardware being used is not pmem\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + + return((void *)pmemaddr); +} /* PMDK_Create() */ + + +/******************************************************************************/ +/* + * Open a memory space through the PMDK interface. + */ + +static void *PMDK_Open(char * testFileName, IOR_param_t * param){ + + char *pmemaddr = NULL; + int is_pmem; + size_t mapped_len; + size_t open_length; + + if(!param->filePerProc){ + fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + open_length = param->blockSize * param->segmentCount; + + if((pmemaddr = pmem_map_file(testFileName, 0, + PMEM_FILE_EXCL, + 0666, &mapped_len, &is_pmem)) == NULL) { + fprintf(stdout, "\nFailed to pmem_map_file for filename: %s\n in IOR_Open_PMDK", testFileName); + perror("pmem_map_file"); + fprintf(stdout, "\n %ld %ld\n",open_length, mapped_len); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + if(!is_pmem){ + fprintf(stdout, "pmem_map_file thinks the hardware being used is not pmem\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + return((void *)pmemaddr); +} /* PMDK_Open() */ + + +/******************************************************************************/ +/* + * Write or read access to a memory space created with PMDK. Include drain/flush functionality. + */ + +static IOR_offset_t PMDK_Xfer(int access, void *file, IOR_size_t * buffer, + IOR_offset_t length, IOR_param_t * param){ + int xferRetries = 0; + long long remaining = (long long)length; + char * ptr = (char *)buffer; + long long rc; + long long i; + long long offset_size; + + offset_size = param->offset; + + if(access == WRITE){ + if(param->fsync){ + pmem_memcpy_nodrain(&file[offset_size], ptr, length); + }else{ + pmem_memcpy_persist(&file[offset_size], ptr, length); + } + /* for(i=0; itransferSize; + pmem_persist(&fd, open_length); +} /* PMDK_Fsync() */ + + +/******************************************************************************/ +/* + * Stub for close functionality that is not required for PMDK + */ + +static void PMDK_Close(void *fd, IOR_param_t * param){ + size_t open_length; + open_length = param->transferSize; + pmem_unmap(fd, open_length); + +} /* PMDK_Close() */ + + +/******************************************************************************/ +/* + * Delete the file backing a memory space through PMDK + */ + +static void PMDK_Delete(char *testFileName, IOR_param_t * param) +{ + char errmsg[256]; + sprintf(errmsg,"[RANK %03d]:cannot delete file %s\n",rank,testFileName); + if (unlink(testFileName) != 0) WARN(errmsg); +} /* PMDK_Delete() */ + + +/******************************************************************************/ +/* + * Determine api version. + */ + +static void PMDK_SetVersion(IOR_param_t *test) +{ + strcpy(test->apiVersion, test->api); +} /* PMDK_SetVersion() */ + + +/******************************************************************************/ +/* + * Use POSIX stat() to return aggregate file size. + */ + +static IOR_offset_t PMDK_GetFileSize(IOR_param_t * test, + MPI_Comm testComm, + char * testFileName) +{ + struct stat stat_buf; + IOR_offset_t aggFileSizeFromStat, + tmpMin, tmpMax, tmpSum; + if (test->filePerProc == FALSE) { + fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); + } + + if (stat(testFileName, &stat_buf) != 0) { + ERR("cannot get status of written file"); + } + aggFileSizeFromStat = stat_buf.st_size; + + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, + MPI_LONG_LONG_INT, MPI_SUM, testComm), + "cannot total data moved"); + aggFileSizeFromStat = tmpSum; + + return(aggFileSizeFromStat); +} /* PMDK_GetFileSize() */ diff --git a/src/aiori.c b/src/aiori.c index a72180d..db124d1 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -42,6 +42,9 @@ ior_aiori_t *available_aiori[] = { #ifdef USE_POSIX_AIORI &posix_aiori, #endif +#ifdef USE_PMDK_AIORI + &pmdk_aiori, +#endif #ifdef USE_DAOS_AIORI &daos_aiori, &dfs_aiori, diff --git a/src/aiori.h b/src/aiori.h index da93a1a..7f40156 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -104,6 +104,7 @@ extern ior_aiori_t ime_aiori; extern ior_aiori_t mpiio_aiori; extern ior_aiori_t ncmpi_aiori; extern ior_aiori_t posix_aiori; +extern ior_aiori_t pmdk_aiori; extern ior_aiori_t mmap_aiori; extern ior_aiori_t s3_aiori; extern ior_aiori_t s3_plus_aiori; From 817603f5d1b57dc7aca2a529c88039b200d0a061 Mon Sep 17 00:00:00 2001 From: Afrian Jackson Date: Thu, 31 Oct 2019 15:31:28 +0000 Subject: [PATCH 2/6] Updating copyright statement --- src/aiori-PMDK.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aiori-PMDK.c b/src/aiori-PMDK.c index eed2230..b14978d 100644 --- a/src/aiori-PMDK.c +++ b/src/aiori-PMDK.c @@ -1,6 +1,7 @@ /******************************************************************************\ * * * Copyright (c) 2019 EPCC, The University of Edinburgh * + * Written by Adrian Jackson a.jackson@epcc.ed.ac.uk * * * ******************************************************************************* * * From 46c5d4e78dffbc2c72dcf7d008c15b78cbec00f5 Mon Sep 17 00:00:00 2001 From: Afrian Jackson Date: Tue, 5 Nov 2019 14:37:54 +0000 Subject: [PATCH 3/6] Adding in dual mount functionality for NVRAM within nodes --- src/aiori-PMDK.c | 10 ++++++---- src/ior.c | 9 ++++++++- src/ior.h | 1 + src/parse_options.c | 7 +++++++ src/utilities.c | 22 ++++++++++++++++++++++ src/utilities.h | 1 + 6 files changed, 45 insertions(+), 5 deletions(-) diff --git a/src/aiori-PMDK.c b/src/aiori-PMDK.c index b14978d..225b427 100644 --- a/src/aiori-PMDK.c +++ b/src/aiori-PMDK.c @@ -99,10 +99,11 @@ static void *PMDK_Create(char * testFileName, IOR_param_t * param){ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - if(!is_pmem){ + /* if(!is_pmem){ + fprintf(stdout, "\n is_pmem is %d\n",is_pmem); fprintf(stdout, "\npmem_map_file thinks the hardware being used is not pmem\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); - } + }*/ return((void *)pmemaddr); @@ -137,10 +138,11 @@ static void *PMDK_Open(char * testFileName, IOR_param_t * param){ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } + /* if(!is_pmem){ fprintf(stdout, "pmem_map_file thinks the hardware being used is not pmem\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); - } + }*/ return((void *)pmemaddr); } /* PMDK_Open() */ @@ -174,7 +176,7 @@ static IOR_offset_t PMDK_Xfer(int access, void *file, IOR_size_t * buffer, pmem_persist(&file[offset_size],length*sizeof(char));*/ }else{ memcpy(ptr, &file[offset_size], length); - /*for(i=0; itestFileName); + if(test->dualMount){ + GetProcessorAndCore(&socket, &core); + sprintf(tmpString, "%s%d/%s",initialTestFileName, + socket, "data"); + strcpy(initialTestFileName, tmpString); + } fileNames = ParseFileName(initialTestFileName, &count); if (count > 1 && test->uniqueDir == TRUE) ERR("cannot use multiple file names with unique directories"); diff --git a/src/ior.h b/src/ior.h index ccf47fa..3e9e260 100755 --- a/src/ior.h +++ b/src/ior.h @@ -97,6 +97,7 @@ typedef struct char * options; /* options string */ // intermediate options int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */ + int dualMount; /* dual mount points */ int numTasks; /* number of tasks for test */ int numNodes; /* number of nodes for test */ int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */ diff --git a/src/parse_options.c b/src/parse_options.c index 74a7b54..3f20ba1 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -80,6 +80,10 @@ static void CheckRunSettings(IOR_test_t *tests) else params->openFlags |= IOR_WRONLY; } + + if(params->dualMount && !params->filePerProc) { + MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "Dual Mount can only be used with File Per Process"); + } } } @@ -137,6 +141,8 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt params->platform = strdup(value); } else if (strcasecmp(option, "testfile") == 0) { params->testFileName = strdup(value); + } else if (strcasecmp(option, "dualmount") == 0){ + params->dualMount = atoi(value); } else if (strcasecmp(option, "hintsfilename") == 0) { params->hintsFileName = strdup(value); } else if (strcasecmp(option, "deadlineforstonewalling") == 0) { @@ -521,6 +527,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'W', NULL, "checkWrite -- check read after write", OPTION_FLAG, 'd', & params->checkWrite}, {'x', NULL, "singleXferAttempt -- do not retry transfer if incomplete", OPTION_FLAG, 'd', & params->singleXferAttempt}, {'X', NULL, "reorderTasksRandomSeed -- random seed for -Z option", OPTION_OPTIONAL_ARGUMENT, 'd', & params->reorderTasksRandomSeed}, + {'y', NULL, "dualMount -- use dual mount points for a filesystem", OPTION_FLAG, 'd', & params->dualMount}, {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite}, {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset}, {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, diff --git a/src/utilities.c b/src/utilities.c index a657d9f..e06bfdc 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -880,3 +880,25 @@ char *HumanReadable(IOR_offset_t value, int base) } return valueStr; } + +#if defined(__aarch64__) +// TODO: This might be general enough to provide the functionality for any system +// regardless of processor type given we aren't worried about thread/process migration. +// Test on Intel systems and see if we can get rid of the architecture specificity +// of the code. +unsigned long GetProcessorAndCore(int *chip, int *core){ + return syscall(SYS_getcpu, core, chip, NULL); +} +// TODO: Add in AMD function +#else +// If we're not on an ARM processor assume we're on an intel processor and use the +// rdtscp instruction. +unsigned long GetProcessorAndCore(int *chip, int *core){ + unsigned long a,d,c; + __asm__ volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c)); + *chip = (c & 0xFFF000)>>12; + *core = c & 0xFFF; + return ((unsigned long)a) | (((unsigned long)d) << 32);; +} +#endif + diff --git a/src/utilities.h b/src/utilities.h index 2a9abe3..600da18 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -68,6 +68,7 @@ void StoreStoneWallingIterations(char * const filename, int64_t count); void init_clock(void); double GetTimeStamp(void); char * PrintTimestamp(); // TODO remove this function +unsigned long GetProcessorAndCore(int *chip, int *core); extern double wall_clock_deviation; extern double wall_clock_delta; From b4c85798bd5be587f915d92e4cdd554890dcc68b Mon Sep 17 00:00:00 2001 From: Afrian Jackson Date: Tue, 5 Nov 2019 14:39:31 +0000 Subject: [PATCH 4/6] Tidying up PMDK backend --- src/aiori-PMDK.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/aiori-PMDK.c b/src/aiori-PMDK.c index 225b427..5bb7add 100644 --- a/src/aiori-PMDK.c +++ b/src/aiori-PMDK.c @@ -99,11 +99,11 @@ static void *PMDK_Create(char * testFileName, IOR_param_t * param){ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - /* if(!is_pmem){ + if(!is_pmem){ fprintf(stdout, "\n is_pmem is %d\n",is_pmem); fprintf(stdout, "\npmem_map_file thinks the hardware being used is not pmem\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); - }*/ + } return((void *)pmemaddr); @@ -138,11 +138,10 @@ static void *PMDK_Open(char * testFileName, IOR_param_t * param){ MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - /* if(!is_pmem){ fprintf(stdout, "pmem_map_file thinks the hardware being used is not pmem\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); - }*/ + } return((void *)pmemaddr); } /* PMDK_Open() */ @@ -170,15 +169,8 @@ static IOR_offset_t PMDK_Xfer(int access, void *file, IOR_size_t * buffer, }else{ pmem_memcpy_persist(&file[offset_size], ptr, length); } - /* for(i=0; i Date: Tue, 5 Nov 2019 17:42:48 +0000 Subject: [PATCH 5/6] Fixing configure so pmdk backend not built without --with-pmdk being specified --- configure.ac | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 09c85e7..88522c9 100755 --- a/configure.ac +++ b/configure.ac @@ -191,10 +191,11 @@ AC_ARG_WITH([pmdk], [AS_HELP_STRING([--with-pmdk], [support IO with PMDK backend @<:@default=no@:>@])], [], - [with_pmdk=yes]) + [with_pmdk=no]) AM_CONDITIONAL([USE_PMDK_AIORI], [test x$with_pmdk = xyes]) -AM_COND_IF([USE_PMDK_AIORI],[ +AS_IF([test "x$with_pmdk" != xno], [ AC_DEFINE([USE_PMDK_AIORI], [], [Build PMDK backend AIORI]) + AC_CHECK_HEADERS(libpmem.h,, [unset PMDK]) AC_SEARCH_LIBS([pmem_map_file], [pmdk], [AC_MSG_ERROR([Library containing pmdk symbols not found])]) ]) From 9c0926ef2a0fd6182f737999125b3106abaef94b Mon Sep 17 00:00:00 2001 From: Afrian Jackson Date: Thu, 7 Nov 2019 20:17:55 +0000 Subject: [PATCH 6/6] Moving fsync pmdk functionalty from persist to drain on the advice of the PMDK developers --- src/aiori-PMDK.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/aiori-PMDK.c b/src/aiori-PMDK.c index 5bb7add..4a3953b 100644 --- a/src/aiori-PMDK.c +++ b/src/aiori-PMDK.c @@ -98,13 +98,14 @@ static void *PMDK_Create(char * testFileName, IOR_param_t * param){ perror("pmem_map_file"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - + if(!is_pmem){ fprintf(stdout, "\n is_pmem is %d\n",is_pmem); fprintf(stdout, "\npmem_map_file thinks the hardware being used is not pmem\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } + return((void *)pmemaddr); } /* PMDK_Create() */ @@ -137,12 +138,12 @@ static void *PMDK_Open(char * testFileName, IOR_param_t * param){ fprintf(stdout, "\n %ld %ld\n",open_length, mapped_len); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - + if(!is_pmem){ fprintf(stdout, "pmem_map_file thinks the hardware being used is not pmem\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - + return((void *)pmemaddr); } /* PMDK_Open() */ @@ -184,9 +185,7 @@ static IOR_offset_t PMDK_Xfer(int access, void *file, IOR_size_t * buffer, static void PMDK_Fsync(void *fd, IOR_param_t * param) { - size_t open_length; - open_length = param->transferSize; - pmem_persist(&fd, open_length); + pmem_drain(); } /* PMDK_Fsync() */