Merge pull request #199 from adrianjhpc/master

PMDK backend with dual mount options
master
Julian Kunkel 2020-06-24 09:50:12 +01:00 committed by GitHub
commit 71874f9b90
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 320 additions and 1 deletions

View File

@ -186,6 +186,21 @@ AM_COND_IF([USE_POSIX_AIORI],[
AC_DEFINE([USE_POSIX_AIORI], [], [Build POSIX backend AIORI])
])
# PMDK IO support
AC_ARG_WITH([pmdk],
[AS_HELP_STRING([--with-pmdk],
[support IO with PMDK backend @<:@default=no@:>@])],
[],
[with_pmdk=no])
AM_CONDITIONAL([USE_PMDK_AIORI], [test x$with_pmdk = xyes])
AS_IF([test "x$with_pmdk" != xno], [
AC_DEFINE([USE_PMDK_AIORI], [], [Build PMDK backend AIORI])
AC_CHECK_HEADERS(libpmem.h,, [unset PMDK])
AC_SEARCH_LIBS([pmem_map_file], [pmdk],
[AC_MSG_ERROR([Library containing pmdk symbols not found])])
])
# RADOS support
AC_ARG_WITH([rados],
[AS_HELP_STRING([--with-rados],

View File

@ -65,6 +65,11 @@ if USE_POSIX_AIORI
extraSOURCES += aiori-POSIX.c
endif
if USE_PMDK_AIORI
extraSOURCES += aiori-PMDK.c
extraLDADD += -lpmem
endif
if USE_RADOS_AIORI
extraSOURCES += aiori-RADOS.c
extraLDADD += -lrados

257
src/aiori-PMDK.c Normal file
View File

@ -0,0 +1,257 @@
/******************************************************************************\
* *
* Copyright (c) 2019 EPCC, The University of Edinburgh *
* Written by Adrian Jackson a.jackson@epcc.ed.ac.uk *
* *
*******************************************************************************
* *
* *
* This file implements the abstract I/O interface for the low-level PMDK API *
* *
\******************************************************************************/
#include "aiori.h" /* abstract IOR interface */
#include <errno.h> /* sys_errlist */
#include <stdio.h> /* only for fprintf() */
#include <stdlib.h>
#include <sys/stat.h>
#include <libpmem.h>
static option_help options [] = {
LAST_OPTION
};
/**************************** P R O T O T Y P E S *****************************/
static option_help * PMDK_options();
static void *PMDK_Create(char *, IOR_param_t *);
static void *PMDK_Open(char *, IOR_param_t *);
static IOR_offset_t PMDK_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *);
static void PMDK_Fsync(void *, IOR_param_t *);
static void PMDK_Close(void *, IOR_param_t *);
static void PMDK_Delete(char *, IOR_param_t *);
static IOR_offset_t PMDK_GetFileSize(IOR_param_t *, MPI_Comm, char *);
/************************** D E C L A R A T I O N S ***************************/
extern int errno;
extern int rank;
extern int rankOffset;
extern int verbose;
extern MPI_Comm testComm;
ior_aiori_t pmdk_aiori = {
.name = "PMDK",
.name_legacy = NULL,
.create = PMDK_Create,
.open = PMDK_Open,
.xfer = PMDK_Xfer,
.close = PMDK_Close,
.delete = PMDK_Delete,
.get_version = aiori_get_version,
.fsync = PMDK_Fsync,
.get_file_size = PMDK_GetFileSize,
.statfs = aiori_posix_statfs,
.mkdir = aiori_posix_mkdir,
.rmdir = aiori_posix_rmdir,
.access = aiori_posix_access,
.stat = aiori_posix_stat,
.get_options = PMDK_options,
.enable_mdtest = false,
};
/***************************** F U N C T I O N S ******************************/
/******************************************************************************/
static option_help * PMDK_options(){
return options;
}
/*
* Create and open a memory space through the PMDK interface.
*/
static void *PMDK_Create(char * testFileName, IOR_param_t * param){
char *pmemaddr = NULL;
int is_pmem;
size_t mapped_len;
size_t open_length;
if(!param->filePerProc){
fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n");
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
}
open_length = param->blockSize * param->segmentCount;
if((pmemaddr = pmem_map_file(testFileName, open_length,
PMEM_FILE_CREATE|PMEM_FILE_EXCL,
0666, &mapped_len, &is_pmem)) == NULL) {
fprintf(stdout, "\nFailed to pmem_map_file for filename: %s in IOR_Create_PMDK\n", testFileName);
perror("pmem_map_file");
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
}
if(!is_pmem){
fprintf(stdout, "\n is_pmem is %d\n",is_pmem);
fprintf(stdout, "\npmem_map_file thinks the hardware being used is not pmem\n");
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
}
return((void *)pmemaddr);
} /* PMDK_Create() */
/******************************************************************************/
/*
* Open a memory space through the PMDK interface.
*/
static void *PMDK_Open(char * testFileName, IOR_param_t * param){
char *pmemaddr = NULL;
int is_pmem;
size_t mapped_len;
size_t open_length;
if(!param->filePerProc){
fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n");
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
}
open_length = param->blockSize * param->segmentCount;
if((pmemaddr = pmem_map_file(testFileName, 0,
PMEM_FILE_EXCL,
0666, &mapped_len, &is_pmem)) == NULL) {
fprintf(stdout, "\nFailed to pmem_map_file for filename: %s\n in IOR_Open_PMDK", testFileName);
perror("pmem_map_file");
fprintf(stdout, "\n %ld %ld\n",open_length, mapped_len);
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
}
if(!is_pmem){
fprintf(stdout, "pmem_map_file thinks the hardware being used is not pmem\n");
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
}
return((void *)pmemaddr);
} /* PMDK_Open() */
/******************************************************************************/
/*
* Write or read access to a memory space created with PMDK. Include drain/flush functionality.
*/
static IOR_offset_t PMDK_Xfer(int access, void *file, IOR_size_t * buffer,
IOR_offset_t length, IOR_param_t * param){
int xferRetries = 0;
long long remaining = (long long)length;
char * ptr = (char *)buffer;
long long rc;
long long i;
long long offset_size;
offset_size = param->offset;
if(access == WRITE){
if(param->fsync){
pmem_memcpy_nodrain(&file[offset_size], ptr, length);
}else{
pmem_memcpy_persist(&file[offset_size], ptr, length);
}
}else{
memcpy(ptr, &file[offset_size], length);
}
return(length);
} /* PMDK_Xfer() */
/******************************************************************************/
/*
* Perform fsync().
*/
static void PMDK_Fsync(void *fd, IOR_param_t * param)
{
pmem_drain();
} /* PMDK_Fsync() */
/******************************************************************************/
/*
* Stub for close functionality that is not required for PMDK
*/
static void PMDK_Close(void *fd, IOR_param_t * param){
size_t open_length;
open_length = param->transferSize;
pmem_unmap(fd, open_length);
} /* PMDK_Close() */
/******************************************************************************/
/*
* Delete the file backing a memory space through PMDK
*/
static void PMDK_Delete(char *testFileName, IOR_param_t * param)
{
char errmsg[256];
sprintf(errmsg,"[RANK %03d]:cannot delete file %s\n",rank,testFileName);
if (unlink(testFileName) != 0) WARN(errmsg);
} /* PMDK_Delete() */
/******************************************************************************/
/*
* Determine api version.
*/
static void PMDK_SetVersion(IOR_param_t *test)
{
strcpy(test->apiVersion, test->api);
} /* PMDK_SetVersion() */
/******************************************************************************/
/*
* Use POSIX stat() to return aggregate file size.
*/
static IOR_offset_t PMDK_GetFileSize(IOR_param_t * test,
MPI_Comm testComm,
char * testFileName)
{
struct stat stat_buf;
IOR_offset_t aggFileSizeFromStat,
tmpMin, tmpMax, tmpSum;
if (test->filePerProc == FALSE) {
fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n");
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error");
}
if (stat(testFileName, &stat_buf) != 0) {
ERR("cannot get status of written file");
}
aggFileSizeFromStat = stat_buf.st_size;
MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1,
MPI_LONG_LONG_INT, MPI_SUM, testComm),
"cannot total data moved");
aggFileSizeFromStat = tmpSum;
return(aggFileSizeFromStat);
} /* PMDK_GetFileSize() */

View File

@ -42,6 +42,9 @@ ior_aiori_t *available_aiori[] = {
#ifdef USE_POSIX_AIORI
&posix_aiori,
#endif
#ifdef USE_PMDK_AIORI
&pmdk_aiori,
#endif
#ifdef USE_DAOS_AIORI
&daos_aiori,
&dfs_aiori,

View File

@ -133,6 +133,7 @@ extern ior_aiori_t ime_aiori;
extern ior_aiori_t mpiio_aiori;
extern ior_aiori_t ncmpi_aiori;
extern ior_aiori_t posix_aiori;
extern ior_aiori_t pmdk_aiori;
extern ior_aiori_t mmap_aiori;
extern ior_aiori_t s3_aiori;
extern ior_aiori_t s3_plus_aiori;

View File

@ -771,10 +771,17 @@ void GetTestFileName(char *testFileName, IOR_param_t * test)
char initialTestFileName[MAX_PATHLEN];
char testFileNameRoot[MAX_STR];
char tmpString[MAX_STR];
int count;
int count;
int socket, core;
/* parse filename for multiple file systems */
strcpy(initialTestFileName, test->testFileName);
if(test->dualMount){
GetProcessorAndCore(&socket, &core);
sprintf(tmpString, "%s%d/%s",initialTestFileName,
socket, "data");
strcpy(initialTestFileName, tmpString);
}
fileNames = ParseFileName(initialTestFileName, &count);
if (count > 1 && test->uniqueDir == TRUE)
ERR("cannot use multiple file names with unique directories");

View File

@ -94,6 +94,7 @@ typedef struct
int collective; /* collective I/O */
MPI_Comm testComm; /* MPI communicator */
int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */
int dualMount; /* dual mount points */
int numTasks; /* number of tasks for test */
int numNodes; /* number of nodes for test */
int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */

View File

@ -60,6 +60,10 @@ static void CheckRunSettings(IOR_test_t *tests)
params->readFile = TRUE;
params->writeFile = TRUE;
}
if(params->dualMount && !params->filePerProc) {
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "Dual Mount can only be used with File Per Process");
}
}
}
@ -117,6 +121,8 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt
params->platform = strdup(value);
} else if (strcasecmp(option, "testfile") == 0) {
params->testFileName = strdup(value);
} else if (strcasecmp(option, "dualmount") == 0){
params->dualMount = atoi(value);
} else if (strcasecmp(option, "deadlineforstonewalling") == 0) {
params->deadlineForStonewalling = atoi(value);
} else if (strcasecmp(option, "stoneWallingWearOut") == 0) {
@ -425,6 +431,7 @@ option_help * createGlobalOptions(IOR_param_t * params){
{'W', NULL, "checkWrite -- check read after write", OPTION_FLAG, 'd', & params->checkWrite},
{'x', NULL, "singleXferAttempt -- do not retry transfer if incomplete", OPTION_FLAG, 'd', & params->singleXferAttempt},
{'X', NULL, "reorderTasksRandomSeed -- random seed for -Z option", OPTION_OPTIONAL_ARGUMENT, 'd', & params->reorderTasksRandomSeed},
{'y', NULL, "dualMount -- use dual mount points for a filesystem", OPTION_FLAG, 'd', & params->dualMount},
{'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite},
{'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset},
{'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom},

View File

@ -868,3 +868,25 @@ char *HumanReadable(IOR_offset_t value, int base)
}
return valueStr;
}
#if defined(__aarch64__)
// TODO: This might be general enough to provide the functionality for any system
// regardless of processor type given we aren't worried about thread/process migration.
// Test on Intel systems and see if we can get rid of the architecture specificity
// of the code.
unsigned long GetProcessorAndCore(int *chip, int *core){
return syscall(SYS_getcpu, core, chip, NULL);
}
// TODO: Add in AMD function
#else
// If we're not on an ARM processor assume we're on an intel processor and use the
// rdtscp instruction.
unsigned long GetProcessorAndCore(int *chip, int *core){
unsigned long a,d,c;
__asm__ volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c));
*chip = (c & 0xFFF000)>>12;
*core = c & 0xFFF;
return ((unsigned long)a) | (((unsigned long)d) << 32);;
}
#endif

View File

@ -68,6 +68,7 @@ void StoreStoneWallingIterations(char * const filename, int64_t count);
void init_clock(void);
double GetTimeStamp(void);
char * PrintTimestamp(); // TODO remove this function
unsigned long GetProcessorAndCore(int *chip, int *core);
extern double wall_clock_deviation;
extern double wall_clock_delta;