From d7fc07163e3f29ca19227344fb03ce5d4be36e7d Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Wed, 25 Apr 2018 17:21:48 -0500 Subject: [PATCH 1/7] expose generic aiori_ calls for access, etc. --- src/aiori.c | 36 ++++++++++++++++++++++++++---------- src/aiori.h | 7 +++++++ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/aiori.c b/src/aiori.c index 95d3a07..ccabcb8 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -61,7 +61,7 @@ ior_aiori_t *available_aiori[] = { * This function provides a AIORI statfs for POSIX-compliant filesystems. It * uses statvfs is available and falls back on statfs. */ -static int aiori_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param) +int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param) { int ret; #if defined(HAVE_STATVFS) @@ -86,44 +86,60 @@ static int aiori_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_par return 0; } -static int aiori_mkdir (const char *path, mode_t mode, IOR_param_t * param) +int aiori_posix_mkdir (const char *path, mode_t mode, IOR_param_t * param) { return mkdir (path, mode); } -static int aiori_rmdir (const char *path, IOR_param_t * param) +int aiori_posix_rmdir (const char *path, IOR_param_t * param) { return rmdir (path); } -static int aiori_access (const char *path, int mode, IOR_param_t * param) +int aiori_posix_access (const char *path, int mode, IOR_param_t * param) { return access (path, mode); } -static int aiori_stat (const char *path, struct stat *buf, IOR_param_t * param) +int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param) { return stat (path, buf); } const ior_aiori_t *aiori_select (const char *api) { + char warn_str[256] = {0}; for (ior_aiori_t **tmp = available_aiori ; *tmp != NULL; ++tmp) { if (NULL == api || strcasecmp(api, (*tmp)->name) == 0) { if (NULL == (*tmp)->statfs) { - (*tmp)->statfs = aiori_statfs; + (*tmp)->statfs = aiori_posix_statfs; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s statfs call", api); + WARN(warn_str); } if (NULL == (*tmp)->mkdir) { - (*tmp)->mkdir = aiori_mkdir; + (*tmp)->mkdir = aiori_posix_mkdir; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s mkdir call", api); + WARN(warn_str); } if (NULL == (*tmp)->rmdir) { - (*tmp)->rmdir = aiori_rmdir; + (*tmp)->rmdir = aiori_posix_rmdir; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s rmdir call", api); + WARN(warn_str); } if (NULL == (*tmp)->access) { - (*tmp)->access = aiori_access; + (*tmp)->access = aiori_posix_access; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s access call", api); + WARN(warn_str); } if (NULL == (*tmp)->stat) { - (*tmp)->stat = aiori_stat; + (*tmp)->stat = aiori_posix_stat; + snprintf(warn_str, 256, "assuming POSIX-based backend for" + " %s stat call", api); + WARN(warn_str); } return *tmp; } diff --git a/src/aiori.h b/src/aiori.h index 06f733c..49b8c62 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -92,6 +92,13 @@ const ior_aiori_t *aiori_select (const char *api); int aiori_count (void); const char *aiori_default (void); +/* some generic POSIX-based backend calls */ +int aiori_posix_statfs (const char *path, ior_aiori_statfs_t *stat_buf, IOR_param_t * param); +int aiori_posix_mkdir (const char *path, mode_t mode, IOR_param_t * param); +int aiori_posix_rmdir (const char *path, IOR_param_t * param); +int aiori_posix_access (const char *path, int mode, IOR_param_t * param); +int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param); + IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName); From 60243e02778a95bab65ef1ab318ee2ed3f096882 Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Wed, 25 Apr 2018 17:32:16 -0500 Subject: [PATCH 2/7] expose MPIIO access and delete calls --- src/aiori-MPIIO.c | 52 +++++++++++++++++++++++------------------------ src/aiori.h | 3 +++ 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index 2fec74f..ac766ff 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -38,10 +38,8 @@ static void *MPIIO_Open(char *, IOR_param_t *); static IOR_offset_t MPIIO_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); static void MPIIO_Close(void *, IOR_param_t *); -static void MPIIO_Delete(char *, IOR_param_t *); static void MPIIO_SetVersion(IOR_param_t *); static void MPIIO_Fsync(void *, IOR_param_t *); -static int MPIIO_Access(const char *, int, IOR_param_t *); /************************** D E C L A R A T I O N S ***************************/ @@ -60,30 +58,6 @@ ior_aiori_t mpiio_aiori = { /***************************** F U N C T I O N S ******************************/ -/* - * Try to access a file through the MPIIO interface. - */ -static int MPIIO_Access(const char *path, int mode, IOR_param_t *param) -{ - MPI_File fd; - int mpi_mode = MPI_MODE_UNIQUE_OPEN; - - if ((mode & W_OK) && (mode & R_OK)) - mpi_mode |= MPI_MODE_RDWR; - else if (mode & W_OK) - mpi_mode |= MPI_MODE_WRONLY; - else - mpi_mode |= MPI_MODE_RDONLY; - - int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode, - MPI_INFO_NULL, &fd); - - if (!ret) - MPI_File_close(&fd); - - return ret; -} - /* * Create and open a file through the MPIIO interface. */ @@ -396,7 +370,7 @@ static void MPIIO_Close(void *fd, IOR_param_t * param) /* * Delete a file through the MPIIO interface. */ -static void MPIIO_Delete(char *testFileName, IOR_param_t * param) +void MPIIO_Delete(char *testFileName, IOR_param_t * param) { MPI_CHECK(MPI_File_delete(testFileName, (MPI_Info) MPI_INFO_NULL), "cannot delete file"); @@ -495,3 +469,27 @@ IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, return (aggFileSizeFromStat); } + +/* + * Try to access a file through the MPIIO interface. + */ +int MPIIO_Access(const char *path, int mode, IOR_param_t *param) +{ + MPI_File fd; + int mpi_mode = MPI_MODE_UNIQUE_OPEN; + + if ((mode & W_OK) && (mode & R_OK)) + mpi_mode |= MPI_MODE_RDWR; + else if (mode & W_OK) + mpi_mode |= MPI_MODE_WRONLY; + else + mpi_mode |= MPI_MODE_RDONLY; + + int ret = MPI_File_open(MPI_COMM_SELF, path, mpi_mode, + MPI_INFO_NULL, &fd); + + if (!ret) + MPI_File_close(&fd); + + return ret; +} diff --git a/src/aiori.h b/src/aiori.h index 49b8c62..a54bd59 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -99,7 +99,10 @@ int aiori_posix_rmdir (const char *path, IOR_param_t * param); int aiori_posix_access (const char *path, int mode, IOR_param_t * param); int aiori_posix_stat (const char *path, struct stat *buf, IOR_param_t * param); +/* NOTE: these 3 MPI-IO functions are exported for reuse by HDF5/PNetCDF */ +void MPIIO_Delete(char *testFileName, IOR_param_t * param); IOR_offset_t MPIIO_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName); +int MPIIO_Access(const char *, int, IOR_param_t *); #endif /* not _AIORI_H */ From 7233bee95364e021c3528f80ba38e9ef1a7f418c Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Wed, 25 Apr 2018 17:34:25 -0500 Subject: [PATCH 3/7] make ncmpi/hdf5 use mpiio access/delete --- src/aiori-HDF5.c | 19 ++++++++++++++++--- src/aiori-MPIIO.c | 4 ++++ src/aiori-NCMPI.c | 19 ++++++++++++++++--- src/aiori-POSIX.c | 5 +++++ 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/aiori-HDF5.c b/src/aiori-HDF5.c index e957605..5d629e1 100755 --- a/src/aiori-HDF5.c +++ b/src/aiori-HDF5.c @@ -92,6 +92,7 @@ static void HDF5_Delete(char *, IOR_param_t *); static void HDF5_SetVersion(IOR_param_t *); static void HDF5_Fsync(void *, IOR_param_t *); static IOR_offset_t HDF5_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static int HDF5_Access(const char *, int, IOR_param_t *); /************************** D E C L A R A T I O N S ***************************/ @@ -105,6 +106,11 @@ ior_aiori_t hdf5_aiori = { .set_version = HDF5_SetVersion, .fsync = HDF5_Fsync, .get_file_size = HDF5_GetFileSize, + .statfs = aiori_posix_statfs, + .mkdir = aiori_posix_mkdir, + .rmdir = aiori_posix_rmdir, + .access = HDF5_Access, + .stat = aiori_posix_stat, }; static hid_t xferPropList; /* xfer property list */ @@ -435,8 +441,7 @@ static void HDF5_Close(void *fd, IOR_param_t * param) */ static void HDF5_Delete(char *testFileName, IOR_param_t * param) { - if (unlink(testFileName) != 0) - WARN("cannot delete file"); + return(MPIIO_Delete(testFileName, param)); } /* @@ -565,5 +570,13 @@ static void SetupDataSet(void *fd, IOR_param_t * param) static IOR_offset_t HDF5_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) { - return (MPIIO_GetFileSize(test, testComm, testFileName)); + return(MPIIO_GetFileSize(test, testComm, testFileName)); +} + +/* + * Use MPIIO call to check for access. + */ +static int HDF5_Access(const char *path, int mode, IOR_param_t *param) +{ + return(MPIIO_Access(path, mode, param)); } diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index ac766ff..8f04310 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -53,7 +53,11 @@ ior_aiori_t mpiio_aiori = { .set_version = MPIIO_SetVersion, .fsync = MPIIO_Fsync, .get_file_size = MPIIO_GetFileSize, + .statfs = aiori_posix_statfs, + .mkdir = aiori_posix_mkdir, + .rmdir = aiori_posix_rmdir, .access = MPIIO_Access, + .stat = aiori_posix_stat, }; /***************************** F U N C T I O N S ******************************/ diff --git a/src/aiori-NCMPI.c b/src/aiori-NCMPI.c index 98fd6bd..d990dfd 100755 --- a/src/aiori-NCMPI.c +++ b/src/aiori-NCMPI.c @@ -56,6 +56,7 @@ static void NCMPI_Delete(char *, IOR_param_t *); static void NCMPI_SetVersion(IOR_param_t *); static void NCMPI_Fsync(void *, IOR_param_t *); static IOR_offset_t NCMPI_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static int NCMPI_Access(const char *, int, IOR_param_t *); /************************** D E C L A R A T I O N S ***************************/ @@ -69,6 +70,11 @@ ior_aiori_t ncmpi_aiori = { .set_version = NCMPI_SetVersion, .fsync = NCMPI_Fsync, .get_file_size = NCMPI_GetFileSize, + .statfs = aiori_posix_statfs, + .mkdir = aiori_posix_mkdir, + .rmdir = aiori_posix_rmdir, + .access = NCMPI_Access, + .stat = aiori_posix_stat, }; /***************************** F U N C T I O N S ******************************/ @@ -329,8 +335,7 @@ static void NCMPI_Close(void *fd, IOR_param_t * param) */ static void NCMPI_Delete(char *testFileName, IOR_param_t * param) { - if (unlink(testFileName) != 0) - WARN("unlink() failed"); + return(MPIIO_Delete(testFileName, param)); } /* @@ -387,5 +392,13 @@ static int GetFileMode(IOR_param_t * param) static IOR_offset_t NCMPI_GetFileSize(IOR_param_t * test, MPI_Comm testComm, char *testFileName) { - return (MPIIO_GetFileSize(test, testComm, testFileName)); + return(MPIIO_GetFileSize(test, testComm, testFileName)); +} + +/* + * Use MPIIO call to check for access. + */ +static int NCMPI_Access(const char *path, int mode, IOR_param_t *param) +{ + return(MPIIO_Access(path, mode, param)); } diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 969f7a1..d562ee2 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -87,6 +87,11 @@ ior_aiori_t posix_aiori = { .set_version = POSIX_SetVersion, .fsync = POSIX_Fsync, .get_file_size = POSIX_GetFileSize, + .statfs = aiori_posix_statfs, + .mkdir = aiori_posix_mkdir, + .rmdir = aiori_posix_rmdir, + .access = aiori_posix_access, + .stat = aiori_posix_stat, }; /***************************** F U N C T I O N S ******************************/ From 9b75f071f47c969562d7455375b546b33146b36d Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Wed, 25 Apr 2018 17:35:10 -0500 Subject: [PATCH 4/7] use backend->access in IOR source --- src/ior.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/ior.c b/src/ior.c index a6029cd..c025e95 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1272,7 +1272,7 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) rankOffset = 0; GetTestFileName(testFileName, test); } - if (access(testFileName, F_OK) == 0) { + if (backend->access(testFileName, F_OK, test) == 0) { backend->delete(testFileName, test); } if (test->reorderTasksRandom == TRUE) { @@ -1280,13 +1280,7 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) GetTestFileName(testFileName, test); } } else { - // BUG: "access()" assumes a POSIX filesystem. Maybe use - // backend->get_file_size(), instead, (and catch - // errors), or extend the aiori struct to include - // something to safely check for existence of the - // "file". - // - if ((rank == 0) && (access(testFileName, F_OK) == 0)) { + if ((rank == 0) && (backend->access(testFileName, F_OK, test) == 0)) { backend->delete(testFileName, test); } } From 76f3d341b99a82663779d4c290a79af1d6001fa8 Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Fri, 27 Apr 2018 11:33:52 -0500 Subject: [PATCH 5/7] implement MPIIO sync --- src/aiori-MPIIO.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index 8f04310..af8094b 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -342,6 +342,8 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer, } } } + if((access == WRITE) && (param->fsyncPerWrite == TRUE)) + MPIIO_Fsync(fd, param); return (length); } @@ -350,7 +352,8 @@ static IOR_offset_t MPIIO_Xfer(int access, void *fd, IOR_size_t * buffer, */ static void MPIIO_Fsync(void *fd, IOR_param_t * param) { - ; + if (MPI_File_sync(*(MPI_File *)fd) != MPI_SUCCESS) + EWARN("fsync() failed"); } /* From 92dfb67e5cbe9ee7ef54b77a6a7cab5e2270a6a9 Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Fri, 27 Apr 2018 12:44:20 -0500 Subject: [PATCH 6/7] allow fsync in POSIX, MPIIO, and HDFS --- src/ior.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ior.c b/src/ior.c index c025e95..3d8c3dc 100755 --- a/src/ior.c +++ b/src/ior.c @@ -2371,8 +2371,10 @@ static void ValidateTests(IOR_param_t * test) if ((strcasecmp(test->api, "POSIX") != 0) && test->singleXferAttempt) WARN_RESET("retry only available in POSIX", test, &defaults, singleXferAttempt); - if ((strcasecmp(test->api, "POSIX") != 0) && test->fsync) - WARN_RESET("fsync() only available in POSIX", + if (((strcasecmp(test->api, "POSIX") != 0) + && (strcasecmp(test->api, "MPIIO") != 0) + && (strcasecmp(test->api, "HDFS") != 0)) && test->fsync) + WARN_RESET("fsync() not supported in selected backend", test, &defaults, fsync); if ((strcasecmp(test->api, "MPIIO") != 0) && test->preallocate) WARN_RESET("preallocation only available in MPIIO", From 7981691a63b3ac348c6222538076e653963abfa9 Mon Sep 17 00:00:00 2001 From: Shane Snyder Date: Fri, 23 Feb 2018 10:38:24 -0600 Subject: [PATCH 7/7] add a new AIORI backend for RADOS inform aiori interface about RADOS backend stubbed out aiori backend for rados additions to get RADOS backend compiling/linking first cut at rados create/open patha make sure to return RADOS oid on open/create implement rados xfer path for WRITE refactor + implement getfilesize and close remember to use read_op interface for stat implement RADOS delete function don't error in RADOS_Delete for now implement RADOS set_version handle open/create flags appropriately cleanup RADOS error handling implement read/readcheck/writecheck for RADOS rados doesn't support directio implement unsupported aiori ops for RADOS implement RADOS access call define rados types if no rados support --- configure.ac | 10 ++ src/Makefile.am | 4 + src/aiori-RADOS.c | 347 ++++++++++++++++++++++++++++++++++++++++++++++ src/aiori.c | 3 + src/aiori.h | 1 + src/ior.h | 11 +- 6 files changed, 375 insertions(+), 1 deletion(-) create mode 100755 src/aiori-RADOS.c diff --git a/configure.ac b/configure.ac index 3513c12..730d23b 100755 --- a/configure.ac +++ b/configure.ac @@ -131,6 +131,16 @@ AM_COND_IF([USE_POSIX_AIORI],[ AC_DEFINE([USE_POSIX_AIORI], [], [Build POSIX backend AIORI]) ]) +# RADOS support +AC_ARG_WITH([rados], + [AS_HELP_STRING([--with-rados], + [support IO with librados backend @<:@default=no@:>@])], + [], + [with_rados=no]) +AM_CONDITIONAL([USE_RADOS_AIORI], [test x$with_rados = xyes]) +AM_COND_IF([USE_RADOS_AIORI],[ + AC_DEFINE([USE_RADOS_AIORI], [], [Build RADOS backend AIORI]) +]) diff --git a/src/Makefile.am b/src/Makefile.am index 36bbde7..92011e3 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -49,6 +49,10 @@ if USE_POSIX_AIORI extraSOURCES += aiori-POSIX.c endif +if USE_RADOS_AIORI +extraSOURCES += aiori-RADOS.c +extraLDADD += -lrados +endif if USE_S3_AIORI extraSOURCES += aiori-S3.c diff --git a/src/aiori-RADOS.c b/src/aiori-RADOS.c new file mode 100755 index 0000000..6d88a7c --- /dev/null +++ b/src/aiori-RADOS.c @@ -0,0 +1,347 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +/******************************************************************************\ +* * +* (C) 2015 The University of Chicago * +* * +* See COPYRIGHT in top-level directory. * +* * +******************************************************************************** +* +* Implement abstract I/O interface for RADOS. +* +\******************************************************************************/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include "ior.h" +#include "iordef.h" +#include "aiori.h" +#include "utilities.h" + +/**************************** P R O T O T Y P E S *****************************/ +static void *RADOS_Create(char *, IOR_param_t *); +static void *RADOS_Open(char *, IOR_param_t *); +static IOR_offset_t RADOS_Xfer(int, void *, IOR_size_t *, + IOR_offset_t, IOR_param_t *); +static void RADOS_Close(void *, IOR_param_t *); +static void RADOS_Delete(char *, IOR_param_t *); +static void RADOS_SetVersion(IOR_param_t *); +static void RADOS_Fsync(void *, IOR_param_t *); +static IOR_offset_t RADOS_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static int RADOS_StatFS(const char *, ior_aiori_statfs_t *, IOR_param_t *); +static int RADOS_MkDir(const char *, mode_t, IOR_param_t *); +static int RADOS_RmDir(const char *, IOR_param_t *); +static int RADOS_Access(const char *, int, IOR_param_t *); +static int RADOS_Stat(const char *, struct stat *, IOR_param_t *); + +/************************** D E C L A R A T I O N S ***************************/ + +ior_aiori_t rados_aiori = { + .name = "RADOS", + .create = RADOS_Create, + .open = RADOS_Open, + .xfer = RADOS_Xfer, + .close = RADOS_Close, + .delete = RADOS_Delete, + .set_version = RADOS_SetVersion, + .fsync = RADOS_Fsync, + .get_file_size = RADOS_GetFileSize, + .statfs = RADOS_StatFS, + .mkdir = RADOS_MkDir, + .rmdir = RADOS_RmDir, + .access = RADOS_Access, + .stat = RADOS_Stat, +}; + +#define RADOS_ERR(__err_str, __ret) do { \ + errno = -__ret; \ + ERR(__err_str); \ +} while(0) + +/***************************** F U N C T I O N S ******************************/ + +static void RADOS_Cluster_Init(IOR_param_t * param) +{ + int ret; + + /* create RADOS cluster handle */ + /* XXX: HARDCODED RADOS USER NAME */ + ret = rados_create(¶m->rados_cluster, "admin"); + if (ret) + RADOS_ERR("unable to create RADOS cluster handle", ret); + + /* set the handle using the Ceph config */ + /* XXX: HARDCODED RADOS CONF PATH */ + ret = rados_conf_read_file(param->rados_cluster, "/etc/ceph/ceph.conf"); + if (ret) + RADOS_ERR("unable to read RADOS config file", ret); + + /* connect to the RADOS cluster */ + ret = rados_connect(param->rados_cluster); + if (ret) + RADOS_ERR("unable to connect to the RADOS cluster", ret); + + /* create an io context for the pool we are operating on */ + /* XXX: HARDCODED RADOS POOL NAME */ + ret = rados_ioctx_create(param->rados_cluster, "cephfs_data", + ¶m->rados_ioctx); + if (ret) + RADOS_ERR("unable to create an I/O context for the RADOS cluster", ret); + + return; +} + +static void RADOS_Cluster_Finalize(IOR_param_t * param) +{ + /* ioctx destroy */ + rados_ioctx_destroy(param->rados_ioctx); + + /* shutdown */ + rados_shutdown(param->rados_cluster); +} + +static void *RADOS_Create_Or_Open(char *testFileName, IOR_param_t * param, int create_flag) +{ + int ret; + char *oid; + + RADOS_Cluster_Init(param); + + if (param->useO_DIRECT == TRUE) + WARN("direct I/O mode is not implemented in RADOS\n"); + + oid = strdup(testFileName); + if (!oid) + ERR("unable to allocate RADOS oid"); + + if (create_flag) + { + rados_write_op_t create_op; + int rados_create_flag; + + if (param->openFlags & IOR_EXCL) + rados_create_flag = LIBRADOS_CREATE_EXCLUSIVE; + else + rados_create_flag = LIBRADOS_CREATE_IDEMPOTENT; + + /* create a RADOS "write op" for creating the object */ + create_op = rados_create_write_op(); + rados_write_op_create(create_op, rados_create_flag, NULL); + ret = rados_write_op_operate(create_op, param->rados_ioctx, oid, + NULL, 0); + rados_release_write_op(create_op); + if (ret) + RADOS_ERR("unable to create RADOS object", ret); + } + else + { + /* XXX actually, we should probably assert oid existence here? */ + } + + return (void *)oid; +} + +static void *RADOS_Create(char *testFileName, IOR_param_t * param) +{ + return RADOS_Create_Or_Open(testFileName, param, TRUE); +} + +static void *RADOS_Open(char *testFileName, IOR_param_t * param) +{ + if (param->openFlags & IOR_CREAT) + return RADOS_Create_Or_Open(testFileName, param, TRUE); + else + return RADOS_Create_Or_Open(testFileName, param, FALSE); +} + +static IOR_offset_t RADOS_Xfer(int access, void *fd, IOR_size_t * buffer, + IOR_offset_t length, IOR_param_t * param) +{ + int ret; + char *oid = (char *)fd; + + if (access == WRITE) + { + rados_write_op_t write_op; + + write_op = rados_create_write_op(); + rados_write_op_write(write_op, (const char *)buffer, + length, param->offset); + ret = rados_write_op_operate(write_op, param->rados_ioctx, + oid, NULL, 0); + rados_release_write_op(write_op); + if (ret) + RADOS_ERR("unable to write RADOS object", ret); + } + else /* READ */ + { + int read_ret; + size_t bytes_read; + rados_read_op_t read_op; + + read_op = rados_create_read_op(); + rados_read_op_read(read_op, param->offset, length, (char *)buffer, + &bytes_read, &read_ret); + ret = rados_read_op_operate(read_op, param->rados_ioctx, oid, 0); + rados_release_read_op(read_op); + if (ret || read_ret || ((IOR_offset_t)bytes_read != length)) + RADOS_ERR("unable to read RADOS object", ret); + } + + return length; +} + +static void RADOS_Fsync(void *fd, IOR_param_t * param) +{ + return; +} + +static void RADOS_Close(void *fd, IOR_param_t * param) +{ + char *oid = (char *)fd; + + /* object does not need to be "closed", but we should tear the cluster down */ + RADOS_Cluster_Finalize(param); + free(oid); + + return; +} + +static void RADOS_Delete(char *testFileName, IOR_param_t * param) +{ + int ret; + char *oid = testFileName; + rados_write_op_t remove_op; + + /* we have to reestablish cluster connection here... */ + RADOS_Cluster_Init(param); + + /* remove the object */ + remove_op = rados_create_write_op(); + rados_write_op_remove(remove_op); + ret = rados_write_op_operate(remove_op, param->rados_ioctx, + oid, NULL, 0); + rados_release_write_op(remove_op); + if (ret) + RADOS_ERR("unable to remove RADOS object", ret); + + RADOS_Cluster_Finalize(param); + + return; +} + +static void RADOS_SetVersion(IOR_param_t * test) +{ + strcpy(test->apiVersion, test->api); + return; +} + +static IOR_offset_t RADOS_GetFileSize(IOR_param_t * test, MPI_Comm testComm, + char *testFileName) +{ + int ret; + char *oid = testFileName; + rados_read_op_t stat_op; + uint64_t oid_size; + int stat_ret; + IOR_offset_t aggSizeFromStat, tmpMin, tmpMax, tmpSum; + + /* we have to reestablish cluster connection here... */ + RADOS_Cluster_Init(test); + + /* stat the object */ + stat_op = rados_create_read_op(); + rados_read_op_stat(stat_op, &oid_size, NULL, &stat_ret); + ret = rados_read_op_operate(stat_op, test->rados_ioctx, oid, 0); + rados_release_read_op(stat_op); + if (ret || stat_ret) + RADOS_ERR("unable to stat RADOS object", stat_ret); + aggSizeFromStat = oid_size; + + if (test->filePerProc == TRUE) + { + MPI_CHECK(MPI_Allreduce(&aggSizeFromStat, &tmpSum, 1, + MPI_LONG_LONG_INT, MPI_SUM, testComm), + "cannot total data moved"); + aggSizeFromStat = tmpSum; + } + else + { + MPI_CHECK(MPI_Allreduce(&aggSizeFromStat, &tmpMin, 1, + MPI_LONG_LONG_INT, MPI_MIN, testComm), + "cannot total data moved"); + MPI_CHECK(MPI_Allreduce(&aggSizeFromStat, &tmpMax, 1, + MPI_LONG_LONG_INT, MPI_MAX, testComm), + "cannot total data moved"); + if (tmpMin != tmpMax) + { + if (rank == 0) + WARN("inconsistent file size by different tasks"); + + /* incorrect, but now consistent across tasks */ + aggSizeFromStat = tmpMin; + } + } + + RADOS_Cluster_Finalize(test); + + return aggSizeFromStat; +} + +static int RADOS_StatFS(const char *oid, ior_aiori_statfs_t *stat_buf, + IOR_param_t *param) +{ + WARN("statfs not supported in RADOS backend!"); + return -1; +} + +static int RADOS_MkDir(const char *oid, mode_t mode, IOR_param_t *param) +{ + WARN("mkdir not supported in RADOS backend!"); + return -1; +} + +static int RADOS_RmDir(const char *oid, IOR_param_t *param) +{ + WARN("rmdir not supported in RADOS backend!"); + return -1; +} + +static int RADOS_Access(const char *oid, int mode, IOR_param_t *param) +{ + rados_read_op_t read_op; + int ret; + int prval; + uint64_t oid_size; + + /* we have to reestablish cluster connection here... */ + RADOS_Cluster_Init(param); + + /* use read_op stat to check for oid existence */ + read_op = rados_create_read_op(); + rados_read_op_stat(read_op, &oid_size, NULL, &prval); + ret = rados_read_op_operate(read_op, param->rados_ioctx, oid, 0); + rados_release_read_op(read_op); + + RADOS_Cluster_Finalize(param); + + if (ret | prval) + return -1; + else + return 0; +} + +static int RADOS_Stat(const char *oid, struct stat *buf, IOR_param_t *param) +{ + WARN("stat not supported in RADOS backend!"); + return -1; +} diff --git a/src/aiori.c b/src/aiori.c index ccabcb8..50d5951 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -48,6 +48,9 @@ ior_aiori_t *available_aiori[] = { &s3_aiori, &s3_plus_aiori, &s3_emc_aiori, +#endif +#ifdef USE_RADOS_AIORI + &rados_aiori, #endif NULL }; diff --git a/src/aiori.h b/src/aiori.h index a54bd59..9f9a0d2 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -87,6 +87,7 @@ extern ior_aiori_t posix_aiori; extern ior_aiori_t s3_aiori; extern ior_aiori_t s3_plus_aiori; extern ior_aiori_t s3_emc_aiori; +extern ior_aiori_t rados_aiori; const ior_aiori_t *aiori_select (const char *api); int aiori_count (void); diff --git a/src/ior.h b/src/ior.h index fce447b..3d6314d 100755 --- a/src/ior.h +++ b/src/ior.h @@ -37,7 +37,12 @@ typedef void IOBuf; /* unused, but needs a type */ #endif - +#ifdef USE_RADOS_AIORI +# include +#else + typedef void *rados_t; + typedef void *rados_ioctx_t; +#endif #include "iordef.h" @@ -193,6 +198,10 @@ typedef struct # define MAX_UPLOAD_ID_SIZE 256 /* seems to be 32, actually */ char UploadId[MAX_UPLOAD_ID_SIZE +1]; /* key for multi-part-uploads */ + /* RADOS variables */ + rados_t rados_cluster; /* RADOS cluster handle */ + rados_ioctx_t rados_ioctx; /* I/O context for our pool in the RADOS cluster */ + /* NCMPI variables */ int var_id; /* variable id handle for data set */