* Basic support for memory allocation on GPU using CUDA unified memory. Partially addressing #284. IOR support completed. * Support for GPU alloc in MDTest and MD-Workbench * Option: support repeated parsing of same option (allows option sharing across modules). * Checks for gpuDirect * Integrate gpuDirect options and basic hooks, more testing to be done. * POSIX: basic gpuDirect implementation working with fake-gpudirect library. * CUDA allow setting of DeviceID for IOR (not yet MDTest). * CUDA/GPUDirect Support --with-X=<path> * Bugfix in option parser for flags that are part of an argument for an option, e.g., -O=1, if 1 is a flag it is wrongly assumed to be a flag.master
parent
e78613d62d
commit
74df77430f
46
configure.ac
46
configure.ac
|
@ -73,6 +73,52 @@ AS_IF([test "$ac_cv_header_gpfs_h" = "yes" -o "$ac_cv_header_gpfs_fcntl_h" = "ye
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
|
# Check for CUDA
|
||||||
|
AC_ARG_WITH([cuda],
|
||||||
|
[AS_HELP_STRING([--with-cuda],
|
||||||
|
[support configurable CUDA @<:@default=check@:>@])],
|
||||||
|
[], [with_cuda=check])
|
||||||
|
|
||||||
|
AS_IF([test "x$with_cuda" != xno], [
|
||||||
|
LDFLAGS="$LDFLAGS -L$with_cuda/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_cuda/lib64"
|
||||||
|
CPPFLAGS="$CPPFLAGS -I$with_cuda/include"
|
||||||
|
|
||||||
|
AC_CHECK_HEADERS([cuda_runtime.h], [AC_DEFINE([HAVE_CUDA], [], [CUDA GPU API found])], [
|
||||||
|
if test "x$with_cuda" != xcheck; then
|
||||||
|
AC_MSG_FAILURE([--with-cuda was given, <cuda_runtime.h> not found])
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
AS_IF([test "$ac_cv_header_cuda_runtime_h" = "yes"], [
|
||||||
|
AC_SEARCH_LIBS([cudaMalloc], [cudart cudart_static], [],
|
||||||
|
[AC_MSG_ERROR([Library containing cudaMalloc symbol not found])])
|
||||||
|
])
|
||||||
|
])
|
||||||
|
AM_CONDITIONAL([USE_CUDA], [test x$with_cuda = xyes])
|
||||||
|
|
||||||
|
# Check for GPUDirect
|
||||||
|
AC_ARG_WITH([gpuDirect],
|
||||||
|
[AS_HELP_STRING([--with-gpuDirect],
|
||||||
|
[support configurable GPUDirect @<:@default=check@:>@])],
|
||||||
|
[], [with_gpuDirect=check])
|
||||||
|
|
||||||
|
AS_IF([test "x$with_gpuDirect" != xno], [
|
||||||
|
LDFLAGS="$LDFLAGS -L$with_gpuDirect/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_gpuDirect/lib64"
|
||||||
|
CPPFLAGS="$CPPFLAGS -I$with_gpuDirect/include"
|
||||||
|
|
||||||
|
AC_CHECK_HEADERS([cufile.h], [AC_DEFINE([HAVE_GPU_DIRECT], [], [GPUDirect API found])], [
|
||||||
|
if test "x$with_gpuDirect" != xcheck; then
|
||||||
|
AC_MSG_FAILURE([--with-gpuDirect was given, <cufile.h> not found])
|
||||||
|
fi
|
||||||
|
])
|
||||||
|
AS_IF([test "$ac_cv_header_cufile_h" = "yes"], [
|
||||||
|
AC_SEARCH_LIBS([cuFileDriverOpen], [cufile], [],
|
||||||
|
[AC_MSG_ERROR([Library containing cuFileDriverOpen symbol not found])])
|
||||||
|
])
|
||||||
|
])
|
||||||
|
AM_CONDITIONAL([HAVE_GPU_DIRECT], [test x$with_gpuDirect = xyes])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Check for system capabilities
|
# Check for system capabilities
|
||||||
AC_SYS_LARGEFILE
|
AC_SYS_LARGEFILE
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,10 @@ extraLDFLAGS += -L/opt/hadoop-2.2.0/lib/native
|
||||||
extraLDADD += -lhdfs
|
extraLDADD += -lhdfs
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
if USE_CUDA
|
||||||
|
extraLDADD += -lcudart
|
||||||
|
endif
|
||||||
|
|
||||||
if USE_HDF5_AIORI
|
if USE_HDF5_AIORI
|
||||||
extraSOURCES += aiori-HDF5.c
|
extraSOURCES += aiori-HDF5.c
|
||||||
extraLDADD += -lhdf5 -lz
|
extraLDADD += -lhdf5 -lz
|
||||||
|
|
|
@ -57,6 +57,20 @@
|
||||||
|
|
||||||
#include "aiori-POSIX.h"
|
#include "aiori-POSIX.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
typedef long long loff_t;
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cufile.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int fd;
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
CUfileHandle_t cf_handle;
|
||||||
|
#endif
|
||||||
|
} posix_fd;
|
||||||
|
|
||||||
|
|
||||||
#ifndef open64 /* necessary for TRU64 -- */
|
#ifndef open64 /* necessary for TRU64 -- */
|
||||||
# define open64 open /* unlikely, but may pose */
|
# define open64 open /* unlikely, but may pose */
|
||||||
#endif /* not open64 */ /* conflicting prototypes */
|
#endif /* not open64 */ /* conflicting prototypes */
|
||||||
|
@ -69,7 +83,30 @@
|
||||||
# define O_BINARY 0
|
# define O_BINARY 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
static const char* cuFileGetErrorString(CUfileError_t status){
|
||||||
|
if(IS_CUDA_ERR(status)){
|
||||||
|
return cudaGetErrorString(status.err);
|
||||||
|
}
|
||||||
|
return strerror(status.err);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void init_cufile(posix_fd * pfd){
|
||||||
|
CUfileDescr_t cf_descr = (CUfileDescr_t){
|
||||||
|
.handle.fd = pfd->fd,
|
||||||
|
.type = CU_FILE_HANDLE_TYPE_OPAQUE_FD
|
||||||
|
};
|
||||||
|
CUfileError_t status = cuFileHandleRegister(& pfd->cf_handle, & cf_descr);
|
||||||
|
if(status.err != CU_FILE_SUCCESS){
|
||||||
|
EWARNF("Could not register handle %s", cuFileGetErrorString(status));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**************************** P R O T O T Y P E S *****************************/
|
/**************************** P R O T O T Y P E S *****************************/
|
||||||
|
static void POSIX_Initialize(aiori_mod_opt_t * options);
|
||||||
|
static void POSIX_Finalize(aiori_mod_opt_t * options);
|
||||||
|
|
||||||
static IOR_offset_t POSIX_Xfer(int, aiori_fd_t *, IOR_size_t *,
|
static IOR_offset_t POSIX_Xfer(int, aiori_fd_t *, IOR_size_t *,
|
||||||
IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *);
|
IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *);
|
||||||
|
|
||||||
|
@ -105,6 +142,9 @@ option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_o
|
||||||
{0, "posix.lustre.stripesize", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_size},
|
{0, "posix.lustre.stripesize", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_size},
|
||||||
{0, "posix.lustre.startost", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_start_ost},
|
{0, "posix.lustre.startost", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_start_ost},
|
||||||
{0, "posix.lustre.ignorelocks", "", OPTION_FLAG, 'd', & o->lustre_ignore_locks},
|
{0, "posix.lustre.ignorelocks", "", OPTION_FLAG, 'd', & o->lustre_ignore_locks},
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
{0, "gpuDirect", "allocate I/O buffers on the GPU", OPTION_FLAG, 'd', & o->gpuDirect},
|
||||||
#endif
|
#endif
|
||||||
LAST_OPTION
|
LAST_OPTION
|
||||||
};
|
};
|
||||||
|
@ -120,6 +160,8 @@ option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_o
|
||||||
ior_aiori_t posix_aiori = {
|
ior_aiori_t posix_aiori = {
|
||||||
.name = "POSIX",
|
.name = "POSIX",
|
||||||
.name_legacy = NULL,
|
.name_legacy = NULL,
|
||||||
|
.initialize = POSIX_Initialize,
|
||||||
|
.finalize = POSIX_Finalize,
|
||||||
.create = POSIX_Create,
|
.create = POSIX_Create,
|
||||||
.mknod = POSIX_Mknod,
|
.mknod = POSIX_Mknod,
|
||||||
.open = POSIX_Open,
|
.open = POSIX_Open,
|
||||||
|
@ -156,6 +198,14 @@ int POSIX_check_params(aiori_mod_opt_t * param){
|
||||||
ERR("beegfsChunkSize must be a power of two and >64k");
|
ERR("beegfsChunkSize must be a power of two and >64k");
|
||||||
if(o->lustre_stripe_count != -1 || o->lustre_stripe_size != 0)
|
if(o->lustre_stripe_count != -1 || o->lustre_stripe_size != 0)
|
||||||
o->lustre_set_striping = 1;
|
o->lustre_set_striping = 1;
|
||||||
|
if(o->gpuDirect && ! o->direct_io){
|
||||||
|
ERR("GPUDirect required direct I/O to be used!");
|
||||||
|
}
|
||||||
|
#ifndef HAVE_GPU_DIRECT
|
||||||
|
if(o->gpuDirect){
|
||||||
|
ERR("GPUDirect support is not compiled");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -352,14 +402,10 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param)
|
||||||
{
|
{
|
||||||
int fd_oflag = O_BINARY;
|
int fd_oflag = O_BINARY;
|
||||||
int mode = 0664;
|
int mode = 0664;
|
||||||
int *fd;
|
posix_fd * pfd = safeMalloc(sizeof(posix_fd));
|
||||||
|
|
||||||
fd = (int *)malloc(sizeof(int));
|
|
||||||
if (fd == NULL)
|
|
||||||
ERR("Unable to malloc file descriptor");
|
|
||||||
posix_options_t * o = (posix_options_t*) param;
|
posix_options_t * o = (posix_options_t*) param;
|
||||||
if (o->direct_io == TRUE){
|
if (o->direct_io == TRUE){
|
||||||
set_o_direct_flag(&fd_oflag);
|
set_o_direct_flag(& fd_oflag);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(hints->dryRun)
|
if(hints->dryRun)
|
||||||
|
@ -378,8 +424,8 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param)
|
||||||
if (!hints->filePerProc && rank != 0) {
|
if (!hints->filePerProc && rank != 0) {
|
||||||
MPI_CHECK(MPI_Barrier(testComm), "barrier error");
|
MPI_CHECK(MPI_Barrier(testComm), "barrier error");
|
||||||
fd_oflag |= O_RDWR;
|
fd_oflag |= O_RDWR;
|
||||||
*fd = open64(testFileName, fd_oflag, mode);
|
pfd->fd = open64(testFileName, fd_oflag, mode);
|
||||||
if (*fd < 0){
|
if (pfd->fd < 0){
|
||||||
ERRF("open64(\"%s\", %d, %#o) failed. Error: %s",
|
ERRF("open64(\"%s\", %d, %#o) failed. Error: %s",
|
||||||
testFileName, fd_oflag, mode, strerror(errno));
|
testFileName, fd_oflag, mode, strerror(errno));
|
||||||
}
|
}
|
||||||
|
@ -396,16 +442,16 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param)
|
||||||
* Lustre striping information on a pre-existing file.*/
|
* Lustre striping information on a pre-existing file.*/
|
||||||
|
|
||||||
fd_oflag |= O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE;
|
fd_oflag |= O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE;
|
||||||
*fd = open64(testFileName, fd_oflag, mode);
|
pfd->fd = open64(testFileName, fd_oflag, mode);
|
||||||
if (*fd < 0) {
|
if (pfd->fd < 0) {
|
||||||
ERRF("Unable to open '%s': %s\n",
|
ERRF("Unable to open '%s': %s\n",
|
||||||
testFileName, strerror(errno));
|
testFileName, strerror(errno));
|
||||||
} else if (ioctl(*fd, LL_IOC_LOV_SETSTRIPE, &opts)) {
|
} else if (ioctl(pfd->fd, LL_IOC_LOV_SETSTRIPE, &opts)) {
|
||||||
char *errmsg = "stripe already set";
|
char *errmsg = "stripe already set";
|
||||||
if (errno != EEXIST && errno != EALREADY)
|
if (errno != EEXIST && errno != EALREADY)
|
||||||
errmsg = strerror(errno);
|
errmsg = strerror(errno);
|
||||||
ERRF("Error on ioctl for '%s' (%d): %s\n",
|
ERRF("Error on ioctl for '%s' (%d): %s\n",
|
||||||
testFileName, *fd, errmsg);
|
testFileName, pfd->fd, errmsg);
|
||||||
}
|
}
|
||||||
if (!hints->filePerProc)
|
if (!hints->filePerProc)
|
||||||
MPI_CHECK(MPI_Barrier(testComm),
|
MPI_CHECK(MPI_Barrier(testComm),
|
||||||
|
@ -431,8 +477,8 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param)
|
||||||
}
|
}
|
||||||
#endif /* HAVE_BEEGFS_BEEGFS_H */
|
#endif /* HAVE_BEEGFS_BEEGFS_H */
|
||||||
|
|
||||||
*fd = open64(testFileName, fd_oflag, mode);
|
pfd->fd = open64(testFileName, fd_oflag, mode);
|
||||||
if (*fd < 0){
|
if (pfd->fd < 0){
|
||||||
ERRF("open64(\"%s\", %d, %#o) failed. Error: %s",
|
ERRF("open64(\"%s\", %d, %#o) failed. Error: %s",
|
||||||
testFileName, fd_oflag, mode, strerror(errno));
|
testFileName, fd_oflag, mode, strerror(errno));
|
||||||
}
|
}
|
||||||
|
@ -442,8 +488,8 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param)
|
||||||
|
|
||||||
if (o->lustre_ignore_locks) {
|
if (o->lustre_ignore_locks) {
|
||||||
int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK;
|
int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK;
|
||||||
if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1)
|
if (ioctl(pfd->fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1)
|
||||||
ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd);
|
ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", pfd->fd);
|
||||||
}
|
}
|
||||||
#endif /* HAVE_LUSTRE_USER */
|
#endif /* HAVE_LUSTRE_USER */
|
||||||
|
|
||||||
|
@ -452,10 +498,15 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param)
|
||||||
* the intent that we can avoid some byte range lock revocation:
|
* the intent that we can avoid some byte range lock revocation:
|
||||||
* everyone will be writing/reading from individual regions */
|
* everyone will be writing/reading from individual regions */
|
||||||
if (o->gpfs_release_token ) {
|
if (o->gpfs_release_token ) {
|
||||||
gpfs_free_all_locks(*fd);
|
gpfs_free_all_locks(pfd->fd);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return (aiori_fd_t*) fd;
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
if(o->gpuDirect){
|
||||||
|
init_cufile(pfd);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return (aiori_fd_t*) pfd;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -477,24 +528,18 @@ int POSIX_Mknod(char *testFileName)
|
||||||
*/
|
*/
|
||||||
aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param)
|
aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param)
|
||||||
{
|
{
|
||||||
int fd_oflag = O_BINARY;
|
int fd_oflag = O_BINARY | O_RDWR;
|
||||||
int *fd;
|
posix_fd * pfd = safeMalloc(sizeof(posix_fd));
|
||||||
|
|
||||||
fd = (int *)malloc(sizeof(int));
|
|
||||||
if (fd == NULL)
|
|
||||||
ERR("Unable to malloc file descriptor");
|
|
||||||
|
|
||||||
posix_options_t * o = (posix_options_t*) param;
|
posix_options_t * o = (posix_options_t*) param;
|
||||||
if (o->direct_io == TRUE)
|
if (o->direct_io == TRUE){
|
||||||
set_o_direct_flag(&fd_oflag);
|
set_o_direct_flag(&fd_oflag);
|
||||||
|
}
|
||||||
fd_oflag |= O_RDWR;
|
|
||||||
|
|
||||||
if(hints->dryRun)
|
if(hints->dryRun)
|
||||||
return (aiori_fd_t*) 0;
|
return (aiori_fd_t*) 0;
|
||||||
|
|
||||||
*fd = open64(testFileName, fd_oflag);
|
pfd->fd = open64(testFileName, fd_oflag);
|
||||||
if (*fd < 0)
|
if (pfd->fd < 0)
|
||||||
ERRF("open64(\"%s\", %d) failed: %s", testFileName, fd_oflag, strerror(errno));
|
ERRF("open64(\"%s\", %d) failed: %s", testFileName, fd_oflag, strerror(errno));
|
||||||
|
|
||||||
#ifdef HAVE_LUSTRE_USER
|
#ifdef HAVE_LUSTRE_USER
|
||||||
|
@ -503,17 +548,22 @@ aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param)
|
||||||
if (verbose >= VERBOSE_1) {
|
if (verbose >= VERBOSE_1) {
|
||||||
EINFO("** Disabling lustre range locking **\n");
|
EINFO("** Disabling lustre range locking **\n");
|
||||||
}
|
}
|
||||||
if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1)
|
if (ioctl(pfd->fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1)
|
||||||
ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd);
|
ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", pfd->fd);
|
||||||
}
|
}
|
||||||
#endif /* HAVE_LUSTRE_USER */
|
#endif /* HAVE_LUSTRE_USER */
|
||||||
|
|
||||||
#ifdef HAVE_GPFS_FCNTL_H
|
#ifdef HAVE_GPFS_FCNTL_H
|
||||||
if(o->gpfs_release_token) {
|
if(o->gpfs_release_token) {
|
||||||
gpfs_free_all_locks(*fd);
|
gpfs_free_all_locks(pfd->fd);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return (aiori_fd_t*) fd;
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
if(o->gpuDirect){
|
||||||
|
init_cufile(pfd);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return (aiori_fd_t*) pfd;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -532,7 +582,8 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer
|
||||||
if(hints->dryRun)
|
if(hints->dryRun)
|
||||||
return length;
|
return length;
|
||||||
|
|
||||||
fd = *(int *)file;
|
posix_fd * pfd = (posix_fd *) file;
|
||||||
|
fd = pfd->fd;
|
||||||
|
|
||||||
#ifdef HAVE_GPFS_FCNTL_H
|
#ifdef HAVE_GPFS_FCNTL_H
|
||||||
if (o->gpfs_hint_access) {
|
if (o->gpfs_hint_access) {
|
||||||
|
@ -544,7 +595,7 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer
|
||||||
/* seek to offset */
|
/* seek to offset */
|
||||||
if (lseek64(fd, offset, SEEK_SET) == -1)
|
if (lseek64(fd, offset, SEEK_SET) == -1)
|
||||||
ERRF("lseek64(%d, %lld, SEEK_SET) failed", fd, offset);
|
ERRF("lseek64(%d, %lld, SEEK_SET) failed", fd, offset);
|
||||||
|
off_t mem_offset = 0;
|
||||||
while (remaining > 0) {
|
while (remaining > 0) {
|
||||||
/* write/read file */
|
/* write/read file */
|
||||||
if (access == WRITE) { /* WRITE */
|
if (access == WRITE) { /* WRITE */
|
||||||
|
@ -553,7 +604,15 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer
|
||||||
rank,
|
rank,
|
||||||
offset + length - remaining);
|
offset + length - remaining);
|
||||||
}
|
}
|
||||||
rc = write(fd, ptr, remaining);
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
if(o->gpuDirect){
|
||||||
|
rc = cuFileWrite(pfd->cf_handle, ptr, remaining, offset + mem_offset, mem_offset);
|
||||||
|
}else{
|
||||||
|
#endif
|
||||||
|
rc = write(fd, ptr, remaining);
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (rc == -1)
|
if (rc == -1)
|
||||||
ERRF("write(%d, %p, %lld) failed",
|
ERRF("write(%d, %p, %lld) failed",
|
||||||
fd, (void*)ptr, remaining);
|
fd, (void*)ptr, remaining);
|
||||||
|
@ -566,7 +625,15 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer
|
||||||
rank,
|
rank,
|
||||||
offset + length - remaining);
|
offset + length - remaining);
|
||||||
}
|
}
|
||||||
rc = read(fd, ptr, remaining);
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
if(o->gpuDirect){
|
||||||
|
rc = cuFileRead(pfd->cf_handle, ptr, remaining, offset + mem_offset, mem_offset);
|
||||||
|
}else{
|
||||||
|
#endif
|
||||||
|
rc = read(fd, ptr, remaining);
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
}
|
||||||
|
#endif
|
||||||
if (rc == 0)
|
if (rc == 0)
|
||||||
ERRF("read(%d, %p, %lld) returned EOF prematurely",
|
ERRF("read(%d, %p, %lld) returned EOF prematurely",
|
||||||
fd, (void*)ptr, remaining);
|
fd, (void*)ptr, remaining);
|
||||||
|
@ -587,6 +654,7 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer
|
||||||
assert(rc <= remaining);
|
assert(rc <= remaining);
|
||||||
remaining -= rc;
|
remaining -= rc;
|
||||||
ptr += rc;
|
ptr += rc;
|
||||||
|
mem_offset += rc;
|
||||||
xferRetries++;
|
xferRetries++;
|
||||||
}
|
}
|
||||||
#ifdef HAVE_GPFS_FCNTL_H
|
#ifdef HAVE_GPFS_FCNTL_H
|
||||||
|
@ -597,10 +665,11 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer
|
||||||
return (length);
|
return (length);
|
||||||
}
|
}
|
||||||
|
|
||||||
void POSIX_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param)
|
void POSIX_Fsync(aiori_fd_t *afd, aiori_mod_opt_t * param)
|
||||||
{
|
{
|
||||||
if (fsync(*(int *)fd) != 0)
|
int fd = ((posix_fd*) afd)->fd;
|
||||||
EWARNF("fsync(%d) failed", *(int *)fd);
|
if (fsync(fd) != 0)
|
||||||
|
EWARNF("fsync(%d) failed", fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -616,13 +685,21 @@ void POSIX_Sync(aiori_mod_opt_t * param)
|
||||||
/*
|
/*
|
||||||
* Close a file through the POSIX interface.
|
* Close a file through the POSIX interface.
|
||||||
*/
|
*/
|
||||||
void POSIX_Close(aiori_fd_t *fd, aiori_mod_opt_t * param)
|
void POSIX_Close(aiori_fd_t *afd, aiori_mod_opt_t * param)
|
||||||
{
|
{
|
||||||
if(hints->dryRun)
|
if(hints->dryRun)
|
||||||
return;
|
return;
|
||||||
if (close(*(int *)fd) != 0)
|
posix_options_t * o = (posix_options_t*) param;
|
||||||
ERRF("close(%d) failed", *(int *)fd);
|
int fd = ((posix_fd*) afd)->fd;
|
||||||
free(fd);
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
if(o->gpuDirect){
|
||||||
|
cuFileHandleDeregister(((posix_fd*) afd)->cf_handle);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (close(fd) != 0){
|
||||||
|
ERRF("close(%d) failed", fd);
|
||||||
|
}
|
||||||
|
free(afd);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -665,3 +742,15 @@ IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, char *testFileName)
|
||||||
|
|
||||||
return (aggFileSizeFromStat);
|
return (aggFileSizeFromStat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void POSIX_Initialize(aiori_mod_opt_t * options){
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
CUfileError_t err = cuFileDriverOpen();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void POSIX_Finalize(aiori_mod_opt_t * options){
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
CUfileError_t err = cuFileDriverClose();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@ typedef struct{
|
||||||
/* beegfs variables */
|
/* beegfs variables */
|
||||||
int beegfs_numTargets; /* number storage targets to use */
|
int beegfs_numTargets; /* number storage targets to use */
|
||||||
int beegfs_chunkSize; /* srtipe pattern for new files */
|
int beegfs_chunkSize; /* srtipe pattern for new files */
|
||||||
|
int gpuDirect;
|
||||||
} posix_options_t;
|
} posix_options_t;
|
||||||
|
|
||||||
void POSIX_Sync(aiori_mod_opt_t * param);
|
void POSIX_Sync(aiori_mod_opt_t * param);
|
||||||
|
|
57
src/ior.c
57
src/ior.c
|
@ -33,6 +33,10 @@
|
||||||
# include <sys/utsname.h> /* uname() */
|
# include <sys/utsname.h> /* uname() */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#include "ior.h"
|
#include "ior.h"
|
||||||
|
@ -113,6 +117,13 @@ static int test_initialize(IOR_test_t * test){
|
||||||
verbose = test->params.verbose;
|
verbose = test->params.verbose;
|
||||||
backend = test->params.backend;
|
backend = test->params.backend;
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
cudaError_t cret = cudaSetDevice(test->params.gpuID);
|
||||||
|
if(cret != cudaSuccess){
|
||||||
|
EWARNF("cudaSetDevice(%d) error: %s", test->params.gpuID, cudaGetErrorString(cret));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if(backend->initialize){
|
if(backend->initialize){
|
||||||
backend->initialize(test->params.backend_options);
|
backend->initialize(test->params.backend_options);
|
||||||
}
|
}
|
||||||
|
@ -507,44 +518,6 @@ static int CountErrors(IOR_param_t * test, int access, int errors)
|
||||||
return (allErrors);
|
return (allErrors);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate a page-aligned (required by O_DIRECT) buffer.
|
|
||||||
*/
|
|
||||||
static void *aligned_buffer_alloc(size_t size)
|
|
||||||
{
|
|
||||||
size_t pageMask;
|
|
||||||
char *buf, *tmp;
|
|
||||||
char *aligned;
|
|
||||||
|
|
||||||
#ifdef HAVE_SYSCONF
|
|
||||||
long pageSize = sysconf(_SC_PAGESIZE);
|
|
||||||
#else
|
|
||||||
size_t pageSize = getpagesize();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
pageMask = pageSize - 1;
|
|
||||||
buf = malloc(size + pageSize + sizeof(void *));
|
|
||||||
if (buf == NULL)
|
|
||||||
ERR("out of memory");
|
|
||||||
/* find the alinged buffer */
|
|
||||||
tmp = buf + sizeof(char *);
|
|
||||||
aligned = tmp + pageSize - ((size_t) tmp & pageMask);
|
|
||||||
/* write a pointer to the original malloc()ed buffer into the bytes
|
|
||||||
preceding "aligned", so that the aligned buffer can later be free()ed */
|
|
||||||
tmp = aligned - sizeof(void *);
|
|
||||||
*(void **)tmp = buf;
|
|
||||||
|
|
||||||
return (void *)aligned;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Free a buffer allocated by aligned_buffer_alloc().
|
|
||||||
*/
|
|
||||||
static void aligned_buffer_free(void *buf)
|
|
||||||
{
|
|
||||||
free(*(void **)((char *)buf - sizeof(char *)));
|
|
||||||
}
|
|
||||||
|
|
||||||
void AllocResults(IOR_test_t *test)
|
void AllocResults(IOR_test_t *test)
|
||||||
{
|
{
|
||||||
int reps;
|
int reps;
|
||||||
|
@ -1053,7 +1026,7 @@ static void InitTests(IOR_test_t *tests)
|
||||||
static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test,
|
static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test,
|
||||||
int pretendRank)
|
int pretendRank)
|
||||||
{
|
{
|
||||||
ioBuffers->buffer = aligned_buffer_alloc(test->transferSize);
|
ioBuffers->buffer = aligned_buffer_alloc(test->transferSize, test->gpuMemoryFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1062,7 +1035,7 @@ static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test,
|
||||||
static void XferBuffersFree(IOR_io_buffers* ioBuffers, IOR_param_t* test)
|
static void XferBuffersFree(IOR_io_buffers* ioBuffers, IOR_param_t* test)
|
||||||
|
|
||||||
{
|
{
|
||||||
aligned_buffer_free(ioBuffers->buffer);
|
aligned_buffer_free(ioBuffers->buffer, test->gpuMemoryFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1878,7 +1851,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
|
||||||
|
|
||||||
void * randomPrefillBuffer = NULL;
|
void * randomPrefillBuffer = NULL;
|
||||||
if(test->randomPrefillBlocksize && (access == WRITE || access == WRITECHECK)){
|
if(test->randomPrefillBlocksize && (access == WRITE || access == WRITECHECK)){
|
||||||
randomPrefillBuffer = aligned_buffer_alloc(test->randomPrefillBlocksize);
|
randomPrefillBuffer = aligned_buffer_alloc(test->randomPrefillBlocksize, test->gpuMemoryFlags);
|
||||||
// store invalid data into the buffer
|
// store invalid data into the buffer
|
||||||
memset(randomPrefillBuffer, -1, test->randomPrefillBlocksize);
|
memset(randomPrefillBuffer, -1, test->randomPrefillBlocksize);
|
||||||
}
|
}
|
||||||
|
@ -2000,7 +1973,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results,
|
||||||
backend->fsync(fd, test->backend_options); /*fsync after all accesses */
|
backend->fsync(fd, test->backend_options); /*fsync after all accesses */
|
||||||
}
|
}
|
||||||
if(randomPrefillBuffer){
|
if(randomPrefillBuffer){
|
||||||
aligned_buffer_free(randomPrefillBuffer);
|
aligned_buffer_free(randomPrefillBuffer, test->gpuMemoryFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
return (dataMoved);
|
return (dataMoved);
|
||||||
|
|
10
src/ior.h
10
src/ior.h
|
@ -58,6 +58,11 @@ enum PACKET_TYPE
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
typedef enum{
|
||||||
|
IOR_MEMORY_TYPE_CPU = 0,
|
||||||
|
IOR_MEMORY_TYPE_GPU_MANAGED = 1,
|
||||||
|
IOR_MEMORY_TYPE_GPU_DEVICE_ONLY = 2,
|
||||||
|
} ior_memory_flags;
|
||||||
|
|
||||||
|
|
||||||
/***************** IOR_BUFFERS *************************************************/
|
/***************** IOR_BUFFERS *************************************************/
|
||||||
|
@ -101,7 +106,10 @@ typedef struct
|
||||||
MPI_Comm testComm; /* Current MPI communicator */
|
MPI_Comm testComm; /* Current MPI communicator */
|
||||||
MPI_Comm mpi_comm_world; /* The global MPI communicator */
|
MPI_Comm mpi_comm_world; /* The global MPI communicator */
|
||||||
int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */
|
int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */
|
||||||
int dualMount; /* dual mount points */
|
int dualMount; /* dual mount points */
|
||||||
|
ior_memory_flags gpuMemoryFlags; /* use the GPU to store the data */
|
||||||
|
int gpuDirect; /* use gpuDirect, this influences gpuMemoryFlags as well */
|
||||||
|
int gpuID; /* the GPU to use for gpuDirect or memory options */
|
||||||
int numTasks; /* number of tasks for test */
|
int numTasks; /* number of tasks for test */
|
||||||
int numNodes; /* number of nodes for test */
|
int numNodes; /* number of nodes for test */
|
||||||
int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */
|
int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */
|
||||||
|
|
|
@ -92,6 +92,7 @@ struct benchmark_options{
|
||||||
int read_only;
|
int read_only;
|
||||||
int stonewall_timer;
|
int stonewall_timer;
|
||||||
int stonewall_timer_wear_out;
|
int stonewall_timer_wear_out;
|
||||||
|
int gpu_memory_flags; /* use the GPU to store the data */
|
||||||
|
|
||||||
char * latency_file_prefix;
|
char * latency_file_prefix;
|
||||||
int latency_keep_all;
|
int latency_keep_all;
|
||||||
|
@ -381,7 +382,7 @@ static void compute_histogram(const char * name, time_result_t * times, time_sta
|
||||||
sprintf(file, "%s-%.2f-%d-%s.csv", o.latency_file_prefix, o.relative_waiting_factor, o.global_iteration, name);
|
sprintf(file, "%s-%.2f-%d-%s.csv", o.latency_file_prefix, o.relative_waiting_factor, o.global_iteration, name);
|
||||||
FILE * f = fopen(file, "w+");
|
FILE * f = fopen(file, "w+");
|
||||||
if(f == NULL){
|
if(f == NULL){
|
||||||
ERRF("%d: Error writing to latency file: %s\n", o.rank, file);
|
ERRF("%d: Error writing to latency file: %s", o.rank, file);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fprintf(f, "time,runtime\n");
|
fprintf(f, "time,runtime\n");
|
||||||
|
@ -546,12 +547,12 @@ void run_precreate(phase_stat_t * s, int current_index){
|
||||||
}else{
|
}else{
|
||||||
s->dset_create.err++;
|
s->dset_create.err++;
|
||||||
if (! o.ignore_precreate_errors){
|
if (! o.ignore_precreate_errors){
|
||||||
ERRF("%d: Error while creating the dset: %s\n", o.rank, dset);
|
ERRF("%d: Error while creating the dset: %s", o.rank, dset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
char * buf = malloc(o.file_size);
|
char * buf = aligned_buffer_alloc(o.file_size, o.gpu_memory_flags);
|
||||||
generate_memory_pattern(buf, o.file_size, o.random_buffer_offset, o.rank);
|
generate_memory_pattern(buf, o.file_size, o.random_buffer_offset, o.rank);
|
||||||
double op_timer; // timer for individual operations
|
double op_timer; // timer for individual operations
|
||||||
size_t pos = -1; // position inside the individual measurement array
|
size_t pos = -1; // position inside the individual measurement array
|
||||||
|
@ -574,7 +575,7 @@ void run_precreate(phase_stat_t * s, int current_index){
|
||||||
}else{
|
}else{
|
||||||
s->obj_create.err++;
|
s->obj_create.err++;
|
||||||
if (! o.ignore_precreate_errors){
|
if (! o.ignore_precreate_errors){
|
||||||
ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name);
|
ERRF("%d: Error while creating the obj: %s", o.rank, obj_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
o.backend->close(aiori_fh, o.backend_options);
|
o.backend->close(aiori_fh, o.backend_options);
|
||||||
|
@ -586,14 +587,14 @@ void run_precreate(phase_stat_t * s, int current_index){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(buf);
|
aligned_buffer_free(buf, o.gpu_memory_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FIFO: create a new file, write to it. Then read from the first created file, delete it... */
|
/* FIFO: create a new file, write to it. Then read from the first created file, delete it... */
|
||||||
void run_benchmark(phase_stat_t * s, int * current_index_p){
|
void run_benchmark(phase_stat_t * s, int * current_index_p){
|
||||||
char obj_name[MAX_PATHLEN];
|
char obj_name[MAX_PATHLEN];
|
||||||
int ret;
|
int ret;
|
||||||
char * buf = malloc(o.file_size);
|
char * buf = aligned_buffer_alloc(o.file_size, o.gpu_memory_flags);
|
||||||
memset(buf, o.rank % 256, o.file_size);
|
memset(buf, o.rank % 256, o.file_size);
|
||||||
double op_timer; // timer for individual operations
|
double op_timer; // timer for individual operations
|
||||||
size_t pos = -1; // position inside the individual measurement array
|
size_t pos = -1; // position inside the individual measurement array
|
||||||
|
@ -632,7 +633,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){
|
||||||
|
|
||||||
if(ret != 0){
|
if(ret != 0){
|
||||||
if (o.verbosity)
|
if (o.verbosity)
|
||||||
ERRF("%d: Error while stating the obj: %s\n", o.rank, obj_name);
|
ERRF("%d: Error while stating the obj: %s", o.rank, obj_name);
|
||||||
s->obj_stat.err++;
|
s->obj_stat.err++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -704,7 +705,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){
|
||||||
o.backend->close(aiori_fh, o.backend_options);
|
o.backend->close(aiori_fh, o.backend_options);
|
||||||
}else{
|
}else{
|
||||||
if (! o.ignore_precreate_errors){
|
if (! o.ignore_precreate_errors){
|
||||||
ERRF("Unable to open file %s", obj_name);
|
ERRF("%d: Error while creating the obj: %s", o.rank, obj_name);
|
||||||
}
|
}
|
||||||
EWARNF("Unable to open file %s", obj_name);
|
EWARNF("Unable to open file %s", obj_name);
|
||||||
s->obj_create.err++;
|
s->obj_create.err++;
|
||||||
|
@ -761,7 +762,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){
|
||||||
*current_index_p += f;
|
*current_index_p += f;
|
||||||
}
|
}
|
||||||
s->repeats = pos + 1;
|
s->repeats = pos + 1;
|
||||||
free(buf);
|
aligned_buffer_free(buf, o.gpu_memory_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void run_cleanup(phase_stat_t * s, int start_index){
|
void run_cleanup(phase_stat_t * s, int start_index){
|
||||||
|
@ -822,6 +823,7 @@ static option_help options [] = {
|
||||||
{'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer},
|
{'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer},
|
||||||
{'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out},
|
{'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out},
|
||||||
{'X', "verify-read", "Verify the data on read", OPTION_FLAG, 'd', & o.verify_read},
|
{'X', "verify-read", "Verify the data on read", OPTION_FLAG, 'd', & o.verify_read},
|
||||||
|
{0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags},
|
||||||
{0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number},
|
{0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number},
|
||||||
{0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats},
|
{0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats},
|
||||||
{0, "read-only", "Run read-only during benchmarking phase (no deletes/writes), probably use with -2", OPTION_FLAG, 'd', & o.read_only},
|
{0, "read-only", "Run read-only during benchmarking phase (no deletes/writes), probably use with -2", OPTION_FLAG, 'd', & o.read_only},
|
||||||
|
@ -844,12 +846,12 @@ static int return_position(){
|
||||||
if( o.rank == 0){
|
if( o.rank == 0){
|
||||||
FILE * f = fopen(o.run_info_file, "r");
|
FILE * f = fopen(o.run_info_file, "r");
|
||||||
if(! f){
|
if(! f){
|
||||||
ERRF("[ERROR] Could not open %s for restart\n", o.run_info_file);
|
ERRF("[ERROR] Could not open %s for restart", o.run_info_file);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
ret = fscanf(f, "pos: %d", & position);
|
ret = fscanf(f, "pos: %d", & position);
|
||||||
if (ret != 1){
|
if (ret != 1){
|
||||||
ERRF("Could not read from %s for restart\n", o.run_info_file);
|
ERRF("Could not read from %s for restart", o.run_info_file);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
fclose(f);
|
fclose(f);
|
||||||
|
@ -864,7 +866,7 @@ static void store_position(int position){
|
||||||
}
|
}
|
||||||
FILE * f = fopen(o.run_info_file, "w");
|
FILE * f = fopen(o.run_info_file, "w");
|
||||||
if(! f){
|
if(! f){
|
||||||
ERRF("[ERROR] Could not open %s for saving data\n", o.run_info_file);
|
ERRF("[ERROR] Could not open %s for saving data", o.run_info_file);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
fprintf(f, "pos: %d\n", position);
|
fprintf(f, "pos: %d\n", position);
|
||||||
|
|
16
src/mdtest.c
16
src/mdtest.c
|
@ -110,6 +110,7 @@ typedef struct {
|
||||||
char unique_rm_uni_dir[MAX_PATHLEN];
|
char unique_rm_uni_dir[MAX_PATHLEN];
|
||||||
char *write_buffer;
|
char *write_buffer;
|
||||||
char *stoneWallingStatusFile;
|
char *stoneWallingStatusFile;
|
||||||
|
int gpu_memory_flags;
|
||||||
|
|
||||||
|
|
||||||
int barriers;
|
int barriers;
|
||||||
|
@ -641,10 +642,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
|
||||||
|
|
||||||
/* allocate read buffer */
|
/* allocate read buffer */
|
||||||
if (o.read_bytes > 0) {
|
if (o.read_bytes > 0) {
|
||||||
int alloc_res = posix_memalign((void**)&read_buffer, sysconf(_SC_PAGESIZE), o.read_bytes);
|
read_buffer = aligned_buffer_alloc(o.read_bytes, o.gpu_memory_flags);
|
||||||
if (alloc_res) {
|
|
||||||
FAIL("out of memory");
|
|
||||||
}
|
|
||||||
memset(read_buffer, -1, o.read_bytes);
|
memset(read_buffer, -1, o.read_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -743,7 +741,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
|
||||||
o.backend->close (aiori_fh, o.backend_options);
|
o.backend->close (aiori_fh, o.backend_options);
|
||||||
}
|
}
|
||||||
if(o.read_bytes){
|
if(o.read_bytes){
|
||||||
free(read_buffer);
|
aligned_buffer_free(read_buffer, o.gpu_memory_flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2236,6 +2234,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
{'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync},
|
{'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync},
|
||||||
{'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth},
|
{'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth},
|
||||||
{'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time},
|
{'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time},
|
||||||
|
{0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags},
|
||||||
{0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors},
|
{0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors},
|
||||||
{0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV},
|
{0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV},
|
||||||
LAST_OPTION
|
LAST_OPTION
|
||||||
|
@ -2420,10 +2419,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
|
|
||||||
/* allocate and initialize write buffer with # */
|
/* allocate and initialize write buffer with # */
|
||||||
if (o.write_bytes > 0) {
|
if (o.write_bytes > 0) {
|
||||||
int alloc_res = posix_memalign((void**)& o.write_buffer, sysconf(_SC_PAGESIZE), o.write_bytes);
|
o.write_buffer = aligned_buffer_alloc(o.write_bytes, o.gpu_memory_flags);
|
||||||
if (alloc_res) {
|
|
||||||
FAIL("out of memory");
|
|
||||||
}
|
|
||||||
generate_memory_pattern(o.write_buffer, o.write_bytes, o.random_buffer_offset, rank);
|
generate_memory_pattern(o.write_buffer, o.write_bytes, o.random_buffer_offset, rank);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2560,7 +2556,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
}
|
}
|
||||||
|
|
||||||
if (o.write_bytes > 0) {
|
if (o.write_bytes > 0) {
|
||||||
free(o.write_buffer);
|
aligned_buffer_free(o.write_buffer, o.gpu_memory_flags);
|
||||||
}
|
}
|
||||||
free(o.summary_table);
|
free(o.summary_table);
|
||||||
|
|
||||||
|
|
|
@ -264,11 +264,13 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
txt++;
|
txt++;
|
||||||
|
int parsed = 0;
|
||||||
|
|
||||||
|
// printf("Parsing: %s : %s\n", txt, arg);
|
||||||
// support groups of multiple flags like -vvv or -vq
|
// support groups of multiple flags like -vvv or -vq
|
||||||
for(int flag_index = 0; flag_index < strlen(txt); ++flag_index){
|
for(int flag_index = 0; flag_index < strlen(txt); ++flag_index){
|
||||||
// don't loop looking for multiple flags if we already processed a long option
|
// don't loop looking for multiple flags if we already processed a long option
|
||||||
if(txt[0] == '-' && flag_index > 0)
|
if(txt[flag_index] == '=' || (txt[0] == '-' && flag_index > 0))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
for(int m = 0; m < opt_all->module_count; m++ ){
|
for(int m = 0; m < opt_all->module_count; m++ ){
|
||||||
|
@ -281,6 +283,7 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ( (o->shortVar == txt[flag_index]) || (strlen(txt) > 2 && txt[0] == '-' && o->longVar != NULL && strcmp(txt + 1, o->longVar) == 0)){
|
if ( (o->shortVar == txt[flag_index]) || (strlen(txt) > 2 && txt[0] == '-' && o->longVar != NULL && strcmp(txt + 1, o->longVar) == 0)){
|
||||||
|
// printf("Found %s %c=%c? %d %d\n", o->help, o->shortVar, txt[flag_index], (o->shortVar == txt[flag_index]), (strlen(txt) > 2 && txt[0] == '-' && o->longVar != NULL && strcmp(txt + 1, o->longVar) == 0));
|
||||||
// now process the option.
|
// now process the option.
|
||||||
switch(o->arg){
|
switch(o->arg){
|
||||||
case (OPTION_FLAG):{
|
case (OPTION_FLAG):{
|
||||||
|
@ -370,11 +373,12 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi
|
||||||
(*requiredArgsSeen)++;
|
(*requiredArgsSeen)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
parsed = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if(parsed) return;
|
||||||
|
|
||||||
if(strcmp(txt, "h") == 0 || strcmp(txt, "-help") == 0){
|
if(strcmp(txt, "h") == 0 || strcmp(txt, "-help") == 0){
|
||||||
*print_help = 1;
|
*print_help = 1;
|
||||||
|
|
|
@ -62,7 +62,17 @@ static void CheckRunSettings(IOR_test_t *tests)
|
||||||
}
|
}
|
||||||
|
|
||||||
if(params->dualMount && !params->filePerProc) {
|
if(params->dualMount && !params->filePerProc) {
|
||||||
MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "Dual Mount can only be used with File Per Process");
|
ERR("Dual Mount can only be used with File Per Process");
|
||||||
|
}
|
||||||
|
|
||||||
|
if(params->gpuDirect){
|
||||||
|
if(params->gpuMemoryFlags == IOR_MEMORY_TYPE_GPU_MANAGED){
|
||||||
|
ERR("GPUDirect cannot be used with managed memory");
|
||||||
|
}
|
||||||
|
params->gpuMemoryFlags = IOR_MEMORY_TYPE_GPU_DEVICE_ONLY;
|
||||||
|
if(params->checkRead || params->checkWrite){
|
||||||
|
ERR("GPUDirect data cannot yet be checked");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -138,6 +148,12 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt
|
||||||
params->testFileName = strdup(value);
|
params->testFileName = strdup(value);
|
||||||
} else if (strcasecmp(option, "dualmount") == 0){
|
} else if (strcasecmp(option, "dualmount") == 0){
|
||||||
params->dualMount = atoi(value);
|
params->dualMount = atoi(value);
|
||||||
|
} else if (strcasecmp(option, "allocateBufferOnGPU") == 0) {
|
||||||
|
params->gpuMemoryFlags = atoi(value);
|
||||||
|
} else if (strcasecmp(option, "GPUid") == 0) {
|
||||||
|
params->gpuID = atoi(value);
|
||||||
|
} else if (strcasecmp(option, "GPUDirect") == 0) {
|
||||||
|
params->gpuDirect = atoi(value);
|
||||||
} else if (strcasecmp(option, "deadlineforstonewalling") == 0) {
|
} else if (strcasecmp(option, "deadlineforstonewalling") == 0) {
|
||||||
params->deadlineForStonewalling = atoi(value);
|
params->deadlineForStonewalling = atoi(value);
|
||||||
} else if (strcasecmp(option, "stoneWallingWearOut") == 0) {
|
} else if (strcasecmp(option, "stoneWallingWearOut") == 0) {
|
||||||
|
@ -413,6 +429,13 @@ option_help * createGlobalOptions(IOR_param_t * params){
|
||||||
{.help=" -O stoneWallingWearOut=1 -- once the stonewalling timeout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT},
|
{.help=" -O stoneWallingWearOut=1 -- once the stonewalling timeout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT},
|
||||||
{.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT},
|
{.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT},
|
||||||
{.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT},
|
{.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT},
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
{.help=" -O allocateBufferOnGPU=X -- allocate I/O buffers on the GPU: X=1 uses managed memory, X=2 device memory.", .arg = OPTION_OPTIONAL_ARGUMENT},
|
||||||
|
{.help=" -O GPUid=X -- select the GPU to use.", .arg = OPTION_OPTIONAL_ARGUMENT},
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
{0, "gpuDirect", "allocate I/O buffers on the GPU and use gpuDirect to store data; this option is incompatible with any option requiring CPU access to data.", OPTION_FLAG, 'd', & params->gpuDirect},
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
{'e', NULL, "fsync -- perform a fsync() operation at the end of each read/write phase", OPTION_FLAG, 'd', & params->fsync},
|
{'e', NULL, "fsync -- perform a fsync() operation at the end of each read/write phase", OPTION_FLAG, 'd', & params->fsync},
|
||||||
{'E', NULL, "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & params->useExistingTestFile},
|
{'E', NULL, "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & params->useExistingTestFile},
|
||||||
{'f', NULL, "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & params->testscripts},
|
{'f', NULL, "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & params->testscripts},
|
||||||
|
|
|
@ -37,6 +37,10 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef _WIN32
|
#ifndef _WIN32
|
||||||
# include <regex.h>
|
# include <regex.h>
|
||||||
# ifdef __sun /* SunOS does not support statfs(), instead uses statvfs() */
|
# ifdef __sun /* SunOS does not support statfs(), instead uses statvfs() */
|
||||||
|
@ -210,7 +214,7 @@ void updateParsedOptions(IOR_param_t * options, options_all_t * global_options){
|
||||||
/* Used in aiori-POSIX.c and aiori-PLFS.c
|
/* Used in aiori-POSIX.c and aiori-PLFS.c
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void set_o_direct_flag(int *fd)
|
void set_o_direct_flag(int *flag)
|
||||||
{
|
{
|
||||||
/* note that TRU64 needs O_DIRECTIO, SunOS uses directio(),
|
/* note that TRU64 needs O_DIRECTIO, SunOS uses directio(),
|
||||||
and everyone else needs O_DIRECT */
|
and everyone else needs O_DIRECT */
|
||||||
|
@ -223,7 +227,7 @@ void set_o_direct_flag(int *fd)
|
||||||
# endif /* not O_DIRECTIO */
|
# endif /* not O_DIRECTIO */
|
||||||
#endif /* not O_DIRECT */
|
#endif /* not O_DIRECT */
|
||||||
|
|
||||||
*fd |= O_DIRECT;
|
*flag |= O_DIRECT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -911,3 +915,72 @@ unsigned long GetProcessorAndCore(int *chip, int *core){
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a page-aligned (required by O_DIRECT) buffer.
|
||||||
|
*/
|
||||||
|
void *aligned_buffer_alloc(size_t size, ior_memory_flags type)
|
||||||
|
{
|
||||||
|
size_t pageMask;
|
||||||
|
char *buf, *tmp;
|
||||||
|
char *aligned;
|
||||||
|
|
||||||
|
if(type == IOR_MEMORY_TYPE_GPU_MANAGED){
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
// use unified memory here to allow drop-in-replacement
|
||||||
|
if (cudaMallocManaged((void**) & buf, size, cudaMemAttachGlobal) != cudaSuccess){
|
||||||
|
ERR("Cannot allocate buffer on GPU");
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
#else
|
||||||
|
ERR("No CUDA supported, cannot allocate on the GPU");
|
||||||
|
#endif
|
||||||
|
}else if(type == IOR_MEMORY_TYPE_GPU_DEVICE_ONLY){
|
||||||
|
#ifdef HAVE_GPU_DIRECT
|
||||||
|
if (cudaMalloc((void**) & buf, size) != cudaSuccess){
|
||||||
|
ERR("Cannot allocate buffer on GPU");
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
#else
|
||||||
|
ERR("No GPUDirect supported, cannot allocate on the GPU");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_SYSCONF
|
||||||
|
long pageSize = sysconf(_SC_PAGESIZE);
|
||||||
|
#else
|
||||||
|
size_t pageSize = getpagesize();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
pageMask = pageSize - 1;
|
||||||
|
buf = safeMalloc(size + pageSize + sizeof(void *));
|
||||||
|
/* find the alinged buffer */
|
||||||
|
tmp = buf + sizeof(char *);
|
||||||
|
aligned = tmp + pageSize - ((size_t) tmp & pageMask);
|
||||||
|
/* write a pointer to the original malloc()ed buffer into the bytes
|
||||||
|
preceding "aligned", so that the aligned buffer can later be free()ed */
|
||||||
|
tmp = aligned - sizeof(void *);
|
||||||
|
*(void **)tmp = buf;
|
||||||
|
|
||||||
|
return (void *)aligned;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Free a buffer allocated by aligned_buffer_alloc().
|
||||||
|
*/
|
||||||
|
void aligned_buffer_free(void *buf, ior_memory_flags gpu)
|
||||||
|
{
|
||||||
|
if(gpu){
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
if (cudaFree(buf) != cudaSuccess){
|
||||||
|
WARN("Cannot free buffer on GPU");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
#else
|
||||||
|
ERR("No CUDA supported, cannot free on the GPU");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
free(*(void **)((char *)buf - sizeof(char *)));
|
||||||
|
}
|
||||||
|
|
|
@ -63,5 +63,6 @@ void init_clock(MPI_Comm com);
|
||||||
double GetTimeStamp(void);
|
double GetTimeStamp(void);
|
||||||
char * PrintTimestamp(); // TODO remove this function
|
char * PrintTimestamp(); // TODO remove this function
|
||||||
unsigned long GetProcessorAndCore(int *chip, int *core);
|
unsigned long GetProcessorAndCore(int *chip, int *core);
|
||||||
|
void *aligned_buffer_alloc(size_t size, ior_memory_flags type);
|
||||||
|
void aligned_buffer_free(void *buf, ior_memory_flags type);
|
||||||
#endif /* !_UTILITIES_H */
|
#endif /* !_UTILITIES_H */
|
||||||
|
|
Loading…
Reference in New Issue