2012-01-09 00:51:04 +04:00
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim : expandtab : shiftwidth = 8 : tabstop = 8 :
*/
2011-06-17 23:20:43 +04:00
/******************************************************************************\
* *
* Copyright ( c ) 2003 , The Regents of the University of California *
* See the file COPYRIGHT for a complete copyright notice and license . *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*
2011-10-28 01:12:26 +04:00
* Definitions and prototypes of abstract I / O interface
2011-06-17 23:20:43 +04:00
*
\ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# ifndef _AIORI_H
# define _AIORI_H
2017-10-20 00:26:52 +03:00
# include <sys/stat.h>
2019-02-11 16:49:14 +03:00
# include <stdbool.h>
2017-10-20 00:26:52 +03:00
2011-11-12 04:40:45 +04:00
# include "iordef.h" /* IOR Definitions */
2020-06-28 19:16:35 +03:00
# include "aiori-debug.h"
2018-07-12 20:09:13 +03:00
# include "option.h"
2011-06-17 23:20:43 +04:00
/*************************** D E F I N I T I O N S ****************************/
2014-08-29 01:35:51 +04:00
/* -- file open flags -- */
# define IOR_RDONLY 0x01 /* read only */
# define IOR_WRONLY 0x02 /* write only */
# define IOR_RDWR 0x04 /* read/write */
# define IOR_APPEND 0x08 /* append */
# define IOR_CREAT 0x10 /* create */
# define IOR_TRUNC 0x20 /* truncate */
# define IOR_EXCL 0x40 /* exclusive */
# define IOR_DIRECT 0x80 /* bypass I/O buffers */
/* -- file mode flags -- */
# define IOR_IRWXU 0x0001 /* read, write, execute perm: owner */
# define IOR_IRUSR 0x0002 /* read permission: owner */
# define IOR_IWUSR 0x0004 /* write permission: owner */
# define IOR_IXUSR 0x0008 /* execute permission: owner */
# define IOR_IRWXG 0x0010 /* read, write, execute perm: group */
# define IOR_IRGRP 0x0020 /* read permission: group */
# define IOR_IWGRP 0x0040 /* write permission: group */
# define IOR_IXGRP 0x0080 /* execute permission: group */
# define IOR_IRWXO 0x0100 /* read, write, execute perm: other */
# define IOR_IROTH 0x0200 /* read permission: other */
# define IOR_IWOTH 0x0400 /* write permission: other */
# define IOR_IXOTH 0x0800 /* execute permission: other */
2011-06-17 23:20:43 +04:00
2017-10-20 00:26:52 +03:00
typedef struct ior_aiori_statfs {
uint64_t f_bsize ;
uint64_t f_blocks ;
uint64_t f_bfree ;
uint64_t f_bavail ;
uint64_t f_files ;
uint64_t f_ffree ;
} ior_aiori_statfs_t ;
2020-05-31 14:50:03 +03:00
/*
This structure contains information about the expected IO pattern that may be used to optimize data access . Optimally , it should be stored for each file descriptor , at the moment it can only be set globally per aiori backend module .
*/
2020-05-31 13:58:34 +03:00
typedef struct aiori_xfer_hint_t {
2020-05-31 14:50:03 +03:00
int dryRun ; /* do not perform any I/Os just run evtl. inputs print dummy output */
int filePerProc ; /* single file or file-per-process */
int collective ; /* collective I/O */
int numTasks ; /* number of tasks for test */
int numNodes ; /* number of nodes for test */
int randomOffset ; /* access is to random offsets */
int fsyncPerWrite ; /* fsync() after each write */
IOR_offset_t segmentCount ; /* number of segments (or HDF5 datasets) */
IOR_offset_t blockSize ; /* contiguous bytes to write per task */
IOR_offset_t transferSize ; /* size of transfer in bytes */
IOR_offset_t expectedAggFileSize ; /* calculated aggregate file size */
int singleXferAttempt ; /* do not retry transfer if incomplete */
2020-05-31 13:58:34 +03:00
} aiori_xfer_hint_t ;
2020-05-31 13:50:15 +03:00
/* this is a dummy structure to create some type safety */
2020-10-29 13:37:56 +03:00
struct aiori_mod_opt_t {
2020-05-31 13:50:15 +03:00
void * dummy ;
2020-10-29 13:37:56 +03:00
} ;
2018-07-12 20:09:13 +03:00
2020-05-31 14:11:00 +03:00
typedef struct aiori_fd_t {
void * dummy ;
} aiori_fd_t ;
2018-07-12 20:09:13 +03:00
2011-10-28 03:50:05 +04:00
typedef struct ior_aiori {
char * name ;
2018-09-10 19:43:12 +03:00
char * name_legacy ;
2020-05-31 14:11:00 +03:00
aiori_fd_t * ( * create ) ( char * , int iorflags , aiori_mod_opt_t * ) ;
2019-11-04 19:10:36 +03:00
int ( * mknod ) ( char * ) ;
2020-05-31 14:11:00 +03:00
aiori_fd_t * ( * open ) ( char * , int iorflags , aiori_mod_opt_t * ) ;
2020-05-30 20:19:48 +03:00
/*
Allow to set generic transfer options that shall be applied to any subsequent IO call .
*/
2020-05-31 14:50:03 +03:00
void ( * xfer_hints ) ( aiori_xfer_hint_t * params ) ;
2020-06-10 19:47:07 +03:00
IOR_offset_t ( * xfer ) ( int access , aiori_fd_t * , IOR_size_t * ,
2020-06-30 14:33:56 +03:00
IOR_offset_t size , IOR_offset_t offset , aiori_mod_opt_t * module_options ) ;
void ( * close ) ( aiori_fd_t * , aiori_mod_opt_t * module_options ) ;
void ( * delete ) ( char * , aiori_mod_opt_t * module_options ) ;
2019-12-22 14:21:40 +03:00
char * ( * get_version ) ( void ) ;
2020-06-30 14:33:56 +03:00
void ( * fsync ) ( aiori_fd_t * , aiori_mod_opt_t * module_options ) ;
IOR_offset_t ( * get_file_size ) ( aiori_mod_opt_t * module_options , char * filename ) ;
2020-05-31 13:58:34 +03:00
int ( * statfs ) ( const char * , ior_aiori_statfs_t * , aiori_mod_opt_t * module_options ) ;
int ( * mkdir ) ( const char * path , mode_t mode , aiori_mod_opt_t * module_options ) ;
int ( * rmdir ) ( const char * path , aiori_mod_opt_t * module_options ) ;
int ( * access ) ( const char * path , int mode , aiori_mod_opt_t * module_options ) ;
int ( * stat ) ( const char * path , struct stat * buf , aiori_mod_opt_t * module_options ) ;
void ( * initialize ) ( aiori_mod_opt_t * options ) ; /* called once per program before MPI is started */
void ( * finalize ) ( aiori_mod_opt_t * options ) ; /* called once per program after MPI is shutdown */
2021-01-21 17:10:23 +03:00
int ( * rename ) ( const char * oldpath , const char * newpath , aiori_mod_opt_t * module_options ) ;
2020-05-31 13:58:34 +03:00
option_help * ( * get_options ) ( aiori_mod_opt_t * * init_backend_options , aiori_mod_opt_t * init_values ) ; /* initializes the backend options as well and returns the pointer to the option help structure */
int ( * check_params ) ( aiori_mod_opt_t * ) ; /* check if the provided module_optionseters for the given test and the module options are correct, if they aren't print a message and exit(1) or return 1*/
void ( * sync ) ( aiori_mod_opt_t * ) ; /* synchronize every pending operation for this storage */
2019-02-11 16:49:14 +03:00
bool enable_mdtest ;
2011-10-28 03:50:05 +04:00
} ior_aiori_t ;
2011-06-17 23:20:43 +04:00
2019-02-11 16:49:14 +03:00
enum bench_type {
IOR ,
MDTEST
} ;
2018-07-07 15:24:59 +03:00
extern ior_aiori_t dummy_aiori ;
2020-07-21 18:16:13 +03:00
extern ior_aiori_t aio_aiori ;
2019-09-09 22:55:56 +03:00
extern ior_aiori_t daos_aiori ;
extern ior_aiori_t dfs_aiori ;
Algorithms 'S3', 'S3_plus', and 'S3_EMC' all available.
These are variants on S3. S3 uses the "pure" S3 interface, e.g. using
Multi-Part-Upload. The "plus" variant enables EMC-extensions in the aws4c
library. This allows the N:N case to use "append", in the case where
"transfer_size" != "block_size" for IOR. In pure S3, the N:N case will
fail, because the EMC-extensions won't be enabled, and appending (which
attempts to use the EMC byte-range tricks to do this) will throw an error.
In the S3_EMC alg, N:1 uses EMCs other byte-range tricks to write different
parts of an N:1 file, and also uses append to write the parts of an N:N
file. Preliminary tests show these EMC extensions look to improve BW by
~20%.
I put all three algs in aiori-S3.c, because it seemed some code was getting
reused. Not sure if that's still going to make sense after the TBD, below.
TBD: Recently realized that the "pure' S3 shouldn't be trying to use
appends for anything. In the N:N case, it should just use MPU, within each
file. Then, there's no need for S3_plus. We just have S3, which does MPU
for all writes where transfer_size != block_size, and uses (standard)
byte-range reads for reading. Then S3_EMC uses "appends for N:N writes,
and byte-range writes for N:1 writes. This separates the code for the two
algs a little more, but we might still want them in the same file.
2014-10-30 01:04:30 +03:00
extern ior_aiori_t hdf5_aiori ;
extern ior_aiori_t hdfs_aiori ;
2018-08-06 13:04:26 +03:00
extern ior_aiori_t ime_aiori ;
Algorithms 'S3', 'S3_plus', and 'S3_EMC' all available.
These are variants on S3. S3 uses the "pure" S3 interface, e.g. using
Multi-Part-Upload. The "plus" variant enables EMC-extensions in the aws4c
library. This allows the N:N case to use "append", in the case where
"transfer_size" != "block_size" for IOR. In pure S3, the N:N case will
fail, because the EMC-extensions won't be enabled, and appending (which
attempts to use the EMC byte-range tricks to do this) will throw an error.
In the S3_EMC alg, N:1 uses EMCs other byte-range tricks to write different
parts of an N:1 file, and also uses append to write the parts of an N:N
file. Preliminary tests show these EMC extensions look to improve BW by
~20%.
I put all three algs in aiori-S3.c, because it seemed some code was getting
reused. Not sure if that's still going to make sense after the TBD, below.
TBD: Recently realized that the "pure' S3 shouldn't be trying to use
appends for anything. In the N:N case, it should just use MPU, within each
file. Then, there's no need for S3_plus. We just have S3, which does MPU
for all writes where transfer_size != block_size, and uses (standard)
byte-range reads for reading. Then S3_EMC uses "appends for N:N writes,
and byte-range writes for N:1 writes. This separates the code for the two
algs a little more, but we might still want them in the same file.
2014-10-30 01:04:30 +03:00
extern ior_aiori_t mpiio_aiori ;
extern ior_aiori_t ncmpi_aiori ;
extern ior_aiori_t posix_aiori ;
2019-10-31 18:29:09 +03:00
extern ior_aiori_t pmdk_aiori ;
2018-05-08 14:08:29 +03:00
extern ior_aiori_t mmap_aiori ;
2020-06-30 15:41:59 +03:00
extern ior_aiori_t S3_libS3_aiori ;
extern ior_aiori_t s3_4c_aiori ;
Algorithms 'S3', 'S3_plus', and 'S3_EMC' all available.
These are variants on S3. S3 uses the "pure" S3 interface, e.g. using
Multi-Part-Upload. The "plus" variant enables EMC-extensions in the aws4c
library. This allows the N:N case to use "append", in the case where
"transfer_size" != "block_size" for IOR. In pure S3, the N:N case will
fail, because the EMC-extensions won't be enabled, and appending (which
attempts to use the EMC byte-range tricks to do this) will throw an error.
In the S3_EMC alg, N:1 uses EMCs other byte-range tricks to write different
parts of an N:1 file, and also uses append to write the parts of an N:N
file. Preliminary tests show these EMC extensions look to improve BW by
~20%.
I put all three algs in aiori-S3.c, because it seemed some code was getting
reused. Not sure if that's still going to make sense after the TBD, below.
TBD: Recently realized that the "pure' S3 shouldn't be trying to use
appends for anything. In the N:N case, it should just use MPU, within each
file. Then, there's no need for S3_plus. We just have S3, which does MPU
for all writes where transfer_size != block_size, and uses (standard)
byte-range reads for reading. Then S3_EMC uses "appends for N:N writes,
and byte-range writes for N:1 writes. This separates the code for the two
algs a little more, but we might still want them in the same file.
2014-10-30 01:04:30 +03:00
extern ior_aiori_t s3_plus_aiori ;
extern ior_aiori_t s3_emc_aiori ;
2018-02-23 19:38:24 +03:00
extern ior_aiori_t rados_aiori ;
2020-03-05 04:40:05 +03:00
extern ior_aiori_t cephfs_aiori ;
2019-06-21 15:05:41 +03:00
extern ior_aiori_t gfarm_aiori ;
S3 with Multi-Part Upload for N:1 is working.
Testing on our EMC ViPR installation. Therefore, we also have available
some EMC extensions. For example, EMC supports a special "byte-range"
header-option ("Range: bytes=-1-") which allows appending to an object.
This is not needed for N:1 (where every write creates an independent part),
but is vital for N:N (where every write is considered an append, unless
"transfer-size" is the same as "block-size").
We also use a LANL-extended implementation of aws4c 0.5, which provides
some special features, and allows greater efficiency. That is included in
this commit as a tarball. Untar it somewhere else and build it, to produce
a library, which is linked with IOR. (configure with --with-S3).
TBD: EMC also supports a simpler alternative to Multi-Part Upload, which
appears to have several advantages. We'll add that in next, but wanted to
capture this as is, before I break it.
2014-10-27 22:16:20 +03:00
2017-10-20 00:26:52 +03:00
const ior_aiori_t * aiori_select ( const char * api ) ;
int aiori_count ( void ) ;
2019-02-11 16:49:14 +03:00
void aiori_supported_apis ( char * APIs , char * APIs_legacy , enum bench_type type ) ;
2019-03-27 23:04:48 +03:00
options_all_t * airoi_create_all_module_options ( option_help * global_options ) ;
void * airoi_update_module_options ( const ior_aiori_t * backend , options_all_t * module_defaults ) ;
2017-10-20 00:26:52 +03:00
const char * aiori_default ( void ) ;
2011-11-12 04:40:45 +04:00
2018-04-26 01:21:48 +03:00
/* some generic POSIX-based backend calls */
2019-12-22 14:21:40 +03:00
char * aiori_get_version ( void ) ;
2020-05-31 13:58:34 +03:00
int aiori_posix_statfs ( const char * path , ior_aiori_statfs_t * stat_buf , aiori_mod_opt_t * module_options ) ;
int aiori_posix_mkdir ( const char * path , mode_t mode , aiori_mod_opt_t * module_options ) ;
int aiori_posix_rmdir ( const char * path , aiori_mod_opt_t * module_options ) ;
int aiori_posix_access ( const char * path , int mode , aiori_mod_opt_t * module_options ) ;
int aiori_posix_stat ( const char * path , struct stat * buf , aiori_mod_opt_t * module_options ) ;
2019-03-28 01:32:59 +03:00
2018-07-12 18:13:40 +03:00
2020-09-02 20:12:17 +03:00
/* NOTE: these 4 MPI-IO functions are exported for reuse by HDF5/PNetCDF */
2020-05-31 13:58:34 +03:00
void MPIIO_Delete ( char * testFileName , aiori_mod_opt_t * module_options ) ;
2020-06-30 14:33:56 +03:00
IOR_offset_t MPIIO_GetFileSize ( aiori_mod_opt_t * options , char * testFileName ) ;
int MPIIO_Access ( const char * , int , aiori_mod_opt_t * module_options ) ;
2020-09-02 20:12:17 +03:00
void MPIIO_xfer_hints ( aiori_xfer_hint_t * params ) ;
2018-05-08 14:08:29 +03:00
2011-06-17 23:20:43 +04:00
# endif /* not _AIORI_H */