S3 with Multi-Part Upload for N:1 is working.
Testing on our EMC ViPR installation. Therefore, we also have available
some EMC extensions. For example, EMC supports a special "byte-range"
header-option ("Range: bytes=-1-") which allows appending to an object.
This is not needed for N:1 (where every write creates an independent part),
but is vital for N:N (where every write is considered an append, unless
"transfer-size" is the same as "block-size").
We also use a LANL-extended implementation of aws4c 0.5, which provides
some special features, and allows greater efficiency. That is included in
this commit as a tarball. Untar it somewhere else and build it, to produce
a library, which is linked with IOR. (configure with --with-S3).
TBD: EMC also supports a simpler alternative to Multi-Part Upload, which
appears to have several advantages. We'll add that in next, but wanted to
capture this as is, before I break it.
2014-10-27 22:16:20 +03:00
|
|
|
/* -*- mode: c; indent-tabs-mode: nil; -*-
|
|
|
|
* vim:expandtab:
|
|
|
|
*
|
|
|
|
* NOTE: Someone was setting indent-sizes in the mode-line. Don't do that.
|
|
|
|
* 8-chars of indenting is ridiculous. If you really want 8-spaces,
|
|
|
|
* then change the mode-line to use tabs, and configure your personal
|
|
|
|
* editor environment to use 8-space tab-stops.
|
2017-10-20 19:02:24 +03:00
|
|
|
*
|
2012-01-09 00:51:04 +04:00
|
|
|
*/
|
2011-06-17 23:20:43 +04:00
|
|
|
/******************************************************************************\
|
|
|
|
* *
|
|
|
|
* Copyright (c) 2003, The Regents of the University of California *
|
|
|
|
* See the file COPYRIGHT for a complete copyright notice and license. *
|
|
|
|
* *
|
|
|
|
\******************************************************************************/
|
|
|
|
|
|
|
|
#ifndef _IOR_H
|
|
|
|
#define _IOR_H
|
|
|
|
|
2011-10-28 03:50:05 +04:00
|
|
|
#ifdef HAVE_CONFIG_H
|
2014-08-14 02:53:24 +04:00
|
|
|
# include "config.h"
|
|
|
|
#endif
|
2014-09-18 21:20:37 +04:00
|
|
|
|
2014-08-14 02:53:24 +04:00
|
|
|
#ifdef USE_HDFS_AIORI
|
|
|
|
# include <hdfs.h> /* hdfsFS */
|
2014-09-18 21:20:37 +04:00
|
|
|
#else
|
|
|
|
# include <stdint.h>
|
|
|
|
typedef uint16_t tPort; /* unused, but needs a type */
|
|
|
|
typedef void* hdfsFS; /* unused, but needs a type */
|
2011-10-28 03:50:05 +04:00
|
|
|
#endif
|
|
|
|
|
2018-02-23 19:38:24 +03:00
|
|
|
#ifdef USE_RADOS_AIORI
|
|
|
|
# include <rados/librados.h>
|
|
|
|
#else
|
|
|
|
typedef void *rados_t;
|
|
|
|
typedef void *rados_ioctx_t;
|
|
|
|
#endif
|
2019-03-27 23:04:48 +03:00
|
|
|
#include "option.h"
|
2011-11-12 04:40:45 +04:00
|
|
|
#include "iordef.h"
|
2020-05-31 14:50:03 +03:00
|
|
|
#include "aiori.h"
|
2020-05-30 22:09:37 +03:00
|
|
|
|
2020-06-30 14:33:56 +03:00
|
|
|
#include <mpi.h>
|
|
|
|
|
|
|
|
#ifndef MPI_FILE_NULL
|
|
|
|
# include <mpio.h>
|
|
|
|
#endif /* not MPI_FILE_NULL */
|
|
|
|
|
2020-05-30 22:09:37 +03:00
|
|
|
#define ISPOWEROFTWO(x) ((x != 0) && !(x & (x - 1)))
|
2015-05-21 21:05:56 +03:00
|
|
|
/******************** DATA Packet Type ***************************************/
|
|
|
|
/* Holds the types of data packets: generic, offset, timestamp, incompressible */
|
|
|
|
|
|
|
|
enum PACKET_TYPE
|
|
|
|
{
|
|
|
|
generic = 0, /* No packet type specified */
|
|
|
|
timestamp=1, /* Timestamp packet set with -l */
|
|
|
|
offset=2, /* Offset packet set with -l */
|
|
|
|
incompressible=3 /* Incompressible packet set with -l */
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2021-02-18 13:40:42 +03:00
|
|
|
typedef enum{
|
|
|
|
IOR_MEMORY_TYPE_CPU = 0,
|
|
|
|
IOR_MEMORY_TYPE_GPU_MANAGED = 1,
|
|
|
|
IOR_MEMORY_TYPE_GPU_DEVICE_ONLY = 2,
|
|
|
|
} ior_memory_flags;
|
2015-05-27 19:24:52 +03:00
|
|
|
|
|
|
|
|
|
|
|
/***************** IOR_BUFFERS *************************************************/
|
|
|
|
/* A struct to hold the buffers so we can pass 1 pointer around instead of 3
|
|
|
|
*/
|
|
|
|
|
2017-10-20 19:02:24 +03:00
|
|
|
typedef struct IO_BUFFERS
|
2015-05-27 19:24:52 +03:00
|
|
|
{
|
|
|
|
void* buffer;
|
|
|
|
void* checkBuffer;
|
|
|
|
void* readCheckBuffer;
|
|
|
|
|
|
|
|
} IOR_io_buffers;
|
|
|
|
|
2011-11-12 04:40:45 +04:00
|
|
|
/******************************************************************************/
|
|
|
|
/*
|
|
|
|
* The parameter struct holds all of the "global" data to be passed,
|
|
|
|
* as well as results to be parsed.
|
|
|
|
*
|
|
|
|
* NOTE: If IOR_Param_t is changed, also change:
|
|
|
|
* init_IOR_Param_t() [ior.c]
|
|
|
|
* DisplayUsage() [ior.c]
|
|
|
|
* ShowTest() [ior.c]
|
|
|
|
* DecodeDirective() [parse_options.c]
|
|
|
|
* ParseCommandLine() [parse_options.c]
|
|
|
|
* USER_GUIDE
|
|
|
|
*/
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
2018-09-17 12:38:57 +03:00
|
|
|
const struct ior_aiori * backend;
|
2018-07-14 10:41:35 +03:00
|
|
|
char * debug; /* debug info string */
|
2012-01-14 02:05:13 +04:00
|
|
|
int referenceNumber; /* user supplied reference number */
|
2018-07-14 10:41:35 +03:00
|
|
|
char * api; /* API for I/O */
|
|
|
|
char * apiVersion; /* API version */
|
|
|
|
char * platform; /* platform type */
|
|
|
|
char * testFileName; /* full name for test */
|
|
|
|
char * options; /* options string */
|
2019-03-27 23:04:48 +03:00
|
|
|
// intermediate options
|
2020-05-30 20:19:48 +03:00
|
|
|
int collective; /* collective I/O */
|
2021-01-20 17:57:21 +03:00
|
|
|
MPI_Comm testComm; /* Current MPI communicator */
|
|
|
|
MPI_Comm mpi_comm_world; /* The global MPI communicator */
|
2018-10-11 21:58:30 +03:00
|
|
|
int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */
|
2021-02-18 13:40:42 +03:00
|
|
|
int dualMount; /* dual mount points */
|
|
|
|
ior_memory_flags gpuMemoryFlags; /* use the GPU to store the data */
|
|
|
|
int gpuDirect; /* use gpuDirect, this influences gpuMemoryFlags as well */
|
|
|
|
int gpuID; /* the GPU to use for gpuDirect or memory options */
|
2011-11-12 04:40:45 +04:00
|
|
|
int numTasks; /* number of tasks for test */
|
2019-08-31 01:45:03 +03:00
|
|
|
int numNodes; /* number of nodes for test */
|
|
|
|
int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */
|
2019-12-21 17:14:27 +03:00
|
|
|
int tasksBlockMapping; /* are the tasks in contiguous blocks across nodes or round-robin */
|
2011-11-12 04:40:45 +04:00
|
|
|
int repetitions; /* number of repetitions of test */
|
|
|
|
int repCounter; /* rep counter */
|
|
|
|
int multiFile; /* multiple files */
|
|
|
|
int interTestDelay; /* delay between reps in seconds */
|
2018-09-30 12:01:21 +03:00
|
|
|
int interIODelay; /* delay after each I/O in us */
|
2011-11-12 04:40:45 +04:00
|
|
|
int open; /* flag for writing or reading */
|
|
|
|
int readFile; /* read of existing file */
|
|
|
|
int writeFile; /* write of file */
|
|
|
|
int filePerProc; /* single file or file-per-process */
|
|
|
|
int reorderTasks; /* reorder tasks for read back and check */
|
|
|
|
int taskPerNodeOffset; /* task node offset for reading files */
|
|
|
|
int reorderTasksRandom; /* reorder tasks for random file read back */
|
|
|
|
int reorderTasksRandomSeed; /* reorder tasks for random file read seed */
|
|
|
|
int checkWrite; /* check read after write */
|
|
|
|
int checkRead; /* check read after read */
|
|
|
|
int keepFile; /* don't delete the testfile on exit */
|
|
|
|
int keepFileWithError; /* don't delete the testfile with errors */
|
|
|
|
int errorFound; /* error found in data check */
|
|
|
|
IOR_offset_t segmentCount; /* number of segments (or HDF5 datasets) */
|
|
|
|
IOR_offset_t blockSize; /* contiguous bytes to write per task */
|
|
|
|
IOR_offset_t transferSize; /* size of transfer in bytes */
|
2011-12-13 09:00:18 +04:00
|
|
|
IOR_offset_t expectedAggFileSize; /* calculated aggregate file size */
|
2020-12-01 16:52:29 +03:00
|
|
|
IOR_offset_t randomPrefillBlocksize; /* prefill option for random IO, the amount of data used for prefill */
|
2020-05-30 20:19:48 +03:00
|
|
|
|
2021-02-09 20:54:14 +03:00
|
|
|
char * saveRankDetailsCSV; /* save the details about the performance to a file */
|
2012-01-14 01:27:55 +04:00
|
|
|
int summary_every_test; /* flag to print summary every test, not just at end */
|
2011-11-12 04:40:45 +04:00
|
|
|
int uniqueDir; /* use unique directory for each fpp */
|
|
|
|
int useExistingTestFile; /* do not delete test file before access */
|
|
|
|
int storeFileOffset; /* use file offset as stored signature */
|
S3 with Multi-Part Upload for N:1 is working.
Testing on our EMC ViPR installation. Therefore, we also have available
some EMC extensions. For example, EMC supports a special "byte-range"
header-option ("Range: bytes=-1-") which allows appending to an object.
This is not needed for N:1 (where every write creates an independent part),
but is vital for N:N (where every write is considered an append, unless
"transfer-size" is the same as "block-size").
We also use a LANL-extended implementation of aws4c 0.5, which provides
some special features, and allows greater efficiency. That is included in
this commit as a tarball. Untar it somewhere else and build it, to produce
a library, which is linked with IOR. (configure with --with-S3).
TBD: EMC also supports a simpler alternative to Multi-Part Upload, which
appears to have several advantages. We'll add that in next, but wanted to
capture this as is, before I break it.
2014-10-27 22:16:20 +03:00
|
|
|
int deadlineForStonewalling; /* max time in seconds to run any test phase */
|
2020-07-03 10:09:40 +03:00
|
|
|
int stoneWallingWearOut; /* wear out the stonewalling, once the timeout is over, each process has to write the same amount */
|
2018-07-07 12:29:27 +03:00
|
|
|
uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
|
2018-07-14 10:41:35 +03:00
|
|
|
char * stoneWallingStatusFile;
|
2018-07-07 16:01:11 +03:00
|
|
|
|
2012-01-09 00:30:05 +04:00
|
|
|
int maxTimeDuration; /* max time in minutes to run each test */
|
2011-11-12 04:40:45 +04:00
|
|
|
int outlierThreshold; /* warn on outlier N seconds from mean */
|
|
|
|
int verbose; /* verbosity */
|
|
|
|
int setTimeStampSignature; /* set time stamp signature */
|
|
|
|
unsigned int timeStampSignatureValue; /* value for time stamp signature */
|
|
|
|
int randomSeed; /* random seed for write/read check */
|
2018-08-08 19:18:54 +03:00
|
|
|
unsigned int incompressibleSeed; /* random seed for incompressible file creation */
|
2011-11-12 04:40:45 +04:00
|
|
|
int randomOffset; /* access is to random offsets */
|
2012-01-09 06:41:30 +04:00
|
|
|
size_t memoryPerTask; /* additional memory used per task */
|
|
|
|
size_t memoryPerNode; /* additional memory used per node */
|
2019-03-27 23:04:48 +03:00
|
|
|
char * memoryPerNodeStr; /* for parsing */
|
|
|
|
char * testscripts; /* for parsing */
|
|
|
|
char * buffer_type; /* for parsing */
|
|
|
|
enum PACKET_TYPE dataPacketType; /* The type of data packet. */
|
2015-05-21 21:05:56 +03:00
|
|
|
|
2019-03-27 23:04:48 +03:00
|
|
|
void * backend_options; /* Backend-specific options */
|
2011-11-12 04:40:45 +04:00
|
|
|
|
|
|
|
/* POSIX variables */
|
|
|
|
int singleXferAttempt; /* do not retry transfer if incomplete */
|
|
|
|
int fsyncPerWrite; /* fsync() after each write */
|
|
|
|
int fsync; /* fsync() after write */
|
|
|
|
|
S3 with Multi-Part Upload for N:1 is working.
Testing on our EMC ViPR installation. Therefore, we also have available
some EMC extensions. For example, EMC supports a special "byte-range"
header-option ("Range: bytes=-1-") which allows appending to an object.
This is not needed for N:1 (where every write creates an independent part),
but is vital for N:N (where every write is considered an append, unless
"transfer-size" is the same as "block-size").
We also use a LANL-extended implementation of aws4c 0.5, which provides
some special features, and allows greater efficiency. That is included in
this commit as a tarball. Untar it somewhere else and build it, to produce
a library, which is linked with IOR. (configure with --with-S3).
TBD: EMC also supports a simpler alternative to Multi-Part Upload, which
appears to have several advantages. We'll add that in next, but wanted to
capture this as is, before I break it.
2014-10-27 22:16:20 +03:00
|
|
|
char* URI; /* "path" to target object */
|
2020-12-01 16:52:29 +03:00
|
|
|
|
2018-02-23 19:38:24 +03:00
|
|
|
/* RADOS variables */
|
|
|
|
rados_t rados_cluster; /* RADOS cluster handle */
|
|
|
|
rados_ioctx_t rados_ioctx; /* I/O context for our pool in the RADOS cluster */
|
|
|
|
|
2011-11-12 04:40:45 +04:00
|
|
|
int id; /* test's unique ID */
|
|
|
|
int intraTestBarriers; /* barriers between open/op and op/close */
|
2020-06-24 13:10:42 +03:00
|
|
|
int warningAsErrors; /* treat any warning as an error */
|
2020-05-31 14:50:03 +03:00
|
|
|
|
|
|
|
aiori_xfer_hint_t hints;
|
2011-11-12 04:40:45 +04:00
|
|
|
} IOR_param_t;
|
|
|
|
|
2018-09-20 00:39:25 +03:00
|
|
|
/* each pointer for a single test */
|
2011-12-13 09:00:18 +04:00
|
|
|
typedef struct {
|
2018-09-20 00:39:25 +03:00
|
|
|
double time;
|
2017-10-20 19:02:24 +03:00
|
|
|
size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling
|
2018-07-07 12:29:27 +03:00
|
|
|
|
2018-07-15 21:38:17 +03:00
|
|
|
double stonewall_time;
|
2020-12-04 00:07:45 +03:00
|
|
|
long long stonewall_min_data_accessed; // of all processes
|
|
|
|
long long stonewall_avg_data_accessed; // across all processes
|
|
|
|
long long stonewall_total_data_accessed; // sum accross all processes
|
2018-07-07 12:29:27 +03:00
|
|
|
|
2018-07-15 21:38:17 +03:00
|
|
|
IOR_offset_t aggFileSizeFromStat;
|
|
|
|
IOR_offset_t aggFileSizeFromXfer;
|
|
|
|
IOR_offset_t aggFileSizeForBW;
|
2018-09-20 00:39:25 +03:00
|
|
|
} IOR_point_t;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
int errors;
|
|
|
|
IOR_point_t write;
|
|
|
|
IOR_point_t read;
|
2011-12-13 09:00:18 +04:00
|
|
|
} IOR_results_t;
|
2011-11-12 04:40:45 +04:00
|
|
|
|
2011-12-13 09:00:18 +04:00
|
|
|
/* define the queuing structure for the test parameters */
|
|
|
|
typedef struct IOR_test_t {
|
S3 with Multi-Part Upload for N:1 is working.
Testing on our EMC ViPR installation. Therefore, we also have available
some EMC extensions. For example, EMC supports a special "byte-range"
header-option ("Range: bytes=-1-") which allows appending to an object.
This is not needed for N:1 (where every write creates an independent part),
but is vital for N:N (where every write is considered an append, unless
"transfer-size" is the same as "block-size").
We also use a LANL-extended implementation of aws4c 0.5, which provides
some special features, and allows greater efficiency. That is included in
this commit as a tarball. Untar it somewhere else and build it, to produce
a library, which is linked with IOR. (configure with --with-S3).
TBD: EMC also supports a simpler alternative to Multi-Part Upload, which
appears to have several advantages. We'll add that in next, but wanted to
capture this as is, before I break it.
2014-10-27 22:16:20 +03:00
|
|
|
IOR_param_t params;
|
2018-09-20 00:39:25 +03:00
|
|
|
IOR_results_t *results;
|
S3 with Multi-Part Upload for N:1 is working.
Testing on our EMC ViPR installation. Therefore, we also have available
some EMC extensions. For example, EMC supports a special "byte-range"
header-option ("Range: bytes=-1-") which allows appending to an object.
This is not needed for N:1 (where every write creates an independent part),
but is vital for N:N (where every write is considered an append, unless
"transfer-size" is the same as "block-size").
We also use a LANL-extended implementation of aws4c 0.5, which provides
some special features, and allows greater efficiency. That is included in
this commit as a tarball. Untar it somewhere else and build it, to produce
a library, which is linked with IOR. (configure with --with-S3).
TBD: EMC also supports a simpler alternative to Multi-Part Upload, which
appears to have several advantages. We'll add that in next, but wanted to
capture this as is, before I break it.
2014-10-27 22:16:20 +03:00
|
|
|
struct IOR_test_t *next;
|
2011-12-13 09:00:18 +04:00
|
|
|
} IOR_test_t;
|
|
|
|
|
|
|
|
IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num);
|
2018-08-29 19:49:41 +03:00
|
|
|
void AllocResults(IOR_test_t *test);
|
|
|
|
|
2019-12-21 14:19:02 +03:00
|
|
|
char * GetPlatformName(void);
|
2021-01-20 17:57:21 +03:00
|
|
|
void init_IOR_Param_t(IOR_param_t *p, MPI_Comm global_com);
|
2011-06-17 23:20:43 +04:00
|
|
|
|
2018-07-07 12:29:27 +03:00
|
|
|
/*
|
|
|
|
* This function runs IOR given by command line, useful for testing
|
|
|
|
*/
|
|
|
|
IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * out_logfile);
|
2011-06-17 23:20:43 +04:00
|
|
|
|
2018-07-07 13:42:21 +03:00
|
|
|
/* Actual IOR Main function, renamed to allow library usage */
|
|
|
|
int ior_main(int argc, char **argv);
|
|
|
|
|
2011-11-12 04:40:45 +04:00
|
|
|
#endif /* !_IOR_H */
|