mdtest/src/mdtest.c

3800 lines
105 KiB
C

/*
* Copyright (C) 2003, The Regents of the University of California.
* Produced at the Lawrence Livermore National Laboratory.
* Written by Christopher J. Morrone <morrone@llnl.gov>,
* Bill Loewe <loewe@loewe.net>, Tyce McLarty <mclarty@llnl.gov>,
* and Ryan Kroiss <rrkroiss@lanl.gov>.
* All rights reserved.
* UCRL-CODE-155800
*
* Please read the COPYRIGHT file.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (as published by
* the Free Software Foundation) version 2, dated June 1991.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* terms and conditions of the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* CVS info:
* $RCSfile: mdtest.c,v $
* $Revision: 1.4 $
* $Date: 2013/11/27 17:05:31 $
* $Author: brettkettering $
*/
#include "mpi.h"
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#ifdef Darwin
#include <sys/param.h>
#include <sys/mount.h>
#else
#include <sys/statfs.h>
#endif
#ifdef _HAS_PLFS
#include <plfs.h>
#include <plfs_error.h>
#include <sys/statvfs.h>
#endif
#ifdef _HAS_HDFS
#include <hdfs.h>
#endif
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <dirent.h>
#include <errno.h>
#include <time.h>
#include <sys/time.h>
#ifdef _HAS_S3
#include "aws4c.h"
#include "aws4c_extra.h"
#include <curl/curl.h>
#endif
#define FILEMODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH
#define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH
/*
* Try using the system's PATH_MAX, which is what realpath and such use.
*/
#define MAX_LEN PATH_MAX
/*
#define MAX_LEN 1024
*/
#define RELEASE_VERS "1.9.4-rc1"
#define ITEM_COUNT 25000
#define MDTEST_SUCCESS 0
#define MDTEST_FAILURE -1
typedef struct
{
double entry[10];
} table_t;
int rank;
int size;
unsigned long long* rand_array;
char testdir[MAX_LEN];
char testdirpath[MAX_LEN];
char top_dir[MAX_LEN];
char base_tree_name[MAX_LEN];
char ** filenames = NULL;
char hostname[MAX_LEN];
char unique_dir[MAX_LEN];
char mk_name[MAX_LEN];
char stat_name[MAX_LEN];
char read_name[MAX_LEN];
char rm_name[MAX_LEN];
char unique_mk_dir[MAX_LEN];
char unique_chdir_dir[MAX_LEN];
char unique_stat_dir[MAX_LEN];
char unique_read_dir[MAX_LEN];
char unique_rm_dir[MAX_LEN];
char unique_rm_uni_dir[MAX_LEN];
char * write_buffer = NULL;
char * read_buffer = NULL;
int barriers = 1;
int create_only = 0;
int stat_only = 0;
int read_only = 0;
int remove_only = 0;
int leaf_only = 0;
int branch_factor = 1;
int depth = 0;
/*
* This is likely a small value, but it's sometimes computed by
* branch_factor^(depth+1), so we'll make it a larger variable,
* just in case.
*/
unsigned long long num_dirs_in_tree = 0;
/*
* As we start moving towards Exascale, we could have billions
* of files in a directory. Make room for that possibility with
* a larger variable.
*/
unsigned long long items_per_dir = 0;
int random_seed = 0;
int shared_file = 0;
int files_only = 0;
int dirs_only = 0;
int pre_delay = 0;
int unique_dir_per_task = 0;
int time_unique_dir_overhead = 0;
int verbose = 0;
int throttle = 1;
unsigned long long items = 0;
int collective_creates = 0;
size_t write_bytes = 0;
size_t read_bytes = 0;
int sync_file = 0;
int path_count = 0;
int nstride = 0; /* neighbor stride */
MPI_Comm testcomm;
table_t * summary_table;
/***** LUSTRE ***********/
/* Try to split workload across all Lustre Meta Data Servers */
struct MDTS { /* info about Meta data target servers for split */
unsigned int* indexes;
unsigned int num; /* Guessing 4 billion meta data servers will do for now */
unsigned int max;
} *mdts = NULL;
int MDTS_stripe_on = 0;
/*************** PLFS ************/
#ifdef _HAS_PLFS
char using_plfs_path = 0;
pid_t pid;
uid_t uid;
Plfs_fd *wpfd = NULL;
Plfs_fd *rpfd = NULL;
Plfs_fd *cpfd = NULL;
#endif
/************ HDFS **************/
#ifdef _HAS_HDFS
hdfsFS hd_fs;
hdfsFile hd_file;
int hdfs_ret;
#endif
/************* S3 ****************/
#ifdef _HAS_S3
IOBuf *bf;
IOBuf *buffer_iter;
enum{S3_CREATE, S3_STAT, S3_DELETE};
#define HTTP_OK 200
#define HTTP_NO_CONTENT 204
#define TEST_DIR "test-dir"
char *dir_slash = "--";
char *file_dot = "-";
int bucket_created = 0;
char * s3_host_ip = NULL;
char * s3_host_id = NULL;
int ident = -1;
#else
char *dir_slash = "/";
char *file_dot = ".";
#define TEST_DIR "#test-dir"
#endif
/* for making/removing unique directory && stating/deleting subdirectory */
enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR};
#ifdef __linux__
#define FAIL(msg) do { \
fprintf(stdout, "%s: Process %d(%s): FAILED in %s, %s: %s\n", \
timestamp(), rank, hostname, __func__, \
msg, strerror(errno)); \
fflush(stdout); \
MPI_Abort(MPI_COMM_WORLD, 1); \
} while(0)
#else
#define FAIL(msg) do { \
fprintf(stdout, "%s: Process %d(%s): FAILED at %d, %s: %s\n", \
timestamp(), rank, hostname, __LINE__, \
msg, strerror(errno)); \
fflush(stdout); \
MPI_Abort(MPI_COMM_WORLD, 1); \
} while(0)
#endif
/**
* A directory making wrapper for the various types
* of filesystems. Makes for one place to change directory
* creation, instead of 6.
*/
int mdtest_mkdir(const char* path, mode_t mode) {
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_mkdir( path, mode );
if ( plfs_ret != PLFS_SUCCESS ) {
fprintf(stderr,"PLFS mkdir unable to make directory");
return MDTEST_FAILURE;
}
} else {
if ( mkdir( path , mode ) == -1 ) {
fprintf(stderr,"mkdir unable to make directory");
return MDTEST_FAILURE;
}
}
#else
if(mdts != NULL && MDTS_stripe_on) {
char buf[1024] = {0};
sprintf(buf,"lfs mkdir -i %d %s", mdts->indexes[rank % mdts->num], path);
if(system(buf) != 0) {
fprintf(stderr,"LFS mkdir unable to make directory");
return MDTEST_FAILURE;
}
}
else if (mkdir(path , DIRMODE) == -1) {
fprintf(stderr,"mkdir unable to make directory");
return MDTEST_FAILURE;
}
#endif
return MDTEST_SUCCESS;
}
/**
* An access wrapper for the various types of filesystems.
*/
int mdtest_access(const char* path, int mode) {
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_access( path, mode );
if ( plfs_ret == PLFS_SUCCESS )
return MDTEST_SUCCESS;
}
return MDTEST_FAILURE;
#else
if(access(path,mode) == 0) {
return MDTEST_SUCCESS;
}
return MDTEST_FAILURE;
#endif
}
char *timestamp() {
static char datestring[80];
time_t timestamp;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering timestamp...\n" );
}
fflush(stdout);
timestamp = time(NULL);
strftime(datestring, 80, "%m/%d/%Y %T", localtime(&timestamp));
return datestring;
}
int count_tasks_per_node(void) {
char localhost[MAX_LEN],
hostname[MAX_LEN];
int count = 1,
i;
char *hosts;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering count_tasks_per_node...\n" );
fflush( stdout );
}
if (gethostname(localhost, MAX_LEN) != 0) {
FAIL("gethostname()");
}
/* MPI_gather all hostnames, and compare to local hostname */
hosts = (char *) malloc(size * MAX_LEN);
MPI_Gather(localhost, MAX_LEN, MPI_CHAR, hosts, MAX_LEN, MPI_CHAR, 0, MPI_COMM_WORLD);
if (rank == 0) {
for (i = 1; i < size-1; i++) {
if (strcmp(&(hosts[i*MAX_LEN]), localhost) == 0) {
count++;
}
}
}
free(hosts);
MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD);
return(count);
}
void delay_secs(int delay) {
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering delay_secs...\n" );
fflush( stdout );
}
if (rank == 0 && delay > 0) {
if (verbose >= 1) {
fprintf(stdout, "delaying %d seconds . . .\n", delay);
fflush(stdout);
}
sleep(delay);
}
MPI_Barrier(testcomm);
}
void offset_timers(double * t, int tcount) {
double toffset;
int i;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering offset_timers...\n" );
fflush( stdout );
}
toffset = MPI_Wtime() - t[tcount];
for (i = 0; i < tcount+1; i++) {
t[i] += toffset;
}
}
void parse_dirpath(char *dirpath_arg) {
char * tmp, * token;
char delimiter_string[3] = { '@', '\n', '\0' };
int i = 0;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering parse_dirpath...\n" );
fflush( stdout );
}
tmp = dirpath_arg;
if (* tmp != '\0') path_count++;
while (* tmp != '\0') {
if (* tmp == '@') {
path_count++;
}
tmp++;
}
filenames = (char **)malloc(path_count * sizeof(char **));
if (filenames == NULL) {
FAIL("out of memory");
}
token = strtok(dirpath_arg, delimiter_string);
while (token != NULL) {
filenames[i] = token;
token = strtok(NULL, delimiter_string);
i++;
}
}
/*
* This function copies the unique directory name for a given option to
* the "to" parameter. Some memory must be allocated to the "to" parameter.
*/
void unique_dir_access(int opt, char *to) {
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering unique_dir_access...\n" );
fflush( stdout );
}
if (opt == MK_UNI_DIR) {
MPI_Barrier(testcomm);
strcpy( to, unique_chdir_dir );
} else if (opt == STAT_SUB_DIR) {
strcpy( to, unique_stat_dir );
} else if (opt == READ_SUB_DIR) {
strcpy( to, unique_read_dir );
} else if (opt == RM_SUB_DIR) {
strcpy( to, unique_rm_dir );
} else if (opt == RM_UNI_DIR) {
strcpy( to, unique_rm_uni_dir );
}
}
#ifdef _HAS_S3
/*
* This function is used to check S3 error codes including Curl return codes
* and HTTP codes.
*
*/
void check_S3_error( CURLcode curl_return, IOBuf *s3_buf, int action )
{
if ( curl_return == CURLE_OK ) {
if (action == S3_CREATE || action == S3_STAT ) {
if (s3_buf->code != HTTP_OK) {
printf("HTTP Code: %d\n", s3_buf->code);
FAIL("CURL OK but problem with HTTP return");
}
}
else {// action == S3_DELETE
if (s3_buf->code != HTTP_NO_CONTENT) {
printf("HTTP Code: %d\n", s3_buf->code);
FAIL("CURL OK but problem with HTTP return when deleting");
}
}
}
else {
printf("Curl Return Code: %d\n", curl_return);
FAIL("Bad return for Curl call");
}
}
#endif
/* helper for creating/removing items */
void create_remove_items_helper(int dirs,
int create, char* path, unsigned long long itemNum) {
unsigned long long i;
char curr_item[MAX_LEN];
#ifdef _HAS_PLFS
int open_flags;
plfs_error_t plfs_ret;
ssize_t bytes_written;
int num_ref;
#endif
#ifdef _HAS_HDFS
int open_flags;
#endif
#ifdef _HAS_S3
CURLcode rv;
char bucket[MAX_LEN];
// char object[MAX_LEN];
#endif
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering create_remove_items_helper...\n" );
fflush( stdout );
}
for (i=0; i<items_per_dir; i++) {
if (dirs) {
if (create) {
if (( rank == 0 ) &&
( verbose >= 3 ) &&
((itemNum+i) % ITEM_COUNT==0 && (itemNum+i != 0))) {
printf("V-3: create dir: %llu\n", itemNum+i);
fflush(stdout);
}
//create dirs
// printf("%llu %d %s\n", itemNum, i, mk_name);
sprintf(curr_item, "%s%sdir%s%s%llu", path,dir_slash,file_dot,mk_name, itemNum+i);
if (rank == 0 && verbose >= 3) {
printf("V-3: create_remove_items_helper (dirs create): curr_item is \"%s\"\n", curr_item);
fflush(stdout);
printf("%llu %llu\n", itemNum, i);
}
# ifdef _HAS_HDFS
if ( hdfsCreateDirectory(hd_fs, curr_item) == -1 ) {
FAIL("Unable to create test directory path");
}
#elif defined _HAS_S3
rv = s3_create_bucket ( bf, curr_item );
check_S3_error(rv, bf, S3_CREATE);
aws_iobuf_reset(bf);
#else
if(mdtest_mkdir(curr_item, DIRMODE) != MDTEST_SUCCESS) {
FAIL("Unable to make directory");
}
#endif
/*
* !create
*/
} else {
if (( rank == 0 ) &&
( verbose >= 3 ) &&
((itemNum+i) % ITEM_COUNT==0 && (itemNum+i != 0))) {
printf("V-3: remove dir: %llu\n", itemNum+i);
fflush(stdout);
}
//remove dirs
sprintf(curr_item, "%s%sdir%s%s%llu", path,dir_slash,file_dot,rm_name, itemNum+i);
if (rank == 0 && verbose >= 3) {
printf("V-3: create_remove_items_helper (dirs remove): curr_item is \"%s\"\n", curr_item);
fflush(stdout);
}
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_rmdir( curr_item );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL("Unable to plfs_rmdir directory");
}
} else {
if (rmdir(curr_item) == -1) {
FAIL("unable to remove directory");
}
}
#elif defined _HAS_HDFS
if ( hdfsDelete(hd_fs, curr_item, 1) == -1 ) {
FAIL("unable to remove directory");
}
#elif defined _HAS_S3
rv = s3_delete_bucket(bf, curr_item);
check_S3_error(rv, bf, S3_DELETE);
aws_iobuf_reset(bf);
#else
if (rmdir(curr_item) == -1) {
FAIL("unable to remove directory");
}
#endif
}
/*
* !dirs
*/
} else {
__attribute__ ((unused)) int fd;
#ifdef _HAS_S3
strcpy(bucket, path);
#endif
if (create) {
if (( rank == 0 ) &&
( verbose >= 3 ) &&
((itemNum+i) % ITEM_COUNT==0 && (itemNum+i != 0))) {
printf("V-3: create file: %llu\n", itemNum+i);
fflush(stdout);
}
#ifdef _HAS_S3
if (unique_dir_per_task && !bucket_created ) {
rv = s3_create_bucket ( bf, path );
check_S3_error(rv, bf, S3_CREATE);
aws_iobuf_reset(bf);
bucket_created = 1;
}
else if ( rank == 0 ) {
if (!bucket_created) {
rv = s3_create_bucket ( bf, path );
check_S3_error(rv, bf, S3_CREATE);
bucket_created = 1;
}
aws_iobuf_reset(bf);
}
MPI_Barrier(testcomm);
s3_set_bucket(path);
#endif
//create files
#ifdef _HAS_S3
sprintf(curr_item, "file-%s%llu", mk_name, itemNum+i);
#else
sprintf(curr_item, "%s/file.%s%llu", path, mk_name, itemNum+i);
#endif
if (rank == 0 && verbose >= 3) {
printf("V-3: create_remove_items_helper (non-dirs create): curr_item is \"%s\"\n", curr_item);
fflush(stdout);
}
if (collective_creates) {
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (collective): plfs_open...\n" );
fflush( stdout );
}
/*
* If PLFS opens a file as O_RDWR, it suffers a bad performance hit. Looking through the
* code that follows up to the close, this file only gets one write, so we'll open it as
* write-only.
*/
open_flags = O_WRONLY;
wpfd = NULL;
plfs_ret = plfs_open( &wpfd, curr_item, open_flags, rank, FILEMODE, NULL );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_open file" );
}
} else {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (collective): open...\n" );
fflush( stdout );
}
if ((fd = open(curr_item, O_RDWR)) == -1) {
FAIL("unable to open file");
}
}
#elif defined _HAS_HDFS
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (collective): hdfsOpenFile...\n" );
fflush( stdout );
}
open_flags = O_WRONLY;
if ( (hd_file = hdfsOpenFile( hd_fs, curr_item, open_flags, 0, 0, 0)) == NULL) {
FAIL( "Unable to hdfsOpenFile" );
}
#elif defined _HAS_S3
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (collective): S3 create object...\n" );
fflush( stdout );
}
rv = s3_put( bf, curr_item );
check_S3_error(rv, bf, S3_CREATE);
aws_iobuf_reset ( bf );
#else
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (collective): open...\n" );
fflush( stdout );
}
if ((fd = open(curr_item, O_RDWR)) == -1) {
FAIL("unable to open file");
}
#endif
/*
* !collective_creates
*/
} else {
if (shared_file) {
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, shared): plfs_open...\n" );
fflush( stdout );
}
/*
* If PLFS opens a file as O_RDWR, it suffers a bad performance hit. Looking through the
* code that follows up to the close, this file only gets one write, so we'll open it as
* write-only.
*/
open_flags = O_CREAT | O_WRONLY;
wpfd = NULL;
plfs_ret = plfs_open( &wpfd, curr_item, open_flags, rank, FILEMODE, NULL );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_open for create file" );
}
} else {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, shared): open...\n" );
fflush( stdout );
}
if ((fd = open(curr_item, O_CREAT|O_RDWR, FILEMODE)) == -1) {
FAIL("unable to create file");
}
}
#elif defined _HAS_HDFS
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, shared): hdfsOpenFile...\n" );
fflush( stdout );
}
open_flags = O_CREAT | O_WRONLY;
if ( (hd_file = hdfsOpenFile( hd_fs, curr_item, open_flags, 0, 0, 0)) == NULL) {
FAIL( "Unable to hdfsOpenFile" );
}
#elif defined _HAS_S3
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, shared): S3 create object...\n" );
fflush( stdout );
}
rv = s3_put( bf, curr_item );
check_S3_error(rv, bf, S3_CREATE);
aws_iobuf_reset ( bf );
#else
if (rank >= 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, shared): open...\n" );
printf( "V-3: %s\n", curr_item );
fflush( stdout );
}
if ((fd = open(curr_item, O_CREAT|O_RDWR, FILEMODE)) == -1) {
FAIL("unable to create file");
}
#endif
/*
* !shared_file
*/
} else {
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, non-shared): plfs_open...\n" );
fflush( stdout );
}
/*
* If PLFS opens a file as O_RDWR, it suffers a bad performance hit. Looking through the
* code that follows up to the close, this file only gets one write, so we'll open it as
* write-only.
*/
open_flags = O_CREAT | O_WRONLY;
wpfd = NULL;
plfs_ret = plfs_open( &wpfd, curr_item, open_flags, rank, FILEMODE, NULL );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_open for create file" );
}
} else {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, non-shared): open...\n" );
fflush( stdout );
}
if ((fd = creat(curr_item, FILEMODE)) == -1) {
FAIL("unable to create file");
}
}
#elif defined _HAS_HDFS
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, non-shared): hdfsOpenFilen...\n" );
fflush( stdout );
}
open_flags = O_CREAT | O_WRONLY;
if ( (hd_file = hdfsOpenFile( hd_fs, curr_item, open_flags, 0, 0, 0)) == NULL) {
FAIL( "Unable to hdfsOpenFile" );
}
#elif defined _HAS_S3
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, non-shared): S3 create object...\n" );
fflush( stdout );
}
rv = s3_put( bf, curr_item );
check_S3_error(rv, bf, S3_CREATE);
aws_iobuf_reset ( bf );
#else
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper (non-collective, non-shared): open...\n" );
fflush( stdout );
}
if ((fd = creat(curr_item, FILEMODE)) == -1) {
FAIL("unable to create file");
}
#endif
}
}
if (write_bytes > 0) {
#ifdef _HAS_PLFS
/*
* According to Bill Loewe, writes are only done one time, so they are always at
* offset 0 (zero).
*/
if ( using_plfs_path ) {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: plfs_write...\n" );
fflush( stdout );
}
plfs_ret = plfs_write( wpfd, write_buffer, write_bytes, 0, pid, &bytes_written );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_write file" );
}
if ( bytes_written != write_bytes ) {
FAIL( "Did not plfs_write the correct number of bytes to the file" );
}
} else {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: write...\n" );
fflush( stdout );
}
if (write(fd, write_buffer, write_bytes) != write_bytes) {
FAIL("unable to write file");
}
}
#elif defined _HAS_HDFS
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: hdfsWrite:w...\n" );
fflush( stdout );
}
hdfs_ret = hdfsWrite ( hd_fs, hd_file, write_buffer, write_bytes);
if ( hdfs_ret == -1 ) {
FAIL( "Unable to hdfsWrite file" );
}
if ( hdfs_ret != write_bytes ) {
FAIL( "Did not plfs_write the correct number of bytes to the file" );
}
#elif defined _HAS_S3
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: S3 write to object..\n" );
fflush( stdout );
}
//aws_iobuf_append_dynamic(buffer_iter, write_buffer, write_bytes);
aws_iobuf_reset ( bf );
aws_iobuf_append_static(bf, write_buffer, write_bytes);
rv = s3_put( bf, curr_item );
check_S3_error(rv, bf, S3_CREATE);
#else
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: write...\n" );
fflush( stdout );
}
if (write(fd, write_buffer, write_bytes) != write_bytes) {
FAIL("unable to write file");
}
#endif
}
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
if ( sync_file ) {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: plfs_sync...\n" );
fflush( stdout );
}
plfs_ret = plfs_sync( wpfd );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_sync file" );
}
}
} else {
if ( sync_file ) {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: fsync...\n" );
fflush( stdout );
}
if ( fsync(fd) == -1 ) {
FAIL("unable to sync file");
}
}
}
#elif defined _HAS_HDFS
if ( sync_file ) {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: plfs_sync...\n" );
fflush( stdout );
}
if ( hdfsFlush(hd_fs, hd_file) == -1) {
FAIL( "Unable to hdfsFlush file" );
}
}
#elif defined _HAS_S3
#else
if ( sync_file ) {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: fsync...\n" );
fflush( stdout );
}
if ( fsync(fd) == -1 ) {
FAIL("unable to sync file");
}
}
#endif
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: plfs_close...\n" );
fflush( stdout );
}
plfs_ret = plfs_close( wpfd, rank, uid, open_flags, NULL, &num_ref );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_close file" );
}
} else {
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: close...\n" );
fflush( stdout );
}
if (close(fd) == -1) {
FAIL("unable to close file");
}
}
#elif defined _HAS_HDFS
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: plfs_close...\n" );
fflush( stdout );
}
if (hdfsCloseFile(hd_fs, hd_file) == -1) {
FAIL( "Unable to hdfsCloseFilee" );
}
#elif defined _HAS_S3
#else
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items_helper: close...\n" );
fflush( stdout );
}
if (close(fd) == -1) {
FAIL("unable to close file");
}
#endif
/*
* !create
*/
} else {
if (( rank == 0 ) &&
( verbose >= 3 ) &&
((itemNum+i) % ITEM_COUNT==0 && (itemNum+i != 0))) {
printf("V-3: remove file: %llu\n", itemNum+i);
fflush(stdout);
}
//remove files
#ifdef _HAS_S3
sprintf(curr_item, "file-%s%llu", rm_name, itemNum+i);
#else
sprintf(curr_item, "%s/file.%s%llu", path, rm_name, itemNum+i);
#endif
if (rank == 0 && verbose >= 3) {
printf("V-3: create_remove_items_helper (non-dirs remove): curr_item is \"%s\"\n", curr_item);
printf("V-3: create_remove_items_helper (non-dirs remove): rm_name is \"%s\"\n", rm_name);
fflush(stdout);
}
if (!(shared_file && rank != 0)) {
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_unlink( curr_item );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_unlink file" );
}
} else {
if (unlink(curr_item) == -1) {
FAIL("unable to unlink file");
}
}
#elif defined _HAS_HDFS
if (hdfsDelete(hd_fs, curr_item, 1) == -1 ) {
FAIL( "Unable to hdfsDelete file" );
}
#elif defined _HAS_S3
s3_set_bucket(bucket);
rv = s3_delete(bf, curr_item);
if (( rank == 0 ) && ( verbose >= 1 )) {
printf("V-3: create_remove_items_helper (bucket remove): curr_item is \"%s\"\n", curr_item);
printf("V-3: create_remove_items_helper (bucket remove): bucket is \"%s\"\n", bucket);
}
aws_iobuf_reset(bf);
#else
if (unlink(curr_item) == -1) {
FAIL("unable to unlink file");
}
#endif
}
}
}
}
}
/* helper function to do collective operations */
void collective_helper(int dirs, int create, char* path, unsigned long long itemNum) {
unsigned long long i;
char curr_item[MAX_LEN];
#ifdef _HAS_PLFS
int open_flags;
plfs_error_t plfs_ret;
int num_ref;
#endif
#ifdef _HAS_HDFS
int open_flags;
#endif
#ifdef _HAS_S3
char bucket[MAX_LEN];
// char object[MAX_LEN];
int rv;
// int bucket_created = 0;
strcpy(bucket, path);
#endif
//MPI_Barrier(testcomm);
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering collective_helper...\n" );
fprintf( stdout, "V-1: Entering collective_helper %s\n", path );
fflush( stdout );
}
for (i=0; i<items_per_dir; i++) {
if (dirs) {
if (create) {
//create dirs
sprintf(curr_item, "%s%sdir%s%s%llu", path, dir_slash,file_dot,mk_name, itemNum+i);
if (rank == 0 && verbose >= 3) {
printf("V-3: create dir : %s\n", curr_item);
fflush(stdout);
}
#ifdef _HAS_HDFS
if ( hdfsCreateDirectory(hd_fs, curr_item) == -1 ) {
FAIL("Unable to create test directory path");
}
#elif defined _HAS_S3
rv = s3_create_bucket ( bf, curr_item );
check_S3_error(rv, bf, S3_CREATE);
aws_iobuf_reset(bf);
#else
if(mdtest_mkdir(curr_item, DIRMODE) != MDTEST_SUCCESS) {
FAIL("Unable to make directory");
}
#endif
} else {
/* remove dirs */
sprintf(curr_item, "%s%sdir%s%s%llu", path, dir_slash,file_dot,rm_name, itemNum+i);
if (rank == 0 && verbose >= 3) {
printf("V-3: remove dir : %s\n", curr_item);
fflush(stdout);
}
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_rmdir( curr_item );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL("Unable to plfs_rmdir directory");
}
} else {
if (rmdir(curr_item) == -1) {
FAIL("unable to remove directory");
}
}
#elif defined _HAS_HDFS
if ( hdfsDelete(hd_fs, curr_item, 1) == -1 ) {
FAIL("unable to remove directory");
}
#elif defined _HAS_S3
rv = s3_delete_bucket(bf, curr_item);
check_S3_error(rv, bf, S3_DELETE);
aws_iobuf_reset(bf);
#else
if (rmdir(curr_item) == -1) {
FAIL("unable to remove directory");
}
#endif
}
} else { //!dirs
__attribute__ ((unused)) int fd;
if (create) {
#ifdef _HAS_S3
// This code is necessary in order to create buckets prior to creating objects
// If not unique dir per prcess, rank 0 will create the bucket and a flag is set
// to state the bucket has been created.
if (!unique_dir_per_task) {
if (!bucket_created) {
rv = s3_create_bucket ( bf, path );
check_S3_error(rv, bf, S3_CREATE);
bucket_created = 1;
aws_iobuf_reset(bf);
}
}
// elseif create buckets if unique_dir_per_process but create only if i==0
else if (i==0) {
rv = s3_create_bucket ( bf, path );
check_S3_error(rv, bf, S3_CREATE);
aws_iobuf_reset(bf);
}
// else set the bucket based on input parameters
else {
sprintf(curr_item, "%s%sdir%s%s0", path, dir_slash,file_dot,mk_name);
}
s3_set_bucket(path);
//s3_set_bucket(curr_item);
sprintf(curr_item, "file-%s%llu", mk_name, itemNum+i);
#else
sprintf(curr_item, "%s/file.%s%llu", path, mk_name, itemNum+i);
#endif
if (rank == 0 && verbose >= 3) {
printf("V-3: create file: %s\n", curr_item);
fflush(stdout);
}
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
open_flags = O_CREAT | O_WRONLY;
cpfd = NULL;
plfs_ret = plfs_open( &cpfd, curr_item, open_flags, rank, FILEMODE, NULL );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_open for create file" );
}
} else {
if ((fd = creat(curr_item, FILEMODE)) == -1) {
FAIL("unable to create file");
}
}
#elif defined _HAS_HDFS
open_flags = O_CREAT | O_WRONLY;
if ( (hd_file = hdfsOpenFile( hd_fs, curr_item, open_flags, 0, 0, 0)) == NULL) {
FAIL( "Unable to hdfsOpenFile" );
}
#elif defined _HAS_S3
rv = s3_put( bf, curr_item );
check_S3_error(rv, bf, S3_CREATE);
aws_iobuf_reset ( bf );
#else
if ((fd = creat(curr_item, FILEMODE)) == -1) {
FAIL("unable to create file");
}
#endif
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_close( cpfd, rank, uid, open_flags, NULL, &num_ref );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_close file" );
}
} else {
if (close(fd) == -1) {
FAIL("unable to close file");
}
}
#elif defined _HAS_HDFS
if (hdfsCloseFile(hd_fs, hd_file) == -1) {
FAIL( "Unable to hdfsCloseFilee" );
}
#elif defined _HAS_S3
/* No meaning to this operation on S3 */
#else
if (close(fd) == -1) {
FAIL("unable to close file");
}
#endif
} else { //remove files
sprintf(curr_item, "%s%sfile%s%s%llu", path,dir_slash,file_dot, rm_name, itemNum+i);
if (rank == 0 && verbose >= 3) {
printf("V-3: remove file: curr_item is \"%s\"\n", curr_item);
fflush(stdout);
}
if (!(shared_file && rank != 0)) {
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_unlink( curr_item );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_unlink file" );
}
} else {
if (unlink(curr_item) == -1) {
FAIL("unable to unlink file");
}
}
#elif defined _HAS_HDFS
if (hdfsDelete(hd_fs, curr_item, 1) == -1 ) {
FAIL( "Unable to hdfsDelete file" );
}
#elif defined _HAS_S3
sprintf(curr_item, "file-%s%llu", mk_name, itemNum+i);
s3_set_bucket(bucket);
rv = s3_delete(bf, curr_item);
aws_iobuf_reset(bf);
#else
if (unlink(curr_item) == -1) {
FAIL("unable to unlink file");
}
#endif
}
}
}
}
}
/* recusive function to create and remove files/directories from the
directory tree */
void create_remove_items(int currDepth, int dirs, int create, int collective,
char *path, unsigned long long dirNum) {
int i;
char dir[MAX_LEN];
char temp_path[MAX_LEN];
unsigned long long currDir = dirNum;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering create_remove_items, currDepth = %d...\n", currDepth );
fflush( stdout );
}
memset(dir, 0, MAX_LEN);
strcpy(temp_path, path);
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items (start): temp_path is \"%s\"\n", temp_path );
fflush(stdout);
}
if (currDepth == 0) {
/* create items at this depth */
if (!leaf_only || (depth == 0 && leaf_only)) {
if (collective) {
collective_helper(dirs, create, temp_path, 0);
} else {
create_remove_items_helper(dirs, create, temp_path, 0);
}
}
if (depth > 0) {
create_remove_items(++currDepth, dirs, create,
collective, temp_path, ++dirNum);
}
} else if (currDepth <= depth) {
/* iterate through the branches */
for (i=0; i<branch_factor; i++) {
/* determine the current branch and append it to the path */
sprintf(dir, "%s%s%llu/", base_tree_name, file_dot, currDir);
strcat(temp_path, "/");
strcat(temp_path, dir);
if (rank == 0 && verbose >= 3) {
printf( "V-3: create_remove_items (for loop): temp_path is \"%s\"\n", temp_path );
fflush(stdout);
}
/* create the items in this branch */
if (!leaf_only || (leaf_only && currDepth == depth)) {
if (collective) {
collective_helper(dirs, create, temp_path, currDir*items_per_dir);
} else {
create_remove_items_helper(dirs, create, temp_path, currDir*items_per_dir);
}
}
/* make the recursive call for the next level below this branch */
create_remove_items(
++currDepth,
dirs,
create,
collective,
temp_path,
( currDir * ( unsigned long long )branch_factor ) + 1 );
currDepth--;
/* reset the path */
strcpy(temp_path, path);
currDir++;
}
}
}
/* stats all of the items created as specified by the input parameters */
void mdtest_stat(int random, int dirs, char *path) {
__attribute__ ((unused)) struct stat buf;
unsigned long long i, parent_dir, item_num = 0;
char item[MAX_LEN], temp[MAX_LEN];
#ifdef _HAS_PLFS
plfs_error_t plfs_ret;
#endif
#ifdef _HAS_S3
int rv;
char bucket[MAX_LEN];
char object[MAX_LEN];
#endif
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering mdtest_stat...\n" );
fflush( stdout );
}
/* determine the number of items to stat*/
unsigned long long stop = 0;
if (leaf_only) {
stop = items_per_dir * ( unsigned long long )pow( branch_factor, depth );
} else {
stop = items;
}
/* iterate over all of the item IDs */
for (i = 0; i < stop; i++) {
/*
* It doesn't make sense to pass the address of the array because that would
* be like passing char **. Tested it on a Cray and it seems to work either
* way, but it seems that it is correct without the "&".
*
memset(&item, 0, MAX_LEN);
*/
memset(item, 0, MAX_LEN);
memset(temp, 0, MAX_LEN);
/* determine the item number to stat */
if (random) {
item_num = rand_array[i];
} else {
item_num = i;
}
/* make adjustments if in leaf only mode*/
if (leaf_only) {
item_num += items_per_dir *
(num_dirs_in_tree - ( unsigned long long )pow( branch_factor, depth ));
}
/* create name of file/dir to stat */
if (dirs) {
if (rank == 0 && verbose >= 3 && (i%ITEM_COUNT == 0) && (i != 0)) {
printf("V-3: stat dir: %llu\n", i);
fflush(stdout);
}
sprintf(item, "dir%s%s%llu", file_dot, stat_name, item_num);
} else {
if (rank == 0 && verbose >= 3 && (i%ITEM_COUNT == 0) && (i != 0)) {
printf("V-3: stat file: %llu\n", i);
fflush(stdout);
}
sprintf(item, "file%s%s%llu", file_dot, stat_name, item_num);
}
/* determine the path to the file/dir to be stat'ed */
parent_dir = item_num / items_per_dir;
if (parent_dir > 0) { //item is not in tree's root directory
/* prepend parent directory to item's path */
sprintf(temp, "%s%s%llu%s%s", base_tree_name, file_dot, parent_dir, dir_slash, item);
strcpy(item, temp);
//still not at the tree's root dir
while (parent_dir > branch_factor) {
parent_dir = (unsigned long long) ((parent_dir-1) / branch_factor);
sprintf(temp, "%s%s%llu%s%s", base_tree_name, file_dot, parent_dir, dir_slash, item);
strcpy(item, temp);
}
}
/* Now get item to have the full path */
sprintf( temp, "%s%s%s", path, dir_slash, item );
#ifdef _HAS_S3
if (!dirs) {
strcpy( bucket, path);
strcpy( object, item);
}
#endif
strcpy( item, temp );
/* below temp used to be hiername */
if (rank == 0 && verbose >= 3) {
if (dirs) {
printf("V-3: mdtest_stat dir : %s\n", item);
} else {
printf("V-3: mdtest_stat file: %s\n", item);
}
fflush(stdout);
}
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_getattr( NULL, item, &buf, 0 );
if ( plfs_ret != PLFS_SUCCESS ) {
if (dirs) {
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing directory \"%s\"\n", item );
fflush( stdout );
}
FAIL( "Unable to plfs_getattr directory" );
} else {
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing file \"%s\"\n", item );
fflush( stdout );
}
FAIL( "Unable to plfs_getattr file" );
}
}
} else {
if (stat(item, &buf) == -1) {
if (dirs) {
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing directory \"%s\"\n", item );
fflush( stdout );
}
FAIL("unable to stat directory");
} else {
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing file \"%s\"\n", item );
fflush( stdout );
}
FAIL("unable to stat file");
}
}
}
#elif defined _HAS_HDFS
hdfsFileInfo *file_info;
file_info = hdfsGetPathInfo(hd_fs, item);
if ( file_info == NULL ) {
if (dirs) {
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing directory \"%s\"\n", item );
fflush( stdout );
}
FAIL( "Unable to hdfsGetPathInfo for directory" );
} else {
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing file \"%s\"\n", item );
fflush( stdout );
}
FAIL( "Unable to hdfsGetPathInfo for file" );
}
}
#elif defined _HAS_S3
if (dirs) {
rv = s3_stat_bucket( bf, item);
check_S3_error(rv, bf, S3_STAT);
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing bucket \"%s\"\n", item );
fflush( stdout );
}
}
else {
rv = s3_stat_object (bf, bucket, object);
check_S3_error(rv, bf, S3_STAT);
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing file %s in bucket %s\n", object, bucket );
fflush( stdout );
}
}
aws_iobuf_reset(bf);
#else
if (stat(item, &buf) == -1) {
if (dirs) {
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing directory \"%s\"\n", item );
fflush( stdout );
}
FAIL("unable to stat directory");
} else {
if ( verbose >= 3 ) {
fprintf( stdout, "V-3: Stat'ing file \"%s\"\n", item );
fflush( stdout );
}
FAIL("unable to stat file");
}
}
#endif
}
}
/* reads all of the items created as specified by the input parameters */
void mdtest_read(int random, int dirs, char *path) {
unsigned long long i, parent_dir, item_num = 0;
__attribute__ ((unused)) int fd;
char item[MAX_LEN], temp[MAX_LEN];
#ifdef _HAS_PLFS
plfs_error_t plfs_ret;
ssize_t bytes_read;
int num_ref;
#endif
#ifdef _HAS_S3
int rv;
s3_set_bucket(path);
char object[MAX_LEN];
#endif
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering mdtest_read...\n" );
fprintf( stdout, "V-1: mdtest_read path = %s\n", path );
fflush( stdout );
}
/* allocate read buffer */
if (read_bytes > 0) {
read_buffer = (char *)malloc(read_bytes);
if (read_buffer == NULL) {
FAIL("out of memory");
}
}
/* determine the number of items to read */
unsigned long long stop = 0;
if (leaf_only) {
stop = items_per_dir * ( unsigned long long )pow( branch_factor, depth );
} else {
stop = items;
}
/* iterate over all of the item IDs */
for (i = 0; i < stop; i++) {
/*
* It doesn't make sense to pass the address of the array because that would
* be like passing char **. Tested it on a Cray and it seems to work either
* way, but it seems that it is correct without the "&".
*
memset(&item, 0, MAX_LEN);
*/
memset(item, 0, MAX_LEN);
memset(temp, 0, MAX_LEN);
/* determine the item number to read */
if (random) {
item_num = rand_array[i];
} else {
item_num = i;
}
/* make adjustments if in leaf only mode*/
if (leaf_only) {
item_num += items_per_dir *
(num_dirs_in_tree - ( unsigned long long )pow( branch_factor, depth ));
}
/* create name of file to read */
if (dirs) {
; /* N/A */
} else {
if (rank == 0 && verbose >= 3 && (i%ITEM_COUNT == 0) && (i != 0)) {
printf("V-3: read file: %llu\n", i);
fflush(stdout);
}
sprintf(item, "file%s%s%llu", file_dot, read_name, item_num);
}
/* determine the path to the file/dir to be read'ed */
parent_dir = item_num / items_per_dir;
if (parent_dir > 0) { //item is not in tree's root directory
/* prepend parent directory to item's path */
sprintf(temp, "%s%s%llu%s%s", base_tree_name, file_dot, parent_dir, dir_slash, item);
strcpy(item, temp);
/* still not at the tree's root dir */
while (parent_dir > branch_factor) {
parent_dir = (unsigned long long) ((parent_dir-1) / branch_factor);
sprintf(temp, "%s%s%llu%s%s", base_tree_name, file_dot, parent_dir, dir_slash, item);
strcpy(item, temp);
}
}
/* Now get item to have the full path */
#ifdef _HAS_S3
strcpy(object, item);
#endif
sprintf( temp, "%s%s%s", path, dir_slash, item );
strcpy( item, temp );
/* below temp used to be hiername */
if (rank == 0 && verbose >= 3) {
if (dirs) {
;
} else {
printf("V-3: mdtest_read file: %s\n", item);
}
fflush(stdout);
}
/* open file for reading */
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
/*
* If PLFS opens a file as O_RDWR, it suffers a bad performance hit. Looking through the
* code that follows up to the close, this file only gets one read, so we'll open it as
* read-only.
*/
rpfd = NULL;
plfs_ret = plfs_open( &rpfd, item, O_RDONLY, rank, FILEMODE, NULL );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_open for read file" );
}
} else {
if ((fd = open(item, O_RDWR, FILEMODE)) == -1) {
FAIL("unable to open file");
}
}
#elif defined _HAS_HDFS
if ( (hd_file = hdfsOpenFile( hd_fs, item, O_RDONLY, 0, 0, 0)) == NULL) {
FAIL( "Unable to hdfsOpenFile" );
}
#elif defined _HAS_S3
/* Do Nothing */
#else
if ((fd = open(item, O_RDWR, FILEMODE)) == -1) {
FAIL("unable to open file");
}
#endif
/* read file */
if (read_bytes > 0) {
#ifdef _HAS_PLFS
/*
* According to Bill Loewe, reads are only done one time, so they are always at
* offset 0 (zero).
*/
if ( using_plfs_path ) {
plfs_ret = plfs_read( rpfd, read_buffer, read_bytes, 0, &bytes_read );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_read file" );
}
if ( bytes_read != read_bytes ) {
FAIL( "Did not plfs_read the correct number of bytes from the file" );
}
} else {
if (read(fd, read_buffer, read_bytes) != read_bytes) {
FAIL("unable to read file");
}
}
#elif defined _HAS_HDFS
hdfs_ret = hdfsRead( hd_fs, hd_file, read_buffer, read_bytes);
if ( hdfs_ret == -1 ) {
FAIL( "Unable to hdfsRead file" );
}
if ( hdfs_ret != read_bytes ) {
FAIL( "Did not plfs_read the correct number of bytes from the file" );
}
#elif defined _HAS_S3
aws_iobuf_reset(bf);
aws_iobuf_extend_dynamic(bf, read_buffer, read_bytes);
rv = s3_get(bf, object);
check_S3_error(rv, bf, S3_STAT);
aws_iobuf_reset(bf);
#else
if (read(fd, read_buffer, read_bytes) != read_bytes) {
FAIL("unable to read file");
}
#endif
}
/* close file */
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_close( rpfd, rank, uid, O_RDONLY, NULL, &num_ref );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "Unable to plfs_close file" );
}
} else {
if (close(fd) == -1) {
FAIL("unable to close file");
}
}
#elif defined _HAS_HDFS
if (hdfsCloseFile(hd_fs, hd_file) == -1) {
FAIL( "Unable to hdfsCloseFilee" );
}
#elif defined _HAS_S3
/* Do Nothing */
#else
if (close(fd) == -1) {
FAIL("unable to close file");
}
#endif
}
}
/* This method should be called by rank 0. It subsequently does all of
the creates and removes for the other ranks */
void collective_create_remove(int create, int dirs, int ntasks, char *path) {
int i;
char temp[MAX_LEN];
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering collective_create_remove...\n" );
fflush( stdout );
}
/* rank 0 does all of the creates and removes for all of the ranks */
for (i=0; i<ntasks; i++) {
memset(temp, 0, MAX_LEN);
strcpy(temp, testdir);
//strcat(temp, "/");
strcat(temp, dir_slash);
/* set the base tree name appropriately */
if (unique_dir_per_task) {
sprintf(base_tree_name, "mdtest_tree%s%d", file_dot, i);
} else {
sprintf(base_tree_name, "mdtest_tree");
}
/* Setup to do I/O to the appropriate test dir */
strcat(temp, base_tree_name);
#ifdef _HAS_S3
strcat(temp, "-0");
#else
strcat(temp, ".0");
#endif
/* set all item names appropriately */
if (!shared_file) {
sprintf(mk_name, "mdtest%s%d%s", file_dot, (i+(0*nstride))%ntasks, file_dot);
sprintf(stat_name, "mdtest%s%d%s", file_dot, (i+(1*nstride))%ntasks, file_dot);
sprintf(read_name, "mdtest%s%d%s", file_dot, (i+(2*nstride))%ntasks, file_dot);
sprintf(rm_name, "mdtest%s%d%s", file_dot, (i+(3*nstride))%ntasks, file_dot);
}
if (unique_dir_per_task) {
sprintf(unique_mk_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash, file_dot,
(i+(0*nstride))%ntasks, file_dot);
sprintf(unique_chdir_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash, file_dot,
(i+(1*nstride))%ntasks, file_dot);
sprintf(unique_stat_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash, file_dot,
(i+(2*nstride))%ntasks, file_dot);
sprintf(unique_read_dir, "%s%smdtest_tree%s%d%s", testdir, dir_slash, file_dot,
(i+(3*nstride))%ntasks, file_dot);
sprintf(unique_rm_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash, file_dot,
(i+(4*nstride))%ntasks, file_dot );
sprintf(unique_rm_uni_dir, "%s", testdir);
}
/* Now that everything is set up as it should be, do the create or remove */
if (rank == 0 && verbose >= 3) {
printf("V-3: collective_create_remove (create_remove_items): temp is \"%s\"\n", temp);
fflush( stdout );
}
create_remove_items(0, dirs, create, 1, temp, 0);
}
/* reset all of the item names */
if (unique_dir_per_task) {
sprintf(base_tree_name, "mdtest_tree.0");
} else {
sprintf(base_tree_name, "mdtest_tree");
}
if (!shared_file) {
sprintf(mk_name, "mdtest%s%d%s", file_dot, (0+(0*nstride))%ntasks, file_dot);
sprintf(stat_name, "mdtest%s%d%s", file_dot, (0+(1*nstride))%ntasks, file_dot);
sprintf(read_name, "mdtest%s%d%s", file_dot, (0+(2*nstride))%ntasks, file_dot);
sprintf(rm_name, "mdtest%s%d%s", file_dot, (0+(3*nstride))%ntasks, file_dot);
}
if (unique_dir_per_task) {
sprintf(unique_mk_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash, file_dot,
(0+(0*nstride))%ntasks, file_dot);
sprintf(unique_chdir_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash, file_dot,
(0+(1*nstride))%ntasks, file_dot);
sprintf(unique_stat_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash, file_dot,
(0+(2*nstride))%ntasks, file_dot);
sprintf(unique_read_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash, file_dot,
(0+(3*nstride))%ntasks, file_dot);
sprintf(unique_rm_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash, file_dot,
(0+(4*nstride))%ntasks, file_dot);
sprintf(unique_rm_uni_dir, "%s", testdir);
}
}
void directory_test(int iteration, int ntasks, char *path) {
int size;
double t[5] = {0};
char temp_path[MAX_LEN];
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering directory_test...\n" );
fflush( stdout );
}
MPI_Barrier(testcomm);
t[0] = MPI_Wtime();
/* create phase */
if(create_only) {
if (unique_dir_per_task) {
unique_dir_access(MK_UNI_DIR, temp_path);
if (!time_unique_dir_overhead) {
offset_timers(t, 0);
}
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: directory_test: create path is \"%s\"\n", temp_path );
fflush( stdout );
}
/* "touch" the files */
if (collective_creates) {
if (rank == 0) {
collective_create_remove(1, 1, ntasks, temp_path);
}
} else {
/* create directories */
create_remove_items(0, 1, 1, 0, temp_path, 0);
}
}
if (barriers) {
MPI_Barrier(testcomm);
}
t[1] = MPI_Wtime();
/* stat phase */
if (stat_only) {
if (unique_dir_per_task) {
unique_dir_access(STAT_SUB_DIR, temp_path);
if (!time_unique_dir_overhead) {
offset_timers(t, 1);
}
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: directory_test: stat path is \"%s\"\n", temp_path );
fflush( stdout );
}
/* stat directories */
if (random_seed > 0) {
mdtest_stat(1, 1, temp_path);
} else {
mdtest_stat(0, 1, temp_path);
}
}
if (barriers) {
MPI_Barrier(testcomm);
}
t[2] = MPI_Wtime();
/* read phase */
if (read_only) {
if (unique_dir_per_task) {
unique_dir_access(READ_SUB_DIR, temp_path);
if (!time_unique_dir_overhead) {
offset_timers(t, 2);
}
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: directory_test: read path is \"%s\"\n", temp_path );
fflush( stdout );
}
/* read directories */
if (random_seed > 0) {
; /* N/A */
} else {
; /* N/A */
}
}
if (barriers) {
MPI_Barrier(testcomm);
}
t[3] = MPI_Wtime();
if (remove_only) {
if (unique_dir_per_task) {
unique_dir_access(RM_SUB_DIR, temp_path);
if (!time_unique_dir_overhead) {
offset_timers(t, 3);
}
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: directory_test: remove directories path is \"%s\"\n", temp_path );
fflush( stdout );
}
/* remove directories */
if (collective_creates) {
if (rank == 0) {
collective_create_remove(0, 1, ntasks, temp_path);
}
} else {
create_remove_items(0, 1, 0, 0, temp_path, 0);
}
}
if (barriers) {
MPI_Barrier(testcomm);
}
t[4] = MPI_Wtime();
if (remove_only) {
if (unique_dir_per_task) {
unique_dir_access(RM_UNI_DIR, temp_path);
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: directory_test: remove unique directories path is \"%s\"\n", temp_path );
fflush( stdout );
}
}
if (unique_dir_per_task && !time_unique_dir_overhead) {
offset_timers(t, 4);
}
MPI_Comm_size(testcomm, &size);
/* calculate times */
if (create_only) {
summary_table[iteration].entry[0] = items*size/(t[1] - t[0]);
} else {
summary_table[iteration].entry[0] = 0;
}
if (stat_only) {
summary_table[iteration].entry[1] = items*size/(t[2] - t[1]);
} else {
summary_table[iteration].entry[1] = 0;
}
if (read_only) {
summary_table[iteration].entry[2] = items*size/(t[3] - t[2]);
} else {
summary_table[iteration].entry[2] = 0;
}
if (remove_only) {
summary_table[iteration].entry[3] = items*size/(t[4] - t[3]);
} else {
summary_table[iteration].entry[3] = 0;
}
if (verbose >= 1 && rank == 0) {
printf("V-1: Directory creation: %14.3f sec, %14.3f ops/sec\n",
t[1] - t[0], summary_table[iteration].entry[0]);
printf("V-1: Directory stat : %14.3f sec, %14.3f ops/sec\n",
t[2] - t[1], summary_table[iteration].entry[1]);
/* N/A
printf("V-1: Directory read : %14.3f sec, %14.3f ops/sec\n",
t[3] - t[2], summary_table[iteration].entry[2]);
*/
printf("V-1: Directory removal : %14.3f sec, %14.3f ops/sec\n",
t[4] - t[3], summary_table[iteration].entry[3]);
fflush(stdout);
}
}
void file_test(int iteration, int ntasks, char *path) {
int size;
double t[5] = {0};
char temp_path[MAX_LEN];
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering file_test...\n" );
fflush( stdout );
}
MPI_Barrier(testcomm);
t[0] = MPI_Wtime();
/* create phase */
if (create_only) {
if (unique_dir_per_task) {
unique_dir_access(MK_UNI_DIR, temp_path);
if (!time_unique_dir_overhead) {
offset_timers(t, 0);
}
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: file_test: create path is \"%s\"\n", temp_path );
fflush( stdout );
}
/* "touch" the files */
if (collective_creates) {
if (rank == 0) {
collective_create_remove(1, 0, ntasks, temp_path);
}
MPI_Barrier(testcomm);
}
// xxxatxxxI think the following line is a bug. If collective, it should not call this after calling collective_
// create_remove I am going to make this an else statement now but needs further testing
else {
/* create files */
create_remove_items(0, 0, 1, 0, temp_path, 0);
}
}
//printf("XXX rank %d after create and write\n", rank);
fflush( stdout );
if (barriers) {
MPI_Barrier(testcomm);
}
t[1] = MPI_Wtime();
/* stat phase */
if (stat_only) {
if (unique_dir_per_task) {
unique_dir_access(STAT_SUB_DIR, temp_path);
if (!time_unique_dir_overhead) {
offset_timers(t, 1);
}
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: file_test: stat path is \"%s\"\n", temp_path );
fflush( stdout );
}
/* stat files */
if (random_seed > 0) {
mdtest_stat(1,0,temp_path);
} else {
mdtest_stat(0,0,temp_path);
}
}
if (barriers) {
MPI_Barrier(testcomm);
}
t[2] = MPI_Wtime();
/* read phase */
if (read_only) {
if (unique_dir_per_task) {
unique_dir_access(READ_SUB_DIR, temp_path);
if (!time_unique_dir_overhead) {
offset_timers(t, 2);
}
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: file_test: read path is \"%s\"\n", temp_path );
fflush( stdout );
}
/* read files */
if (random_seed > 0) {
mdtest_read(1,0,temp_path);
} else {
mdtest_read(0,0,temp_path);
}
}
if (barriers) {
MPI_Barrier(testcomm);
}
t[3] = MPI_Wtime();
if (remove_only) {
if (unique_dir_per_task) {
unique_dir_access(RM_SUB_DIR, temp_path);
if (!time_unique_dir_overhead) {
offset_timers(t, 3);
}
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: file_test: rm directories path is \"%s\"\n", temp_path );
fflush( stdout );
}
if (collective_creates) {
if (rank == 0) {
collective_create_remove(0, 0, ntasks, temp_path);
}
} else {
create_remove_items(0, 0, 0, 0, temp_path, 0);
}
}
if (barriers) {
MPI_Barrier(testcomm);
}
t[4] = MPI_Wtime();
if (remove_only) {
if (unique_dir_per_task) {
unique_dir_access(RM_UNI_DIR, temp_path);
} else {
strcpy( temp_path, path );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: file_test: rm unique directories path is \"%s\"\n", temp_path );
fflush( stdout );
}
}
if (unique_dir_per_task && !time_unique_dir_overhead) {
offset_timers(t, 4);
}
MPI_Comm_size(testcomm, &size);
/* calculate times */
if (create_only) {
summary_table[iteration].entry[4] = items*size/(t[1] - t[0]);
} else {
summary_table[iteration].entry[4] = 0;
}
if (stat_only) {
summary_table[iteration].entry[5] = items*size/(t[2] - t[1]);
} else {
summary_table[iteration].entry[5] = 0;
}
if (read_only) {
summary_table[iteration].entry[6] = items*size/(t[3] - t[2]);
} else {
summary_table[iteration].entry[6] = 0;
}
if (remove_only) {
summary_table[iteration].entry[7] = items*size/(t[4] - t[3]);
} else {
summary_table[iteration].entry[7] = 0;
}
if (verbose >= 1 && rank == 0) {
printf("V-1: File creation : %14.3f sec, %14.3f ops/sec\n",
t[1] - t[0], summary_table[iteration].entry[4]);
printf("V-1: File stat : %14.3f sec, %14.3f ops/sec\n",
t[2] - t[1], summary_table[iteration].entry[5]);
printf("V-1: File read : %14.3f sec, %14.3f ops/sec\n",
t[3] - t[2], summary_table[iteration].entry[6]);
printf("V-1: File removal : %14.3f sec, %14.3f ops/sec\n",
t[4] - t[3], summary_table[iteration].entry[7]);
fflush(stdout);
}
}
void print_help() {
char * opts[] = {
"Usage: mdtest [-a S3_userid] [-A S3 IP/Hostname] [-b branching factor]",
" [-B] [-c] [-C] [-d testdir] [-D] [-e number_of_bytes_to_read]",
" [-E] [-f first] [-F] [-g S3 bucket identifier] [-h] [-i iterations]",
" [-I items_per_dir] [-l last] [-L] [-M] [-n number_of_items] [-N stride_length]",
" [-p seconds] [-r] [-R[seed]] [-s stride] [-S] [-t] [-T] [-u] [-v]",
" [-V verbosity_value] [-w number_of_bytes_to_write] [-y] [-z depth]",
"\t-a: userid for S3 target device",
"\t-A: IP or hostname for S3 target device",
"\t-b: branching factor of hierarchical directory structure",
"\t-B: no barriers between phases",
"\t-c: collective creates: task 0 does all creates",
"\t-C: only create files/dirs",
"\t-d: the directory in which the tests will run",
"\t-D: perform test on directories only (no files)",
"\t-e: bytes to read from each file",
"\t-E: only read files/dir",
"\t-f: first number of tasks on which the test will run",
"\t-F: perform test on files only (no directories)",
"\t-g: integer identifier added to bucket name for uniqueness",
"\t-h: prints this help message",
"\t-i: number of iterations the test will run",
"\t-I: number of items per directory in tree",
"\t-l: last number of tasks on which the test will run",
"\t-L: files only at leaf level of tree",
"\t-M: every process will stripe directory creation across LUSTRE MDTS",
"\t-n: every process will creat/stat/read/remove # directories and files",
"\t-N: stride # between neighbor tasks for file/dir operation (local=0)",
"\t-p: pre-iteration delay (in seconds)",
"\t-r: only remove files or directories left behind by previous runs",
"\t-R: randomly stat files (optional argument for random seed)",
"\t-s: stride between the number of tasks for each test",
"\t-S: shared file access (file only, no directories)",
"\t-t: time unique working directory overhead",
"\t-T: only stat files/dirs",
"\t-u: unique working directory for each task",
"\t-v: verbosity (each instance of option increments by one)",
"\t-V: verbosity value",
"\t-w: bytes to write to each file after it is created",
"\t-y: sync file after writing",
"\t-z: depth of hierarchical directory structure",
""
};
int i, j;
for (i = 0; strlen(opts[i]) > 0; i++)
printf("%s\n", opts[i]);
fflush(stdout);
MPI_Initialized(&j);
if (j) {
MPI_Finalize();
}
exit(0);
}
void summarize_results(int iterations) {
char access[MAX_LEN];
int i, j, k;
int start, stop, tableSize = 10;
double min, max, mean, sd, sum = 0, var = 0, curr = 0;
double all[iterations * size * tableSize];
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering summarize_results...\n" );
fflush( stdout );
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Gather(&summary_table->entry[0], tableSize*iterations,
MPI_DOUBLE, all, tableSize*iterations, MPI_DOUBLE,
0, MPI_COMM_WORLD);
if (rank == 0) {
printf("\nSUMMARY: (of %d iterations)\n", iterations);
printf(
" Operation Max Min Mean Std Dev\n");
printf(
" --------- --- --- ---- -------\n");
fflush(stdout);
/* if files only access, skip entries 0-3 (the dir tests) */
if (files_only && !dirs_only) {
start = 4;
} else {
start = 0;
}
/* if directories only access, skip entries 4-7 (the file tests) */
if (dirs_only && !files_only) {
stop = 4;
} else {
stop = 8;
}
/* special case: if no directory or file tests, skip all */
if (!dirs_only && !files_only) {
start = stop = 0;
}
/* calculate aggregates */
if (barriers) {
double maxes[iterations];
/* Because each proc times itself, in the case of barriers we
* have to backwards calculate the time to simulate the use
* of barriers.
*/
for (i = start; i < stop; i++) {
for (j=0; j<iterations; j++) {
maxes[j] = all[j*tableSize + i];
for (k=0; k<size; k++) {
curr = all[(k*tableSize*iterations)
+ (j*tableSize) + i];
if (maxes[j] < curr) {
maxes[j] = curr;
}
}
}
min = max = maxes[0];
for (j=0; j<iterations; j++) {
if (min > maxes[j]) {
min = maxes[j];
}
if (max < maxes[j]) {
max = maxes[j];
}
sum += maxes[j];
}
mean = sum / iterations;
for (j=0; j<iterations; j++) {
var += pow((mean - maxes[j]), 2);
}
var = var / iterations;
sd = sqrt(var);
switch (i) {
case 0: strcpy(access, "Directory creation:"); break;
case 1: strcpy(access, "Directory stat :"); break;
/* case 2: strcpy(access, "Directory read :"); break; */
case 2: ; break; /* N/A */
case 3: strcpy(access, "Directory removal :"); break;
case 4: strcpy(access, "File creation :"); break;
case 5: strcpy(access, "File stat :"); break;
case 6: strcpy(access, "File read :"); break;
case 7: strcpy(access, "File removal :"); break;
default: strcpy(access, "ERR"); break;
}
if (i != 2) {
printf(" %s ", access);
printf("%14.3f ", max);
printf("%14.3f ", min);
printf("%14.3f ", mean);
printf("%14.3f\n", sd);
fflush(stdout);
}
sum = var = 0;
}
} else {
for (i = start; i < stop; i++) {
min = max = all[i];
for (k=0; k < size; k++) {
for (j = 0; j < iterations; j++) {
curr = all[(k*tableSize*iterations)
+ (j*tableSize) + i];
if (min > curr) {
min = curr;
}
if (max < curr) {
max = curr;
}
sum += curr;
}
}
mean = sum / (iterations * size);
for (k=0; k<size; k++) {
for (j = 0; j < iterations; j++) {
var += pow((mean - all[(k*tableSize*iterations)
+ (j*tableSize) + i]), 2);
}
}
var = var / (iterations * size);
sd = sqrt(var);
switch (i) {
case 0: strcpy(access, "Directory creation:"); break;
case 1: strcpy(access, "Directory stat :"); break;
/* case 2: strcpy(access, "Directory read :"); break; */
case 2: ; break; /* N/A */
case 3: strcpy(access, "Directory removal :"); break;
case 4: strcpy(access, "File creation :"); break;
case 5: strcpy(access, "File stat :"); break;
case 6: strcpy(access, "File read :"); break;
case 7: strcpy(access, "File removal :"); break;
default: strcpy(access, "ERR"); break;
}
if (i != 2) {
printf(" %s ", access);
printf("%14.3f ", max);
printf("%14.3f ", min);
printf("%14.3f ", mean);
printf("%14.3f\n", sd);
fflush(stdout);
}
sum = var = 0;
}
}
/* calculate tree create/remove rates */
for (i = 8; i < tableSize; i++) {
min = max = all[i];
for (j = 0; j < iterations; j++) {
curr = summary_table[j].entry[i];
if (min > curr) {
min = curr;
}
if (max < curr) {
max = curr;
}
sum += curr;
}
mean = sum / (iterations);
for (j = 0; j < iterations; j++) {
var += pow((mean - summary_table[j].entry[i]), 2);
}
var = var / (iterations);
sd = sqrt(var);
switch (i) {
case 8: strcpy(access, "Tree creation :"); break;
case 9: strcpy(access, "Tree removal :"); break;
default: strcpy(access, "ERR"); break;
}
printf(" %s ", access);
printf("%14.3f ", max);
printf("%14.3f ", min);
printf("%14.3f ", mean);
printf("%14.3f\n", sd);
fflush(stdout);
sum = var = 0;
}
}
}
/* Checks to see if the test setup is valid. If it isn't, fail. */
void valid_tests() {
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering valid_tests...\n" );
fflush( stdout );
}
/* if dirs_only and files_only were both left unset, set both now */
if (!dirs_only && !files_only) {
dirs_only = files_only = 1;
}
/* if shared file 'S' access, no directory tests */
if (shared_file) {
dirs_only = 0;
}
/* check for no barriers with shifting processes for different phases.
that is, one may not specify both -B and -N as it will introduce
race conditions that may cause errors stat'ing or deleting after
creates.
*/
if (( barriers == 0 ) && ( nstride != 0 ) && ( rank == 0 )) {
FAIL( "Possible race conditions will occur: -B not compatible with -N");
}
/* check for collective_creates incompatibilities */
if (shared_file && collective_creates && rank == 0) {
FAIL("-c not compatible with -S");
}
if (path_count > 1 && collective_creates && rank == 0) {
FAIL("-c not compatible with multiple test directories");
}
if (collective_creates && !barriers) {
FAIL("-c not compatible with -B");
}
/* check for shared file incompatibilities */
if (unique_dir_per_task && shared_file && rank == 0) {
FAIL("-u not compatible with -S");
}
/* check multiple directory paths and strided option */
if (path_count > 1 && nstride > 0) {
FAIL("cannot have multiple directory paths with -N strides between neighbor tasks");
}
/* check for shared directory and multiple directories incompatibility */
if (path_count > 1 && unique_dir_per_task != 1) {
FAIL("shared directory mode is not compatible with multiple directory paths");
}
/* check if more directory paths than ranks */
if (path_count > size) {
FAIL("cannot have more directory paths than MPI tasks");
}
/* check depth */
if (depth < 0) {
FAIL("depth must be greater than or equal to zero");
}
/* check branch_factor */
if (branch_factor < 1 && depth > 0) {
FAIL("branch factor must be greater than or equal to zero");
}
/* check for valid number of items */
if ((items > 0) && (items_per_dir > 0)) {
FAIL("only specify the number of items or the number of items per directory");
}
#ifdef _HAS_S3
if (branch_factor > 1 || depth > 0) {
FAIL("Cannot specify branch factor or depth when using S3 interface");
}
if (dirs_only && files_only) {
FAIL("Must specify files (objects) only (-F) or dirs (buckets) only (-D) when using S3 interface");
}
if (s3_host_ip == NULL || s3_host_id == NULL) {
FAIL("Must specify s3 host ip (-A) and s3 host userid (-a)");
}
#endif
}
void show_file_system_size(char *file_system) {
char real_path[MAX_LEN];
char file_system_unit_str[MAX_LEN] = "GiB";
char inode_unit_str[MAX_LEN] = "Mi";
long long int file_system_unit_val = 1024 * 1024 * 1024;
long long int inode_unit_val = 1024 * 1024;
long long int total_file_system_size = 0;
long long int free_file_system_size = 0;
long long int total_inodes = 0;
long long int free_inodes = 0;
double total_file_system_size_hr,
used_file_system_percentage,
used_inode_percentage;
__attribute__ ((unused)) struct statfs status_buffer;
#ifdef _HAS_PLFS
struct statvfs stbuf;
plfs_error_t plfs_ret;
#endif
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering show_file_system_size...\n" );
fflush( stdout );
}
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
/*
printf( "Detected that file system, \"%s\" is a PLFS file system.\n", file_system );
*/
plfs_ret = plfs_statvfs( file_system, &stbuf );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL( "unable to plfs_statvfs() file system" );
}
} else {
/*
printf( "Detected that file system, \"%s\" is a regular file system.\n", file_system );
*/
if ( statfs( file_system, &status_buffer ) != 0 ) {
FAIL("unable to statfs() file system");
}
}
#elif _HAS_HDFS
/* Do Nothing */
#elif _HAS_S3
/* Do Nothing */
#else
if (statfs(file_system, &status_buffer) != 0) {
FAIL("unable to statfs() file system");
}
#endif
/* data blocks */
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
total_file_system_size = stbuf.f_blocks * stbuf.f_bsize;
free_file_system_size = stbuf.f_bfree * stbuf.f_bsize;
} else {
total_file_system_size = status_buffer.f_blocks * status_buffer.f_bsize;
free_file_system_size = status_buffer.f_bfree * status_buffer.f_bsize;
}
#elif _HAS_HDFS
/* Do Nothing */
#elif _HAS_S3
/* Do Nothing */
#else
total_file_system_size = status_buffer.f_blocks * status_buffer.f_bsize;
free_file_system_size = status_buffer.f_bfree * status_buffer.f_bsize;
#endif
used_file_system_percentage = (1 - ((double)free_file_system_size
/ (double)total_file_system_size)) * 100;
total_file_system_size_hr = (double)total_file_system_size
/ (double)file_system_unit_val;
if (total_file_system_size_hr > 1024) {
total_file_system_size_hr = total_file_system_size_hr / 1024;
strcpy(file_system_unit_str, "TiB");
}
/* inodes */
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
total_inodes = stbuf.f_files;
free_inodes = stbuf.f_ffree;
} else {
total_inodes = status_buffer.f_files;
free_inodes = status_buffer.f_ffree;
}
#elif _HAS_HDFS
/* Do Nothing */
#elif _HAS_S3
/* Do Nothing */
#else
total_inodes = status_buffer.f_files;
free_inodes = status_buffer.f_ffree;
#endif
used_inode_percentage = (1 - ((double)free_inodes/(double)total_inodes))
* 100;
/* show results */
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
strcpy( real_path, file_system );
} else {
if (realpath(file_system, real_path) == NULL) {
FAIL("unable to use realpath()");
}
}
#elif _HAS_HDFS
/* Do Nothing */
#elif _HAS_S3
/* Do Nothing */
#else
if (realpath(file_system, real_path) == NULL) {
FAIL("unable to use realpath()");
}
#endif
fprintf(stdout, "Path: %s\n", real_path);
fprintf(stdout, "FS: %.1f %s Used FS: %2.1f%% ",
total_file_system_size_hr, file_system_unit_str,
used_file_system_percentage);
fprintf(stdout, "Inodes: %.1f %s Used Inodes: %2.1f%%\n",
(double)total_inodes / (double)inode_unit_val,
inode_unit_str, used_inode_percentage);
fflush(stdout);
return;
}
void display_freespace(char *testdirpath)
{
char dirpath[MAX_LEN] = {0};
int i;
int directoryFound = 0;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering display_freespace...\n" );
fflush( stdout );
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: testdirpath is \"%s\"\n", testdirpath );
fflush( stdout );
}
strcpy(dirpath, testdirpath);
/* get directory for outfile */
i = strlen(dirpath);
while (i-- > 0) {
if (dirpath[i] == '/') {
dirpath[i] = '\0';
directoryFound = 1;
break;
}
}
/* if no directory/, use '.' */
if (directoryFound == 0) {
strcpy(dirpath, ".");
}
if (verbose >= 3 && rank == 0) {
printf( "V-3: Before show_file_system_size, dirpath is \"%s\"\n", dirpath );
fflush( stdout );
}
show_file_system_size(dirpath);
if (verbose >= 3 && rank == 0) {
printf( "V-3: After show_file_system_size, dirpath is \"%s\"\n", dirpath );
fflush( stdout );
}
return;
}
void create_remove_directory_tree(int create,
int currDepth, char* path, int dirNum) {
int i;
char dir[MAX_LEN];
#ifdef _HAS_PLFS
plfs_error_t plfs_ret;
#endif
#ifdef _HAS_S3
int rv;
#endif
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: Entering create_remove_directory_tree, currDepth = %d...\n", currDepth );
fflush( stdout );
}
// fprintf( stdout, "Rank: %d Current Depth: %d\n", rank, currDepth);
if (currDepth == 0) {
#ifdef _HAS_S3
sprintf(dir, "%s%s%s%s%d", path, dir_slash, base_tree_name, file_dot, dirNum);
#else
sprintf(dir, "%s%s%s%s%d%s", path, dir_slash, base_tree_name, file_dot, dirNum, dir_slash);
#endif
if (create) {
if (rank == 0 && verbose >= 2) {
printf("V-2: Making directory \"%s\"\n", dir);
fflush(stdout);
}
#ifdef _HAS_HDFS
/* Do Nothing */
#elif _HAS_S3
/* Do Nothing */
#else
MDTS_stripe_on = 1; /* Only stripe the top level directory */
if(mdtest_mkdir(dir, DIRMODE) != MDTEST_SUCCESS) {
FAIL("Unable to make directory");
}
MDTS_stripe_on = 0; /* Stop so we only stripe the top level */
#endif
// NOTE: IT APPEARS BLAIR MOVED THIS HERE where it use to be after the if create
// block NOT SURE WHY BUT I WILL TEST MDT stuff to see if that is why A.T. 6/25
// create_remove_directory_tree(create, ++currDepth, dir, ++dirNum);
}
create_remove_directory_tree(create, ++currDepth, dir, ++dirNum);
if (!create) {
if (rank == 0 && verbose >= 2) {
printf("V-2: Remove directory \"%s\"\n", dir);
fflush(stdout);
}
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_rmdir( dir );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL("Unable to plfs_rmdir directory");
}
} else {
if (rmdir(dir) == -1) {
FAIL("Unable to remove directory");
}
}
#elif _HAS_HDFS
/* Do nothing */
#elif _HAS_S3
if (files_only) { // check this because S3 does not have
// levels of directories (buckets)
s3_set_bucket(NULL);
rv = s3_delete_bucket(bf, dir);
aws_iobuf_reset(bf);
}
#else
if (rmdir(dir) == -1) {
FAIL("Unable to remove directory");
}
#endif
}
} else if (currDepth <= depth) {
char temp_path[MAX_LEN];
strcpy(temp_path, path);
int currDir = dirNum;
for (i=0; i<branch_factor; i++) {
#ifdef _HAS_S3
sprintf(dir, "%s%s%d", base_tree_name, file_dot, currDir);
#else
sprintf(dir, "%s%s%d%s", base_tree_name, file_dot, currDir, dir_slash);
#endif
sprintf(dir, "%s%s%d%s", base_tree_name, file_dot, currDir, dir_slash);
strcat(temp_path, dir);
if (create) {
if (rank == 0 && verbose >= 2) {
printf("V-2: Making directory \"%s\"\n", temp_path);
fflush(stdout);
}
#ifdef _HAS_HDFS
/* Do Nothing */
#elif _HAS_S3
/* Do Nothing */
#else
if(mdtest_mkdir(temp_path, DIRMODE) != MDTEST_SUCCESS) {
FAIL("Unable to make directory");
}
#endif
}
create_remove_directory_tree(create, ++currDepth,
temp_path, (branch_factor*currDir)+1);
currDepth--;
if (!create) {
if (rank == 0 && verbose >= 2) {
printf("V-2: Remove directory \"%s\"\n", temp_path);
fflush(stdout);
}
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
plfs_ret = plfs_rmdir( temp_path );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL("Unable to plfs_rmdir directory");
}
} else {
if (rmdir(temp_path) == -1) {
FAIL("Unable to remove directory");
}
}
#elif _HAS_HDFS
/* Do Nothing */
#elif _HAS_S3
rv = s3_delete_bucket(bf, temp_path);
check_S3_error(rv, bf, S3_DELETE);
aws_iobuf_reset(bf);
#else
if (rmdir(temp_path) == -1) {
FAIL("Unable to remove directory");
}
#endif
}
strcpy(temp_path, path);
currDir++;
}
}
}
int main(int argc, char **argv) {
int i, j, k, c;
int nodeCount;
MPI_Group worldgroup, testgroup;
struct {
int first;
int last;
int stride;
} range = {0, 0, 1};
int first = 1;
int last = 0;
int stride = 1;
int iterations = 1;
/* --- initialize a connection-builder, holding parameters for hdfsBuilderConnect() */
#ifdef _HAS_HDFS
struct hdfsBuilder* builder = hdfsNewBuilder();
if ( ! builder ) {
fprintf(stderr, "couldn't create an hdsfsBuilder");
exit (1);
}
/* see dfs.ha.namenodes.glome in /etc/hdfs-site.xml */
// hdfsBuilderSetNameNode ( builder, "gl-io02-ib0" );
hdfsBuilderSetNameNode ( builder, "default" );
//
// hdfsBuilderSetNameNodePort( builder, 50070 );
// hdfsBuilderSetNameNodePort( builder, 9000 );
// hdfsBuilderSetNameNodePort( builder, 8020 );
//
hdfsBuilderSetUserName ( builder, "hadoop" ); // "jti" also works
//
#endif
#ifdef _HAS_S3
aws_init();
aws_set_debug( 0 );
#endif
/**********
int rc = aws_read_config ("atorrez");
if ( rc ) {
fprintf(stderr, "Unable to read aws config file\n");
exit (1);
}
s3_set_host ( "10.140.0.17:9020");
//s3_set_host ( "10.143.0.1:80");
aws_reuse_connections(1);
bf = aws_iobuf_new();
#endif
*********/
/* Check for -h parameter before MPI_Init so the mdtest binary can be
called directly, without, for instance, mpirun. */
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) {
print_help();
}
}
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
#ifdef _HAS_PLFS
pid = getpid();
uid = getuid();
plfs_error_t plfs_ret;
#endif
#ifdef _HAS_HDFS
hd_fs = hdfsBuilderConnect(builder);
if ( !hd_fs ) {
FAIL("Unable to to peform HDFS connect");
exit(1);
}
#endif
nodeCount = size / count_tasks_per_node();
if (rank == 0) {
printf("-- started at %s --\n\n", timestamp());
printf("mdtest-%s was launched with %d total task(s) on %d node(s)\n",
RELEASE_VERS, size, nodeCount);
fflush(stdout);
}
if (rank == 0) {
fprintf(stdout, "Command line used:");
for (i = 0; i < argc; i++) {
fprintf(stdout, " %s", argv[i]);
}
fprintf(stdout, "\n");
fflush(stdout);
}
/* Parse command line options */
while (1) {
#ifdef _HAS_S3
c = getopt(argc, argv, "a:A:b:BcCd:De:Ef:Fg:hi:I:l:Ln:N:p:rR::s:StTuvV:w:yz:");
#else
c = getopt(argc, argv, "b:BcCd:De:Ef:Fhi:I:l:LMn:N:p:rR::s:StTuvV:w:yz:");
#endif
if (c == -1) {
break;
}
switch (c) {
#ifdef _HAS_S3
case 'a':
s3_host_id = strdup(optarg);
break;
case 'A':
s3_host_ip = strdup(optarg);
break;
case 'g':
ident = atoi(optarg);
break;
#endif
case 'b':
branch_factor = atoi(optarg); break;
case 'B':
barriers = 0; break;
case 'c':
collective_creates = 1; break;
case 'C':
create_only = 1; break;
case 'd':
parse_dirpath(optarg); break;
case 'D':
dirs_only = 1; break;
case 'e':
read_bytes = ( size_t )strtoul( optarg, ( char ** )NULL, 10 ); break;
//read_bytes = atoi(optarg); break;
case 'E':
read_only = 1; break;
case 'f':
first = atoi(optarg); break;
case 'F':
files_only = 1; break;
case 'h':
print_help(); break;
case 'i':
iterations = atoi(optarg); break;
case 'I':
items_per_dir = ( unsigned long long )strtoul( optarg, ( char ** )NULL, 10 ); break;
//items_per_dir = atoi(optarg); break;
case 'l':
last = atoi(optarg); break;
case 'L':
leaf_only = 1; break;
case 'M': { /* Auto fill the meta data target indexes */
/* Need all of the nodes to succeed at this one otherwise fail */
mdts = malloc(sizeof(struct MDTS));
if(!mdts) {
FAIL("No memory for MDTS struct ");
}
mdts->indexes = NULL;
mdts->num = 0;
mdts->max = 0;
/* Have rank 0 figure out what MDTS are availible */
if(rank == 0) {
char buf[1024];
fflush(stdout);
FILE* mdsList = popen("lfs mdts | grep ACTIVE |cut -d : -f 1", "r");
if(mdsList == NULL) {
fprintf(stderr,"lfs mdts failed, ignoring -M flag");
/* MPI BCAST NULL RESULT */
mdts->num = 0;
MPI_Bcast((void*) &mdts->num, 1 , MPI_INTEGER, 0, MPI_COMM_WORLD);
break;
}
/* some starting space. Assumes small number of MDTS */
mdts->indexes = malloc(sizeof(unsigned int) * 10); /* Generic starting size of 10 */
if(!mdts->indexes) {
free(mdts);
mdts = NULL;
fprintf(stderr,"Out of memory for MetaData target indexes, ignoring -M flag");
/* MPI BCAST NULL RESULT */
mdts->num = 0;
MPI_Bcast((void*) &mdts->num, 1 , MPI_INTEGER, 0, MPI_COMM_WORLD);
break;
}
mdts->max = 10;
unsigned int* temp = NULL;
while(fgets(buf, sizeof(buf),mdsList)) {
if(mdts->max == mdts->num) {
temp = realloc(mdts->indexes, mdts->max * 2);
if(!temp) {
fprintf(stderr,"Ran out of memory for MetaData targets, ignoring -M flag");
/* realloc failure leaves the block, so we need to free it */
free(mdts->indexes);
free(mdts);
mdts = NULL;
/* MPI BCAST NULL RESULT */
mdts->num = 0;
MPI_Bcast((void*) &mdts->num, 1 , MPI_INTEGER, 0, MPI_COMM_WORLD);
}
/* Realloc Moved the block */
if(temp != mdts->indexes) {
/* Realloc will free the old memory, but we have to change the pointer */
mdts->indexes = temp;
}
mdts->max = mdts->max * 2;
}
sscanf(buf, "%d", (mdts->indexes + mdts->num));
/* Because of the weirdness of buffers with popen, the output of the command
* is actually read twice. I guess due to the buffering change of stdout when it is
* directed to a file.(totally a guess!), but the output is in acending order of index
* so we just need to check if the MDTS index we just read is < the number of indexes
*/
if(mdts->indexes[mdts->num] < mdts->num)
break;
++mdts->num;
}
pclose(mdsList);
/* MPI BCAST NUMBER OF MDTS RESULT */
MPI_Bcast((void*) &mdts->num, 1 , MPI_INTEGER, 0, MPI_COMM_WORLD);
/* Check and see if we actually sent anything */
if(mdts->num == 0) {
fprintf(stderr,"No Meta data Targets found, ignoring -M flag\n");
free(mdts->indexes);
free(mdts);
mdts = NULL;
break; /* Exit before we broadcast again, no one is listening. */
}
else {
/* We have results to share, so lets share them. */
MPI_Bcast((void*) mdts->indexes, mdts->num, MPI_INT, 0, MPI_COMM_WORLD);
}
}
/* The not rank zero nodes */
else {
/* See if there are any records to get */
MPI_Bcast((void*) &mdts->num, 1 , MPI_INT , 0, MPI_COMM_WORLD);
if(mdts->num == 0) { /* Failure case, but Ignore the flag, don't FAIL */
free(mdts);
mdts = NULL;
break;
}
else {
mdts->max = mdts->num;
mdts->indexes = malloc(sizeof(int) * mdts->num);
if(!mdts->indexes) { /* FAIL because all nodes need to succeed at this */
FAIL("Unable to allocate memory for MDTS indexes ");
}
/* Collect the indexes of the availible MDTS */
MPI_Bcast((void*) mdts->indexes, mdts->num, MPI_INT, 0, MPI_COMM_WORLD);
}
}
unique_dir_per_task = 1; /* Unique dirs so we don't bottle neck on one MDTS */
break;
}
case 'n':
items = ( unsigned long long )strtoul( optarg, ( char ** )NULL, 10 ); break;
//items = atoi(optarg); break;
case 'N':
nstride = atoi(optarg); break;
case 'p':
pre_delay = atoi(optarg); break;
case 'r':
remove_only = 1; break;
case 'R':
if (optarg == NULL) {
random_seed = time(NULL);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Bcast(&random_seed, 1, MPI_INT, 0, MPI_COMM_WORLD);
random_seed += rank;
} else {
random_seed = atoi(optarg)+rank;
}
break;
case 's':
stride = atoi(optarg); break;
case 'S':
shared_file = 1; break;
case 't':
time_unique_dir_overhead = 1; break;
case 'T':
stat_only = 1; break;
case 'u':
unique_dir_per_task = 1; break;
case 'v':
verbose += 1; break;
case 'V':
verbose = atoi(optarg); break;
case 'w':
write_bytes = ( size_t )strtoul( optarg, ( char ** )NULL, 10 ); break;
//write_bytes = atoi(optarg); break;
case 'y':
sync_file = 1; break;
case 'z':
depth = atoi(optarg); break;
}
}
if (!create_only && !stat_only && !read_only && !remove_only) {
create_only = stat_only = read_only = remove_only = 1;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "V-1: main: Setting create/stat/read/remove_only to True\n" );
fflush( stdout );
}
}
valid_tests();
#ifdef _HAS_S3
// aws_init();
// aws_set_debug( 0 );
int rc = aws_read_config (s3_host_id);
if ( rc ) {
fprintf(stderr, "Unable to read aws config file\n");
exit (1);
}
s3_set_host ( s3_host_ip );
aws_reuse_connections(1);
bf = aws_iobuf_new();
#endif
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( stdout, "barriers : %s\n", ( barriers ? "True" : "False" ));
fprintf( stdout, "collective_creates : %s\n", ( collective_creates ? "True" : "False" ));
fprintf( stdout, "create_only : %s\n", ( create_only ? "True" : "False" ));
fprintf( stdout, "dirpath(s):\n" );
for ( i = 0; i < path_count; i++ ) {
fprintf( stdout, "\t%s\n", filenames[i] );
}
fprintf( stdout, "dirs_only : %s\n", ( dirs_only ? "True" : "False" ));
fprintf( stdout, "read_bytes : %zu\n", read_bytes );
fprintf( stdout, "read_only : %s\n", ( read_only ? "True" : "False" ));
fprintf( stdout, "first : %d\n", first );
fprintf( stdout, "files_only : %s\n", ( files_only ? "True" : "False" ));
fprintf( stdout, "iterations : %d\n", iterations );
fprintf( stdout, "items_per_dir : %llu\n", items_per_dir );
fprintf( stdout, "last : %d\n", last );
fprintf( stdout, "leaf_only : %s\n", ( leaf_only ? "True" : "False" ));
fprintf( stdout, "items : %llu\n", items );
fprintf( stdout, "nstride : %d\n", nstride );
fprintf( stdout, "pre_delay : %d\n", pre_delay );
fprintf( stdout, "remove_only : %s\n", ( leaf_only ? "True" : "False" ));
fprintf( stdout, "random_seed : %d\n", random_seed );
fprintf( stdout, "stride : %d\n", stride );
fprintf( stdout, "shared_file : %s\n", ( shared_file ? "True" : "False" ));
fprintf( stdout, "time_unique_dir_overhead: %s\n", ( time_unique_dir_overhead ? "True" : "False" ));
fprintf( stdout, "stat_only : %s\n", ( stat_only ? "True" : "False" ));
fprintf( stdout, "unique_dir_per_task : %s\n", ( unique_dir_per_task ? "True" : "False" ));
fprintf( stdout, "write_bytes : %zu\n", write_bytes );
fprintf( stdout, "sync_file : %s\n", ( sync_file ? "True" : "False" ));
fprintf( stdout, "depth : %d\n", depth );
fflush( stdout );
}
/* setup total number of items and number of items per dir */
if (depth <= 0) {
num_dirs_in_tree = 1;
} else {
if (branch_factor < 1) {
num_dirs_in_tree = 1;
} else if (branch_factor == 1) {
num_dirs_in_tree = depth + 1;
} else {
num_dirs_in_tree =
(1 - pow(branch_factor, depth+1)) / (1 - branch_factor);
}
}
if (items_per_dir > 0) {
items = items_per_dir * num_dirs_in_tree;
} else {
if (leaf_only) {
if (branch_factor <= 1) {
items_per_dir = items;
} else {
items_per_dir = items / pow(branch_factor, depth);
items = items_per_dir * pow(branch_factor, depth);
}
} else {
items_per_dir = items / num_dirs_in_tree;
items = items_per_dir * num_dirs_in_tree;
}
}
/* initialize rand_array */
if (random_seed > 0) {
srand(random_seed);
unsigned long long stop = 0;
unsigned long long s;
if (leaf_only) {
stop = items_per_dir * ( unsigned long long )pow(branch_factor, depth);
} else {
stop = items;
}
rand_array = (unsigned long long *) malloc( stop * sizeof( unsigned long long ));
for (s=0; s<stop; s++) {
rand_array[s] = s;
}
/* shuffle list randomly */
unsigned long long n = stop;
while (n>1) {
n--;
/*
* Generate a random number in the range 0 .. n
*
* rand() returns a number from 0 .. RAND_MAX. Divide that
* by RAND_MAX and you get a floating point number in the
* range 0 .. 1. Multiply that by n and you get a number in
* the range 0 .. n.
*/
unsigned long long k =
( unsigned long long ) ((( double )rand() / ( double )RAND_MAX ) * ( double )n );
/*
* Now move the nth element to the kth (randomly chosen)
* element, and the kth element to the nth element.
*/
unsigned long long tmp = rand_array[k];
rand_array[k] = rand_array[n];
rand_array[n] = tmp;
}
}
/* allocate and initialize write buffer with # */
if (write_bytes > 0) {
write_buffer = (char *)malloc(write_bytes);
if (write_buffer == NULL) {
FAIL("out of memory");
}
memset(write_buffer, 0x23, write_bytes);
}
#ifdef _HAS_S3
// fixed name for now - bucket will be comprised of this + testdir
// Check if user specified identifier (-g) in arguments so that bucket can
// be uniquely named
if (ident == -1) {
sprintf(testdirpath, "%s", "mdtest-S3");
}
else {
sprintf(testdirpath, "%s%d", "mdtest-S3",ident);
}
#else
/* setup directory path to work in */
if (path_count == 0) { /* special case where no directory path provided with '-d' option */
getcwd(testdirpath, MAX_LEN);
path_count = 1;
} else {
strcpy(testdirpath, filenames[rank%path_count]);
}
#endif
#ifdef _HAS_PLFS
using_plfs_path = is_plfs_path( testdirpath );
#endif
/* if directory does not exist, create it */
if(rank < path_count) {
#ifdef _HAS_HDFS
if ( hdfsExists(hd_fs, testdirpath) == -1 ){
if ( hdfsCreateDirectory(hd_fs, testdirpath) == -1 ) {
FAIL("Unable to create test directory path");
}
}
#elif _HAS_S3
/* Do Nothing */
#else
if( mdtest_access(testdirpath, F_OK) != MDTEST_SUCCESS) {
if(mdtest_mkdir(testdirpath, DIRMODE) != MDTEST_SUCCESS) {
FAIL("Unable to make test directory path");
}
}
#endif
}
/* display disk usage */
if (verbose >= 3 && rank == 0) {
printf( "V-3: main (before display_freespace): testdirpath is \"%s\"\n", testdirpath );
fflush( stdout );
}
if (rank == 0) display_freespace(testdirpath);
if (verbose >= 3 && rank == 0) {
printf( "V-3: main (after display_freespace): testdirpath is \"%s\"\n", testdirpath );
fflush( stdout );
}
if (rank == 0) {
if (random_seed > 0) {
printf("random seed: %d\n", random_seed);
}
}
if (gethostname(hostname, MAX_LEN) == -1) {
perror("gethostname");
MPI_Abort(MPI_COMM_WORLD, 2);
}
if (last == 0) {
first = size;
last = size;
}
/* setup summary table for recording results */
summary_table = (table_t *)malloc(iterations * sizeof(table_t));
if (summary_table == NULL) {
FAIL("out of memory");
}
if (unique_dir_per_task) {
sprintf(base_tree_name, "mdtest_tree%s%d", file_dot, rank);
} else {
sprintf(base_tree_name, "mdtest_tree");
}
/* start and end times of directory tree create/remove */
double startCreate, endCreate;
/* default use shared directory */
#ifdef _HAS_S3
strcpy(mk_name, "mdtest-shared-");
strcpy(stat_name, "mdtest-shared-");
strcpy(read_name, "mdtest-shared-");
strcpy(rm_name, "mdtest-shared-");
#else
strcpy(mk_name, "mdtest.shared.");
strcpy(stat_name, "mdtest.shared.");
strcpy(read_name, "mdtest.shared.");
strcpy(rm_name, "mdtest.shared.");
#endif
MPI_Comm_group(MPI_COMM_WORLD, &worldgroup);
/* Run the tests */
for (i = first; i <= last && i <= size; i += stride) {
range.last = i - 1;
MPI_Group_range_incl(worldgroup, 1, (void *)&range, &testgroup);
MPI_Comm_create(MPI_COMM_WORLD, testgroup, &testcomm);
if (rank == 0) {
if (files_only && dirs_only) {
printf("\n%d tasks, %llu files/directories\n", i, i * items);
} else if (files_only) {
if (!shared_file) {
printf("\n%d tasks, %llu files\n", i, i * items);
}
else {
printf("\n%d tasks, 1 file\n", i);
}
} else if (dirs_only) {
printf("\n%d tasks, %llu directories\n", i, i * items);
}
}
if (rank == 0 && verbose >= 1) {
printf("\n");
printf(" Operation Duration Rate\n");
printf(" --------- -------- ----\n");
}
for (j = 0; j < iterations; j++) {
if (rank == 0 && verbose >= 1) {
printf("V-1: main: * iteration %d %s *\n", j+1, timestamp());
fflush(stdout);
}
strcpy(testdir, testdirpath);
if ( testdir[strlen( testdir ) - 1] != '/' ) {
strcat(testdir, dir_slash); // S3 does not allow "/" in bucket names
}
strcat(testdir, TEST_DIR);
sprintf(testdir, "%s%s%d", testdir, file_dot, j); // S3 does not allow "." in bucket names
if (verbose >= 2 && rank == 0) {
printf( "V-2: main (for j loop): making testdir, \"%s\"\n", testdir );
fflush( stdout );
}
if(rank < path_count) {
#ifdef _HAS_HDFS
if ( hdfsExists(hd_fs, testdir) == -1 ){
if ( hdfsCreateDirectory(hd_fs, testdir) == -1 ) {
FAIL("Unable to create test directory path");
}
}
#elif _HAS_S3 // Do nothing if S3 because bucket will be created at lower level
#else
if(mdtest_access(testdir, F_OK) != MDTEST_SUCCESS) {
if(mdtest_mkdir(testdir,DIRMODE) != MDTEST_SUCCESS) {
FAIL("Unable to make test directory");
}
}
#endif
}
MPI_Barrier(MPI_COMM_WORLD);
/* create hierarchical directory structure */
MPI_Barrier(MPI_COMM_WORLD);
if (create_only) {
startCreate = MPI_Wtime();
if (unique_dir_per_task) {
if (collective_creates && (rank == 0)) {
/*
* This is inside two loops, one of which already uses "i" and the other uses "j".
* I don't know how this ever worked. I'm changing this loop to use "k".
*/
for (k=0; k<size; k++) {
sprintf(base_tree_name, "mdtest_tree%s%d", file_dot,k);
if (verbose >= 3 && rank == 0) {
printf(
"V-3: main (create hierarchical directory loop-collective): Calling create_remove_directory_tree with \"%s\"\n",
testdir );
fflush( stdout );
}
/*
* Let's pass in the path to the directory we most recently made so that we can use
* full paths in the other calls.
*/
create_remove_directory_tree(1, 0, testdir, 0);
}
} else if (!collective_creates) {
if (verbose >= 3 && rank == 0) {
printf(
"V-3: main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with \"%s\"\n",
testdir );
fflush( stdout );
}
/*
* Let's pass in the path to the directory we most recently made so that we can use
* full paths in the other calls.
*/
create_remove_directory_tree(1, 0, testdir, 0);
}
} else {
if (rank == 0) {
if (verbose >= 3 && rank == 0) {
printf(
"V-3: main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with \"%s\"\n",
testdir );
fflush( stdout );
}
/*
* Let's pass in the path to the directory we most recently made so that we can use
* full paths in the other calls.
*/
create_remove_directory_tree(1, 0 , testdir, 0);
}
}
MPI_Barrier(MPI_COMM_WORLD);
endCreate = MPI_Wtime();
summary_table[j].entry[8] =
num_dirs_in_tree / (endCreate - startCreate);
if (verbose >= 1 && rank == 0) {
printf("V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec\n",
(endCreate - startCreate), summary_table[j].entry[8]);
fflush(stdout);
}
} else {
summary_table[j].entry[8] = 0;
}
sprintf(unique_mk_dir, "%s%s%s%s0", testdir, dir_slash, base_tree_name, file_dot);
sprintf(unique_chdir_dir, "%s%s%s%s0", testdir, dir_slash, base_tree_name, file_dot);
sprintf(unique_stat_dir, "%s%s%s%s0", testdir, dir_slash, base_tree_name, file_dot);
sprintf(unique_read_dir, "%s%s%s%s0", testdir, dir_slash, base_tree_name, file_dot);
sprintf(unique_rm_dir, "%s%s%s%s0", testdir, dir_slash, base_tree_name, file_dot);
sprintf(unique_rm_uni_dir, "%s", testdir);
if (!unique_dir_per_task) {
if (verbose >= 3 && rank == 0) {
printf( "V-3: main: Using unique_mk_dir, \"%s\"\n", unique_mk_dir );
fflush( stdout );
}
}
if (rank < i) {
if (!shared_file) {
sprintf(mk_name, "mdtest%s%d%s", file_dot, (rank+(0*nstride))%i, file_dot);
sprintf(stat_name, "mdtest%s%d%s", file_dot, (rank+(1*nstride))%i, file_dot);
sprintf(read_name, "mdtest%s%d%s", file_dot, (rank+(2*nstride))%i, file_dot);
sprintf(rm_name, "mdtest%s%d%s", file_dot, (rank+(3*nstride))%i, file_dot);
}
if (unique_dir_per_task) {
sprintf(unique_mk_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash,
file_dot, (rank+(0*nstride))%i, file_dot);
sprintf(unique_chdir_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash,
file_dot, (rank+(1*nstride))%i, file_dot);
sprintf(unique_stat_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash,
file_dot, (rank+(2*nstride))%i, file_dot);
sprintf(unique_read_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash,
file_dot, (rank+(3*nstride))%i, file_dot);
sprintf(unique_rm_dir, "%s%smdtest_tree%s%d%s0", testdir, dir_slash,
file_dot, (rank+(4*nstride))%i, file_dot);
sprintf(unique_rm_uni_dir, "%s", testdir);
}
strcpy(top_dir, unique_mk_dir);
if (verbose >= 3 && rank == 0) {
printf( "V-3: main: Copied unique_mk_dir, \"%s\", to topdir\n", unique_mk_dir );
fflush( stdout );
}
if (dirs_only && !shared_file) {
if (pre_delay) {
delay_secs(pre_delay);
}
directory_test(j, i, unique_mk_dir);
}
if (files_only) {
if (pre_delay) {
delay_secs(pre_delay);
}
file_test(j, i, unique_mk_dir);
}
}
/* remove directory structure */
if (!unique_dir_per_task) {
if (verbose >= 3 && rank == 0) {
printf( "V-3: main: Using testdir, \"%s\"\n", testdir );
fflush( stdout );
}
}
MPI_Barrier(MPI_COMM_WORLD);
if (remove_only) {
startCreate = MPI_Wtime();
if (unique_dir_per_task) {
if (collective_creates && (rank == 0)) {
/*
* This is inside two loops, one of which already uses "i" and the other uses "j".
* I don't know how this ever worked. I'm changing this loop to use "k".
*/
for (k=0; k<size; k++) {
sprintf(base_tree_name, "mdtest_tree%s%d", file_dot,k);
if (verbose >= 3 && rank == 0) {
printf(
"V-3: main (remove hierarchical directory loop-collective): Calling create_remove_directory_tree with \"%s\"\n",
testdir );
fflush( stdout );
}
/*
* Let's pass in the path to the directory we most recently made so that we can use
* full paths in the other calls.
*/
create_remove_directory_tree(0, 0, testdir, 0);
}
} else if (!collective_creates) {
if (verbose >= 3 && rank == 0) {
printf(
"V-3: main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with \"%s\"\n",
testdir );
fflush( stdout );
}
/*
* Let's pass in the path to the directory we most recently made so that we can use
* full paths in the other calls.
*/
create_remove_directory_tree(0, 0, testdir, 0);
}
} else {
if (rank == 0) {
if (verbose >= 3 && rank == 0) {
printf(
"V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with \"%s\"\n",
testdir );
fflush( stdout );
}
/*
* Let's pass in the path to the directory we most recently made so that we can use
* full paths in the other calls.
*/
create_remove_directory_tree(0, 0 , testdir, 0);
}
}
MPI_Barrier(MPI_COMM_WORLD);
endCreate = MPI_Wtime();
summary_table[j].entry[9] = num_dirs_in_tree
/ (endCreate - startCreate);
if (verbose >= 1 && rank == 0) {
printf("V-1: main Tree removal : %14.3f sec, %14.3f ops/sec\n",
(endCreate - startCreate), summary_table[j].entry[9]);
fflush(stdout);
}
if (( rank == 0 ) && ( verbose >=2 )) {
fprintf( stdout, "V-2: main (at end of for j loop): Removing testdir of \"%s\"\n", testdir );
fflush( stdout );
}
#ifdef _HAS_PLFS
if ( using_plfs_path ) {
if ( rank < path_count ) {
plfs_ret = plfs_access( testdir, F_OK );
if ( plfs_ret == PLFS_SUCCESS ) {
plfs_ret = plfs_rmdir( testdir );
if ( plfs_ret != PLFS_SUCCESS ) {
FAIL("Unable to plfs_rmdir directory");
}
}
}
} else {
if ((rank < path_count) && access(testdir, F_OK) == 0) {
//if (( rank == 0 ) && access(testdir, F_OK) == 0) {
if (rmdir(testdir) == -1) {
FAIL("unable to remove directory");
}
}
}
# elif defined _HAS_HDFS
if (rank < path_count) {
if ( hdfsExists(hd_fs, testdir) == 0 ){
if ( hdfsDelete(hd_fs, testdir, 1) == -1 ) {
FAIL("Unable to remove directory path");
}
}
}
#elif _HAS_S3
/* Do Nothing */
#else
if ((rank < path_count) && access(testdir, F_OK) == 0) {
//if (( rank == 0 ) && access(testdir, F_OK) == 0) {
if (rmdir(testdir) == -1) {
FAIL("unable to remove directory");
}
}
#endif
} else {
summary_table[j].entry[9] = 0;
}
#if _HAS_S3
bucket_created = 0;
#endif
}
summarize_results(iterations);
if (i == 1 && stride > 1) {
i = 0;
}
}
if (rank == 0) {
printf("\n-- finished at %s --\n", timestamp());
fflush(stdout);
}
#ifdef _HAS_HDFS
if (hdfsDisconnect(hd_fs) == -1 ) {
FAIL("Unable to disconnect from hdfs");
}
#endif
if (random_seed > 0) {
free(rand_array);
}
#ifdef _HAS_S3
aws_iobuf_free(bf);
#endif
/* Free up the last of the memory used */
if(mdts) {
if(mdts->indexes) {
free(mdts->indexes);
}
free(mdts);
}
MPI_Finalize();
exit(0);
}