From c55848f14d2f14ba0cfcd3990793032692e62227 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 24 Jun 2020 10:15:31 +0100 Subject: [PATCH 001/154] Bugfix based on the pull request for fix-189. Should fix #189. --- configure.ac | 2 +- src/aiori-HDFS.c | 2 +- src/aiori-POSIX.c | 16 ++++++++-------- src/aiori-S3.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/configure.ac b/configure.ac index d9f302d..e1b1932 100755 --- a/configure.ac +++ b/configure.ac @@ -84,7 +84,7 @@ AC_ARG_WITH([lustre], [support configurable Lustre striping values @<:@default=check@:>@])], [], [with_lustre=check]) AS_IF([test "x$with_lustre" = xyes ], [ - AC_CHECK_HEADERS([linux/lustre/lustre_user.h lustre/lustre_user.h], break, [ + AC_CHECK_HEADERS([linux/lustre/lustre_user.h lustre/lustre_user.h], [AC_DEFINE([HAVE_LUSTRE_USER], [], [Lustre user API available in some shape or form])], [ if test "x$with_lustre" != xcheck -a \ "x$ac_cv_header_linux_lustre_lustre_user_h" = "xno" -a \ "x$ac_cv_header_lustre_lustre_user_h" = "xno" ; then diff --git a/src/aiori-HDFS.c b/src/aiori-HDFS.c index 2d4dcb1..118de15 100755 --- a/src/aiori-HDFS.c +++ b/src/aiori-HDFS.c @@ -77,7 +77,7 @@ #include #include /* -#ifdef HAVE_LUSTRE_LUSTRE_USER_H +#ifdef HAVE_LUSTRE_USER #include #endif */ diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 283a6ec..615cd9f 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -34,7 +34,7 @@ #ifdef HAVE_LINUX_LUSTRE_LUSTRE_USER_H # include -#elif defined(HAVE_LUSTRE_LUSTRE_USER_H) +#elif defined(HAVE_LUSTRE_USER) # include #endif #ifdef HAVE_GPFS_H @@ -123,7 +123,7 @@ option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_o {0, "posix.gpfs.releasetoken", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->gpfs_release_token}, #endif -#ifdef HAVE_LUSTRE_LUSTRE_USER_H +#ifdef HAVE_LUSTRE_USER {0, "posix.lustre.stripecount", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_count}, {0, "posix.lustre.stripesize", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_size}, {0, "posix.lustre.startost", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_start_ost}, @@ -387,7 +387,7 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) if(hints->dryRun) return (aiori_fd_t*) 0; -#ifdef HAVE_LUSTRE_LUSTRE_USER_H +#ifdef HAVE_LUSTRE_USER /* Add a #define for FASYNC if not available, as it forms part of * the Lustre O_LOV_DELAY_CREATE definition. */ #ifndef FASYNC @@ -439,7 +439,7 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) "barrier error"); } } else { -#endif /* HAVE_LUSTRE_LUSTRE_USER_H */ +#endif /* HAVE_LUSTRE_USER */ fd_oflag |= O_CREAT | O_RDWR; @@ -463,7 +463,7 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) ERRF("open64(\"%s\", %d, %#o) failed", testFileName, fd_oflag, mode); -#ifdef HAVE_LUSTRE_LUSTRE_USER_H +#ifdef HAVE_LUSTRE_USER } if (o->lustre_ignore_locks) { @@ -471,7 +471,7 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); } -#endif /* HAVE_LUSTRE_LUSTRE_USER_H */ +#endif /* HAVE_LUSTRE_USER */ #ifdef HAVE_GPFS_FCNTL_H /* in the single shared file case, immediately release all locks, with @@ -523,7 +523,7 @@ aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param) if (*fd < 0) ERRF("open64(\"%s\", %d) failed", testFileName, fd_oflag); -#ifdef HAVE_LUSTRE_LUSTRE_USER_H +#ifdef HAVE_LUSTRE_USER if (o->lustre_ignore_locks) { int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK; if (verbose >= VERBOSE_1) { @@ -533,7 +533,7 @@ aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param) if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); } -#endif /* HAVE_LUSTRE_LUSTRE_USER_H */ +#endif /* HAVE_LUSTRE_USER */ #ifdef HAVE_GPFS_FCNTL_H if(o->gpfs_release_token) { diff --git a/src/aiori-S3.c b/src/aiori-S3.c index a060646..3999739 100755 --- a/src/aiori-S3.c +++ b/src/aiori-S3.c @@ -92,7 +92,7 @@ #include #include /* -#ifdef HAVE_LUSTRE_LUSTRE_USER_H +#ifdef HAVE_LUSTRE_USER #include #endif */ From 0bffd14de78a43c45e88e5e79e7b4d1ddde2ce5d Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 24 Jun 2020 11:10:42 +0100 Subject: [PATCH 002/154] Added --warningAsErrors option to IOR and MDTest and refactored WARNINGs in IOR. #174 --- src/aiori-POSIX.c | 3 +-- src/ior-output.c | 2 +- src/ior.c | 49 +++++++++++++++++---------------------------- src/ior.h | 2 +- src/iordef.h | 42 +++++++++++++++++++------------------- src/mdtest.c | 3 ++- src/parse_options.c | 6 +++--- 7 files changed, 47 insertions(+), 60 deletions(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 615cd9f..648b7c1 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -669,8 +669,7 @@ void POSIX_Delete(char *testFileName, aiori_mod_opt_t * param) if(hints->dryRun) return; if (unlink(testFileName) != 0){ - EWARNF("[RANK %03d]: unlink() of file \"%s\" failed\n", - rank, testFileName); + EWARNF("[RANK %03d]: unlink() of file \"%s\" failed", rank, testFileName); } } diff --git a/src/ior-output.c b/src/ior-output.c index b890cd9..25366eb 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -365,7 +365,7 @@ void ShowTestStart(IOR_param_t *test) PrintKeyValInt("storeFileOffset", test->storeFileOffset); PrintKeyValInt("keepFile", test->keepFile); PrintKeyValInt("keepFileWithError", test->keepFileWithError); - PrintKeyValInt("quitOnError", test->quitOnError); + PrintKeyValInt("warningAsErrors", test->warningAsErrors); PrintKeyValInt("verbose", verbose); PrintKeyVal("data packet type", data_packets[test->dataPacketType]); PrintKeyValInt("setTimeStampSignature/incompressibleSeed", test->setTimeStampSignature); /* Seed value was copied into setTimeStampSignature as well */ diff --git a/src/ior.c b/src/ior.c index 08f95ef..5d1632c 100755 --- a/src/ior.c +++ b/src/ior.c @@ -78,6 +78,8 @@ static void ior_set_xfer_hints(IOR_param_t * p){ } } +int aiori_warning_as_errors = 0; + static void test_initialize(IOR_test_t * test){ verbose = test->params.verbose; backend = test->params.backend; @@ -85,6 +87,7 @@ static void test_initialize(IOR_test_t * test){ backend->initialize(test->params.backend_options); } ior_set_xfer_hints(& test->params); + aiori_warning_as_errors = test->params.warningAsErrors; if (rank == 0 && verbose >= VERBOSE_0) { ShowTestStart(& test->params); @@ -111,7 +114,6 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out /* setup tests, and validate parameters */ tests_head = ParseCommandLine(argc, argv); InitTests(tests_head, world_com); - verbose = tests_head->params.verbose; PrintHeader(argc, argv); @@ -159,7 +161,6 @@ int ior_main(int argc, char **argv) /* setup tests, and validate parameters */ InitTests(tests_head, mpi_comm_world); - verbose = tests_head->params.verbose; PrintHeader(argc, argv); @@ -281,10 +282,8 @@ DisplayOutliers(int numTasks, if (ret != 0) strcpy(hostname, "unknown"); - fprintf(out_logfile, "WARNING: for %s, task %d, %s %s is %f\n", - hostname, rank, accessString, timeString, timerVal); - fprintf(out_logfile, " (mean=%f, stddev=%f)\n", mean, sd); - fflush(out_logfile); + EWARNF("for %s, task %d, %s %s is %f (mean=%f, stddev=%f)\n", + hostname, rank, accessString, timeString, timerVal, mean, sd); } } @@ -333,18 +332,11 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep, != point->aggFileSizeFromXfer) || (point->aggFileSizeFromStat != point->aggFileSizeFromXfer)) { - fprintf(out_logfile, - "WARNING: Expected aggregate file size = %lld.\n", - (long long) params->expectedAggFileSize); - fprintf(out_logfile, - "WARNING: Stat() of aggregate file size = %lld.\n", - (long long) point->aggFileSizeFromStat); - fprintf(out_logfile, - "WARNING: Using actual aggregate bytes moved = %lld.\n", - (long long) point->aggFileSizeFromXfer); + EWARNF("Expected aggregate file size = %lld", (long long) params->expectedAggFileSize); + EWARNF("Stat() of aggregate file size = %lld", (long long) point->aggFileSizeFromStat); + EWARNF("Using actual aggregate bytes moved = %lld", (long long) point->aggFileSizeFromXfer); if(params->deadlineForStonewalling){ - fprintf(out_logfile, - "WARNING: maybe caused by deadlineForStonewalling\n"); + EWARN("Maybe caused by deadlineForStonewalling"); } } } @@ -425,8 +417,7 @@ CompareBuffers(void *expectedBuffer, if (inError) { inError = 0; GetTestFileName(testFileName, test); - fprintf(out_logfile, - "[%d] FAILED comparison of buffer containing %d-byte ints:\n", + EWARNF("[%d] FAILED comparison of buffer containing %d-byte ints:\n", rank, (int)sizeof(unsigned long long int)); fprintf(out_logfile, "[%d] File name = %s\n", rank, testFileName); fprintf(out_logfile, "[%d] In transfer %lld, ", rank, @@ -449,8 +440,6 @@ CompareBuffers(void *expectedBuffer, if (j == length) fprintf(out_logfile, "[end of buffer]"); fprintf(out_logfile, "\n"); - if (test->quitOnError == TRUE) - ERR("data check error, aborting execution"); } return (errorCount); } @@ -476,7 +465,7 @@ static int CountErrors(IOR_param_t * test, int access, int errors) WARN("overflow in errors counted"); allErrors = -1; } - fprintf(out_logfile, "WARNING: incorrect data on %s (%d errors found).\n", + EWARNF("Incorrect data on %s (%d errors found).\n", access == WRITECHECK ? "write" : "read", allErrors); fprintf(out_logfile, "Used Time Stamp %u (0x%x) for Data Signature\n", @@ -778,7 +767,7 @@ void GetTestFileName(char *testFileName, IOR_param_t * test) strcpy(initialTestFileName, test->testFileName); if(test->dualMount){ GetProcessorAndCore(&socket, &core); - sprintf(tmpString, "%s%d/%s",initialTestFileName, + sprintf(tmpString, "%s%d/%s",initialTestFileName, socket, "data"); strcpy(initialTestFileName, tmpString); } @@ -977,6 +966,9 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) int mpiNumTasks = 0; int mpiNumTasksOnNode0 = 0; + verbose = tests->params.verbose; + aiori_warning_as_errors = tests->params.warningAsErrors; + /* * These default values are the same for every test and expensive to * retrieve so just do it once. @@ -1005,11 +997,9 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) params->numTasks = mpiNumTasks; } else if (params->numTasks > mpiNumTasks) { if (rank == 0) { - fprintf(out_logfile, - "WARNING: More tasks requested (%d) than available (%d),", + EWARNF("More tasks requested (%d) than available (%d),", params->numTasks, mpiNumTasks); - fprintf(out_logfile, " running with %d tasks.\n", - mpiNumTasks); + EWARNF(" running with %d tasks.\n", mpiNumTasks); } params->numTasks = mpiNumTasks; } @@ -1451,7 +1441,7 @@ static void TestIoSys(IOR_test_t *test) if(params->stoneWallingStatusFile){ params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile); if(params->stoneWallingWearOutIterations == -1 && rank == 0){ - fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!\n"); + WARN("Could not read back the stonewalling status from the file!"); params->stoneWallingWearOutIterations = 0; } } @@ -1637,9 +1627,6 @@ static void ValidateTests(IOR_param_t * test) && (test->blockSize < sizeof(IOR_size_t) || test->transferSize < sizeof(IOR_size_t))) ERR("block/transfer size may not be smaller than IOR_size_t for NCMPI"); - if ((strcasecmp(test->api, "POSIX") != 0) && test->singleXferAttempt) - WARN_RESET("retry only available in POSIX", - test, &defaults, singleXferAttempt); if (((strcasecmp(test->api, "POSIX") != 0) && (strcasecmp(test->api, "MPIIO") != 0) && (strcasecmp(test->api, "MMAP") != 0) diff --git a/src/ior.h b/src/ior.h index c3d9ad4..a5c34b9 100755 --- a/src/ior.h +++ b/src/ior.h @@ -117,7 +117,6 @@ typedef struct int keepFile; /* don't delete the testfile on exit */ int keepFileWithError; /* don't delete the testfile with errors */ int errorFound; /* error found in data check */ - int quitOnError; /* quit code when error in check */ IOR_offset_t segmentCount; /* number of segments (or HDF5 datasets) */ IOR_offset_t blockSize; /* contiguous bytes to write per task */ IOR_offset_t transferSize; /* size of transfer in bytes */ @@ -175,6 +174,7 @@ typedef struct int id; /* test's unique ID */ int intraTestBarriers; /* barriers between open/op and op/close */ + int warningAsErrors; /* treat any warning as an error */ aiori_xfer_hint_t hints; } IOR_param_t; diff --git a/src/iordef.h b/src/iordef.h index 4c46b29..0805208 100755 --- a/src/iordef.h +++ b/src/iordef.h @@ -115,15 +115,12 @@ enum OutputFormat_t{ #define DELIMITERS " \t\r\n=" /* ReadScript() */ #define FILENAME_DELIMITER '@' /* ParseFileName() */ -/* MACROs for debugging */ -#define HERE fprintf(stdout, "** LINE %d (TASK=%d) **\n", \ - __LINE__, rank); - typedef long long int IOR_offset_t; typedef long long int IOR_size_t; #define IOR_format "%016llx" +extern FILE * out_logfile; /******************************** M A C R O S *********************************/ @@ -134,34 +131,37 @@ typedef long long int IOR_size_t; #define WARN_RESET(MSG, TO_STRUCT_PTR, FROM_STRUCT_PTR, MEMBER) do { \ (TO_STRUCT_PTR)->MEMBER = (FROM_STRUCT_PTR)->MEMBER; \ if (rank == 0) { \ - fprintf(stdout, "ior WARNING: %s. Using value of %d.\n", \ + fprintf(out_logfile, "ior WARNING: %s. Using value of %d.\n", \ MSG, (TO_STRUCT_PTR)->MEMBER); \ } \ - fflush(stdout); \ + fflush(out_logfile); \ } while (0) +extern int aiori_warning_as_errors; #define WARN(MSG) do { \ + if(aiori_warning_as_errors){ ERR(MSG); } \ if (verbose > VERBOSE_2) { \ - fprintf(stdout, "ior WARNING: %s, (%s:%d).\n", \ + fprintf(out_logfile, "ior WARNING: %s, (%s:%d).\n", \ MSG, __FILE__, __LINE__); \ } else { \ - fprintf(stdout, "ior WARNING: %s.\n", MSG); \ + fprintf(out_logfile, "ior WARNING: %s.\n", MSG); \ } \ - fflush(stdout); \ + fflush(out_logfile); \ } while (0) /* warning with format string and errno printed */ #define EWARNF(FORMAT, ...) do { \ + if(aiori_warning_as_errors){ ERRF(FORMAT, __VA_ARGS__); } \ if (verbose > VERBOSE_2) { \ - fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s (%s:%d).\n", \ - __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ + fprintf(out_logfile, "ior WARNING: " FORMAT ", (%s:%d).\n", \ + __VA_ARGS__, __FILE__, __LINE__); \ } else { \ - fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s \n", \ - __VA_ARGS__, errno, strerror(errno)); \ + fprintf(out_logfile, "ior WARNING: " FORMAT "\n", \ + __VA_ARGS__); \ } \ - fflush(stdout); \ + fflush(out_logfile); \ } while (0) @@ -173,9 +173,9 @@ typedef long long int IOR_size_t; /* display error message with format string and terminate execution */ #define ERRF(FORMAT, ...) do { \ - fprintf(stdout, "ior ERROR: " FORMAT ", errno %d, %s (%s:%d)\n", \ - __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ - fflush(stdout); \ + fprintf(out_logfile, "ior ERROR: " FORMAT ", (%s:%d)\n", \ + __VA_ARGS__, __FILE__, __LINE__); \ + fflush(out_logfile); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } while (0) @@ -188,9 +188,9 @@ typedef long long int IOR_size_t; /* display a simple error message (i.e. errno is not set) and terminate execution */ #define ERR(MSG) do { \ - fprintf(stdout, "ior ERROR: %s, (%s:%d)\n", \ + fprintf(out_logfile, "ior ERROR: %s, (%s:%d)\n", \ MSG, __FILE__, __LINE__); \ - fflush(stdout); \ + fflush(out_logfile); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } while (0) @@ -207,9 +207,9 @@ typedef long long int IOR_size_t; \ if (MPI_STATUS != MPI_SUCCESS) { \ MPI_Error_string(MPI_STATUS, resultString, &resultLength); \ - fprintf(stdout, "ior ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ + fprintf(out_logfile, "ior ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ __VA_ARGS__, resultString, __FILE__, __LINE__); \ - fflush(stdout); \ + fflush(out_logfile); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } \ } while(0) diff --git a/src/mdtest.c b/src/mdtest.c index 5488834..3eef40c 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1079,7 +1079,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } if (rank == 0) { if(expected_items == -1){ - fprintf(out_logfile, "WARNING: could not read stonewall status file\n"); + WARN("Could not read stonewall status file"); }else { VERBOSE(1,1, "Read stonewall status; items: "LLU"\n", items); } @@ -1949,6 +1949,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & call_sync}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & depth}, {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & print_time}, + {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, LAST_OPTION }; options_all_t * global_options = airoi_create_all_module_options(options); diff --git a/src/parse_options.c b/src/parse_options.c index ce5421c..31fac13 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -175,8 +175,8 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt params->keepFileWithError = atoi(value); } else if (strcasecmp(option, "multiFile") == 0) { params->multiFile = atoi(value); - } else if (strcasecmp(option, "quitonerror") == 0) { - params->quitOnError = atoi(value); + } else if (strcasecmp(option, "warningAsErrors") == 0) { + params->warningAsErrors = atoi(value); } else if (strcasecmp(option, "segmentcount") == 0) { params->segmentCount = string_to_bytes(value); } else if (strcasecmp(option, "blocksize") == 0) { @@ -418,7 +418,6 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'N', NULL, "numTasks -- number of tasks that are participating in the test (overrides MPI)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks}, {'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & params->testFileName}, {'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper}, - {'q', NULL, "quitOnError -- during file error-checking, abort on error", OPTION_FLAG, 'd', & params->quitOnError}, {'Q', NULL, "taskPerNodeOffset for read tests use with -C & -Z options (-C constant N, -Z at least N)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->taskPerNodeOffset}, {'r', NULL, "readFile -- read existing file", OPTION_FLAG, 'd', & params->readFile}, {'R', NULL, "checkRead -- verify that the output of read matches the expected signature (used with -G)", OPTION_FLAG, 'd', & params->checkRead}, @@ -435,6 +434,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite}, {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset}, {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, + {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & params->warningAsErrors}, {.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputing the summary", .arg = OPTION_OPTIONAL_ARGUMENT}, {0, "dryRun", "do not perform any I/Os just run evtl. inputs print dummy output", OPTION_FLAG, 'd', & params->dryRun}, From 38b1752bdae736b2b0861dd4387b0b8069697c55 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 25 Jun 2020 14:37:52 +0100 Subject: [PATCH 003/154] JSON issue with results. Fix for #233. --- src/ior.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ior.c b/src/ior.c index 5d1632c..e70bf1d 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1303,7 +1303,6 @@ static void TestIoSys(IOR_test_t *test) /* loop over test iterations */ uint64_t params_saved_wearout = params->stoneWallingWearOutIterations; for (rep = 0; rep < params->repetitions; rep++) { - PrintRepeatStart(); /* Get iteration start time in seconds in task 0 and broadcast to all tasks */ if (rank == 0) { From 3cb144e7f317ad62cea15bf10b864c2a3dee7bf5 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 25 Jun 2020 14:44:55 +0100 Subject: [PATCH 004/154] Added regression test for JSON #202 --- testing/basic-tests.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/testing/basic-tests.sh b/testing/basic-tests.sh index 91dba4b..1a0841e 100755 --- a/testing/basic-tests.sh +++ b/testing/basic-tests.sh @@ -26,7 +26,10 @@ IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 100k IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 100k IOR 2 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 100k - IOR 2 -f "$ROOT/test_comments.ior" +# Test for JSON output +IOR 2 -a DUMMY -e -F -t 1m -b 1m -A 328883 -O summaryFormat=JSON -O summaryFile=OUT.json +python -mjson.tool OUT.json >/dev/null && echo "JSON OK" + END From 7edd559c016159a5832a9bfcb83f39a1ef014075 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sun, 28 Jun 2020 16:49:57 +0100 Subject: [PATCH 005/154] Added missing news. --- NEWS | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 9367112..4349765 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Version 3.3.0+dev +Version 3.4.0+dev -------------------------------------------------------------------------------- New major features: @@ -7,6 +7,48 @@ New minor features: Bugfixes: + +Version 3.3.0+dev +-------------------------------------------------------------------------------- + +New major features: +- Add CephFS AIORI (Mark Nelson) +- Add Gfarm AIORI (Osamu Tatebe) +- Add DAOS AIORI (Mohamad Chaarawi) +- Add DAOS DFS AIORI (Mohamad Chaarawi) + +New minor features: +- Display outlier host names (Jean-Yves Vet) +- Enable global default dir layout for subdirs in Lustre (Petros Koutoupis) +- Removed pound signs (#) from mdtest output file names (Julian Kunkel) +- Print I/O hints from NCMPI (Wei-keng Liao) +- Add mknod support to mdtest (Gu Zheng) +- Refactor AIORI-specific options (Julian Kunkel) +- Enable IME native backend for mdtest (Jean-Yves Vet) +- Enable mkdir/rmdir to IME AIORI (Jean-Yves Vet) +- Add HDF5 collective metadata option (Rob Latham) +- Add support for sync to AIORIs (Julian Kunkel) + +General user improvements and bug fixes: +- Allocate aligned buffers to support DirectIO for BeeGFS (Sven Breuner) +- Added IOPS and latency results to json output (Robert LeBlanc) +- Fixed case where numTasks is not evenly divisible by tasksPerNode (J. Schwartz) +- Fix several memory leaks and buffer alignment problems (J. Schwartz, Axel Huebl, Sylvain Didelot) +- Add mdtest data verification (Julian Kunkel) +- Clean up functionality of stonewall (Julian Kunkel) +- Fix checks for lustre_user.h (Andreas Dilger) +- Make write verification work without read test (Jean-Yves Vet) +- Documentation updates (Vaclav Hapla, Glenn Lockwood) +- Add more debugging support (J. Schwartz) +- +General developer improvements: +- Fix type casting errors (Vaclav Hapla) +- Add basic test infrastructure (Julian Kunkel, Glenn Lockwood) +- Conform to strict C99 (Glenn Lockwood) + +Known issues: +- S3 AIORI may not compile with new versions of aws4c + Version 3.2.1 -------------------------------------------------------------------------------- @@ -63,7 +105,7 @@ Known issues: because `-u`/`-c`/`-p` cannot be specified (issue #98) - `writeCheck` cannot be enabled for write-only tests using some AIORIs such as MPI-IO (pull request #89) - + Version 3.0.2 -------------------------------------------------------------------------------- From 145c71f7c394bde881114f946fb450bc64f60780 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sun, 28 Jun 2020 17:16:35 +0100 Subject: [PATCH 006/154] Trivial cleanup: Extracted debug-related stuff into new header. --- src/aiori-debug.h | 117 ++++++++++++++++++++++++++++++++++++++++++++++ src/aiori.h | 1 + src/iordef.h | 115 --------------------------------------------- src/utilities.c | 5 +- src/utilities.h | 3 -- 5 files changed, 120 insertions(+), 121 deletions(-) create mode 100644 src/aiori-debug.h diff --git a/src/aiori-debug.h b/src/aiori-debug.h new file mode 100644 index 0000000..0fa20d6 --- /dev/null +++ b/src/aiori-debug.h @@ -0,0 +1,117 @@ +#ifndef _AIORI_UTIL_H +#define _AIORI_UTIL_H + +/* This file contains only debug relevant helpers */ + +#include + +extern FILE * out_logfile; +extern int verbose; /* verbose output */ + +#define FAIL(...) FailMessage(rank, ERROR_LOCATION, __VA_ARGS__) +void FailMessage(int rank, const char *location, char *format, ...); + +/******************************** M A C R O S *********************************/ + +/******************************************************************************/ +/* + * WARN_RESET will display a custom error message and set value to default + */ +#define WARN_RESET(MSG, TO_STRUCT_PTR, FROM_STRUCT_PTR, MEMBER) do { \ + (TO_STRUCT_PTR)->MEMBER = (FROM_STRUCT_PTR)->MEMBER; \ + if (rank == 0) { \ + fprintf(out_logfile, "WARNING: %s. Using value of %d.\n", \ + MSG, (TO_STRUCT_PTR)->MEMBER); \ + } \ + fflush(out_logfile); \ +} while (0) + +extern int aiori_warning_as_errors; + +#define WARN(MSG) do { \ + if(aiori_warning_as_errors){ ERR(MSG); } \ + if (verbose > VERBOSE_2) { \ + fprintf(out_logfile, "WARNING: %s, (%s:%d).\n", \ + MSG, __FILE__, __LINE__); \ + } else { \ + fprintf(out_logfile, "WARNING: %s.\n", MSG); \ + } \ + fflush(out_logfile); \ +} while (0) + + +/* warning with format string and errno printed */ +#define EWARNF(FORMAT, ...) do { \ + if(aiori_warning_as_errors){ ERRF(FORMAT, __VA_ARGS__); } \ + if (verbose > VERBOSE_2) { \ + fprintf(out_logfile, "WARNING: " FORMAT ", (%s:%d).\n", \ + __VA_ARGS__, __FILE__, __LINE__); \ + } else { \ + fprintf(out_logfile, "WARNING: " FORMAT "\n", \ + __VA_ARGS__); \ + } \ + fflush(out_logfile); \ +} while (0) + + +/* warning with errno printed */ +#define EWARN(MSG) do { \ + EWARNF("%s", MSG); \ +} while (0) + + +/* display error message with format string and terminate execution */ +#define ERRF(FORMAT, ...) do { \ + fprintf(out_logfile, "ERROR: " FORMAT ", (%s:%d)\n", \ + __VA_ARGS__, __FILE__, __LINE__); \ + fflush(out_logfile); \ + MPI_Abort(MPI_COMM_WORLD, -1); \ +} while (0) + + +/* display error message and terminate execution */ +#define ERR_ERRNO(MSG) do { \ + ERRF("%s", MSG); \ +} while (0) + + +/* display a simple error message (i.e. errno is not set) and terminate execution */ +#define ERR(MSG) do { \ + fprintf(out_logfile, "ERROR: %s, (%s:%d)\n", \ + MSG, __FILE__, __LINE__); \ + fflush(out_logfile); \ + MPI_Abort(MPI_COMM_WORLD, -1); \ +} while (0) + + +/******************************************************************************/ +/* + * MPI_CHECKF will display a custom format string as well as an error string + * from the MPI_STATUS and then exit the program + */ + +#define MPI_CHECKF(MPI_STATUS, FORMAT, ...) do { \ + char resultString[MPI_MAX_ERROR_STRING]; \ + int resultLength; \ + \ + if (MPI_STATUS != MPI_SUCCESS) { \ + MPI_Error_string(MPI_STATUS, resultString, &resultLength); \ + fprintf(out_logfile, "ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ + __VA_ARGS__, resultString, __FILE__, __LINE__); \ + fflush(out_logfile); \ + MPI_Abort(MPI_COMM_WORLD, -1); \ + } \ +} while(0) + + +/******************************************************************************/ +/* + * MPI_CHECK will display a custom error message as well as an error string + * from the MPI_STATUS and then exit the program + */ + +#define MPI_CHECK(MPI_STATUS, MSG) do { \ + MPI_CHECKF(MPI_STATUS, "%s", MSG); \ +} while(0) + +#endif diff --git a/src/aiori.h b/src/aiori.h index ad10e4d..e5f0e5e 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -25,6 +25,7 @@ #include #include "iordef.h" /* IOR Definitions */ +#include "aiori-debug.h" #include "option.h" /*************************** D E F I N I T I O N S ****************************/ diff --git a/src/iordef.h b/src/iordef.h index 0805208..eb10306 100755 --- a/src/iordef.h +++ b/src/iordef.h @@ -18,8 +18,6 @@ #include #include #include -#include -#include #ifdef _WIN32 # define _CRT_SECURE_NO_WARNINGS @@ -52,13 +50,6 @@ # include #endif -/************************** D E C L A R A T I O N S ***************************/ - -extern int numTasks; /* MPI variables */ -extern int rank; -extern int rankOffset; -extern int verbose; /* verbose output */ - /*************************** D E F I N I T I O N S ****************************/ enum OutputFormat_t{ @@ -120,112 +111,6 @@ typedef long long int IOR_size_t; #define IOR_format "%016llx" -extern FILE * out_logfile; - -/******************************** M A C R O S *********************************/ - -/******************************************************************************/ -/* - * WARN_RESET will display a custom error message and set value to default - */ -#define WARN_RESET(MSG, TO_STRUCT_PTR, FROM_STRUCT_PTR, MEMBER) do { \ - (TO_STRUCT_PTR)->MEMBER = (FROM_STRUCT_PTR)->MEMBER; \ - if (rank == 0) { \ - fprintf(out_logfile, "ior WARNING: %s. Using value of %d.\n", \ - MSG, (TO_STRUCT_PTR)->MEMBER); \ - } \ - fflush(out_logfile); \ -} while (0) - -extern int aiori_warning_as_errors; - -#define WARN(MSG) do { \ - if(aiori_warning_as_errors){ ERR(MSG); } \ - if (verbose > VERBOSE_2) { \ - fprintf(out_logfile, "ior WARNING: %s, (%s:%d).\n", \ - MSG, __FILE__, __LINE__); \ - } else { \ - fprintf(out_logfile, "ior WARNING: %s.\n", MSG); \ - } \ - fflush(out_logfile); \ -} while (0) - - -/* warning with format string and errno printed */ -#define EWARNF(FORMAT, ...) do { \ - if(aiori_warning_as_errors){ ERRF(FORMAT, __VA_ARGS__); } \ - if (verbose > VERBOSE_2) { \ - fprintf(out_logfile, "ior WARNING: " FORMAT ", (%s:%d).\n", \ - __VA_ARGS__, __FILE__, __LINE__); \ - } else { \ - fprintf(out_logfile, "ior WARNING: " FORMAT "\n", \ - __VA_ARGS__); \ - } \ - fflush(out_logfile); \ -} while (0) - - -/* warning with errno printed */ -#define EWARN(MSG) do { \ - EWARNF("%s", MSG); \ -} while (0) - - -/* display error message with format string and terminate execution */ -#define ERRF(FORMAT, ...) do { \ - fprintf(out_logfile, "ior ERROR: " FORMAT ", (%s:%d)\n", \ - __VA_ARGS__, __FILE__, __LINE__); \ - fflush(out_logfile); \ - MPI_Abort(MPI_COMM_WORLD, -1); \ -} while (0) - - -/* display error message and terminate execution */ -#define ERR_ERRNO(MSG) do { \ - ERRF("%s", MSG); \ -} while (0) - - -/* display a simple error message (i.e. errno is not set) and terminate execution */ -#define ERR(MSG) do { \ - fprintf(out_logfile, "ior ERROR: %s, (%s:%d)\n", \ - MSG, __FILE__, __LINE__); \ - fflush(out_logfile); \ - MPI_Abort(MPI_COMM_WORLD, -1); \ -} while (0) - - -/******************************************************************************/ -/* - * MPI_CHECKF will display a custom format string as well as an error string - * from the MPI_STATUS and then exit the program - */ - -#define MPI_CHECKF(MPI_STATUS, FORMAT, ...) do { \ - char resultString[MPI_MAX_ERROR_STRING]; \ - int resultLength; \ - \ - if (MPI_STATUS != MPI_SUCCESS) { \ - MPI_Error_string(MPI_STATUS, resultString, &resultLength); \ - fprintf(out_logfile, "ior ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ - __VA_ARGS__, resultString, __FILE__, __LINE__); \ - fflush(out_logfile); \ - MPI_Abort(MPI_COMM_WORLD, -1); \ - } \ -} while(0) - - -/******************************************************************************/ -/* - * MPI_CHECK will display a custom error message as well as an error string - * from the MPI_STATUS and then exit the program - */ - -#define MPI_CHECK(MPI_STATUS, MSG) do { \ - MPI_CHECKF(MPI_STATUS, "%s", MSG); \ -} while(0) - - /******************************************************************************/ /* * System info for Windows. diff --git a/src/utilities.c b/src/utilities.c index 715e30d..19ef0d6 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -81,8 +81,8 @@ void FailMessage(int rank, const char *location, char *format, ...) { va_start(args, format); vsnprintf(msg, 4096, format, args); va_end(args); - fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", - PrintTimestamp(), rank, location, msg, strerror(errno)); + fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s\n", + PrintTimestamp(), rank, location, msg); fflush(out_logfile); MPI_Abort(testComm, 1); } @@ -889,4 +889,3 @@ unsigned long GetProcessorAndCore(int *chip, int *core){ return ((unsigned long)a) | (((unsigned long)d) << 32);; } #endif - diff --git a/src/utilities.h b/src/utilities.h index 32292a4..020f27b 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -23,7 +23,6 @@ extern int rankOffset; extern int verbose; extern MPI_Comm testComm; extern MPI_Comm mpi_comm_world; -extern FILE * out_logfile; extern FILE * out_resultfile; extern enum OutputFormat_t outputFormat; /* format of the output */ @@ -39,8 +38,6 @@ extern enum OutputFormat_t outputFormat; /* format of the output */ #define ERROR_LOCATION __LINE__ #endif -#define FAIL(...) FailMessage(rank, ERROR_LOCATION, __VA_ARGS__) -void FailMessage(int rank, const char *location, char *format, ...); void* safeMalloc(uint64_t size); void set_o_direct_flag(int *fd); From df3f7082d8ef7137341f810aebff0f5e36823d90 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sun, 28 Jun 2020 17:21:24 +0100 Subject: [PATCH 007/154] MDTest: Added warnings/errors for errorneous read pattern #206 --- src/mdtest.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 3eef40c..0e407a6 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1377,7 +1377,7 @@ void summarize_results(int iterations, int print_time) { } /* Checks to see if the test setup is valid. If it isn't, fail. */ -void valid_tests() { +void md_validate_tests() { if (((stone_wall_timer_seconds > 0) && (branch_factor > 1)) || ! barriers) { FAIL( "Error, stone wall timer does only work with a branch factor <= 1 (current is %d) and with barriers\n", branch_factor); @@ -1388,7 +1388,7 @@ void valid_tests() { VERBOSE(1,-1,"main: Setting create/stat/read/remove_only to True" ); } - VERBOSE(1,-1,"Entering valid_tests..." ); + VERBOSE(1,-1,"Entering md_validate_tests..." ); /* if dirs_only and files_only were both left unset, set both now */ if (!dirs_only && !files_only) { @@ -1462,6 +1462,15 @@ void valid_tests() { if (write_bytes > 0 && make_node) { FAIL("-k not compatible with -w"); } + + if(verify_read && ! read_only) + FAIL("Verify read requires that the read test is used"); + + if(verify_read && read_bytes <= 0) + FAIL("Verify read requires that read bytes is > 0"); + + if(read_only && read_bytes <= 0) + WARN("Read bytes is 0, thus, a read test will actually just open/close"); } void show_file_system_size(char *file_system) { @@ -2010,7 +2019,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * }else{ directory_loops = 1; } - valid_tests(); + md_validate_tests(); // option_print_current(options); VERBOSE(1,-1, "api : %s", api); From 03dbb20594acbb611ffb4124ac4e496c0fe6c3bb Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 29 Jun 2020 20:15:14 +0100 Subject: [PATCH 008/154] MDTest changed verification pattern. Read now always checks the first byte/8 bytes for the signature (item number). Added also --verify-write option which performs a read immediately after a write. Supports #206 --- src/mdtest.c | 70 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 0e407a6..85e081f 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -106,8 +106,6 @@ static char unique_read_dir[MAX_PATHLEN]; static char unique_rm_dir[MAX_PATHLEN]; static char unique_rm_uni_dir[MAX_PATHLEN]; static char *write_buffer; -static char *read_buffer; -static char *verify_read_buffer; static char *stoneWallingStatusFile; @@ -116,6 +114,7 @@ static int create_only; static int stat_only; static int read_only; static int verify_read; +static int verify_write; static int verification_error; static int remove_only; static int leaf_only; @@ -213,7 +212,8 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ... } void generate_memory_pattern(char * buffer, size_t bytes){ - for(int i=0; i < bytes; i++){ + // the first byte is set to the item number + for(int i=1; i < bytes; i++){ buffer[i] = i + 1; } } @@ -344,6 +344,22 @@ static void remove_file (const char *path, uint64_t itemNum) { } } +void mdtest_verify_data(int item, char * buffer, size_t bytes){ + if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){ + VERBOSE(2, -1, "Error verifying first element for item: %d", item); + verification_error++; + } + + size_t i = bytes < 8 ? 1 : 8; // the first byte + + for( ; i < bytes; i++){ + if(buffer[i] != (char) (i + 1)){ + VERBOSE(0, -1, "Error verifying byte %zu for item %d", i, item); + verification_error++; + } + } +} + static void create_file (const char *path, uint64_t itemNum) { char curr_item[MAX_PATHLEN]; aiori_fd_t *aiori_fh = NULL; @@ -392,9 +408,22 @@ static void create_file (const char *path, uint64_t itemNum) { * offset 0 (zero). */ hints.fsyncPerWrite = sync_file; - if ( write_bytes != (size_t) backend->xfer (WRITE, aiori_fh, (IOR_size_t *) write_buffer, write_bytes, 0, backend_options)) { + if(write_bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible + ((uint64_t*) write_buffer)[0] = itemNum; + }else{ + write_buffer[0] = (char) itemNum; + } + if ( write_bytes != (size_t) backend->xfer(WRITE, aiori_fh, (IOR_size_t *) write_buffer, write_bytes, 0, backend_options)) { FAIL("unable to write file %s", curr_item); } + + if (verify_write) { + write_buffer[0] = 42; + if (write_bytes != (size_t) backend->xfer(READ, aiori_fh, (IOR_size_t *) write_buffer, write_bytes, 0, backend_options)) { + FAIL("unable to verify write (read/back) file %s", curr_item); + } + mdtest_verify_data(itemNum, write_buffer, write_bytes); + } } VERBOSE(3,5,"create_remove_items_helper: close..." ); @@ -616,7 +645,6 @@ void mdtest_stat(const int random, const int dirs, const long dir_iter, const ch } } - /* reads all of the items created as specified by the input parameters */ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { uint64_t parent_dir, item_num = 0; @@ -624,6 +652,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { aiori_fd_t *aiori_fh; VERBOSE(1,-1,"Entering mdtest_read on %s", path ); + char *read_buffer; /* allocate read buffer */ if (read_bytes > 0) { @@ -631,14 +660,6 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { if (alloc_res) { FAIL("out of memory"); } - - if (verify_read > 0) { - verify_read_buffer = (char *)malloc(read_bytes); - if (verify_read_buffer == NULL) { - FAIL("out of memory"); - } - generate_memory_pattern(verify_read_buffer, read_bytes); - } } uint64_t stop_items = items; @@ -714,21 +735,24 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { /* read file */ if (read_bytes > 0) { - read_buffer[0] = 42; /* use a random value to ensure that the read_buffer is now different from the expected buffer and read isn't sometimes NOOP */ - if (read_bytes != (size_t) backend->xfer (READ, aiori_fh, (IOR_size_t *) read_buffer, read_bytes, 0, backend_options)) { + read_buffer[0] = 42; + if (read_bytes != (size_t) backend->xfer(READ, aiori_fh, (IOR_size_t *) read_buffer, read_bytes, 0, backend_options)) { FAIL("unable to read file %s", item); } if(verify_read){ - if (memcmp(read_buffer, verify_read_buffer, read_bytes) != 0){ - VERBOSE(2, -1, "Error verifying %s", item); - verification_error++; - } + mdtest_verify_data(item_num, read_buffer, read_bytes); + }else if((read_bytes >= 8 && ((uint64_t*) read_buffer)[0] != item_num) || (read_bytes < 8 && read_buffer[0] != (char) item_num)){ + // do a lightweight check, which cost is neglectable + verification_error++; } } /* close file */ backend->close (aiori_fh, backend_options); } + if(read_bytes){ + free(read_buffer); + } } /* This method should be called by rank 0. It subsequently does all of @@ -1471,6 +1495,9 @@ void md_validate_tests() { if(read_only && read_bytes <= 0) WARN("Read bytes is 0, thus, a read test will actually just open/close"); + + if(create_only && read_only && read_bytes > write_bytes) + FAIL("When writing and reading files, read bytes must be smaller than write bytes"); } void show_file_system_size(char *file_system) { @@ -1954,6 +1981,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase)", OPTION_OPTIONAL_ARGUMENT, 'd', & stone_wall_timer_seconds}, {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & stoneWallingStatusFile}, {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & verify_read}, + {0, "verify-write", "Verify the data after a write by reading it back immediately", OPTION_FLAG, 'd', & verify_write}, {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & sync_file}, {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & call_sync}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & depth}, @@ -2277,5 +2305,9 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * backend->finalize(backend_options); } + if (write_bytes > 0) { + free(write_buffer); + } + return summary_table; } From 3e6bfd2db76011d029d76893a6f6a331c0361609 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 29 Jun 2020 20:58:45 +0100 Subject: [PATCH 009/154] Added missing header info. --- src/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile.am b/src/Makefile.am index 567d9ce..7cbd448 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -5,7 +5,7 @@ if USE_CAPS bin_PROGRAMS += IOR MDTEST endif -noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h mdtest.h +noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h mdtest.h aiori-debug.h lib_LIBRARIES = libaiori.a libaiori_a_SOURCES = ior.c mdtest.c utilities.c parse_options.c ior-output.c option.c From 5a5b90cdaa8cb47b55f02ad65fd556480dbec2b6 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 30 Jun 2020 11:17:46 +0100 Subject: [PATCH 010/154] Run existing check to allow only supported backends in mdtest. --- src/mdtest.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mdtest.c b/src/mdtest.c index 85e081f..ce07981 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1994,6 +1994,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * backend = aiori_select(api); if (backend == NULL) ERR("Unrecognized I/O API"); + if (! backend->enable_mdtest) + ERR("Backend doesn't support MDTest"); backend_options = airoi_update_module_options(backend, global_options); free(global_options->modules); From 3395fc621cf73bf57c8ba25a33be268d2d4b6aad Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 30 Jun 2020 12:03:05 +0100 Subject: [PATCH 011/154] Moved GetFileSize into CheckFileSize, starting to simplify get_file_size (too much replication in modules). --- src/ior.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/ior.c b/src/ior.c index e70bf1d..411204d 100755 --- a/src/ior.c +++ b/src/ior.c @@ -313,7 +313,7 @@ CheckForOutliers(IOR_param_t *test, const double *timer, const int access) * Check if actual file size equals expected size; if not use actual for * calculating performance rate. */ -static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep, +static void CheckFileSize(IOR_test_t *test, char * testFilename, IOR_offset_t dataMoved, int rep, const int access) { IOR_param_t *params = &test->params; @@ -321,6 +321,32 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep, IOR_point_t *point = (access == WRITE) ? &results[rep].write : &results[rep].read; + /* get the size of the file */ + IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; + aggFileSizeFromStat = backend->get_file_size(params->backend_options, testComm, testFilename); + + if (params->hints.filePerProc == TRUE) { + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, + MPI_LONG_LONG_INT, MPI_SUM, testComm), + "cannot reduce total data moved"); + aggFileSizeFromStat = tmpSum; + } else { + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, + MPI_LONG_LONG_INT, MPI_MIN, testComm), + "cannot reduce total data moved"); + MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, + MPI_LONG_LONG_INT, MPI_MAX, testComm), + "cannot reduce total data moved"); + if (tmpMin != tmpMax) { + if (rank == 0) { + WARN("inconsistent file size by different tasks"); + } + /* incorrect, but now consistent across tasks */ + aggFileSizeFromStat = tmpMin; + } + } + point->aggFileSizeFromStat = aggFileSizeFromStat; + MPI_CHECK(MPI_Allreduce(&dataMoved, &point->aggFileSizeFromXfer, 1, MPI_LONG_LONG_INT, MPI_SUM, testComm), "cannot total data moved"); @@ -1379,13 +1405,9 @@ static void TestIoSys(IOR_test_t *test) timer[5] = GetTimeStamp(); MPI_CHECK(MPI_Barrier(testComm), "barrier error"); - /* get the size of the file just written */ - results[rep].write.aggFileSizeFromStat = - backend->get_file_size(params->backend_options, testComm, testFileName); - /* check if stat() of file doesn't equal expected file size, use actual amount of byte moved */ - CheckFileSize(test, dataMoved, rep, WRITE); + CheckFileSize(test, testFileName, dataMoved, rep, WRITE); if (verbose >= VERBOSE_3) WriteTimes(params, timer, rep, WRITE); @@ -1519,14 +1541,9 @@ static void TestIoSys(IOR_test_t *test) backend->close(fd, params->backend_options); timer[5] = GetTimeStamp(); - /* get the size of the file just read */ - results[rep].read.aggFileSizeFromStat = - backend->get_file_size(params->backend_options, testComm, - testFileName); - /* check if stat() of file doesn't equal expected file size, use actual amount of byte moved */ - CheckFileSize(test, dataMoved, rep, READ); + CheckFileSize(test, testFileName, dataMoved, rep, READ); if (verbose >= VERBOSE_3) WriteTimes(params, timer, rep, READ); From 82417128cdfb4d187b96db8367b0ca105b8afdda Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 30 Jun 2020 12:33:56 +0100 Subject: [PATCH 012/154] Extracted file size routine that is replicated in each module into IOR removing the MPI dependency from AIORI. --- src/aiori-DFS.c | 25 ++----------------------- src/aiori-DUMMY.c | 2 +- src/aiori-HDF5.c | 6 +++--- src/aiori-MPIIO.c | 24 +----------------------- src/aiori-POSIX.c | 24 +----------------------- src/aiori-debug.h | 1 + src/aiori.h | 22 ++++++++-------------- src/ior.c | 5 ++--- src/ior.h | 6 ++++++ 9 files changed, 25 insertions(+), 90 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 86f014c..82f7672 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -114,7 +114,7 @@ static void DFS_Delete(char *, aiori_mod_opt_t *); static char* DFS_GetVersion(); static void DFS_Fsync(aiori_fd_t *, aiori_mod_opt_t *); static void DFS_Sync(aiori_mod_opt_t *); -static IOR_offset_t DFS_GetFileSize(aiori_mod_opt_t *, MPI_Comm, char *); +static IOR_offset_t DFS_GetFileSize(aiori_mod_opt_t *, char *); static int DFS_Statfs (const char *, ior_aiori_statfs_t *, aiori_mod_opt_t *); static int DFS_Stat (const char *, struct stat *, aiori_mod_opt_t *); static int DFS_Mkdir (const char *, mode_t, aiori_mod_opt_t *); @@ -774,7 +774,7 @@ static char* DFS_GetVersion() * Use DFS stat() to return aggregate file size. */ static IOR_offset_t -DFS_GetFileSize(aiori_mod_opt_t * test, MPI_Comm comm, char *testFileName) +DFS_GetFileSize(aiori_mod_opt_t * test, char *testFileName) { dfs_obj_t *obj; daos_size_t fsize, tmpMin, tmpMax, tmpSum; @@ -792,27 +792,6 @@ DFS_GetFileSize(aiori_mod_opt_t * test, MPI_Comm comm, char *testFileName) dfs_release(obj); - if (hints->filePerProc == TRUE) { - MPI_CHECK(MPI_Allreduce(&fsize, &tmpSum, 1, - MPI_LONG_LONG_INT, MPI_SUM, comm), - "cannot total data moved"); - fsize = tmpSum; - } else { - MPI_CHECK(MPI_Allreduce(&fsize, &tmpMin, 1, - MPI_LONG_LONG_INT, MPI_MIN, comm), - "cannot total data moved"); - MPI_CHECK(MPI_Allreduce(&fsize, &tmpMax, 1, - MPI_LONG_LONG_INT, MPI_MAX, comm), - "cannot total data moved"); - if (tmpMin != tmpMax) { - if (rank == 0) { - WARN("inconsistent file size by different tasks"); - } - /* incorrect, but now consistent across tasks */ - fsize = tmpMin; - } - } - return (fsize); } diff --git a/src/aiori-DUMMY.c b/src/aiori-DUMMY.c index 034fc98..17656bb 100755 --- a/src/aiori-DUMMY.c +++ b/src/aiori-DUMMY.c @@ -108,7 +108,7 @@ static char * DUMMY_getVersion() return "0.5"; } -static IOR_offset_t DUMMY_GetFileSize(aiori_mod_opt_t * options, MPI_Comm testComm, char *testFileName) +static IOR_offset_t DUMMY_GetFileSize(aiori_mod_opt_t * options, char *testFileName) { if(verbose > 4){ fprintf(out_logfile, "DUMMY getFileSize: %s\n", testFileName); diff --git a/src/aiori-HDF5.c b/src/aiori-HDF5.c index 1e7f2bf..560dfbb 100755 --- a/src/aiori-HDF5.c +++ b/src/aiori-HDF5.c @@ -91,7 +91,7 @@ static void HDF5_Close(aiori_fd_t *, aiori_mod_opt_t *); static void HDF5_Delete(char *, aiori_mod_opt_t *); static char* HDF5_GetVersion(); static void HDF5_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -static IOR_offset_t HDF5_GetFileSize(aiori_mod_opt_t *, MPI_Comm, char *); +static IOR_offset_t HDF5_GetFileSize(aiori_mod_opt_t *, char *); static int HDF5_Access(const char *, int, aiori_mod_opt_t *); static void HDF5_init_xfer_options(aiori_xfer_hint_t * params); static int HDF5_check_params(aiori_mod_opt_t * options); @@ -660,11 +660,11 @@ static void SetupDataSet(void *fd, int flags, aiori_mod_opt_t * param) * Use MPIIO call to get file size. */ static IOR_offset_t -HDF5_GetFileSize(aiori_mod_opt_t * test, MPI_Comm testComm, char *testFileName) +HDF5_GetFileSize(aiori_mod_opt_t * test, char *testFileName) { if(hints->dryRun) return 0; - return(MPIIO_GetFileSize(test, testComm, testFileName)); + return(MPIIO_GetFileSize(test, testFileName)); } /* diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index 53eaad0..8462248 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -562,8 +562,7 @@ static IOR_offset_t SeekOffset(MPI_File fd, IOR_offset_t offset, * Use MPI_File_get_size() to return aggregate file size. * NOTE: This function is used by the HDF5 and NCMPI backends. */ -IOR_offset_t MPIIO_GetFileSize(aiori_mod_opt_t * module_options, MPI_Comm testComm, - char *testFileName) +IOR_offset_t MPIIO_GetFileSize(aiori_mod_opt_t * module_options, char *testFileName) { mpiio_options_t * test = (mpiio_options_t*) module_options; if(hints->dryRun) @@ -589,26 +588,5 @@ IOR_offset_t MPIIO_GetFileSize(aiori_mod_opt_t * module_options, MPI_Comm testCo if (mpiHints != MPI_INFO_NULL) MPI_CHECK(MPI_Info_free(&mpiHints), "MPI_Info_free failed"); - if (hints->filePerProc == TRUE) { - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, - MPI_LONG_LONG_INT, MPI_SUM, testComm), - "cannot total data moved"); - aggFileSizeFromStat = tmpSum; - } else { - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, - MPI_LONG_LONG_INT, MPI_MIN, testComm), - "cannot total data moved"); - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, - MPI_LONG_LONG_INT, MPI_MAX, testComm), - "cannot total data moved"); - if (tmpMin != tmpMax) { - if (rank == 0) { - WARN("inconsistent file size by different tasks"); - } - /* incorrect, but now consistent across tasks */ - aggFileSizeFromStat = tmpMin; - } - } - return (aggFileSizeFromStat); } diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 648b7c1..f3cf319 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -676,8 +676,7 @@ void POSIX_Delete(char *testFileName, aiori_mod_opt_t * param) /* * Use POSIX stat() to return aggregate file size. */ -IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, MPI_Comm testComm, - char *testFileName) +IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, char *testFileName) { if(hints->dryRun) return 0; @@ -689,26 +688,5 @@ IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, MPI_Comm testComm, } aggFileSizeFromStat = stat_buf.st_size; - if (hints->filePerProc == TRUE) { - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, - MPI_LONG_LONG_INT, MPI_SUM, testComm), - "cannot total data moved"); - aggFileSizeFromStat = tmpSum; - } else { - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, - MPI_LONG_LONG_INT, MPI_MIN, testComm), - "cannot total data moved"); - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, - MPI_LONG_LONG_INT, MPI_MAX, testComm), - "cannot total data moved"); - if (tmpMin != tmpMax) { - if (rank == 0) { - WARN("inconsistent file size by different tasks"); - } - /* incorrect, but now consistent across tasks */ - aggFileSizeFromStat = tmpMin; - } - } - return (aggFileSizeFromStat); } diff --git a/src/aiori-debug.h b/src/aiori-debug.h index 0fa20d6..be289e9 100644 --- a/src/aiori-debug.h +++ b/src/aiori-debug.h @@ -4,6 +4,7 @@ /* This file contains only debug relevant helpers */ #include +#include extern FILE * out_logfile; extern int verbose; /* verbose output */ diff --git a/src/aiori.h b/src/aiori.h index e5f0e5e..f7205e0 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -15,12 +15,6 @@ #ifndef _AIORI_H #define _AIORI_H -#include - -#ifndef MPI_FILE_NULL -# include -#endif /* not MPI_FILE_NULL */ - #include #include @@ -101,12 +95,12 @@ typedef struct ior_aiori { */ void (*xfer_hints)(aiori_xfer_hint_t * params); IOR_offset_t (*xfer)(int access, aiori_fd_t *, IOR_size_t *, - IOR_offset_t size, IOR_offset_t offset, aiori_mod_opt_t *); - void (*close)(aiori_fd_t *, aiori_mod_opt_t *); - void (*delete)(char *, aiori_mod_opt_t *); + IOR_offset_t size, IOR_offset_t offset, aiori_mod_opt_t * module_options); + void (*close)(aiori_fd_t *, aiori_mod_opt_t * module_options); + void (*delete)(char *, aiori_mod_opt_t * module_options); char* (*get_version)(void); - void (*fsync)(aiori_fd_t *, aiori_mod_opt_t *); - IOR_offset_t (*get_file_size)(aiori_mod_opt_t * module_options, MPI_Comm, char *); + void (*fsync)(aiori_fd_t *, aiori_mod_opt_t * module_options); + IOR_offset_t (*get_file_size)(aiori_mod_opt_t * module_options, char * filename); int (*statfs) (const char *, ior_aiori_statfs_t *, aiori_mod_opt_t * module_options); int (*mkdir) (const char *path, mode_t mode, aiori_mod_opt_t * module_options); int (*rmdir) (const char *path, aiori_mod_opt_t * module_options); @@ -164,7 +158,7 @@ void aiori_posix_xfer_hints(aiori_xfer_hint_t * params); aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * module_options); int POSIX_Mknod(char *testFileName); aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * module_options); -IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, MPI_Comm testComm, char *testFileName); +IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, char *testFileName); void POSIX_Delete(char *testFileName, aiori_mod_opt_t * module_options); void POSIX_Close(aiori_fd_t *fd, aiori_mod_opt_t * module_options); option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); @@ -172,7 +166,7 @@ option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_o /* NOTE: these 3 MPI-IO functions are exported for reuse by HDF5/PNetCDF */ void MPIIO_Delete(char *testFileName, aiori_mod_opt_t * module_options); -IOR_offset_t MPIIO_GetFileSize(aiori_mod_opt_t * options, MPI_Comm testComm, char *testFileName); -int MPIIO_Access(const char *, int, aiori_mod_opt_t *); +IOR_offset_t MPIIO_GetFileSize(aiori_mod_opt_t * options, char *testFileName); +int MPIIO_Access(const char *, int, aiori_mod_opt_t * module_options); #endif /* not _AIORI_H */ diff --git a/src/ior.c b/src/ior.c index 411204d..6263a04 100755 --- a/src/ior.c +++ b/src/ior.c @@ -313,8 +313,7 @@ CheckForOutliers(IOR_param_t *test, const double *timer, const int access) * Check if actual file size equals expected size; if not use actual for * calculating performance rate. */ -static void CheckFileSize(IOR_test_t *test, char * testFilename, IOR_offset_t dataMoved, int rep, - const int access) +static void CheckFileSize(IOR_test_t *test, char * testFilename, IOR_offset_t dataMoved, int rep, const int access) { IOR_param_t *params = &test->params; IOR_results_t *results = test->results; @@ -323,7 +322,7 @@ static void CheckFileSize(IOR_test_t *test, char * testFilename, IOR_offset_t da /* get the size of the file */ IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; - aggFileSizeFromStat = backend->get_file_size(params->backend_options, testComm, testFilename); + aggFileSizeFromStat = backend->get_file_size(params->backend_options, testFilename); if (params->hints.filePerProc == TRUE) { MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, diff --git a/src/ior.h b/src/ior.h index a5c34b9..a1b5f8f 100755 --- a/src/ior.h +++ b/src/ior.h @@ -39,6 +39,12 @@ #include "iordef.h" #include "aiori.h" +#include + +#ifndef MPI_FILE_NULL +# include +#endif /* not MPI_FILE_NULL */ + #define ISPOWEROFTWO(x) ((x != 0) && !(x & (x - 1))) /******************** DATA Packet Type ***************************************/ /* Holds the types of data packets: generic, offset, timestamp, incompressible */ From 3a9bd7828df7bcc25aa2c3b17015b83f8ba8d76d Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 30 Jun 2020 13:12:44 +0100 Subject: [PATCH 013/154] Ported PMDK API. --- src/aiori-PMDK.c | 82 ++++++++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 48 deletions(-) diff --git a/src/aiori-PMDK.c b/src/aiori-PMDK.c index 4a3953b..79b41b4 100644 --- a/src/aiori-PMDK.c +++ b/src/aiori-PMDK.c @@ -28,14 +28,19 @@ static option_help options [] = { /**************************** P R O T O T Y P E S *****************************/ static option_help * PMDK_options(); -static void *PMDK_Create(char *, IOR_param_t *); -static void *PMDK_Open(char *, IOR_param_t *); -static IOR_offset_t PMDK_Xfer(int, void *, IOR_size_t *, IOR_offset_t, IOR_param_t *); -static void PMDK_Fsync(void *, IOR_param_t *); -static void PMDK_Close(void *, IOR_param_t *); -static void PMDK_Delete(char *, IOR_param_t *); -static IOR_offset_t PMDK_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static aiori_fd_t *PMDK_Create(char *,int iorflags, aiori_mod_opt_t *); +static aiori_fd_t *PMDK_Open(char *, int iorflags, aiori_mod_opt_t *); +static IOR_offset_t PMDK_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); +static void PMDK_Fsync(aiori_fd_t *, aiori_mod_opt_t *); +static void PMDK_Close(aiori_fd_t *, aiori_mod_opt_t *); +static void PMDK_Delete(char *, aiori_mod_opt_t *); +static IOR_offset_t PMDK_GetFileSize(aiori_mod_opt_t *, char *); +static aiori_xfer_hint_t * hints = NULL; + +static void PMDK_xfer_hints(aiori_xfer_hint_t * params){ + hints = params; +} /************************** D E C L A R A T I O N S ***************************/ @@ -55,6 +60,7 @@ ior_aiori_t pmdk_aiori = { .delete = PMDK_Delete, .get_version = aiori_get_version, .fsync = PMDK_Fsync, + .xfer_hints = PMDK_xfer_hints, .get_file_size = PMDK_GetFileSize, .statfs = aiori_posix_statfs, .mkdir = aiori_posix_mkdir, @@ -78,18 +84,18 @@ static option_help * PMDK_options(){ /* * Create and open a memory space through the PMDK interface. */ -static void *PMDK_Create(char * testFileName, IOR_param_t * param){ +static aiori_fd_t *PMDK_Create(char * testFileName, int iorflags, aiori_mod_opt_t * param){ char *pmemaddr = NULL; int is_pmem; size_t mapped_len; size_t open_length; - if(!param->filePerProc){ + if(! hints->filePerProc){ fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - open_length = param->blockSize * param->segmentCount; + open_length = hints->blockSize * hints->segmentCount; if((pmemaddr = pmem_map_file(testFileName, open_length, PMEM_FILE_CREATE|PMEM_FILE_EXCL, @@ -98,7 +104,7 @@ static void *PMDK_Create(char * testFileName, IOR_param_t * param){ perror("pmem_map_file"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - + if(!is_pmem){ fprintf(stdout, "\n is_pmem is %d\n",is_pmem); fprintf(stdout, "\npmem_map_file thinks the hardware being used is not pmem\n"); @@ -106,7 +112,7 @@ static void *PMDK_Create(char * testFileName, IOR_param_t * param){ } - + return((void *)pmemaddr); } /* PMDK_Create() */ @@ -115,20 +121,19 @@ static void *PMDK_Create(char * testFileName, IOR_param_t * param){ /* * Open a memory space through the PMDK interface. */ - -static void *PMDK_Open(char * testFileName, IOR_param_t * param){ +static aiori_fd_t *PMDK_Open(char * testFileName,int iorflags, aiori_mod_opt_t * param){ char *pmemaddr = NULL; int is_pmem; size_t mapped_len; size_t open_length; - if(!param->filePerProc){ + if(!hints->filePerProc){ fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - open_length = param->blockSize * param->segmentCount; + open_length = hints->blockSize * hints->segmentCount; if((pmemaddr = pmem_map_file(testFileName, 0, PMEM_FILE_EXCL, @@ -138,12 +143,12 @@ static void *PMDK_Open(char * testFileName, IOR_param_t * param){ fprintf(stdout, "\n %ld %ld\n",open_length, mapped_len); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - + if(!is_pmem){ fprintf(stdout, "pmem_map_file thinks the hardware being used is not pmem\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } - + return((void *)pmemaddr); } /* PMDK_Open() */ @@ -153,8 +158,8 @@ static void *PMDK_Open(char * testFileName, IOR_param_t * param){ * Write or read access to a memory space created with PMDK. Include drain/flush functionality. */ -static IOR_offset_t PMDK_Xfer(int access, void *file, IOR_size_t * buffer, - IOR_offset_t length, IOR_param_t * param){ +static IOR_offset_t PMDK_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, + IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param){ int xferRetries = 0; long long remaining = (long long)length; char * ptr = (char *)buffer; @@ -162,11 +167,11 @@ static IOR_offset_t PMDK_Xfer(int access, void *file, IOR_size_t * buffer, long long i; long long offset_size; - offset_size = param->offset; + offset_size = offset; if(access == WRITE){ - if(param->fsync){ - pmem_memcpy_nodrain(&file[offset_size], ptr, length); + if(hints->fsyncPerWrite){ + pmem_memcpy_nodrain(&file[offset_size], ptr, length); }else{ pmem_memcpy_persist(&file[offset_size], ptr, length); } @@ -183,7 +188,7 @@ static IOR_offset_t PMDK_Xfer(int access, void *file, IOR_size_t * buffer, * Perform fsync(). */ -static void PMDK_Fsync(void *fd, IOR_param_t * param) +static void PMDK_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) { pmem_drain(); } /* PMDK_Fsync() */ @@ -194,11 +199,10 @@ static void PMDK_Fsync(void *fd, IOR_param_t * param) * Stub for close functionality that is not required for PMDK */ -static void PMDK_Close(void *fd, IOR_param_t * param){ +static void PMDK_Close(aiori_fd_t *fd, aiori_mod_opt_t * param){ size_t open_length; - open_length = param->transferSize; + open_length = hints->transferSize; pmem_unmap(fd, open_length); - } /* PMDK_Close() */ @@ -207,38 +211,25 @@ static void PMDK_Close(void *fd, IOR_param_t * param){ * Delete the file backing a memory space through PMDK */ -static void PMDK_Delete(char *testFileName, IOR_param_t * param) +static void PMDK_Delete(char *testFileName, aiori_mod_opt_t * param) { char errmsg[256]; sprintf(errmsg,"[RANK %03d]:cannot delete file %s\n",rank,testFileName); if (unlink(testFileName) != 0) WARN(errmsg); } /* PMDK_Delete() */ - -/******************************************************************************/ -/* - * Determine api version. - */ - -static void PMDK_SetVersion(IOR_param_t *test) -{ - strcpy(test->apiVersion, test->api); -} /* PMDK_SetVersion() */ - - /******************************************************************************/ /* * Use POSIX stat() to return aggregate file size. */ -static IOR_offset_t PMDK_GetFileSize(IOR_param_t * test, - MPI_Comm testComm, +static IOR_offset_t PMDK_GetFileSize(aiori_mod_opt_t * test, char * testFileName) { struct stat stat_buf; IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; - if (test->filePerProc == FALSE) { + if (hints->filePerProc == FALSE) { fprintf(stdout, "\nPMDK functionality can only be used with filePerProc functionality\n"); MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); } @@ -248,10 +239,5 @@ static IOR_offset_t PMDK_GetFileSize(IOR_param_t * test, } aggFileSizeFromStat = stat_buf.st_size; - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, - MPI_LONG_LONG_INT, MPI_SUM, testComm), - "cannot total data moved"); - aggFileSizeFromStat = tmpSum; - return(aggFileSizeFromStat); } /* PMDK_GetFileSize() */ From 81a7a3ab81545a9002d1697de8c50f5ba40b44d7 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 30 Jun 2020 13:41:59 +0100 Subject: [PATCH 014/154] Renamed existing S3 impl. which uses (apparently) outdated library. Added dummy support for libS3 with library search. --- configure.ac | 53 ++++++-- src/Makefile.am | 9 +- src/{aiori-S3.c => aiori-S3-4c.c} | 6 +- src/aiori-S3-libs3.c | 199 ++++++++++++++++++++++++++++++ src/aiori.c | 7 +- src/aiori.h | 3 +- 6 files changed, 260 insertions(+), 17 deletions(-) rename src/{aiori-S3.c => aiori-S3-4c.c} (99%) create mode 100644 src/aiori-S3-libs3.c diff --git a/configure.ac b/configure.ac index e1b1932..a7d5085 100755 --- a/configure.ac +++ b/configure.ac @@ -308,19 +308,54 @@ AM_COND_IF([AWS4C_DIR],[ ]) -# Amazon S3 support [see also: --with-aws4c] -AC_ARG_WITH([S3], - [AS_HELP_STRING([--with-S3], - [support IO with Amazon S3 backend @<:@default=no@:>@])], + +# Amazon S3 support using the libs3 API +AC_ARG_WITH([S3-libs3], + [AS_HELP_STRING([--with-S3-libs3], + [support IO with Amazon libS3 @<:@default=no@:>@])], [], - [with_S3=no]) -AM_CONDITIONAL([USE_S3_AIORI], [test x$with_S3 = xyes]) -AM_COND_IF([USE_S3_AIORI],[ - AC_DEFINE([USE_S3_AIORI], [], [Build Amazon-S3 backend AIORI]) + [with_S3_libs3=no]) +AM_CONDITIONAL([USE_S3_LIBS3_AIORI], [test x$with_S3_libs3 = xyes]) +AM_COND_IF([USE_S3_LIBS3_AIORI],[ + AC_DEFINE([USE_S3_LIBS3_AIORI], [], [Build Amazon-S3 backend AIORI using libs3]) ]) err=0 -AS_IF([test "x$with_S3" != xno], [ +AS_IF([test "x$with_S3_libs3" != xno], [ + AC_MSG_NOTICE([beginning of S3-related checks]) + ORIG_CPPFLAGS=$CPPFLAGS + ORIG_LDFLAGS=$LDFLAGS + + AC_CHECK_HEADERS([libs3.h], [], [err=1]) + + # Autotools thinks searching for a library means I want it added to LIBS + ORIG_LIBS=$LIBS + AC_CHECK_LIB([s3], [S3_initialize], [], [err=1]) + LIBS=$ORIG_LIBS + + AC_MSG_NOTICE([end of S3-related checks]) + if test "$err" == 1; then + AC_MSG_FAILURE([S3 support is missing. dnl Make sure you have access to libs3. dnl]) + fi + + # restore user's values + CPPFLAGS=$ORIG_CPPFLAGS + LDFLAGS=$ORIG_LDFLAGS +]) + +# Amazon S3 support [see also: --with-aws4c] +AC_ARG_WITH([S3-4c], + [AS_HELP_STRING([--with-S3-4c], + [support IO with Amazon S3 backend @<:@default=no@:>@])], + [], + [with_S3_4c=no]) +AM_CONDITIONAL([USE_S3_4C_AIORI], [test x$with_S3_4c = xyes]) +AM_COND_IF([USE_S3_4C_AIORI],[ + AC_DEFINE([USE_S3_4C_AIORI], [], [Build Amazon-S3 backend AIORI using lib4c]) +]) + +err=0 +AS_IF([test "x$with_S3_4c" != xno], [ AC_MSG_NOTICE([beginning of S3-related checks]) # save user's values, while we use AC_CHECK_HEADERS with $AWS4C_DIR diff --git a/src/Makefile.am b/src/Makefile.am index 7cbd448..c718169 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -90,8 +90,8 @@ extraSOURCES += aiori-Gfarm.c extraLDADD += -lgfarm endif -if USE_S3_AIORI -extraSOURCES += aiori-S3.c +if USE_S3_4C_AIORI +extraSOURCES += aiori-S3-4c.c if AWS4C_DIR extraCPPFLAGS += $(AWS4C_CPPFLAGS) extraLDFLAGS += $(AWS4C_LDFLAGS) @@ -102,6 +102,11 @@ extraLDADD += -laws4c extraLDADD += -laws4c_extra endif +if USE_S3_LIBS3_AIORI +extraSOURCES += aiori-S3-libs3.c +extraLDADD += -ls3 +endif + if WITH_LUSTRE extraLDADD += -llustreapi endif diff --git a/src/aiori-S3.c b/src/aiori-S3-4c.c similarity index 99% rename from src/aiori-S3.c rename to src/aiori-S3-4c.c index 3999739..a1465b7 100755 --- a/src/aiori-S3.c +++ b/src/aiori-S3-4c.c @@ -130,7 +130,7 @@ const char* bucket_name = "ior"; # define IOR_CURL_NOCONTINUE 0x02 # define IOR_CURL_S3_EMC_EXT 0x04 /* allow EMC extensions to S3? */ -#ifdef USE_S3_AIORI +#ifdef USE_S3_4C_AIORI # include # include "aws4c.h" #else @@ -167,8 +167,8 @@ static int S3_check_params(IOR_param_t *); // "Pure S3" // N:1 writes use multi-part upload // N:N fails if "transfer-size" != "block-size" (because that requires "append") -ior_aiori_t s3_aiori = { - .name = "S3", +ior_aiori_t s3_4c_aiori = { + .name = "S3-4c", .name_legacy = NULL, .create = S3_Create, .open = S3_Open, diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c new file mode 100644 index 0000000..5c9129f --- /dev/null +++ b/src/aiori-S3-libs3.c @@ -0,0 +1,199 @@ +/* +* S3 implementation using the newer libs3 +*/ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include + +#include "ior.h" +#include "aiori.h" +#include "utilities.h" + + +/************************** O P T I O N S *****************************/ +typedef struct { + uint64_t delay_creates; + uint64_t delay_xfer; + int delay_rank_0_only; +} dummy_options_t; + +static char * current = (char*) 1; + +static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ + dummy_options_t * o = malloc(sizeof(dummy_options_t)); + if (init_values != NULL){ + memcpy(o, init_values, sizeof(dummy_options_t)); + }else{ + memset(o, 0, sizeof(dummy_options_t)); + } + + *init_backend_options = (aiori_mod_opt_t*) o; + + option_help h [] = { + {0, "dummy.delay-create", "Delay per create in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_creates}, + {0, "dummy.delay-xfer", "Delay per xfer in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_xfer}, + {0, "dummy.delay-only-rank0", "Delay only Rank0", OPTION_FLAG, 'd', & o->delay_rank_0_only}, + LAST_OPTION + }; + option_help * help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); + return help; +} + +static int count_init = 0; + +static aiori_fd_t *S3_Create(char *testFileName, int iorflags, aiori_mod_opt_t * options) +{ + if(count_init <= 0){ + ERR("S3 missing initialization in create\n"); + } + if(verbose > 4){ + fprintf(out_logfile, "S3 create: %s = %p\n", testFileName, current); + } + dummy_options_t * o = (dummy_options_t*) options; + if (o->delay_creates){ + if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ + struct timespec wait = { o->delay_creates / 1000 / 1000, 1000l * (o->delay_creates % 1000000)}; + nanosleep( & wait, NULL); + } + } + return (aiori_fd_t*) current++; +} + +static aiori_fd_t *S3_Open(char *testFileName, int flags, aiori_mod_opt_t * options) +{ + if(count_init <= 0){ + ERR("S3 missing initialization in open\n"); + } + if(verbose > 4){ + fprintf(out_logfile, "S3 open: %s = %p\n", testFileName, current); + } + return (aiori_fd_t*) current++; +} + +static void S3_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * options) +{ + if(verbose > 4){ + fprintf(out_logfile, "S3 fsync %p\n", fd); + } +} + + +static void S3_Sync(aiori_mod_opt_t * options) +{ +} + +static void S3_Close(aiori_fd_t *fd, aiori_mod_opt_t * options) +{ + if(verbose > 4){ + fprintf(out_logfile, "S3 close %p\n", fd); + } +} + +static void S3_Delete(char *testFileName, aiori_mod_opt_t * options) +{ + if(verbose > 4){ + fprintf(out_logfile, "S3 delete: %s\n", testFileName); + } +} + +static char * S3_getVersion() +{ + return "0.5"; +} + +static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName) +{ + if(verbose > 4){ + fprintf(out_logfile, "S3 getFileSize: %s\n", testFileName); + } + return 0; +} + +static IOR_offset_t S3_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options){ + if(verbose > 4){ + fprintf(out_logfile, "S3 xfer: %p\n", file); + } + dummy_options_t * o = (dummy_options_t*) options; + if (o->delay_xfer){ + if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ + struct timespec wait = {o->delay_xfer / 1000 / 1000, 1000l * (o->delay_xfer % 1000000)}; + nanosleep( & wait, NULL); + } + } + return length; +} + +static int S3_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_opt_t * options){ + stat->f_bsize = 1; + stat->f_blocks = 1; + stat->f_bfree = 1; + stat->f_bavail = 1; + stat->f_files = 1; + stat->f_ffree = 1; + return 0; +} + +static int S3_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ + return 0; +} + +static int S3_rmdir (const char *path, aiori_mod_opt_t * options){ + return 0; +} + +static int S3_access (const char *path, int mode, aiori_mod_opt_t * options){ + return 0; +} + +static int S3_stat (const char *path, struct stat *buf, aiori_mod_opt_t * options){ + return 0; +} + +static int S3_check_params(aiori_mod_opt_t * options){ + return 0; +} + +static void S3_init(aiori_mod_opt_t * options){ + WARN("S3 initialized"); + count_init++; +} + +static void S3_final(aiori_mod_opt_t * options){ + WARN("S3 finalized"); + if(count_init <= 0){ + ERR("S3 invalid finalization\n"); + } + count_init--; +} + + +ior_aiori_t S3_libS3_aiori = { + .name = "S3-libs3", + .name_legacy = NULL, + .create = S3_Create, + .open = S3_Open, + .xfer = S3_Xfer, + .close = S3_Close, + .delete = S3_Delete, + .get_version = S3_getVersion, + .fsync = S3_Fsync, + .get_file_size = S3_GetFileSize, + .statfs = S3_statfs, + .mkdir = S3_mkdir, + .rmdir = S3_rmdir, + .access = S3_access, + .stat = S3_stat, + .initialize = S3_init, + .finalize = S3_final, + .get_options = S3_options, + .check_params = S3_check_params, + .sync = S3_Sync, + .enable_mdtest = true +}; diff --git a/src/aiori.c b/src/aiori.c index 303f367..7b0c160 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -68,8 +68,11 @@ ior_aiori_t *available_aiori[] = { #ifdef USE_MMAP_AIORI &mmap_aiori, #endif -#ifdef USE_S3_AIORI - &s3_aiori, +#ifdef USE_S3_LIBS3_AIORI + &S3_libS3_aiori, +#endif +#ifdef USE_S3_4C_AIORI + &s3_4c_aiori, &s3_plus_aiori, &s3_emc_aiori, #endif diff --git a/src/aiori.h b/src/aiori.h index f7205e0..6b185d7 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -130,7 +130,8 @@ extern ior_aiori_t ncmpi_aiori; extern ior_aiori_t posix_aiori; extern ior_aiori_t pmdk_aiori; extern ior_aiori_t mmap_aiori; -extern ior_aiori_t s3_aiori; +extern ior_aiori_t S3_libS3_aiori; +extern ior_aiori_t s3_4c_aiori; extern ior_aiori_t s3_plus_aiori; extern ior_aiori_t s3_emc_aiori; extern ior_aiori_t rados_aiori; From 2a3838c360031ca73361965d866cd738a58ed87b Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 30 Jun 2020 16:50:13 +0100 Subject: [PATCH 015/154] S3: Partial implementation. --- src/aiori-S3-libs3.c | 355 ++++++++++++++++++++++++++++++++----------- testing/s3.sh | 24 +++ 2 files changed, 292 insertions(+), 87 deletions(-) create mode 100755 testing/s3.sh diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index 5c9129f..7d5bfb1 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -11,166 +11,346 @@ #include #include +#include + #include "ior.h" #include "aiori.h" +#include "aiori-debug.h" #include "utilities.h" +static aiori_xfer_hint_t * hints = NULL; + +static void s3_xfer_hints(aiori_xfer_hint_t * params){ + hints = params; +} + /************************** O P T I O N S *****************************/ typedef struct { - uint64_t delay_creates; - uint64_t delay_xfer; - int delay_rank_0_only; -} dummy_options_t; + int bucket_per_dir; + char * access_key; + char * secret_key; + char * host; + char * bucket_prefix; + char * bucket_prefix_cur; + char * locationConstraint; -static char * current = (char*) 1; + int dont_suffix; + int s3_compatible; + int use_ssl; + S3BucketContext bucket_context; + S3Protocol s3_protocol; +} s3_options_t; static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ - dummy_options_t * o = malloc(sizeof(dummy_options_t)); + s3_options_t * o = malloc(sizeof(s3_options_t)); if (init_values != NULL){ - memcpy(o, init_values, sizeof(dummy_options_t)); + memcpy(o, init_values, sizeof(s3_options_t)); }else{ - memset(o, 0, sizeof(dummy_options_t)); + memset(o, 0, sizeof(s3_options_t)); } *init_backend_options = (aiori_mod_opt_t*) o; + o->bucket_prefix = "ior"; option_help h [] = { - {0, "dummy.delay-create", "Delay per create in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_creates}, - {0, "dummy.delay-xfer", "Delay per xfer in usec", OPTION_OPTIONAL_ARGUMENT, 'l', & o->delay_xfer}, - {0, "dummy.delay-only-rank0", "Delay only Rank0", OPTION_FLAG, 'd', & o->delay_rank_0_only}, - LAST_OPTION + //{0, "S3.bucket-per-directory", "Use one bucket to map one dir, otherwise only one bucket is used.", OPTION_FLAG, 'd', & o->bucket_per_dir}, + {0, "S3.bucket-name-prefix", "The name of the bucket (when using without -b), otherwise it is used as prefix.", OPTION_OPTIONAL_ARGUMENT, 's', & o->bucket_prefix}, + {0, "S3.dont-suffix-bucket", "If not selected, then a hash will be added to the bucket name to increase uniqueness.", OPTION_FLAG, 'd', & o->dont_suffix }, + {0, "S3.s3-compatible", "to be selected when using S3 compatible storage", OPTION_FLAG, 'd', & o->s3_compatible }, + {0, "S3.use-ssl", "used to specify that SSL is needed for the connection", OPTION_FLAG, 'd', & o->use_ssl }, + {0, "S3.host", "The host optionally followed by:port.", OPTION_OPTIONAL_ARGUMENT, 's', & o->host}, + {0, "S3.secret-key", "The secret key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->secret_key}, + {0, "S3.access-key", "The access key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->access_key}, + LAST_OPTION }; option_help * help = malloc(sizeof(h)); memcpy(help, h, sizeof(h)); return help; } -static int count_init = 0; - -static aiori_fd_t *S3_Create(char *testFileName, int iorflags, aiori_mod_opt_t * options) -{ - if(count_init <= 0){ - ERR("S3 missing initialization in create\n"); - } - if(verbose > 4){ - fprintf(out_logfile, "S3 create: %s = %p\n", testFileName, current); - } - dummy_options_t * o = (dummy_options_t*) options; - if (o->delay_creates){ - if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ - struct timespec wait = { o->delay_creates / 1000 / 1000, 1000l * (o->delay_creates % 1000000)}; - nanosleep( & wait, NULL); +static void def_file_name(s3_options_t * o, char * out_name, char const * path){ + // duplicate path except "/" + while(*path != 0){ + char c = *path; + if(((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') )){ + *out_name = *path; + out_name++; } + path++; } - return (aiori_fd_t*) current++; + *out_name = '\0'; } -static aiori_fd_t *S3_Open(char *testFileName, int flags, aiori_mod_opt_t * options) -{ - if(count_init <= 0){ - ERR("S3 missing initialization in open\n"); +static void def_bucket_name(s3_options_t * o, char * out_name, char const * path){ + // S3_MAX_BUCKET_NAME_SIZE + if (o->bucket_per_dir){ + out_name += sprintf(out_name, "%s-", o->bucket_prefix_cur); } - if(verbose > 4){ - fprintf(out_logfile, "S3 open: %s = %p\n", testFileName, current); - } - return (aiori_fd_t*) current++; -} -static void S3_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * options) -{ - if(verbose > 4){ - fprintf(out_logfile, "S3 fsync %p\n", fd); - } -} - - -static void S3_Sync(aiori_mod_opt_t * options) -{ -} - -static void S3_Close(aiori_fd_t *fd, aiori_mod_opt_t * options) -{ - if(verbose > 4){ - fprintf(out_logfile, "S3 close %p\n", fd); - } -} - -static void S3_Delete(char *testFileName, aiori_mod_opt_t * options) -{ - if(verbose > 4){ - fprintf(out_logfile, "S3 delete: %s\n", testFileName); + // duplicate path except "/" + while(*path != 0){ + char c = *path; + if(((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') )){ + *out_name = *path; + out_name++; } + path++; + } + *out_name = '\0'; + + // S3Status S3_validate_bucket_name(const char *bucketName, S3UriStyle uriStyle); } +struct data_handling{ + char * buf; + int64_t size; +}; + +static S3Status s3status = S3StatusInterrupted; +static S3ErrorDetails s3error = {NULL}; + +static S3Status responsePropertiesCallback(const S3ResponseProperties *properties, void *callbackData){ + s3status = S3StatusOK; + return s3status; +} + +static void responseCompleteCallback(S3Status status, const S3ErrorDetails *error, void *callbackData) { + s3status = status; + if (error == NULL){ + s3error.message = NULL; + }else{ + s3error = *error; + } + return; +} + +#define CHECK_ERROR if (s3status != S3StatusOK){ \ + EWARNF("S3 \"%s\": %s - %s", S3_get_status_name(s3status), s3error.message, s3error.furtherDetails); \ +} + + +static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData){ + printf("CALLBACK\n"); + struct data_handling * dh = (struct data_handling *) callbackData; + const int64_t size = dh->size > bufferSize ? bufferSize : dh->size; + if(size == 0) return 0; + memcpy(buffer, dh->buf, size); + dh->buf += size; + dh->size -= size; + + return size; +} + +static S3PutObjectHandler putObjectHandler = { { &responsePropertiesCallback, &responseCompleteCallback }, & putObjectDataCallback }; + +static S3ResponseHandler responseHandler = { &responsePropertiesCallback, &responseCompleteCallback }; + static char * S3_getVersion() { return "0.5"; } -static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName) +static void S3_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * options) { - if(verbose > 4){ - fprintf(out_logfile, "S3 getFileSize: %s\n", testFileName); - } - return 0; + // Not needed } -static IOR_offset_t S3_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options){ - if(verbose > 4){ - fprintf(out_logfile, "S3 xfer: %p\n", file); - } - dummy_options_t * o = (dummy_options_t*) options; - if (o->delay_xfer){ - if (! o->delay_rank_0_only || (o->delay_rank_0_only && rank == 0)){ - struct timespec wait = {o->delay_xfer / 1000 / 1000, 1000l * (o->delay_xfer % 1000000)}; - nanosleep( & wait, NULL); - } - } - return length; + +static void S3_Sync(aiori_mod_opt_t * options) +{ + // Not needed } +static S3Status S3ListResponseCallback(const char *ownerId, const char *ownerDisplayName, const char *bucketName, int64_t creationDateSeconds, void *callbackData){ + uint64_t * count = (uint64_t*) callbackData; + *count++; + return S3StatusOK; +} + +static S3ListServiceHandler listhandler = { { &responsePropertiesCallback, &responseCompleteCallback }, & S3ListResponseCallback}; + static int S3_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_opt_t * options){ stat->f_bsize = 1; stat->f_blocks = 1; stat->f_bfree = 1; stat->f_bavail = 1; - stat->f_files = 1; stat->f_ffree = 1; + s3_options_t * o = (s3_options_t*) options; + + // use the number of bucket as files + uint64_t buckets = 0; + S3_list_service(o->s3_protocol, o->access_key, o->secret_key, o->host, + NULL, & listhandler, & buckets); + stat->f_files = buckets; + CHECK_ERROR + return 0; } +static aiori_fd_t *S3_Create(char *testFileName, int iorflags, aiori_mod_opt_t * options) +{ + s3_options_t * o = (s3_options_t*) options; + + return (aiori_fd_t*) 1; +} + +static aiori_fd_t *S3_Open(char *testFileName, int flags, aiori_mod_opt_t * options) +{ + s3_options_t * o = (s3_options_t*) options; + + return (aiori_fd_t*) 1; +} + +static void S3_Close(aiori_fd_t *fd, aiori_mod_opt_t * options) +{ +} + +static void S3_Delete(char *path, aiori_mod_opt_t * options) +{ + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + def_file_name(o, p, path); + S3_delete_object(& o->bucket_context, p, NULL, & responseHandler, NULL); + CHECK_ERROR +} + +static IOR_offset_t S3_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options){ + + return length; +} + static int S3_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ - return 0; + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + + def_bucket_name(o, p, path); + if (o->bucket_per_dir){ + S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, p, S3CannedAclPrivate, o->locationConstraint, NULL, & responseHandler, NULL); + CHECK_ERROR + return 0; + }else{ + struct data_handling dh = { .buf = NULL, .size = 0 }; + S3_put_object(& o->bucket_context, p, 0, NULL, NULL, & putObjectHandler, & dh); + if (! o->s3_compatible){ + CHECK_ERROR + } + return 0; + } } static int S3_rmdir (const char *path, aiori_mod_opt_t * options){ - return 0; + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + + def_bucket_name(o, p, path); + if (o->bucket_per_dir){ + S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, p, NULL, & responseHandler, NULL); + CHECK_ERROR + return 0; + }else{ + S3_delete_object(& o->bucket_context, p, NULL, & responseHandler, NULL); + CHECK_ERROR + return 0; + } } + +static S3Status statResponsePropertiesCallback(const S3ResponseProperties *properties, void *callbackData){ + // check the size + struct stat *buf = (struct stat*) callbackData; + if(buf != NULL){ + buf->st_size = properties->contentLength; + buf->st_mtime = properties->lastModified; + } + s3status = S3StatusOK; + return s3status; +} + +static S3ResponseHandler statResponseHandler = { &statResponsePropertiesCallback, &responseCompleteCallback }; + static int S3_access (const char *path, int mode, aiori_mod_opt_t * options){ + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + def_file_name(o, p, path); + + S3_head_object(& o->bucket_context, p, NULL, & statResponseHandler, NULL); + if (s3status != S3StatusOK){ + return -1; + } return 0; } static int S3_stat (const char *path, struct stat *buf, aiori_mod_opt_t * options){ + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + def_file_name(o, p, path); + + memset(buf, 0, sizeof(struct stat)); + S3_head_object(& o->bucket_context, p, NULL, & statResponseHandler, buf); + if (s3status != S3StatusOK){ + return -1; + } return 0; } +static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName) +{ + struct stat buf; + if(S3_stat(testFileName, & buf, options) != 0) return -1; + return buf.st_size; +} + + static int S3_check_params(aiori_mod_opt_t * options){ return 0; } static void S3_init(aiori_mod_opt_t * options){ - WARN("S3 initialized"); - count_init++; + s3_options_t * o = (s3_options_t*) options; + int ret = S3_initialize(NULL, S3_INIT_ALL, o->host); + + // create a bucket id based on access-key using a trivial checksumming + if(! o->dont_suffix){ + uint64_t c = 0; + char * r = o->access_key; + for(uint64_t pos = 1; (*r) != '\0' ; r++, pos*=10) { + c += (*r) * pos; + } + int count = snprintf(NULL, 0, "%s%lu", o->bucket_prefix, c % 1000); + char * old_prefix = o->bucket_prefix; + o->bucket_prefix_cur = malloc(count + 1); + sprintf(o->bucket_prefix_cur, "%s%lu", old_prefix, c % 1000); + }else{ + o->bucket_prefix_cur = o->bucket_prefix; + } + + // init bucket context + memset(& o->bucket_context, 0, sizeof(o->bucket_context)); + o->bucket_context.hostName = o->host; + o->bucket_context.bucketName = o->bucket_prefix_cur; + if (! o->bucket_per_dir){ + S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->bucket_context.bucketName, S3CannedAclPrivate, o->locationConstraint, NULL, & responseHandler, NULL); + CHECK_ERROR + } + + if (o->use_ssl){ + o->s3_protocol = S3ProtocolHTTPS; + }else{ + o->s3_protocol = S3ProtocolHTTP; + } + o->bucket_context.protocol = o->s3_protocol; + o->bucket_context.uriStyle = S3UriStylePath; + o->bucket_context.accessKeyId = o->access_key; + o->bucket_context.secretAccessKey = o->secret_key; + + if ( ret != S3StatusOK ){ + FAIL("S3 error %s", S3_get_status_name(ret)); + } } static void S3_final(aiori_mod_opt_t * options){ - WARN("S3 finalized"); - if(count_init <= 0){ - ERR("S3 invalid finalization\n"); - } - count_init--; + S3_deinitialize(); } @@ -184,6 +364,7 @@ ior_aiori_t S3_libS3_aiori = { .delete = S3_Delete, .get_version = S3_getVersion, .fsync = S3_Fsync, + .xfer_hints = s3_xfer_hints, .get_file_size = S3_GetFileSize, .statfs = S3_statfs, .mkdir = S3_mkdir, diff --git a/testing/s3.sh b/testing/s3.sh new file mode 100755 index 0000000..d1bf50b --- /dev/null +++ b/testing/s3.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# Test basic S3 behavior using minio. + +ROOT="$(dirname ${BASH_SOURCE[0]})" +TYPE="basic" + +cd $ROOT + +if [[ ! -e minio ]] ; then + wget https://dl.min.io/server/minio/release/linux-amd64/minio + chmod +x minio +fi + +export MINIO_ACCESS_KEY=accesskey +export MINIO_SECRET_KEY=secretkey + +./minio --quiet server /dev/shm & + +source $ROOT/test-lib.sh +IOR 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey +MDTEST 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey + +kill -9 %1 From 154cf2cde7ca218f18ea8b4450aff043682651df Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 30 Jun 2020 18:36:49 +0100 Subject: [PATCH 016/154] LibS3 version that stores fragments as one object each. --- src/aiori-S3-libs3.c | 235 +++++++++++++++++++++++++++++-------------- testing/s3.sh | 17 ++-- testing/test-lib.sh | 4 +- 3 files changed, 170 insertions(+), 86 deletions(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index 7d5bfb1..2267c53 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -1,5 +1,7 @@ /* * S3 implementation using the newer libs3 +* https://github.com/bji/libs3 +* Use one object per file chunk */ #ifdef HAVE_CONFIG_H @@ -27,14 +29,16 @@ static void s3_xfer_hints(aiori_xfer_hint_t * params){ /************************** O P T I O N S *****************************/ typedef struct { - int bucket_per_dir; + int bucket_per_file; char * access_key; char * secret_key; char * host; char * bucket_prefix; char * bucket_prefix_cur; char * locationConstraint; + char * authRegion; + int timeout; int dont_suffix; int s3_compatible; int use_ssl; @@ -54,7 +58,7 @@ static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_m o->bucket_prefix = "ior"; option_help h [] = { - //{0, "S3.bucket-per-directory", "Use one bucket to map one dir, otherwise only one bucket is used.", OPTION_FLAG, 'd', & o->bucket_per_dir}, + {0, "S3.bucket-per-file", "Use one bucket to map one file, otherwise only one bucket is used to store all files.", OPTION_FLAG, 'd', & o->bucket_per_file}, {0, "S3.bucket-name-prefix", "The name of the bucket (when using without -b), otherwise it is used as prefix.", OPTION_OPTIONAL_ARGUMENT, 's', & o->bucket_prefix}, {0, "S3.dont-suffix-bucket", "If not selected, then a hash will be added to the bucket name to increase uniqueness.", OPTION_FLAG, 'd', & o->dont_suffix }, {0, "S3.s3-compatible", "to be selected when using S3 compatible storage", OPTION_FLAG, 'd', & o->s3_compatible }, @@ -84,7 +88,7 @@ static void def_file_name(s3_options_t * o, char * out_name, char const * path){ static void def_bucket_name(s3_options_t * o, char * out_name, char const * path){ // S3_MAX_BUCKET_NAME_SIZE - if (o->bucket_per_dir){ + if (o->bucket_per_file){ out_name += sprintf(out_name, "%s-", o->bucket_prefix_cur); } @@ -103,7 +107,7 @@ static void def_bucket_name(s3_options_t * o, char * out_name, char const * path } struct data_handling{ - char * buf; + IOR_size_t * buf; int64_t size; }; @@ -126,24 +130,10 @@ static void responseCompleteCallback(S3Status status, const S3ErrorDetails *erro } #define CHECK_ERROR if (s3status != S3StatusOK){ \ - EWARNF("S3 \"%s\": %s - %s", S3_get_status_name(s3status), s3error.message, s3error.furtherDetails); \ + EWARNF("S3 %s:%d \"%s\": %s - %s", __FUNCTION__, __LINE__, S3_get_status_name(s3status), s3error.message, s3error.furtherDetails); \ } -static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData){ - printf("CALLBACK\n"); - struct data_handling * dh = (struct data_handling *) callbackData; - const int64_t size = dh->size > bufferSize ? bufferSize : dh->size; - if(size == 0) return 0; - memcpy(buffer, dh->buf, size); - dh->buf += size; - dh->size -= size; - - return size; -} - -static S3PutObjectHandler putObjectHandler = { { &responsePropertiesCallback, &responseCompleteCallback }, & putObjectDataCallback }; - static S3ResponseHandler responseHandler = { &responsePropertiesCallback, &responseCompleteCallback }; static char * S3_getVersion() @@ -180,79 +170,54 @@ static int S3_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_op // use the number of bucket as files uint64_t buckets = 0; - S3_list_service(o->s3_protocol, o->access_key, o->secret_key, o->host, - NULL, & listhandler, & buckets); + S3_list_service(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, + o->authRegion, NULL, o->timeout, & listhandler, & buckets); stat->f_files = buckets; CHECK_ERROR return 0; } -static aiori_fd_t *S3_Create(char *testFileName, int iorflags, aiori_mod_opt_t * options) -{ - s3_options_t * o = (s3_options_t*) options; - - return (aiori_fd_t*) 1; +static S3Status S3multipart_handler(const char *upload_id, void *callbackData){ + *((char const**)(callbackData)) = upload_id; + return S3StatusOK; } -static aiori_fd_t *S3_Open(char *testFileName, int flags, aiori_mod_opt_t * options) -{ - s3_options_t * o = (s3_options_t*) options; +static S3MultipartInitialHandler multipart_handler = { {&responsePropertiesCallback, &responseCompleteCallback }, & S3multipart_handler}; - return (aiori_fd_t*) 1; +typedef struct{ + char * object; +} S3_fd_t; + +static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData){ + struct data_handling * dh = (struct data_handling *) callbackData; + const int64_t size = dh->size > bufferSize ? bufferSize : dh->size; + if(size == 0) return 0; + memcpy(buffer, dh->buf, size); + dh->buf = (IOR_size_t*) ((char*)(dh->buf) + size); + dh->size -= size; + + return size; } -static void S3_Close(aiori_fd_t *fd, aiori_mod_opt_t * options) -{ -} +static S3PutObjectHandler putObjectHandler = { { &responsePropertiesCallback, &responseCompleteCallback }, & putObjectDataCallback }; -static void S3_Delete(char *path, aiori_mod_opt_t * options) +static aiori_fd_t *S3_Create(char *path, int iorflags, aiori_mod_opt_t * options) { + char * upload_id; s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); - S3_delete_object(& o->bucket_context, p, NULL, & responseHandler, NULL); - CHECK_ERROR -} + S3_fd_t * fd = malloc(sizeof(S3_fd_t)); + fd->object = strdup(p); -static IOR_offset_t S3_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options){ - - return length; -} - -static int S3_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ - s3_options_t * o = (s3_options_t*) options; - char p[FILENAME_MAX]; - - def_bucket_name(o, p, path); - if (o->bucket_per_dir){ - S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, p, S3CannedAclPrivate, o->locationConstraint, NULL, & responseHandler, NULL); - CHECK_ERROR - return 0; - }else{ + if(iorflags & IOR_CREAT){ struct data_handling dh = { .buf = NULL, .size = 0 }; - S3_put_object(& o->bucket_context, p, 0, NULL, NULL, & putObjectHandler, & dh); - if (! o->s3_compatible){ - CHECK_ERROR - } - return 0; - } -} - -static int S3_rmdir (const char *path, aiori_mod_opt_t * options){ - s3_options_t * o = (s3_options_t*) options; - char p[FILENAME_MAX]; - - def_bucket_name(o, p, path); - if (o->bucket_per_dir){ - S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, p, NULL, & responseHandler, NULL); + S3_put_object(& o->bucket_context, p, 0, NULL, NULL, o->timeout, &putObjectHandler, & dh); CHECK_ERROR - return 0; - }else{ - S3_delete_object(& o->bucket_context, p, NULL, & responseHandler, NULL); - CHECK_ERROR - return 0; } + + return (aiori_fd_t*) fd; } @@ -269,25 +234,135 @@ static S3Status statResponsePropertiesCallback(const S3ResponseProperties *prope static S3ResponseHandler statResponseHandler = { &statResponsePropertiesCallback, &responseCompleteCallback }; +static aiori_fd_t *S3_Open(char *path, int flags, aiori_mod_opt_t * options) +{ + if(flags & IOR_CREAT){ + return S3_Create(path, flags, options); + } + if(flags & IOR_WRONLY){ + WARN("S3 IOR_WRONLY is not supported"); + } + if(flags & IOR_RDWR){ + WARN("S3 IOR_RDWR is not supported"); + } + + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + def_file_name(o, p, path); + + struct stat buf; + S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, & buf); + if (s3status != S3StatusOK){ + return NULL; + } + + S3_fd_t * fd = malloc(sizeof(S3_fd_t)); + fd->object = strdup(p); + + return (aiori_fd_t*) fd; +} + +static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void *callbackData){ + struct data_handling * dh = (struct data_handling *) callbackData; + const int64_t size = dh->size > bufferSize ? bufferSize : dh->size; + memcpy(dh->buf, buffer, size); + dh->buf = (IOR_size_t*) ((char*)(dh->buf) + size); + dh->size -= size; + + return S3StatusOK; +} + +static S3GetObjectHandler getObjectHandler = { { &responsePropertiesCallback, &responseCompleteCallback }, & getObjectDataCallback }; + +static IOR_offset_t S3_Xfer(int access, aiori_fd_t * afd, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options){ + S3_fd_t * fd = (S3_fd_t *) afd; + struct data_handling dh = { .buf = buffer, .size = length }; + + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + sprintf(p, "%s-%ld-%ld", fd->object, (long) offset, (long) length); + if(access == WRITE){ + S3_put_object(& o->bucket_context, p, length, NULL, NULL, o->timeout, &putObjectHandler, & dh); + }else{ + S3_get_object(& o->bucket_context, p, NULL, 0, length, NULL, o->timeout, &getObjectHandler, & dh); + } + if (! o->s3_compatible){ + CHECK_ERROR + } + return length; +} + + +static void S3_Close(aiori_fd_t * afd, aiori_mod_opt_t * options) +{ + S3_fd_t * fd = (S3_fd_t *) afd; + free(fd->object); + free(afd); +} + +static void S3_Delete(char *path, aiori_mod_opt_t * options) +{ + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + def_file_name(o, p, path); + S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); + CHECK_ERROR +} + +static int S3_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + + def_bucket_name(o, p, path); + if (o->bucket_per_file){ + S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); + CHECK_ERROR + return 0; + }else{ + struct data_handling dh = { .buf = NULL, .size = 0 }; + S3_put_object(& o->bucket_context, p, 0, NULL, NULL, o->timeout, & putObjectHandler, & dh); + if (! o->s3_compatible){ + CHECK_ERROR + } + return 0; + } +} + +static int S3_rmdir (const char *path, aiori_mod_opt_t * options){ + s3_options_t * o = (s3_options_t*) options; + char p[FILENAME_MAX]; + + def_bucket_name(o, p, path); + if (o->bucket_per_file){ + S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); + CHECK_ERROR + return 0; + }else{ + S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); + CHECK_ERROR + return 0; + } +} + static int S3_access (const char *path, int mode, aiori_mod_opt_t * options){ s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); - S3_head_object(& o->bucket_context, p, NULL, & statResponseHandler, NULL); + S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, NULL); if (s3status != S3StatusOK){ return -1; } return 0; } -static int S3_stat (const char *path, struct stat *buf, aiori_mod_opt_t * options){ +static int S3_stat(const char *path, struct stat *buf, aiori_mod_opt_t * options){ s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); memset(buf, 0, sizeof(struct stat)); - S3_head_object(& o->bucket_context, p, NULL, & statResponseHandler, buf); + S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, buf); if (s3status != S3StatusOK){ return -1; } @@ -303,12 +378,18 @@ static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName static int S3_check_params(aiori_mod_opt_t * options){ + if(hints->blockSize != hints->transferSize){ + ERR("S3 Blocksize must be transferSize"); + } + return 0; } static void S3_init(aiori_mod_opt_t * options){ s3_options_t * o = (s3_options_t*) options; int ret = S3_initialize(NULL, S3_INIT_ALL, o->host); + if(ret != S3StatusOK) + FAIL("Could not initialize S3 library"); // create a bucket id based on access-key using a trivial checksumming if(! o->dont_suffix){ @@ -329,8 +410,8 @@ static void S3_init(aiori_mod_opt_t * options){ memset(& o->bucket_context, 0, sizeof(o->bucket_context)); o->bucket_context.hostName = o->host; o->bucket_context.bucketName = o->bucket_prefix_cur; - if (! o->bucket_per_dir){ - S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->bucket_context.bucketName, S3CannedAclPrivate, o->locationConstraint, NULL, & responseHandler, NULL); + if (! o->bucket_per_file){ + S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, o->bucket_context.bucketName, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); CHECK_ERROR } diff --git a/testing/s3.sh b/testing/s3.sh index d1bf50b..2e79e29 100755 --- a/testing/s3.sh +++ b/testing/s3.sh @@ -5,20 +5,23 @@ ROOT="$(dirname ${BASH_SOURCE[0]})" TYPE="basic" -cd $ROOT - -if [[ ! -e minio ]] ; then +if [[ ! -e $ROOT/minio ]] ; then wget https://dl.min.io/server/minio/release/linux-amd64/minio - chmod +x minio + mv minio $ROOT + chmod +x $ROOT/minio fi export MINIO_ACCESS_KEY=accesskey export MINIO_SECRET_KEY=secretkey -./minio --quiet server /dev/shm & +$ROOT/minio --quiet server /dev/shm & +export IOR_EXTRA="-o test" +export MDTEST_EXTRA="-d test" source $ROOT/test-lib.sh -IOR 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -MDTEST 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey + +I=100 # Start with this ID +IOR 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -b $((10*1024*1024)) -t $((10*1024*1024)) +MDTEST 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey kill -9 %1 diff --git a/testing/test-lib.sh b/testing/test-lib.sh index 444873d..e35b245 100644 --- a/testing/test-lib.sh +++ b/testing/test-lib.sh @@ -40,7 +40,7 @@ I=0 function IOR(){ RANKS=$1 shift - WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/ior ${@} ${IOR_EXTRA} -o ${IOR_TMP}/ior" + WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/ior ${@} -o ${IOR_TMP}/ior ${IOR_EXTRA}" $WHAT 1>"${IOR_OUT}/test_out.$I" 2>&1 if [[ $? != 0 ]]; then echo -n "ERR" @@ -56,7 +56,7 @@ function MDTEST(){ RANKS=$1 shift rm -rf ${IOR_TMP}/mdest - WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/mdtest ${@} ${MDTEST_EXTRA} -d ${IOR_TMP}/mdest -V=4" + WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/mdtest ${@} -d ${IOR_TMP}/mdest ${MDTEST_EXTRA} -V=4" $WHAT 1>"${IOR_OUT}/test_out.$I" 2>&1 if [[ $? != 0 ]]; then echo -n "ERR" From 87c9906d70bc39dddea5753deca39d6cc528b037 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 30 Jun 2020 19:41:49 +0100 Subject: [PATCH 017/154] Error checking for NULL create/opens --- src/ior.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ior.c b/src/ior.c index 6263a04..ed2a565 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1379,6 +1379,7 @@ static void TestIoSys(IOR_test_t *test) params->open = WRITE; timer[0] = GetTimeStamp(); fd = backend->create(testFileName, IOR_WRONLY | IOR_CREAT | IOR_TRUNC, params->backend_options); + if(fd == NULL) FAIL("Cannot create file"); timer[1] = GetTimeStamp(); if (params->intraTestBarriers) MPI_CHECK(MPI_Barrier(testComm), @@ -1449,6 +1450,7 @@ static void TestIoSys(IOR_test_t *test) GetTestFileName(testFileName, params); params->open = WRITECHECK; fd = backend->open(testFileName, IOR_RDONLY, params->backend_options); + if(fd == NULL) FAIL("Cannot open file"); dataMoved = WriteOrRead(params, &results[rep], fd, WRITECHECK, &ioBuffers); backend->close(fd, params->backend_options); rankOffset = 0; @@ -1521,6 +1523,7 @@ static void TestIoSys(IOR_test_t *test) params->open = READ; timer[0] = GetTimeStamp(); fd = backend->open(testFileName, IOR_RDONLY, params->backend_options); + if(fd == NULL) FAIL("Cannot open file"); timer[1] = GetTimeStamp(); if (params->intraTestBarriers) MPI_CHECK(MPI_Barrier(testComm), From fcae6ff0b9f5cdc4de8ca9bc84c9c2ba2c102087 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 30 Jun 2020 20:02:36 +0100 Subject: [PATCH 018/154] S3: Fine tuning, supporting bucket per file/dir and single bucket. --- src/aiori-S3-libs3.c | 145 +++++++++++++++++++++++++++++-------------- testing/s3.sh | 4 ++ 2 files changed, 102 insertions(+), 47 deletions(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index 2267c53..bb7dc20 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -58,9 +58,9 @@ static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_m o->bucket_prefix = "ior"; option_help h [] = { - {0, "S3.bucket-per-file", "Use one bucket to map one file, otherwise only one bucket is used to store all files.", OPTION_FLAG, 'd', & o->bucket_per_file}, - {0, "S3.bucket-name-prefix", "The name of the bucket (when using without -b), otherwise it is used as prefix.", OPTION_OPTIONAL_ARGUMENT, 's', & o->bucket_prefix}, - {0, "S3.dont-suffix-bucket", "If not selected, then a hash will be added to the bucket name to increase uniqueness.", OPTION_FLAG, 'd', & o->dont_suffix }, + {0, "S3.bucket-per-file", "Use one bucket to map one file/directory, otherwise one bucket is used to store all dirs/files.", OPTION_FLAG, 'd', & o->bucket_per_file}, + {0, "S3.bucket-name-prefix", "The prefix of the bucket(s).", OPTION_OPTIONAL_ARGUMENT, 's', & o->bucket_prefix}, + {0, "S3.dont-suffix-bucket", "By default a hash will be added to the bucket name to increase uniqueness, this disables the option.", OPTION_FLAG, 'd', & o->dont_suffix }, {0, "S3.s3-compatible", "to be selected when using S3 compatible storage", OPTION_FLAG, 'd', & o->s3_compatible }, {0, "S3.use-ssl", "used to specify that SSL is needed for the connection", OPTION_FLAG, 'd', & o->use_ssl }, {0, "S3.host", "The host optionally followed by:port.", OPTION_OPTIONAL_ARGUMENT, 's', & o->host}, @@ -74,12 +74,18 @@ static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_m } static void def_file_name(s3_options_t * o, char * out_name, char const * path){ + if(o->bucket_per_file){ + out_name += sprintf(out_name, "%s-", o->bucket_prefix_cur); + } // duplicate path except "/" while(*path != 0){ char c = *path; - if(((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') )){ + if(((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') )){ *out_name = *path; out_name++; + }else if(c >= 'A' && c <= 'Z'){ + *out_name = *path + ('a' - 'A'); + out_name++; } path++; } @@ -88,16 +94,16 @@ static void def_file_name(s3_options_t * o, char * out_name, char const * path){ static void def_bucket_name(s3_options_t * o, char * out_name, char const * path){ // S3_MAX_BUCKET_NAME_SIZE - if (o->bucket_per_file){ - out_name += sprintf(out_name, "%s-", o->bucket_prefix_cur); - } - + out_name += sprintf(out_name, "%s-", o->bucket_prefix_cur); // duplicate path except "/" while(*path != 0){ char c = *path; - if(((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') )){ + if(((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') )){ *out_name = *path; out_name++; + }else if(c >= 'A' && c <= 'Z'){ + *out_name = *path + ('a' - 'A'); + out_name++; } path++; } @@ -129,8 +135,9 @@ static void responseCompleteCallback(S3Status status, const S3ErrorDetails *erro return; } -#define CHECK_ERROR if (s3status != S3StatusOK){ \ - EWARNF("S3 %s:%d \"%s\": %s - %s", __FUNCTION__, __LINE__, S3_get_status_name(s3status), s3error.message, s3error.furtherDetails); \ +#define CHECK_ERROR(p) \ +if (s3status != S3StatusOK){ \ + EWARNF("S3 %s:%d (path:%s) \"%s\": %s %s", __FUNCTION__, __LINE__, p, S3_get_status_name(s3status), s3error.message, s3error.furtherDetails ? s3error.furtherDetails : ""); \ } @@ -173,7 +180,7 @@ static int S3_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_op S3_list_service(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, o->authRegion, NULL, o->timeout, & listhandler, & buckets); stat->f_files = buckets; - CHECK_ERROR + CHECK_ERROR(o->authRegion); return 0; } @@ -208,15 +215,22 @@ static aiori_fd_t *S3_Create(char *path, int iorflags, aiori_mod_opt_t * options s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); - S3_fd_t * fd = malloc(sizeof(S3_fd_t)); - fd->object = strdup(p); if(iorflags & IOR_CREAT){ - struct data_handling dh = { .buf = NULL, .size = 0 }; - S3_put_object(& o->bucket_context, p, 0, NULL, NULL, o->timeout, &putObjectHandler, & dh); - CHECK_ERROR + if(o->bucket_per_file){ + S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); + }else{ + struct data_handling dh = { .buf = NULL, .size = 0 }; + S3_put_object(& o->bucket_context, p, 0, NULL, NULL, o->timeout, &putObjectHandler, & dh); + } + if (s3status != S3StatusOK){ + CHECK_ERROR(p); + return NULL; + } } + S3_fd_t * fd = malloc(sizeof(S3_fd_t)); + fd->object = strdup(p); return (aiori_fd_t*) fd; } @@ -250,15 +264,21 @@ static aiori_fd_t *S3_Open(char *path, int flags, aiori_mod_opt_t * options) char p[FILENAME_MAX]; def_file_name(o, p, path); - struct stat buf; - S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, & buf); + if (o->bucket_per_file){ + S3_test_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, + NULL, o->host, p, o->authRegion, 0, NULL, + NULL, o->timeout, & responseHandler, NULL); + }else{ + struct stat buf; + S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, & buf); + } if (s3status != S3StatusOK){ + CHECK_ERROR(p); return NULL; } S3_fd_t * fd = malloc(sizeof(S3_fd_t)); fd->object = strdup(p); - return (aiori_fd_t*) fd; } @@ -280,14 +300,29 @@ static IOR_offset_t S3_Xfer(int access, aiori_fd_t * afd, IOR_size_t * buffer, I s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; - sprintf(p, "%s-%ld-%ld", fd->object, (long) offset, (long) length); + + if(o->bucket_per_file){ + o->bucket_context.bucketName = fd->object; + if(offset != 0){ + sprintf(p, "%ld-%ld", (long) offset, (long) length); + }else{ + sprintf(p, "0"); + } + }else{ + if(offset != 0){ + sprintf(p, "%s-%ld-%ld", fd->object, (long) offset, (long) length); + }else{ + sprintf(p, "%s", fd->object); + } + } + if(access == WRITE){ S3_put_object(& o->bucket_context, p, length, NULL, NULL, o->timeout, &putObjectHandler, & dh); }else{ S3_get_object(& o->bucket_context, p, NULL, 0, length, NULL, o->timeout, &getObjectHandler, & dh); } if (! o->s3_compatible){ - CHECK_ERROR + CHECK_ERROR(p); } return length; } @@ -300,13 +335,30 @@ static void S3_Close(aiori_fd_t * afd, aiori_mod_opt_t * options) free(afd); } +S3Status list_delete_cb(int isTruncated, const char *nextMarker, int contentsCount, const S3ListBucketContent *contents, int commonPrefixesCount, const char **commonPrefixes, void *callbackData){ + s3_options_t * o = (s3_options_t*) callbackData; + S3_delete_object(& o->bucket_context, contents->key, NULL, o->timeout, & responseHandler, NULL); + + return S3StatusOK; +} + +static S3ListBucketHandler list_delete_handler = {{&responsePropertiesCallback, &responseCompleteCallback }, list_delete_cb}; + static void S3_Delete(char *path, aiori_mod_opt_t * options) { s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); - S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); - CHECK_ERROR + + if(o->bucket_per_file){ + o->bucket_context.bucketName = p; + S3_list_bucket(& o->bucket_context, NULL, NULL, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, o); + + S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); + }else{ + S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); + } + CHECK_ERROR(p); } static int S3_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ @@ -316,13 +368,13 @@ static int S3_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ def_bucket_name(o, p, path); if (o->bucket_per_file){ S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); - CHECK_ERROR + CHECK_ERROR(p); return 0; }else{ struct data_handling dh = { .buf = NULL, .size = 0 }; S3_put_object(& o->bucket_context, p, 0, NULL, NULL, o->timeout, & putObjectHandler, & dh); if (! o->s3_compatible){ - CHECK_ERROR + CHECK_ERROR(p); } return 0; } @@ -335,40 +387,39 @@ static int S3_rmdir (const char *path, aiori_mod_opt_t * options){ def_bucket_name(o, p, path); if (o->bucket_per_file){ S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); - CHECK_ERROR + CHECK_ERROR(p); return 0; }else{ S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); - CHECK_ERROR + CHECK_ERROR(p); return 0; } } -static int S3_access (const char *path, int mode, aiori_mod_opt_t * options){ - s3_options_t * o = (s3_options_t*) options; - char p[FILENAME_MAX]; - def_file_name(o, p, path); - - S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, NULL); - if (s3status != S3StatusOK){ - return -1; - } - return 0; -} - static int S3_stat(const char *path, struct stat *buf, aiori_mod_opt_t * options){ s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); - memset(buf, 0, sizeof(struct stat)); - S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, buf); + // TODO count the individual file fragment sizes together + if (o->bucket_per_file){ + S3_test_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, + NULL, o->host, p, o->authRegion, 0, NULL, + NULL, o->timeout, & responseHandler, NULL); + }else{ + S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, buf); + } if (s3status != S3StatusOK){ return -1; } return 0; } +static int S3_access (const char *path, int mode, aiori_mod_opt_t * options){ + struct stat buf; + return S3_stat(path, & buf, options); +} + static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName) { struct stat buf; @@ -410,11 +461,6 @@ static void S3_init(aiori_mod_opt_t * options){ memset(& o->bucket_context, 0, sizeof(o->bucket_context)); o->bucket_context.hostName = o->host; o->bucket_context.bucketName = o->bucket_prefix_cur; - if (! o->bucket_per_file){ - S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, o->bucket_context.bucketName, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); - CHECK_ERROR - } - if (o->use_ssl){ o->s3_protocol = S3ProtocolHTTPS; }else{ @@ -425,6 +471,11 @@ static void S3_init(aiori_mod_opt_t * options){ o->bucket_context.accessKeyId = o->access_key; o->bucket_context.secretAccessKey = o->secret_key; + if (! o->bucket_per_file){ + S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, o->bucket_context.bucketName, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); + CHECK_ERROR(o->bucket_context.bucketName); + } + if ( ret != S3StatusOK ){ FAIL("S3 error %s", S3_get_status_name(ret)); } diff --git a/testing/s3.sh b/testing/s3.sh index 2e79e29..2fcb12a 100755 --- a/testing/s3.sh +++ b/testing/s3.sh @@ -24,4 +24,8 @@ I=100 # Start with this ID IOR 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -b $((10*1024*1024)) -t $((10*1024*1024)) MDTEST 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey +IOR 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -b $((10*1024)) -t $((10*1024)) --S3.bucket-per-file +MDTEST 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey --S3.bucket-per-file + + kill -9 %1 From 484cb420d28f3de262c7f6d2cd4eed8bf3ebd252 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 1 Jul 2020 09:19:10 +0100 Subject: [PATCH 019/154] Generate config file with build options --- src/Makefile.am | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Makefile.am b/src/Makefile.am index c718169..7f1be40 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -133,3 +133,9 @@ MDTEST_CPPFLAGS = $(mdtest_CPPFLAGS) libaiori_a_SOURCES += $(extraSOURCES) libaiori_a_CPPFLAGS = $(extraCPPFLAGS) + +# Generate config file with build flags to allow reuse of library +all-local: build.conf +build.conf: + @echo LDFLAGS=$(LDFLAGS) $(extraLDFLAGS) $(extraLDADD) > build.conf + @echo CFLAGS=$(CFLAGS) $(extraCPPFLAGS) >> build.conf From 371335195baecbf49d6ccc50f286a97a85f2b674 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 1 Jul 2020 09:42:13 +0100 Subject: [PATCH 020/154] Initialize logfile for warnings if not set. --- src/aiori.c | 1 + src/utilities.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/aiori.c b/src/aiori.c index 7b0c160..05e4935 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -103,6 +103,7 @@ void * airoi_update_module_options(const ior_aiori_t * backend, options_all_t * } options_all_t * airoi_create_all_module_options(option_help * global_options){ + if(! out_logfile) out_logfile = stdout; int airoi_c = aiori_count(); options_all_t * opt = malloc(sizeof(options_all_t)); opt->module_count = airoi_c + 1; diff --git a/src/utilities.c b/src/utilities.c index 19ef0d6..5b65e55 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -60,8 +60,8 @@ int rankOffset = 0; int verbose = VERBOSE_0; /* verbose output */ MPI_Comm testComm; MPI_Comm mpi_comm_world; -FILE * out_logfile; -FILE * out_resultfile; +FILE * out_logfile = NULL; +FILE * out_resultfile = NULL; enum OutputFormat_t outputFormat; /***************************** F U N C T I O N S ******************************/ From fb8f43fcfec43edea0160ceab2cb0867bd76b739 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 1 Jul 2020 10:05:09 +0100 Subject: [PATCH 021/154] Reduce verbosity for verification --- src/mdtest.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mdtest.c b/src/mdtest.c index ce07981..d2a3e23 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -354,8 +354,9 @@ void mdtest_verify_data(int item, char * buffer, size_t bytes){ for( ; i < bytes; i++){ if(buffer[i] != (char) (i + 1)){ - VERBOSE(0, -1, "Error verifying byte %zu for item %d", i, item); + VERBOSE(5, -1, "Error verifying byte %zu for item %d", i, item); verification_error++; + break; } } } From fb9fa7cc9fe1dbdceaecf7a392e8decdec7c1fe8 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 2 Jul 2020 15:57:46 +0100 Subject: [PATCH 022/154] S3: Better cleanup. --- src/aiori-S3-libs3.c | 42 ++++++++++++++++++++++++++++++++++++------ testing/s3.sh | 4 ++-- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index bb7dc20..8bf36e2 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -335,10 +335,22 @@ static void S3_Close(aiori_fd_t * afd, aiori_mod_opt_t * options) free(afd); } -S3Status list_delete_cb(int isTruncated, const char *nextMarker, int contentsCount, const S3ListBucketContent *contents, int commonPrefixesCount, const char **commonPrefixes, void *callbackData){ - s3_options_t * o = (s3_options_t*) callbackData; - S3_delete_object(& o->bucket_context, contents->key, NULL, o->timeout, & responseHandler, NULL); +typedef struct { + int status; // do not reorder! + s3_options_t * o; + int truncated; + char const *nextMarker; +} s3_delete_req; +S3Status list_delete_cb(int isTruncated, const char *nextMarker, int contentsCount, const S3ListBucketContent *contents, int commonPrefixesCount, const char **commonPrefixes, void *callbackData){ + s3_delete_req * req = (s3_delete_req*) callbackData; + for(int i=0; i < contentsCount; i++){ + S3_delete_object(& req->o->bucket_context, contents[i].key, NULL, req->o->timeout, & responseHandler, NULL); + } + req->truncated = isTruncated; + if(isTruncated){ + req->nextMarker = nextMarker; + } return S3StatusOK; } @@ -352,10 +364,16 @@ static void S3_Delete(char *path, aiori_mod_opt_t * options) if(o->bucket_per_file){ o->bucket_context.bucketName = p; - S3_list_bucket(& o->bucket_context, NULL, NULL, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, o); - + s3_delete_req req = {0, o, 1, NULL}; + while(req.truncated){ + S3_list_bucket(& o->bucket_context, NULL, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); + } S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); }else{ + s3_delete_req req = {0, o, 1, NULL}; + while(req.truncated){ + S3_list_bucket(& o->bucket_context, p, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); + } S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); } CHECK_ERROR(p); @@ -386,6 +404,12 @@ static int S3_rmdir (const char *path, aiori_mod_opt_t * options){ def_bucket_name(o, p, path); if (o->bucket_per_file){ + o->bucket_context.bucketName = p; + s3_delete_req req = {0, o, 1, NULL}; + while(req.truncated){ + S3_list_bucket(& o->bucket_context, req.nextMarker, NULL, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); + } + S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); CHECK_ERROR(p); return 0; @@ -471,7 +495,7 @@ static void S3_init(aiori_mod_opt_t * options){ o->bucket_context.accessKeyId = o->access_key; o->bucket_context.secretAccessKey = o->secret_key; - if (! o->bucket_per_file){ + if (! o->bucket_per_file && rank == 0){ S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, o->bucket_context.bucketName, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); CHECK_ERROR(o->bucket_context.bucketName); } @@ -482,6 +506,12 @@ static void S3_init(aiori_mod_opt_t * options){ } static void S3_final(aiori_mod_opt_t * options){ + s3_options_t * o = (s3_options_t*) options; + if (! o->bucket_per_file && rank == 0){ + S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, o->bucket_context.bucketName, o->authRegion, NULL, o->timeout, & responseHandler, NULL); + CHECK_ERROR(o->bucket_context.bucketName); + } + S3_deinitialize(); } diff --git a/testing/s3.sh b/testing/s3.sh index 2fcb12a..28b1b04 100755 --- a/testing/s3.sh +++ b/testing/s3.sh @@ -22,10 +22,10 @@ source $ROOT/test-lib.sh I=100 # Start with this ID IOR 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -b $((10*1024*1024)) -t $((10*1024*1024)) -MDTEST 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey +MDTEST 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -n 10 IOR 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -b $((10*1024)) -t $((10*1024)) --S3.bucket-per-file -MDTEST 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey --S3.bucket-per-file +MDTEST 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey --S3.bucket-per-file -n 10 kill -9 %1 From 4f7350dc6abf66ff8e5ac0ad27e3e45402c080ca Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 2 Jul 2020 16:26:05 +0100 Subject: [PATCH 023/154] MDTest remove testdir if created by MDTest. --- src/mdtest.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mdtest.c b/src/mdtest.c index d2a3e23..c8caa4f 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1932,6 +1932,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * int last = 0; int stride = 1; int iterations = 1; + int created_root_dir = 0; // was the root directory existing or newly created verbose = 0; int no_barriers = 0; @@ -2189,6 +2190,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * if (backend->mkdir(testdirpath, DIRMODE, backend_options) != 0) { FAIL("Unable to create test directory path %s", testdirpath); } + created_root_dir = 1; } /* display disk usage */ @@ -2295,6 +2297,10 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } } + if (created_root_dir && backend->rmdir(testdirpath, backend_options) != 0) { + FAIL("Unable to remote test directory path %s", testdirpath); + } + if(verification_error){ VERBOSE(0, -1, "\nERROR: verifying the data read! Take the performance values with care!\n"); } From a12ed015a6012704b00403cf4e1721401bd2437e Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 2 Jul 2020 16:40:20 +0100 Subject: [PATCH 024/154] S3: better testing, stat is semi-fake, other functions aren't. --- src/aiori-S3-libs3.c | 34 ++++++++++++++++------------------ testing/s3.sh | 6 ++++-- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index 8bf36e2..ef11c43 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -94,7 +94,9 @@ static void def_file_name(s3_options_t * o, char * out_name, char const * path){ static void def_bucket_name(s3_options_t * o, char * out_name, char const * path){ // S3_MAX_BUCKET_NAME_SIZE - out_name += sprintf(out_name, "%s-", o->bucket_prefix_cur); + if(o->bucket_per_file){ + out_name += sprintf(out_name, "%s-", o->bucket_prefix_cur); + } // duplicate path except "/" while(*path != 0){ char c = *path; @@ -215,6 +217,7 @@ static aiori_fd_t *S3_Create(char *path, int iorflags, aiori_mod_opt_t * options s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); + if(iorflags & IOR_CREAT){ if(o->bucket_per_file){ @@ -314,8 +317,7 @@ static IOR_offset_t S3_Xfer(int access, aiori_fd_t * afd, IOR_size_t * buffer, I }else{ sprintf(p, "%s", fd->object); } - } - + } if(access == WRITE){ S3_put_object(& o->bucket_context, p, length, NULL, NULL, o->timeout, &putObjectHandler, & dh); }else{ @@ -361,19 +363,20 @@ static void S3_Delete(char *path, aiori_mod_opt_t * options) s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); + if(o->bucket_per_file){ o->bucket_context.bucketName = p; - s3_delete_req req = {0, o, 1, NULL}; - while(req.truncated){ + s3_delete_req req = {0, o, 0, NULL}; + do{ S3_list_bucket(& o->bucket_context, NULL, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); - } + }while(req.truncated); S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); }else{ - s3_delete_req req = {0, o, 1, NULL}; - while(req.truncated){ + s3_delete_req req = {0, o, 0, NULL}; + do{ S3_list_bucket(& o->bucket_context, p, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); - } + }while(req.truncated); S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); } CHECK_ERROR(p); @@ -382,8 +385,9 @@ static void S3_Delete(char *path, aiori_mod_opt_t * options) static int S3_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; - def_bucket_name(o, p, path); + + if (o->bucket_per_file){ S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); CHECK_ERROR(p); @@ -402,14 +406,8 @@ static int S3_rmdir (const char *path, aiori_mod_opt_t * options){ s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; - def_bucket_name(o, p, path); + def_bucket_name(o, p, path); if (o->bucket_per_file){ - o->bucket_context.bucketName = p; - s3_delete_req req = {0, o, 1, NULL}; - while(req.truncated){ - S3_list_bucket(& o->bucket_context, req.nextMarker, NULL, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); - } - S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); CHECK_ERROR(p); return 0; @@ -425,7 +423,7 @@ static int S3_stat(const char *path, struct stat *buf, aiori_mod_opt_t * options char p[FILENAME_MAX]; def_file_name(o, p, path); memset(buf, 0, sizeof(struct stat)); - // TODO count the individual file fragment sizes together + // TODO count the individual file fragment sizes together if (o->bucket_per_file){ S3_test_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, 0, NULL, diff --git a/testing/s3.sh b/testing/s3.sh index 28b1b04..b38d339 100755 --- a/testing/s3.sh +++ b/testing/s3.sh @@ -22,10 +22,12 @@ source $ROOT/test-lib.sh I=100 # Start with this ID IOR 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -b $((10*1024*1024)) -t $((10*1024*1024)) -MDTEST 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -n 10 +MDTEST 2 -a S3-libs3 -L --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -n 10 +MDTEST 2 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -n 5 -w 1024 -e 1024 IOR 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey -b $((10*1024)) -t $((10*1024)) --S3.bucket-per-file -MDTEST 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey --S3.bucket-per-file -n 10 +MDTEST 1 -a S3-libs3 -L --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey --S3.bucket-per-file -n 5 +MDTEST 1 -a S3-libs3 --S3.host=localhost:9000 --S3.secret-key=secretkey --S3.access-key=accesskey --S3.bucket-per-file -n 10 -w 1024 -e 1024 kill -9 %1 From f280123d0b08c7f54987a33f3209c20d8e288b1f Mon Sep 17 00:00:00 2001 From: Karsten Weiss Date: Fri, 3 Jul 2020 09:09:40 +0200 Subject: [PATCH 025/154] Spelling fixes (found by codespell) --- .travis.yml | 4 ++-- NEWS | 4 ++-- README.md | 6 +++--- README_DAOS | 4 ++-- doc/USER_GUIDE | 8 ++++---- doc/mdtest.1 | 2 +- doc/sphinx/userDoc/faq.rst | 2 +- doc/sphinx/userDoc/options.rst | 4 ++-- doc/sphinx/userDoc/tutorial.rst | 2 +- src/aiori-DFS.c | 4 ++-- src/aiori-IME.c | 2 +- src/aiori-MMAP.c | 2 +- src/aiori-POSIX.c | 10 +++++----- src/aiori-S3.c | 14 +++++++------- src/ior.c | 10 +++++----- src/ior.h | 2 +- src/mdtest.c | 2 +- src/parse_options.c | 6 +++--- testing/docker/ceph/NOTES | 2 +- testing/docker/run-all-tests.sh | 2 +- 20 files changed, 46 insertions(+), 46 deletions(-) diff --git a/.travis.yml b/.travis.yml index 7d8202d..aea9647 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,12 +20,12 @@ install: # TODO: Not in repos for 14.04 trustz but comes 16.04 xenial #- sudo apt-get install -y libpnetcdf-dev pnetcdf-bin # Install HDFS - # TODO: Not sure with which c libray hdfs should be used and if it is in + # TODO: Not sure with which c library hdfs should be used and if it is in # the ubuntu repos # Probably hadoop needs to be installed an provides native API. # Install Amazon S3 # TODO: The needed library needs to be installed. Follow the instructions in - # aiori-S3.c to achive this. + # aiori-S3.c to achieve this. # GPFS # NOTE: Think GPFS need a license and is therefore not testable with travis. script: diff --git a/NEWS b/NEWS index 4349765..195e645 100644 --- a/NEWS +++ b/NEWS @@ -133,7 +133,7 @@ Version 2.10.3 Contributed by demyn@users.sourceforge.net - Ported to Windows. Required changes related to 'long' types, which on Windows are always 32-bits, even on 64-bit systems. Missing system headers and - functions acount for most of the remaining changes. + functions account for most of the remaining changes. New files for Windows: - IOR/ior.vcproj - Visual C project file - IOR/src/C/win/getopt.{h,c} - GNU getopt() support @@ -193,7 +193,7 @@ Version 2.9.5 - Added notification for "Using reorderTasks '-C' (expecting block, not cyclic, task assignment)" - Corrected bug with read performance with stonewalling (was using full size, - stat'ed file instead of bytes transfered). + stat'ed file instead of bytes transferred). Version 2.9.4 -------------------------------------------------------------------------------- diff --git a/README.md b/README.md index c1c73a8..8f1c0c8 100755 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # HPC IO Benchmark Repository [![Build Status](https://travis-ci.org/hpc/ior.svg?branch=master)](https://travis-ci.org/hpc/ior) This repository contains the IOR and mdtest parallel I/O benchmarks. The -[official IOR/mdtest documention][] can be found in the `docs/` subdirectory or -on Read the Docs. +[official IOR/mdtest documentation][] can be found in the `docs/` subdirectory +or on Read the Docs. ## Building @@ -28,4 +28,4 @@ on Read the Docs. distributions at once. [official IOR release]: https://github.com/hpc/ior/releases -[official IOR/mdtest documention]: http://ior.readthedocs.org/ +[official IOR/mdtest documentation]: http://ior.readthedocs.org/ diff --git a/README_DAOS b/README_DAOS index ed98bd6..f54b426 100644 --- a/README_DAOS +++ b/README_DAOS @@ -40,7 +40,7 @@ Required Options: Optional Options: --daos.group : group name of servers with the pool --daos.chunk_size : Chunk size of the array object controlling striping over DKEYs ---daos.destroy flag to destory the container on finalize +--daos.destroy flag to destroy the container on finalize --daos.oclass : specific object class for array object Examples that should work include: @@ -66,7 +66,7 @@ Required Options: Optional Options: --dfs.group : group name of servers with the pool --dfs.chunk_size : Chunk size of the files ---dfs.destroy flag to destory the container on finalize +--dfs.destroy flag to destroy the container on finalize --dfs.oclass : specific object class for files In the IOR options, the file name should be specified on the root dir directly diff --git a/doc/USER_GUIDE b/doc/USER_GUIDE index 3d6b4e4..2962753 100755 --- a/doc/USER_GUIDE +++ b/doc/USER_GUIDE @@ -47,7 +47,7 @@ Two ways to run IOR: E.g., to execute: IOR -W -f script This defaults all tests in 'script' to use write data checking. - * The Command line supports to specify additional parameters for the choosen API. + * The Command line supports to specify additional parameters for the chosen API. For example, username and password for the storage. Available options are listed in the help text after selecting the API when running with -h. For example, 'IOR -a DUMMY -h' shows the supported options for the DUMMY backend. @@ -361,7 +361,7 @@ GPFS-SPECIFIC: * gpfsReleaseToken - immediately after opening or creating file, release all locks. Might help mitigate lock-revocation - traffic when many proceses write/read to same file. + traffic when many processes write/read to same file. BeeGFS-SPECIFIC (POSIX only): ================ @@ -499,7 +499,7 @@ zip, gzip, and bzip. 3) bzip2: For bziped files a transfer size of 1k is insufficient (~50% compressed). To avoid compression a transfer size of greater than the bzip block size is required - (default = 900KB). I suggest a transfer size of greather than 1MB to avoid bzip2 compression. + (default = 900KB). I suggest a transfer size of greater than 1MB to avoid bzip2 compression. Be aware of the block size your compression algorithm will look at, and adjust the transfer size accordingly. @@ -660,7 +660,7 @@ HOW DO I USE HINTS? 'setenv IOR_HINT__MPI__ ' -HOW DO I EXPLICITY SET THE FILE DATA SIGNATURE? +HOW DO I EXPLICITLY SET THE FILE DATA SIGNATURE? The data signature for a transfer contains the MPI task number, transfer- buffer offset, and also timestamp for the start of iteration. As IOR works diff --git a/doc/mdtest.1 b/doc/mdtest.1 index 3cfc082..81468d9 100644 --- a/doc/mdtest.1 +++ b/doc/mdtest.1 @@ -28,7 +28,7 @@ Use ``collective creates'', meaning task 0 does all the creates. Only perform the create phase of the tests. .TP .I "-d" testdir[@testdir2] -The directory in which the tests will run. For multiple pathes, must use fully-qualified pathnames. +The directory in which the tests will run. For multiple paths, must use fully-qualified pathnames. [default: working directory of mdtest]. .TP .I "-D" diff --git a/doc/sphinx/userDoc/faq.rst b/doc/sphinx/userDoc/faq.rst index 0e9a8a9..df07cbb 100644 --- a/doc/sphinx/userDoc/faq.rst +++ b/doc/sphinx/userDoc/faq.rst @@ -146,7 +146,7 @@ HOW DO I USE HINTS? 'setenv IOR_HINT__MPI__ ' -HOW DO I EXPLICITY SET THE FILE DATA SIGNATURE? +HOW DO I EXPLICITLY SET THE FILE DATA SIGNATURE? The data signature for a transfer contains the MPI task number, transfer- buffer offset, and also timestamp for the start of iteration. As IOR works diff --git a/doc/sphinx/userDoc/options.rst b/doc/sphinx/userDoc/options.rst index 31240f0..6751749 100644 --- a/doc/sphinx/userDoc/options.rst +++ b/doc/sphinx/userDoc/options.rst @@ -302,7 +302,7 @@ GPFS-SPECIFIC * ``gpfsReleaseToken`` - release all locks immediately after opening or creating file. Might help mitigate lock-revocation traffic when many - proceses write/read to same file. (default: 0) + processes write/read to same file. (default: 0) Verbosity levels ---------------- @@ -338,7 +338,7 @@ bzip. 3) bzip2: For bziped files a transfer size of 1k is insufficient (~50% compressed). To avoid compression a transfer size of greater than the bzip block size is required - (default = 900KB). I suggest a transfer size of greather than 1MB to avoid bzip2 compression. + (default = 900KB). I suggest a transfer size of greater than 1MB to avoid bzip2 compression. Be aware of the block size your compression algorithm will look at, and adjust the transfer size accordingly. diff --git a/doc/sphinx/userDoc/tutorial.rst b/doc/sphinx/userDoc/tutorial.rst index 5fa6814..449d980 100644 --- a/doc/sphinx/userDoc/tutorial.rst +++ b/doc/sphinx/userDoc/tutorial.rst @@ -4,7 +4,7 @@ First Steps with IOR ==================== This is a short tutorial for the basic usage of IOR and some tips on how to use -IOR to handel caching effects as these are very likely to affect your +IOR to handle caching effects as these are very likely to affect your measurements. Running IOR diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 86f014c..ea87c67 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -514,7 +514,7 @@ DFS_Finalize(aiori_mod_opt_t *options) uuid_t uuid; double t1, t2; - INFO(VERBOSE_1, "Destorying DFS Container: %s\n", o->cont); + INFO(VERBOSE_1, "Destroying DFS Container: %s\n", o->cont); uuid_parse(o->cont, uuid); t1 = MPI_Wtime(); rc = daos_cont_destroy(poh, uuid, 1, NULL); @@ -561,7 +561,7 @@ DFS_Finalize(aiori_mod_opt_t *options) } /* - * Creat and open a file through the DFS interface. + * Create and open a file through the DFS interface. */ static aiori_fd_t * DFS_Create(char *testFileName, int flags, aiori_mod_opt_t *param) diff --git a/src/aiori-IME.c b/src/aiori-IME.c index 500f380..2ecabae 100755 --- a/src/aiori-IME.c +++ b/src/aiori-IME.c @@ -149,7 +149,7 @@ static int IME_Access(const char *path, int mode, IOR_param_t *param) } /* - * Creat and open a file through the IME interface. + * Create and open a file through the IME interface. */ static void *IME_Create(char *testFileName, IOR_param_t *param) { diff --git a/src/aiori-MMAP.c b/src/aiori-MMAP.c index 7ed3b90..2c0db42 100644 --- a/src/aiori-MMAP.c +++ b/src/aiori-MMAP.c @@ -128,7 +128,7 @@ static void ior_mmap_file(int *file, int mflags, void *param) } /* - * Creat and open a file through the POSIX interface, then setup mmap. + * Create and open a file through the POSIX interface, then setup mmap. */ static aiori_fd_t *MMAP_Create(char *testFileName, int flags, aiori_mod_opt_t * param) { diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 648b7c1..59d88dc 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -368,7 +368,7 @@ bool beegfs_createFilePath(char* filepath, mode_t mode, int numTargets, int chun /* - * Creat and open a file through the POSIX interface. + * Create and open a file through the POSIX interface. */ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) { @@ -394,9 +394,9 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) #define FASYNC 00020000 /* fcntl, for BSD compatibility */ #endif if (o->lustre_set_striping) { - /* In the single-shared-file case, task 0 has to creat the - file with the Lustre striping options before any other processes - open the file */ + /* In the single-shared-file case, task 0 has to create the + file with the Lustre striping options before any other + processes open the file */ if (!hints->filePerProc && rank != 0) { MPI_CHECK(MPI_Barrier(testComm), "barrier error"); fd_oflag |= O_RDWR; @@ -485,7 +485,7 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) } /* - * Creat a file through mknod interface. + * Create a file through mknod interface. */ int POSIX_Mknod(char *testFileName) { diff --git a/src/aiori-S3.c b/src/aiori-S3.c index 3999739..2c97e51 100755 --- a/src/aiori-S3.c +++ b/src/aiori-S3.c @@ -126,7 +126,7 @@ const char* bucket_name = "ior"; /* TODO: The following stuff goes into options! */ /* REST/S3 variables */ // CURL* curl; /* for libcurl "easy" fns (now managed by aws4c) */ -# define IOR_CURL_INIT 0x01 /* curl top-level inits were perfomed once? */ +# define IOR_CURL_INIT 0x01 /* curl top-level inits were performed once? */ # define IOR_CURL_NOCONTINUE 0x02 # define IOR_CURL_S3_EMC_EXT 0x04 /* allow EMC extensions to S3? */ @@ -286,7 +286,7 @@ static int S3_check_params(IOR_param_t * test){ * NOTE: Our custom version of aws4c can be configured so that connections * are reused, instead of opened and closed on every operation. We * do configure it that way, but you still need to call these - * connect/disconnet functions, in order to insure that aws4c has + * connect/disconnect functions, in order to insure that aws4c has * been configured. * --------------------------------------------------------------------------- */ @@ -322,7 +322,7 @@ static void s3_connect( IOR_param_t* param ) { aws_read_config(getenv("USER")); // requires ~/.awsAuth aws_reuse_connections(1); - // initalize IOBufs. These are basically dynamically-extensible + // initialize IOBufs. These are basically dynamically-extensible // linked-lists. "growth size" controls the increment of new memory // allocated, whenever storage is used up. param->io_buf = aws_iobuf_new(); @@ -714,7 +714,7 @@ EMC_Open( char *testFileName, IOR_param_t * param ) { * impose two scaling problems: (1) requires all ETags to be shipped at * the BW available to a single process, (1) requires either that they * all fit into memory of a single process, or be written to disk - * (imposes additional BW contraints), or make a more-complex + * (imposes additional BW constraints), or make a more-complex * interaction with a threaded curl writefunction, to present the * appearance of a single thread to curl, whilst allowing streaming * reception of non-local ETags. @@ -777,7 +777,7 @@ S3_Xfer_internal(int access, // // In the N:1 case, the global order of part-numbers we're writing // depends on whether wer're writing strided or segmented, in - // other words, how and are acutally + // other words, how and are actually // positioning the parts being written. [See discussion at // S3_Close_internal().] // @@ -1014,7 +1014,7 @@ S3_Fsync( void *fd, IOR_param_t * param ) { * * ISSUE: The S3 spec says that a multi-part upload can have at most 10,000 * parts. Does EMC allow more than this? (NOTE the spec also says - * parts must be at leaast 5MB, but EMC definitely allows smaller + * parts must be at least 5MB, but EMC definitely allows smaller * parts than that.) * * ISSUE: All Etags must be sent from a single rank, in a single @@ -1126,7 +1126,7 @@ S3_Close_internal( void* fd, // add XML for *all* the parts. The XML must be ordered by // part-number. Each rank wrote parts, // locally. At rank0, the etags for each rank are now - // stored as a continguous block of text, with the blocks + // stored as a contiguous block of text, with the blocks // stored in rank order in etag_vec. In other words, our // internal rep at rank 0 matches the "segmented" format. // From this, we must select etags in an order matching how diff --git a/src/ior.c b/src/ior.c index e70bf1d..5937a8e 100755 --- a/src/ior.c +++ b/src/ior.c @@ -641,9 +641,9 @@ FillBuffer(void *buffer, unsigned long long hi, lo; unsigned long long *buf = (unsigned long long *)buffer; - if(test->dataPacketType == incompressible ) { /* Make for some non compressable buffers with randomish data */ + if(test->dataPacketType == incompressible ) { /* Make for some non compressible buffers with randomish data */ - /* In order for write checks to work, we have to restart the psuedo random sequence */ + /* In order for write checks to work, we have to restart the pseudo random sequence */ if(reseed_incompressible_prng == TRUE) { test->incompressibleSeed = test->setTimeStampSignature + rank; /* We copied seed into timestampSignature at initialization, also add the rank to add randomness between processes */ reseed_incompressible_prng = FALSE; @@ -1637,7 +1637,7 @@ static void ValidateTests(IOR_param_t * test) && (strcasecmp(test->api, "CEPHFS") != 0)) && test->fsync) WARN_RESET("fsync() not supported in selected backend", test, &defaults, fsync); - /* parameter consitency */ + /* parameter consistency */ if (test->reorderTasks == TRUE && test->reorderTasksRandom == TRUE) ERR("Both Constant and Random task re-ordering specified. Choose one and resubmit"); if (test->randomOffset && test->reorderTasksRandom @@ -1672,7 +1672,7 @@ static void ValidateTests(IOR_param_t * test) * Returns a precomputed array of IOR_offset_t for the inner benchmark loop. * They are sequential and the last element is set to -1 as end marker. * @param test IOR_param_t for getting transferSize, blocksize and SegmentCount - * @param pretendRank int pretended Rank for shifting the offsest corectly + * @param pretendRank int pretended Rank for shifting the offsets correctly * @return IOR_offset_t */ IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank) @@ -1720,7 +1720,7 @@ IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank) * diversion in accesse as it dose with filePerProc. This is expected but * should be mined. * @param test IOR_param_t for getting transferSize, blocksize and SegmentCount - * @param pretendRank int pretended Rank for shifting the offsest corectly + * @param pretendRank int pretended Rank for shifting the offsets correctly * @return IOR_offset_t * @return */ diff --git a/src/ior.h b/src/ior.h index a5c34b9..6767629 100755 --- a/src/ior.h +++ b/src/ior.h @@ -127,7 +127,7 @@ typedef struct int useExistingTestFile; /* do not delete test file before access */ int storeFileOffset; /* use file offset as stored signature */ int deadlineForStonewalling; /* max time in seconds to run any test phase */ - int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */ + int stoneWallingWearOut; /* wear out the stonewalling, once the timeout is over, each process has to write the same amount */ uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */ char * stoneWallingStatusFile; diff --git a/src/mdtest.c b/src/mdtest.c index ce07981..d0cf14c 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -492,7 +492,7 @@ void collective_helper(const int dirs, const int create, const char* path, uint6 progress->items_done = progress->items_per_dir; } -/* recusive function to create and remove files/directories from the +/* recursive function to create and remove files/directories from the directory tree */ void create_remove_items(int currDepth, const int dirs, const int create, const int collective, const char *path, uint64_t dirNum, rank_progress_t * progress) { unsigned i; diff --git a/src/parse_options.c b/src/parse_options.c index 31fac13..c2b5b8c 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -282,7 +282,7 @@ int contains_only(char *haystack, char *needle) /* check for "needle" */ if (strncasecmp(ptr, needle, strlen(needle)) != 0) return 0; - /* make sure the rest of the line is only whitspace as well */ + /* make sure the rest of the line is only whitespace as well */ for (ptr += strlen(needle); ptr < end; ptr++) { if (!isspace(*ptr)) return 0; @@ -395,7 +395,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'C', NULL, "reorderTasks -- changes task ordering for readback (useful to avoid client cache)", OPTION_FLAG, 'd', & params->reorderTasks}, {'d', NULL, "interTestDelay -- delay between reps in seconds", OPTION_OPTIONAL_ARGUMENT, 'd', & params->interTestDelay}, {'D', NULL, "deadlineForStonewalling -- seconds before stopping write or read phase", OPTION_OPTIONAL_ARGUMENT, 'd', & params->deadlineForStonewalling}, - {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, + {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timeout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT}, {'e', NULL, "fsync -- perform a fsync() operation at the end of each read/write phase", OPTION_FLAG, 'd', & params->fsync}, @@ -436,7 +436,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & params->warningAsErrors}, {.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT}, - {.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputing the summary", .arg = OPTION_OPTIONAL_ARGUMENT}, + {.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputting the summary", .arg = OPTION_OPTIONAL_ARGUMENT}, {0, "dryRun", "do not perform any I/Os just run evtl. inputs print dummy output", OPTION_FLAG, 'd', & params->dryRun}, LAST_OPTION, }; diff --git a/testing/docker/ceph/NOTES b/testing/docker/ceph/NOTES index 2023922..398e4c4 100644 --- a/testing/docker/ceph/NOTES +++ b/testing/docker/ceph/NOTES @@ -7,7 +7,7 @@ Following are basic notes on how to deploy the 'ceph/demo' docker container. The Run `docker pull ceph/demo` to download the image to your system. ################################ -# Deploy 'ceph/demo' conatiner # +# Deploy 'ceph/demo' container # ################################ To deploy the Ceph cluster, execute the following command: diff --git a/testing/docker/run-all-tests.sh b/testing/docker/run-all-tests.sh index 172576f..15d576d 100755 --- a/testing/docker/run-all-tests.sh +++ b/testing/docker/run-all-tests.sh @@ -46,7 +46,7 @@ for IMAGE in $(find -type d | cut -b 3- |grep -v "^$") ; do done if [[ $ERROR != 0 ]] ; then - echo "Errors occured!" + echo "Errors occurred!" else echo "OK: all tests passed!" fi From 49746b99d9a1da953aa94995445c27fe596428a0 Mon Sep 17 00:00:00 2001 From: Jean-Yves VET Date: Thu, 2 Jul 2020 17:48:52 +0200 Subject: [PATCH 026/154] aiori-IME: Update to new aiori interface This patch updates IME backend to support new aiori interface. It also fixes some indentation issues. --- src/aiori-IME.c | 298 ++++++++++++++++++++++++++---------------------- 1 file changed, 159 insertions(+), 139 deletions(-) diff --git a/src/aiori-IME.c b/src/aiori-IME.c index 500f380..cf7a1b3 100755 --- a/src/aiori-IME.c +++ b/src/aiori-IME.c @@ -21,8 +21,8 @@ #include #include #include -#include /* sys_errlist */ -#include /* IO operations */ +#include /* sys_errlist */ +#include /* IO operations */ #include "ior.h" #include "iordef.h" @@ -30,63 +30,70 @@ #include "utilities.h" #include "ime_native.h" -#ifndef O_BINARY /* Required on Windows */ +#define IME_UNUSED(x) (void)(x) /* Silence compiler warnings */ + +#ifndef O_BINARY /* Required on Windows */ # define O_BINARY 0 #endif /**************************** P R O T O T Y P E S *****************************/ -static void *IME_Create(char *, IOR_param_t *); -static void *IME_Open(char *, IOR_param_t *); -static void IME_Close(void *, IOR_param_t *); -static void IME_Delete(char *, IOR_param_t *); -static char *IME_GetVersion(); -static void IME_Fsync(void *, IOR_param_t *); -static int IME_Access(const char *, int, IOR_param_t *); -static IOR_offset_t IME_GetFileSize(IOR_param_t *, MPI_Comm, char *); -static IOR_offset_t IME_Xfer(int, void *, IOR_size_t *, - IOR_offset_t, IOR_param_t *); -static int IME_StatFS(const char *, ior_aiori_statfs_t *, - IOR_param_t *); -static int IME_RmDir(const char *, IOR_param_t *); -static int IME_MkDir(const char *, mode_t, IOR_param_t *); -static int IME_Stat(const char *, struct stat *, IOR_param_t *); +aiori_fd_t *IME_Create(char *, int, aiori_mod_opt_t *); +aiori_fd_t *IME_Open(char *, int, aiori_mod_opt_t *); +void IME_Close(aiori_fd_t *, aiori_mod_opt_t *); +void IME_Delete(char *, aiori_mod_opt_t *); +char *IME_GetVersion(); +void IME_Fsync(aiori_fd_t *, aiori_mod_opt_t *); +int IME_Access(const char *, int, aiori_mod_opt_t *); +IOR_offset_t IME_GetFileSize(aiori_mod_opt_t *, MPI_Comm, char *); +IOR_offset_t IME_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, + IOR_offset_t, aiori_mod_opt_t *); +int IME_Statfs(const char *, ior_aiori_statfs_t *, + aiori_mod_opt_t *); +int IME_Rmdir(const char *, aiori_mod_opt_t *); +int IME_Mkdir(const char *, mode_t, aiori_mod_opt_t *); +int IME_Stat(const char *, struct stat *, aiori_mod_opt_t *); +void IME_Xferhints(aiori_xfer_hint_t *params); #if (IME_NATIVE_API_VERSION >= 132) -static int IME_Mknod(char *); -static void IME_Sync(IOR_param_t *); +int IME_Mknod(char *); +void IME_Sync(aiori_mod_opt_t *param); #endif -static void IME_Initialize(); -static void IME_Finalize(); +void IME_Initialize(); +void IME_Finalize(); -/************************** O P T I O N S *****************************/ + +/****************************** O P T I O N S *********************************/ + typedef struct{ - int direct_io; + int direct_io; } ime_options_t; +option_help *IME_Options(aiori_mod_opt_t **init_backend_options, + aiori_mod_opt_t *init_values) +{ + ime_options_t *o = malloc(sizeof(ime_options_t)); -option_help * IME_options(void ** init_backend_options, void * init_values){ - ime_options_t * o = malloc(sizeof(ime_options_t)); + if (init_values != NULL) + memcpy(o, init_values, sizeof(ime_options_t)); + else + o->direct_io = 0; - if (init_values != NULL){ - memcpy(o, init_values, sizeof(ime_options_t)); - }else{ - o->direct_io = 0; - } + *init_backend_options = (aiori_mod_opt_t*)o; - *init_backend_options = o; + option_help h[] = { + {0, "ime.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, + LAST_OPTION + }; + option_help *help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); - option_help h [] = { - {0, "ime.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, - LAST_OPTION - }; - option_help * help = malloc(sizeof(h)); - memcpy(help, h, sizeof(h)); - return help; + return help; } + /************************** D E C L A R A T I O N S ***************************/ extern int rank; @@ -100,19 +107,20 @@ ior_aiori_t ime_aiori = { .create = IME_Create, .open = IME_Open, .xfer = IME_Xfer, + .xfer_hints = IME_Xferhints, .close = IME_Close, .delete = IME_Delete, .get_version = IME_GetVersion, .fsync = IME_Fsync, .get_file_size = IME_GetFileSize, .access = IME_Access, - .statfs = IME_StatFS, - .rmdir = IME_RmDir, - .mkdir = IME_MkDir, + .statfs = IME_Statfs, + .rmdir = IME_Rmdir, + .mkdir = IME_Mkdir, .stat = IME_Stat, .initialize = IME_Initialize, .finalize = IME_Finalize, - .get_options = IME_options, + .get_options = IME_Options, #if (IME_NATIVE_API_VERSION >= 132) .sync = IME_Sync, .mknod = IME_Mknod, @@ -120,30 +128,48 @@ ior_aiori_t ime_aiori = { .enable_mdtest = true, }; +static aiori_xfer_hint_t *hints = NULL; +static bool ime_initialized = false; + + /***************************** F U N C T I O N S ******************************/ +void IME_Xferhints(aiori_xfer_hint_t *params) +{ + hints = params; +} + /* * Initialize IME (before MPI is started). */ -static void IME_Initialize() +void IME_Initialize() { + if (ime_initialized) + return; + ime_native_init(); + ime_initialized = true; } /* * Finlize IME (after MPI is shutdown). */ -static void IME_Finalize() +void IME_Finalize() { + if (!ime_initialized) + return; + (void)ime_native_finalize(); + ime_initialized = true; } /* * Try to access a file through the IME interface. */ -static int IME_Access(const char *path, int mode, IOR_param_t *param) + +int IME_Access(const char *path, int mode, aiori_mod_opt_t *module_options) { - (void)param; + IME_UNUSED(module_options); return ime_native_access(path, mode); } @@ -151,41 +177,43 @@ static int IME_Access(const char *path, int mode, IOR_param_t *param) /* * Creat and open a file through the IME interface. */ -static void *IME_Create(char *testFileName, IOR_param_t *param) +aiori_fd_t *IME_Create(char *testFileName, int flags, aiori_mod_opt_t *param) { - return IME_Open(testFileName, param); + return IME_Open(testFileName, flags, param); } /* * Open a file through the IME interface. */ -static void *IME_Open(char *testFileName, IOR_param_t *param) +aiori_fd_t *IME_Open(char *testFileName, int flags, aiori_mod_opt_t *param) { int fd_oflag = O_BINARY; int *fd; + if (hints->dryRun) + return NULL; + fd = (int *)malloc(sizeof(int)); if (fd == NULL) ERR("Unable to malloc file descriptor"); - ime_options_t * o = (ime_options_t*) param->backend_options; - if (o->direct_io == TRUE){ - set_o_direct_flag(&fd_oflag); - } + ime_options_t *o = (ime_options_t*) param; + if (o->direct_io == TRUE) + set_o_direct_flag(&fd_oflag); - if (param->openFlags & IOR_RDONLY) + if (flags & IOR_RDONLY) fd_oflag |= O_RDONLY; - if (param->openFlags & IOR_WRONLY) + if (flags & IOR_WRONLY) fd_oflag |= O_WRONLY; - if (param->openFlags & IOR_RDWR) + if (flags & IOR_RDWR) fd_oflag |= O_RDWR; - if (param->openFlags & IOR_APPEND) + if (flags & IOR_APPEND) fd_oflag |= O_APPEND; - if (param->openFlags & IOR_CREAT) + if (flags & IOR_CREAT) fd_oflag |= O_CREAT; - if (param->openFlags & IOR_EXCL) + if (flags & IOR_EXCL) fd_oflag |= O_EXCL; - if (param->openFlags & IOR_TRUNC) + if (flags & IOR_TRUNC) fd_oflag |= O_TRUNC; *fd = ime_native_open(testFileName, fd_oflag, 0664); @@ -194,14 +222,14 @@ static void *IME_Open(char *testFileName, IOR_param_t *param) ERR("cannot open file"); } - return((void *)fd); + return (aiori_fd_t*) fd; } /* * Write or read access to file using the IM interface. */ -static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, - IOR_offset_t length, IOR_param_t *param) +IOR_offset_t IME_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer, + IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t *param) { int xferRetries = 0; long long remaining = (long long)length; @@ -209,25 +237,28 @@ static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, int fd = *(int *)file; long long rc; + if (hints->dryRun) + return length; + while (remaining > 0) { /* write/read file */ if (access == WRITE) { /* WRITE */ if (verbose >= VERBOSE_4) { fprintf(stdout, "task %d writing to offset %lld\n", - rank, param->offset + length - remaining); + rank, offset + length - remaining); } - rc = ime_native_pwrite(fd, ptr, remaining, param->offset); + rc = ime_native_pwrite(fd, ptr, remaining, offset); - if (param->fsyncPerWrite) - IME_Fsync(&fd, param); + if (hints->fsyncPerWrite) + IME_Fsync(file, param); } else { /* READ or CHECK */ if (verbose >= VERBOSE_4) { fprintf(stdout, "task %d reading from offset %lld\n", - rank, param->offset + length - remaining); + rank, offset + length - remaining); } - rc = ime_native_pread(fd, ptr, remaining, param->offset); + rc = ime_native_pread(fd, ptr, remaining, offset); if (rc == 0) ERR("hit EOF prematurely"); else if (rc < 0) @@ -238,9 +269,9 @@ static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, fprintf(stdout, "WARNING: Task %d, partial %s, %lld of " "%lld bytes at offset %lld\n", rank, access == WRITE ? "write" : "read", rc, - remaining, param->offset + length - remaining ); + remaining, offset + length - remaining ); - if (param->singleXferAttempt) { + if (hints->singleXferAttempt) { MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "barrier error"); } @@ -264,7 +295,7 @@ static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, /* * Perform fsync(). */ -static void IME_Fsync(void *fd, IOR_param_t *param) +void IME_Fsync(aiori_fd_t *fd, aiori_mod_opt_t *param) { if (ime_native_fsync(*(int *)fd) != 0) WARN("cannot perform fsync on file"); @@ -273,33 +304,34 @@ static void IME_Fsync(void *fd, IOR_param_t *param) /* * Close a file through the IME interface. */ -static void IME_Close(void *fd, IOR_param_t *param) +void IME_Close(aiori_fd_t *file, aiori_mod_opt_t *param) { - if (ime_native_close(*(int *)fd) != 0) - { - free(fd); - ERR("cannot close file"); - } - else - free(fd); + if (hints->dryRun) + return; + + if (ime_native_close(*(int*)file) != 0) + ERRF("Cannot close file descriptor: %d", *(int*)file); + + free(file); } /* * Delete a file through the IME interface. */ -static void IME_Delete(char *testFileName, IOR_param_t *param) +void IME_Delete(char *testFileName, aiori_mod_opt_t *param) { - char errmsg[256]; - sprintf(errmsg, "[RANK %03d]:cannot delete file %s\n", - rank, testFileName); + if (hints->dryRun) + return; + if (ime_native_unlink(testFileName) != 0) - WARN(errmsg); + EWARNF("[RANK %03d]: cannot delete file \"%s\"\n", + rank, testFileName); } /* * Determine API version. */ -static char *IME_GetVersion() +char *IME_GetVersion() { static char ver[1024] = {}; #if (IME_NATIVE_API_VERSION >= 120) @@ -310,18 +342,17 @@ static char *IME_GetVersion() return ver; } -static int IME_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, - IOR_param_t *param) +int IME_Statfs(const char *path, ior_aiori_statfs_t *stat_buf, + aiori_mod_opt_t *module_options) { - (void)param; + IME_UNUSED(module_options); #if (IME_NATIVE_API_VERSION >= 130) struct statvfs statfs_buf; int ret = ime_native_statvfs(path, &statfs_buf); if (ret) - return ret; - + return ret; stat_buf->f_bsize = statfs_buf.f_bsize; stat_buf->f_blocks = statfs_buf.f_blocks; stat_buf->f_bfree = statfs_buf.f_bfree; @@ -330,38 +361,37 @@ static int IME_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, return 0; #else - (void)path; - (void)stat_buf; + IME_UNUSED(path); + IME_UNUSED(stat_buf); WARN("statfs is currently not supported in IME backend!"); return -1; #endif } - -static int IME_MkDir(const char *path, mode_t mode, IOR_param_t *param) +int IME_Mkdir(const char *path, mode_t mode, aiori_mod_opt_t * module_options) { - (void)param; + IME_UNUSED(module_options); #if (IME_NATIVE_API_VERSION >= 130) return ime_native_mkdir(path, mode); #else - (void)path; - (void)mode; + IME_UNUSED(path); + IME_UNUSED(mode); WARN("mkdir not supported in IME backend!"); return -1; #endif } -static int IME_RmDir(const char *path, IOR_param_t *param) +int IME_Rmdir(const char *path, aiori_mod_opt_t *module_options) { - (void)param; + IME_UNUSED(module_options); #if (IME_NATIVE_API_VERSION >= 130) return ime_native_rmdir(path); #else - (void)path; + IME_UNUSED(path); WARN("rmdir not supported in IME backend!"); return -1; @@ -371,9 +401,10 @@ static int IME_RmDir(const char *path, IOR_param_t *param) /* * Perform stat() through the IME interface. */ -static int IME_Stat(const char *path, struct stat *buf, IOR_param_t *param) +int IME_Stat(const char *path, struct stat *buf, + aiori_mod_opt_t *module_options) { - (void)param; + IME_UNUSED(module_options); return ime_native_stat(path, buf); } @@ -381,62 +412,51 @@ static int IME_Stat(const char *path, struct stat *buf, IOR_param_t *param) /* * Use IME stat() to return aggregate file size. */ -static IOR_offset_t IME_GetFileSize(IOR_param_t *test, MPI_Comm testComm, - char *testFileName) +IOR_offset_t IME_GetFileSize(aiori_mod_opt_t *test, MPI_Comm testComm, + char *testFileName) { struct stat stat_buf; - IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; + IOR_offset_t size, tmpSum; - if (ime_native_stat(testFileName, &stat_buf) != 0) { - ERR("cannot get status of written file"); - } - aggFileSizeFromStat = stat_buf.st_size; + if (hints->dryRun) + return 0; - if (test->filePerProc) { - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, + if (ime_native_stat(testFileName, &stat_buf) != 0) + ERRF("cannot get status of written file %s", + testFileName); + + size = stat_buf.st_size; + + if (hints->filePerProc) { + MPI_CHECK(MPI_Allreduce(&size, &tmpSum, 1, MPI_LONG_LONG_INT, MPI_SUM, testComm), "cannot total data moved"); - aggFileSizeFromStat = tmpSum; - } else { - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, - MPI_LONG_LONG_INT, MPI_MIN, testComm), - "cannot total data moved"); - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, - MPI_LONG_LONG_INT, MPI_MAX, testComm), - "cannot total data moved"); - - if (tmpMin != tmpMax) { - if (rank == 0) { - WARN("inconsistent file size by different tasks"); - } - /* incorrect, but now consistent across tasks */ - aggFileSizeFromStat = tmpMin; - } + size = tmpSum; } - return(aggFileSizeFromStat); + return size; } #if (IME_NATIVE_API_VERSION >= 132) /* * Create a file through mknod interface. */ -static int IME_Mknod(char *testFileName) +int IME_Mknod(char *testFileName) { - int ret = ime_native_mknod(testFileName, S_IFREG | S_IRUSR, 0); - if (ret < 0) - ERR("mknod failed"); + int ret = ime_native_mknod(testFileName, S_IFREG | S_IRUSR, 0); + if (ret < 0) + ERR("mknod failed"); - return ret; + return ret; } /* * Use IME sync to flush page cache of all opened files. */ -static void IME_Sync(IOR_param_t * param) +void IME_Sync(aiori_mod_opt_t *param) { - int ret = ime_native_sync(0); - if (ret != 0) - FAIL("Error executing the sync command."); + int ret = ime_native_sync(0); + if (ret != 0) + FAIL("Error executing the sync command."); } #endif From f005147c4d42bef3de9b39812ccf7fe1c50b9b33 Mon Sep 17 00:00:00 2001 From: Jean-Yves VET Date: Fri, 3 Jul 2020 16:37:33 +0200 Subject: [PATCH 027/154] aiori-IME: Update to new aiori interface This patch updates IME backend to support new aiori interface. It also fixes some indentation issues. --- src/aiori-IME.c | 293 +++++++++++++++++++++++++----------------------- 1 file changed, 151 insertions(+), 142 deletions(-) diff --git a/src/aiori-IME.c b/src/aiori-IME.c index 500f380..a972b42 100755 --- a/src/aiori-IME.c +++ b/src/aiori-IME.c @@ -21,8 +21,8 @@ #include #include #include -#include /* sys_errlist */ -#include /* IO operations */ +#include /* sys_errlist */ +#include /* IO operations */ #include "ior.h" #include "iordef.h" @@ -30,63 +30,70 @@ #include "utilities.h" #include "ime_native.h" -#ifndef O_BINARY /* Required on Windows */ +#define IME_UNUSED(x) (void)(x) /* Silence compiler warnings */ + +#ifndef O_BINARY /* Required on Windows */ # define O_BINARY 0 #endif /**************************** P R O T O T Y P E S *****************************/ -static void *IME_Create(char *, IOR_param_t *); -static void *IME_Open(char *, IOR_param_t *); -static void IME_Close(void *, IOR_param_t *); -static void IME_Delete(char *, IOR_param_t *); -static char *IME_GetVersion(); -static void IME_Fsync(void *, IOR_param_t *); -static int IME_Access(const char *, int, IOR_param_t *); -static IOR_offset_t IME_GetFileSize(IOR_param_t *, MPI_Comm, char *); -static IOR_offset_t IME_Xfer(int, void *, IOR_size_t *, - IOR_offset_t, IOR_param_t *); -static int IME_StatFS(const char *, ior_aiori_statfs_t *, - IOR_param_t *); -static int IME_RmDir(const char *, IOR_param_t *); -static int IME_MkDir(const char *, mode_t, IOR_param_t *); -static int IME_Stat(const char *, struct stat *, IOR_param_t *); +aiori_fd_t *IME_Create(char *, int, aiori_mod_opt_t *); +aiori_fd_t *IME_Open(char *, int, aiori_mod_opt_t *); +void IME_Close(aiori_fd_t *, aiori_mod_opt_t *); +void IME_Delete(char *, aiori_mod_opt_t *); +char *IME_GetVersion(); +void IME_Fsync(aiori_fd_t *, aiori_mod_opt_t *); +int IME_Access(const char *, int, aiori_mod_opt_t *); +IOR_offset_t IME_GetFileSize(aiori_mod_opt_t *, char *); +IOR_offset_t IME_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, + IOR_offset_t, aiori_mod_opt_t *); +int IME_Statfs(const char *, ior_aiori_statfs_t *, + aiori_mod_opt_t *); +int IME_Rmdir(const char *, aiori_mod_opt_t *); +int IME_Mkdir(const char *, mode_t, aiori_mod_opt_t *); +int IME_Stat(const char *, struct stat *, aiori_mod_opt_t *); +void IME_Xferhints(aiori_xfer_hint_t *params); #if (IME_NATIVE_API_VERSION >= 132) -static int IME_Mknod(char *); -static void IME_Sync(IOR_param_t *); +int IME_Mknod(char *); +void IME_Sync(aiori_mod_opt_t *param); #endif -static void IME_Initialize(); -static void IME_Finalize(); +void IME_Initialize(); +void IME_Finalize(); -/************************** O P T I O N S *****************************/ + +/****************************** O P T I O N S *********************************/ + typedef struct{ - int direct_io; + int direct_io; } ime_options_t; +option_help *IME_Options(aiori_mod_opt_t **init_backend_options, + aiori_mod_opt_t *init_values) +{ + ime_options_t *o = malloc(sizeof(ime_options_t)); -option_help * IME_options(void ** init_backend_options, void * init_values){ - ime_options_t * o = malloc(sizeof(ime_options_t)); + if (init_values != NULL) + memcpy(o, init_values, sizeof(ime_options_t)); + else + o->direct_io = 0; - if (init_values != NULL){ - memcpy(o, init_values, sizeof(ime_options_t)); - }else{ - o->direct_io = 0; - } + *init_backend_options = (aiori_mod_opt_t*)o; - *init_backend_options = o; + option_help h[] = { + {0, "ime.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, + LAST_OPTION + }; + option_help *help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); - option_help h [] = { - {0, "ime.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, - LAST_OPTION - }; - option_help * help = malloc(sizeof(h)); - memcpy(help, h, sizeof(h)); - return help; + return help; } + /************************** D E C L A R A T I O N S ***************************/ extern int rank; @@ -100,19 +107,20 @@ ior_aiori_t ime_aiori = { .create = IME_Create, .open = IME_Open, .xfer = IME_Xfer, + .xfer_hints = IME_Xferhints, .close = IME_Close, .delete = IME_Delete, .get_version = IME_GetVersion, .fsync = IME_Fsync, .get_file_size = IME_GetFileSize, .access = IME_Access, - .statfs = IME_StatFS, - .rmdir = IME_RmDir, - .mkdir = IME_MkDir, + .statfs = IME_Statfs, + .rmdir = IME_Rmdir, + .mkdir = IME_Mkdir, .stat = IME_Stat, .initialize = IME_Initialize, .finalize = IME_Finalize, - .get_options = IME_options, + .get_options = IME_Options, #if (IME_NATIVE_API_VERSION >= 132) .sync = IME_Sync, .mknod = IME_Mknod, @@ -120,30 +128,48 @@ ior_aiori_t ime_aiori = { .enable_mdtest = true, }; +static aiori_xfer_hint_t *hints = NULL; +static bool ime_initialized = false; + + /***************************** F U N C T I O N S ******************************/ +void IME_Xferhints(aiori_xfer_hint_t *params) +{ + hints = params; +} + /* * Initialize IME (before MPI is started). */ -static void IME_Initialize() +void IME_Initialize() { + if (ime_initialized) + return; + ime_native_init(); + ime_initialized = true; } /* * Finlize IME (after MPI is shutdown). */ -static void IME_Finalize() +void IME_Finalize() { + if (!ime_initialized) + return; + (void)ime_native_finalize(); + ime_initialized = true; } /* * Try to access a file through the IME interface. */ -static int IME_Access(const char *path, int mode, IOR_param_t *param) + +int IME_Access(const char *path, int mode, aiori_mod_opt_t *module_options) { - (void)param; + IME_UNUSED(module_options); return ime_native_access(path, mode); } @@ -151,41 +177,43 @@ static int IME_Access(const char *path, int mode, IOR_param_t *param) /* * Creat and open a file through the IME interface. */ -static void *IME_Create(char *testFileName, IOR_param_t *param) +aiori_fd_t *IME_Create(char *testFileName, int flags, aiori_mod_opt_t *param) { - return IME_Open(testFileName, param); + return IME_Open(testFileName, flags, param); } /* * Open a file through the IME interface. */ -static void *IME_Open(char *testFileName, IOR_param_t *param) +aiori_fd_t *IME_Open(char *testFileName, int flags, aiori_mod_opt_t *param) { int fd_oflag = O_BINARY; int *fd; + if (hints->dryRun) + return NULL; + fd = (int *)malloc(sizeof(int)); if (fd == NULL) ERR("Unable to malloc file descriptor"); - ime_options_t * o = (ime_options_t*) param->backend_options; - if (o->direct_io == TRUE){ - set_o_direct_flag(&fd_oflag); - } + ime_options_t *o = (ime_options_t*) param; + if (o->direct_io == TRUE) + set_o_direct_flag(&fd_oflag); - if (param->openFlags & IOR_RDONLY) + if (flags & IOR_RDONLY) fd_oflag |= O_RDONLY; - if (param->openFlags & IOR_WRONLY) + if (flags & IOR_WRONLY) fd_oflag |= O_WRONLY; - if (param->openFlags & IOR_RDWR) + if (flags & IOR_RDWR) fd_oflag |= O_RDWR; - if (param->openFlags & IOR_APPEND) + if (flags & IOR_APPEND) fd_oflag |= O_APPEND; - if (param->openFlags & IOR_CREAT) + if (flags & IOR_CREAT) fd_oflag |= O_CREAT; - if (param->openFlags & IOR_EXCL) + if (flags & IOR_EXCL) fd_oflag |= O_EXCL; - if (param->openFlags & IOR_TRUNC) + if (flags & IOR_TRUNC) fd_oflag |= O_TRUNC; *fd = ime_native_open(testFileName, fd_oflag, 0664); @@ -194,14 +222,14 @@ static void *IME_Open(char *testFileName, IOR_param_t *param) ERR("cannot open file"); } - return((void *)fd); + return (aiori_fd_t*) fd; } /* * Write or read access to file using the IM interface. */ -static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, - IOR_offset_t length, IOR_param_t *param) +IOR_offset_t IME_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer, + IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t *param) { int xferRetries = 0; long long remaining = (long long)length; @@ -209,25 +237,28 @@ static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, int fd = *(int *)file; long long rc; + if (hints->dryRun) + return length; + while (remaining > 0) { /* write/read file */ if (access == WRITE) { /* WRITE */ if (verbose >= VERBOSE_4) { fprintf(stdout, "task %d writing to offset %lld\n", - rank, param->offset + length - remaining); + rank, offset + length - remaining); } - rc = ime_native_pwrite(fd, ptr, remaining, param->offset); + rc = ime_native_pwrite(fd, ptr, remaining, offset); - if (param->fsyncPerWrite) - IME_Fsync(&fd, param); + if (hints->fsyncPerWrite) + IME_Fsync(file, param); } else { /* READ or CHECK */ if (verbose >= VERBOSE_4) { fprintf(stdout, "task %d reading from offset %lld\n", - rank, param->offset + length - remaining); + rank, offset + length - remaining); } - rc = ime_native_pread(fd, ptr, remaining, param->offset); + rc = ime_native_pread(fd, ptr, remaining, offset); if (rc == 0) ERR("hit EOF prematurely"); else if (rc < 0) @@ -238,9 +269,9 @@ static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, fprintf(stdout, "WARNING: Task %d, partial %s, %lld of " "%lld bytes at offset %lld\n", rank, access == WRITE ? "write" : "read", rc, - remaining, param->offset + length - remaining ); + remaining, offset + length - remaining ); - if (param->singleXferAttempt) { + if (hints->singleXferAttempt) { MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "barrier error"); } @@ -264,7 +295,7 @@ static IOR_offset_t IME_Xfer(int access, void *file, IOR_size_t *buffer, /* * Perform fsync(). */ -static void IME_Fsync(void *fd, IOR_param_t *param) +void IME_Fsync(aiori_fd_t *fd, aiori_mod_opt_t *param) { if (ime_native_fsync(*(int *)fd) != 0) WARN("cannot perform fsync on file"); @@ -273,33 +304,34 @@ static void IME_Fsync(void *fd, IOR_param_t *param) /* * Close a file through the IME interface. */ -static void IME_Close(void *fd, IOR_param_t *param) +void IME_Close(aiori_fd_t *file, aiori_mod_opt_t *param) { - if (ime_native_close(*(int *)fd) != 0) - { - free(fd); - ERR("cannot close file"); - } - else - free(fd); + if (hints->dryRun) + return; + + if (ime_native_close(*(int*)file) != 0) + ERRF("Cannot close file descriptor: %d", *(int*)file); + + free(file); } /* * Delete a file through the IME interface. */ -static void IME_Delete(char *testFileName, IOR_param_t *param) +void IME_Delete(char *testFileName, aiori_mod_opt_t *param) { - char errmsg[256]; - sprintf(errmsg, "[RANK %03d]:cannot delete file %s\n", - rank, testFileName); + if (hints->dryRun) + return; + if (ime_native_unlink(testFileName) != 0) - WARN(errmsg); + EWARNF("[RANK %03d]: cannot delete file \"%s\"\n", + rank, testFileName); } /* * Determine API version. */ -static char *IME_GetVersion() +char *IME_GetVersion() { static char ver[1024] = {}; #if (IME_NATIVE_API_VERSION >= 120) @@ -310,18 +342,17 @@ static char *IME_GetVersion() return ver; } -static int IME_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, - IOR_param_t *param) +int IME_Statfs(const char *path, ior_aiori_statfs_t *stat_buf, + aiori_mod_opt_t *module_options) { - (void)param; + IME_UNUSED(module_options); #if (IME_NATIVE_API_VERSION >= 130) struct statvfs statfs_buf; int ret = ime_native_statvfs(path, &statfs_buf); if (ret) - return ret; - + return ret; stat_buf->f_bsize = statfs_buf.f_bsize; stat_buf->f_blocks = statfs_buf.f_blocks; stat_buf->f_bfree = statfs_buf.f_bfree; @@ -330,38 +361,37 @@ static int IME_StatFS(const char *path, ior_aiori_statfs_t *stat_buf, return 0; #else - (void)path; - (void)stat_buf; + IME_UNUSED(path); + IME_UNUSED(stat_buf); WARN("statfs is currently not supported in IME backend!"); return -1; #endif } - -static int IME_MkDir(const char *path, mode_t mode, IOR_param_t *param) +int IME_Mkdir(const char *path, mode_t mode, aiori_mod_opt_t * module_options) { - (void)param; + IME_UNUSED(module_options); #if (IME_NATIVE_API_VERSION >= 130) return ime_native_mkdir(path, mode); #else - (void)path; - (void)mode; + IME_UNUSED(path); + IME_UNUSED(mode); WARN("mkdir not supported in IME backend!"); return -1; #endif } -static int IME_RmDir(const char *path, IOR_param_t *param) +int IME_Rmdir(const char *path, aiori_mod_opt_t *module_options) { - (void)param; + IME_UNUSED(module_options); #if (IME_NATIVE_API_VERSION >= 130) return ime_native_rmdir(path); #else - (void)path; + IME_UNUSED(path); WARN("rmdir not supported in IME backend!"); return -1; @@ -371,9 +401,10 @@ static int IME_RmDir(const char *path, IOR_param_t *param) /* * Perform stat() through the IME interface. */ -static int IME_Stat(const char *path, struct stat *buf, IOR_param_t *param) +int IME_Stat(const char *path, struct stat *buf, + aiori_mod_opt_t *module_options) { - (void)param; + IME_UNUSED(module_options); return ime_native_stat(path, buf); } @@ -381,62 +412,40 @@ static int IME_Stat(const char *path, struct stat *buf, IOR_param_t *param) /* * Use IME stat() to return aggregate file size. */ -static IOR_offset_t IME_GetFileSize(IOR_param_t *test, MPI_Comm testComm, - char *testFileName) +IOR_offset_t IME_GetFileSize(aiori_mod_opt_t *test, char *testFileName) { struct stat stat_buf; - IOR_offset_t aggFileSizeFromStat, tmpMin, tmpMax, tmpSum; - if (ime_native_stat(testFileName, &stat_buf) != 0) { - ERR("cannot get status of written file"); - } - aggFileSizeFromStat = stat_buf.st_size; + if (hints->dryRun) + return 0; - if (test->filePerProc) { - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpSum, 1, - MPI_LONG_LONG_INT, MPI_SUM, testComm), - "cannot total data moved"); - aggFileSizeFromStat = tmpSum; - } else { - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMin, 1, - MPI_LONG_LONG_INT, MPI_MIN, testComm), - "cannot total data moved"); - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, &tmpMax, 1, - MPI_LONG_LONG_INT, MPI_MAX, testComm), - "cannot total data moved"); + if (ime_native_stat(testFileName, &stat_buf) != 0) + ERRF("cannot get status of written file %s", + testFileName); - if (tmpMin != tmpMax) { - if (rank == 0) { - WARN("inconsistent file size by different tasks"); - } - /* incorrect, but now consistent across tasks */ - aggFileSizeFromStat = tmpMin; - } - } - - return(aggFileSizeFromStat); + return stat_buf.st_size; } #if (IME_NATIVE_API_VERSION >= 132) /* * Create a file through mknod interface. */ -static int IME_Mknod(char *testFileName) +int IME_Mknod(char *testFileName) { - int ret = ime_native_mknod(testFileName, S_IFREG | S_IRUSR, 0); - if (ret < 0) - ERR("mknod failed"); + int ret = ime_native_mknod(testFileName, S_IFREG | S_IRUSR, 0); + if (ret < 0) + ERR("mknod failed"); - return ret; + return ret; } /* * Use IME sync to flush page cache of all opened files. */ -static void IME_Sync(IOR_param_t * param) +void IME_Sync(aiori_mod_opt_t *param) { - int ret = ime_native_sync(0); - if (ret != 0) - FAIL("Error executing the sync command."); + int ret = ime_native_sync(0); + if (ret != 0) + FAIL("Error executing the sync command."); } #endif From 4258e14c1181313bc18ff5a6f2fd58ab7008e10a Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 21 Jul 2020 09:31:19 +0100 Subject: [PATCH 028/154] mdtest: Bugfix of changed behavior, can only remove dir if all children were deleted. --- src/mdtest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 0981481..083e2d4 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -2297,8 +2297,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } } - if (created_root_dir && backend->rmdir(testdirpath, backend_options) != 0) { - FAIL("Unable to remote test directory path %s", testdirpath); + if (created_root_dir && remove_only && backend->rmdir(testdirpath, backend_options) != 0) { + FAIL("Unable to remove test directory path %s", testdirpath); } if(verification_error){ From 5e465ac8bf961dcce7329237fb15c14abe9fa300 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 21 Jul 2020 13:54:50 +0100 Subject: [PATCH 029/154] Option: merge function to supported AIORI module reuse. --- src/option.c | 17 +++++++++++++++++ src/option.h | 1 + 2 files changed, 18 insertions(+) diff --git a/src/option.c b/src/option.c index 618360f..c44dc9b 100644 --- a/src/option.c +++ b/src/option.c @@ -7,6 +7,23 @@ #include + +/* merge two option lists and return the total size */ +option_help * option_merge(option_help * a, option_help * b){ + int count_a = 0; + for(option_help * i = a; i->type != 0; i++){ + count_a++; + } + int count = count_a + 1; // LAST_OPTION is one + for(option_help * i = b; i->type != 0; i++){ + count++; + } + option_help * h = malloc(sizeof(option_help) * count); + memcpy(h, a, sizeof(option_help) * count_a); + memcpy(h + count_a, b, sizeof(option_help) * (count - count_a)); + return h; +} + /* * Takes a string of the form 64, 8m, 128k, 4g, etc. and converts to bytes. */ diff --git a/src/option.h b/src/option.h index 5ca305f..0afa519 100644 --- a/src/option.h +++ b/src/option.h @@ -43,6 +43,7 @@ void option_print_current(option_help * args); //@return the number of parsed arguments int option_parse(int argc, char ** argv, options_all_t * args); int option_parse_str(char*val, options_all_t * opt_all); +option_help * option_merge(option_help * a, option_help * b); /* Parse a single line */ int option_parse_key_value(char * key, char * value, options_all_t * opt_all); From f275671cc94e35f61b15407e763b5fb035f36971 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 21 Jul 2020 16:16:13 +0100 Subject: [PATCH 030/154] AIORI POSIX AIO support. Collect ops until granularity is reached, then submit pending IOs. Synchronize latest on close. Doesn't work with data verification and reuses the existing buffer. The implementation shows the potential AIO may have. Extract also the POSIX header from AIORI to allow better reuse. #240 --- configure.ac | 13 +++ src/Makefile.am | 5 + src/aiori-MMAP.c | 3 +- src/aiori-POSIX.c | 41 ++------ src/aiori-POSIX.h | 42 ++++++++ src/aiori-aio.c | 255 ++++++++++++++++++++++++++++++++++++++++++++++ src/aiori.c | 3 + src/aiori.h | 10 +- src/mdtest.c | 17 +++- 9 files changed, 340 insertions(+), 49 deletions(-) create mode 100644 src/aiori-POSIX.h create mode 100644 src/aiori-aio.c diff --git a/configure.ac b/configure.ac index a7d5085..dc05ee7 100755 --- a/configure.ac +++ b/configure.ac @@ -200,6 +200,19 @@ AS_IF([test "x$with_pmdk" != xno], [ [AC_MSG_ERROR([Library containing pmdk symbols not found])]) ]) +# LINUX AIO support +AC_ARG_WITH([aio], + [AS_HELP_STRING([--with-aio], + [support Linux AIO @<:@default=no@:>@])], + [], + [with_aio=no]) +AM_CONDITIONAL([USE_AIO_AIORI], [test x$with_aio = xyes]) +AS_IF([test "x$with_aio" != xno], [ + AC_DEFINE([USE_AIO_AIORI], [], [Build AIO backend]) + AC_CHECK_HEADERS(libaio.h,, [unset AIO]) + AC_SEARCH_LIBS([aio], [io_setup], [AC_MSG_ERROR([Library containing AIO symbol io_setup not found])]) +]) + # RADOS support AC_ARG_WITH([rados], diff --git a/src/Makefile.am b/src/Makefile.am index 7f1be40..03148d2 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -65,6 +65,11 @@ if USE_POSIX_AIORI extraSOURCES += aiori-POSIX.c endif +if USE_AIO_AIORI +extraSOURCES += aiori-aio.c +extraLDADD += -laio +endif + if USE_PMDK_AIORI extraSOURCES += aiori-PMDK.c extraLDADD += -lpmem diff --git a/src/aiori-MMAP.c b/src/aiori-MMAP.c index 2c0db42..5fa13f8 100644 --- a/src/aiori-MMAP.c +++ b/src/aiori-MMAP.c @@ -22,6 +22,7 @@ #include "ior.h" #include "aiori.h" +#include "aiori-POSIX.h" #include "iordef.h" #include "utilities.h" @@ -86,7 +87,7 @@ static aiori_xfer_hint_t * hints = NULL; static void MMAP_xfer_hints(aiori_xfer_hint_t * params){ hints = params; - aiori_posix_xfer_hints(params); + POSIX_xfer_hints(params); } static int MMAP_check_params(aiori_mod_opt_t * options){ diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index ec95625..c46c99b 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -55,6 +55,8 @@ #include "iordef.h" #include "utilities.h" +#include "aiori-POSIX.h" + #ifndef open64 /* necessary for TRU64 -- */ # define open64 open /* unlikely, but may pose */ #endif /* not open64 */ /* conflicting prototypes */ @@ -70,32 +72,6 @@ /**************************** P R O T O T Y P E S *****************************/ static IOR_offset_t POSIX_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); -static void POSIX_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -static void POSIX_Sync(aiori_mod_opt_t * ); -static int POSIX_check_params(aiori_mod_opt_t * options); - -/************************** O P T I O N S *****************************/ -typedef struct{ - /* in case of a change, please update depending MMAP module too */ - int direct_io; - - /* Lustre variables */ - int lustre_set_striping; /* flag that we need to set lustre striping */ - int lustre_stripe_count; - int lustre_stripe_size; - int lustre_start_ost; - int lustre_ignore_locks; - - /* gpfs variables */ - int gpfs_hint_access; /* use gpfs "access range" hint */ - int gpfs_release_token; /* immediately release GPFS tokens after - creating or opening a file */ - /* beegfs variables */ - int beegfs_numTargets; /* number storage targets to use */ - int beegfs_chunkSize; /* srtipe pattern for new files */ - -} posix_options_t; - option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ posix_options_t * o = malloc(sizeof(posix_options_t)); @@ -149,7 +125,7 @@ ior_aiori_t posix_aiori = { .xfer = POSIX_Xfer, .close = POSIX_Close, .delete = POSIX_Delete, - .xfer_hints = aiori_posix_xfer_hints, + .xfer_hints = POSIX_xfer_hints, .get_version = aiori_get_version, .fsync = POSIX_Fsync, .get_file_size = POSIX_GetFileSize, @@ -168,11 +144,11 @@ ior_aiori_t posix_aiori = { static aiori_xfer_hint_t * hints = NULL; -void aiori_posix_xfer_hints(aiori_xfer_hint_t * params){ +void POSIX_xfer_hints(aiori_xfer_hint_t * params){ hints = params; } -static int POSIX_check_params(aiori_mod_opt_t * param){ +int POSIX_check_params(aiori_mod_opt_t * param){ posix_options_t * o = (posix_options_t*) param; if (o->beegfs_chunkSize != -1 && (!ISPOWEROFTWO(o->beegfs_chunkSize) || o->beegfs_chunkSize < (1<<16))) ERR("beegfsChunkSize must be a power of two and >64k"); @@ -630,17 +606,14 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer return (length); } -/* - * Perform fsync(). - */ -static void POSIX_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) +void POSIX_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) { if (fsync(*(int *)fd) != 0) EWARNF("fsync(%d) failed", *(int *)fd); } -static void POSIX_Sync(aiori_mod_opt_t * param) +void POSIX_Sync(aiori_mod_opt_t * param) { int ret = system("sync"); if (ret != 0){ diff --git a/src/aiori-POSIX.h b/src/aiori-POSIX.h new file mode 100644 index 0000000..1780cf7 --- /dev/null +++ b/src/aiori-POSIX.h @@ -0,0 +1,42 @@ +#ifndef AIORI_POSIX_H +#define AIORI_POSIX_H + +#include "aiori.h" + +/************************** O P T I O N S *****************************/ +typedef struct{ + /* in case of a change, please update depending MMAP module too */ + int direct_io; + + /* Lustre variables */ + int lustre_set_striping; /* flag that we need to set lustre striping */ + int lustre_stripe_count; + int lustre_stripe_size; + int lustre_start_ost; + int lustre_ignore_locks; + + /* gpfs variables */ + int gpfs_hint_access; /* use gpfs "access range" hint */ + int gpfs_release_token; /* immediately release GPFS tokens after + creating or opening a file */ + /* beegfs variables */ + int beegfs_numTargets; /* number storage targets to use */ + int beegfs_chunkSize; /* srtipe pattern for new files */ + +} posix_options_t; + +void POSIX_Sync(aiori_mod_opt_t * param); +int POSIX_check_params(aiori_mod_opt_t * param); +void POSIX_Fsync(aiori_fd_t *, aiori_mod_opt_t *); +int POSIX_check_params(aiori_mod_opt_t * options); +aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * module_options); +int POSIX_Mknod(char *testFileName); +aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * module_options); +IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, char *testFileName); +void POSIX_Delete(char *testFileName, aiori_mod_opt_t * module_options); +void POSIX_Close(aiori_fd_t *fd, aiori_mod_opt_t * module_options); +option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); +void POSIX_xfer_hints(aiori_xfer_hint_t * params); + + +#endif diff --git a/src/aiori-aio.c b/src/aiori-aio.c new file mode 100644 index 0000000..3e21f64 --- /dev/null +++ b/src/aiori-aio.c @@ -0,0 +1,255 @@ +/* + This backend uses linux-aio + Requires: libaio-dev + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ior.h" +#include "aiori.h" +#include "iordef.h" +#include "utilities.h" + +#include "aiori-POSIX.h" + +/************************** O P T I O N S *****************************/ +typedef struct{ + aiori_mod_opt_t * p; // posix options + int max_pending; + int granularity; // how frequent to submit, submit ever granularity elements + + // runtime data + io_context_t ioctx; // one context per fs + struct iocb ** iocbs; + int iocbs_pos; // how many are pending in iocbs + + int in_flight; // total pending ops + IOR_offset_t pending_bytes; // track pending IO volume for error checking +} aio_options_t; + +option_help * aio_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ + aio_options_t * o = malloc(sizeof(aio_options_t)); + + if (init_values != NULL){ + memcpy(o, init_values, sizeof(aio_options_t)); + }else{ + memset(o, 0, sizeof(aio_options_t)); + o->max_pending = 128; + o->granularity = 16; + } + option_help * p_help = POSIX_options((aiori_mod_opt_t**)& o->p, init_values == NULL ? NULL : (aiori_mod_opt_t*) ((aio_options_t*)init_values)->p); + *init_backend_options = (aiori_mod_opt_t*) o; + + option_help h [] = { + {0, "aio.max-pending", "Max number of pending ops", OPTION_OPTIONAL_ARGUMENT, 'd', & o->max_pending}, + {0, "aio.granularity", "How frequent to submit pending IOs, submit every *granularity* elements", OPTION_OPTIONAL_ARGUMENT, 'd', & o->granularity}, + LAST_OPTION + }; + option_help * help = option_merge(h, p_help); + free(p_help); + return help; +} + + +/************************** D E C L A R A T I O N S ***************************/ + +typedef struct{ + aiori_fd_t * pfd; // the underlying POSIX fd +} aio_fd_t; + +/***************************** F U N C T I O N S ******************************/ + +static aiori_xfer_hint_t * hints = NULL; + +static void aio_xfer_hints(aiori_xfer_hint_t * params){ + hints = params; + POSIX_xfer_hints(params); +} + +static void aio_initialize(aiori_mod_opt_t * param){ + aio_options_t * o = (aio_options_t*) param; + if(io_setup(o->max_pending, & o->ioctx) != 0){ + ERRF("Couldn't initialize io context %s", strerror(errno)); + } + printf("%d\n", (o->max_pending)); + + o->iocbs = malloc(sizeof(struct iocb *) * o->granularity); + o->iocbs_pos = 0; + o->in_flight = 0; +} + +static void aio_finalize(aiori_mod_opt_t * param){ + aio_options_t * o = (aio_options_t*) param; + io_destroy(o->ioctx); +} + +static int aio_check_params(aiori_mod_opt_t * param){ + aio_options_t * o = (aio_options_t*) param; + POSIX_check_params((aiori_mod_opt_t*) o->p); + if(o->max_pending < 8){ + ERRF("max-pending = %d < 8", o->max_pending); + } + return 0; +} + +static aiori_fd_t *aio_Open(char *testFileName, int flags, aiori_mod_opt_t * param){ + aio_options_t * o = (aio_options_t*) param; + aio_fd_t * fd = malloc(sizeof(aio_fd_t)); + fd->pfd = POSIX_Open(testFileName, flags, o->p); + return (aiori_fd_t*) fd; +} + +static aiori_fd_t *aio_create(char *testFileName, int flags, aiori_mod_opt_t * param){ + aio_options_t * o = (aio_options_t*) param; + aio_fd_t * fd = malloc(sizeof(aio_fd_t)); + fd->pfd = POSIX_Create(testFileName, flags, o->p); + return (aiori_fd_t*) fd; +} + +/* called whenever the granularity is met */ +static void submit_pending(aio_options_t * o){ + if(o->iocbs_pos == 0){ + return; + } + int res; + res = io_submit(o->ioctx, o->iocbs_pos, o->iocbs); + //printf("AIO submit %d jobs\n", o->iocbs_pos); + if(res != o->iocbs_pos){ + if(errno == EAGAIN){ + ERR("AIO: errno == EAGAIN; this should't happen"); + } + ERRF("AIO: submitted %d, error: \"%s\" ; this should't happen", res, strerror(errno)); + } + o->iocbs_pos = 0; +} + +/* complete all pending ops */ +static void complete_all(aio_options_t * o){ + submit_pending(o); + + struct io_event events[o->in_flight]; + int num_events; + num_events = io_getevents(o->ioctx, o->in_flight, o->in_flight, events, NULL); + for (int i = 0; i < num_events; i++) { + struct io_event event = events[i]; + if(event.res == -1){ + ERR("AIO, error in io_getevents(), IO incomplete!"); + }else{ + o->pending_bytes -= event.res; + } + free(event.obj); + } + if(o->pending_bytes != 0){ + ERRF("AIO, error in flushing data, pending bytes: %lld", o->pending_bytes); + } + o->in_flight = 0; +} + +/* called if we must make *some* progress */ +static void process_some(aio_options_t * o){ + if(o->in_flight == 0){ + return; + } + struct io_event events[o->in_flight]; + int num_events; + int mn = o->in_flight < o->granularity ? o->in_flight : o->granularity; + num_events = io_getevents(o->ioctx, mn, o->in_flight, events, NULL); + //printf("Completed: %d\n", num_events); + for (int i = 0; i < num_events; i++) { + struct io_event event = events[i]; + if(event.res == -1){ + ERR("AIO, error in io_getevents(), IO incomplete!"); + }else{ + o->pending_bytes -= event.res; + } + free(event.obj); + } + o->in_flight -= num_events; +} + +static IOR_offset_t aio_Xfer(int access, aiori_fd_t *fd, IOR_size_t * buffer, + IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param){ + aio_options_t * o = (aio_options_t*) param; + aio_fd_t * afd = (aio_fd_t*) fd; + + if(o->in_flight >= o->max_pending){ + process_some(o); + } + o->pending_bytes += length; + + struct iocb * iocb = malloc(sizeof(struct iocb)); + if(access == WRITE){ + io_prep_pwrite(iocb, *(int*)afd->pfd, buffer, length, offset); + }else{ + io_prep_pread(iocb, *(int*)afd->pfd, buffer, length, offset); + } + o->iocbs[o->iocbs_pos] = iocb; + o->iocbs_pos++; + o->in_flight++; + + if(o->iocbs_pos == o->granularity){ + submit_pending(o); + } + return length; +} + +static void aio_Close(aiori_fd_t *fd, aiori_mod_opt_t * param){ + aio_options_t * o = (aio_options_t*) param; + aio_fd_t * afd = (aio_fd_t*) fd; + complete_all(o); + POSIX_Close(afd->pfd, o->p); +} + +static void aio_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param){ + aio_options_t * o = (aio_options_t*) param; + complete_all(o); + aio_fd_t * afd = (aio_fd_t*) fd; + POSIX_Fsync(afd->pfd, o->p); +} + +static void aio_Sync(aiori_mod_opt_t * param){ + aio_options_t * o = (aio_options_t*) param; + complete_all(o); + POSIX_Sync((aiori_mod_opt_t*) o->p); +} + + + +ior_aiori_t aio_aiori = { + .name = "AIO", + .name_legacy = NULL, + .create = aio_create, + .get_options = aio_options, + .initialize = aio_initialize, + .finalize = aio_finalize, + .xfer_hints = aio_xfer_hints, + .get_options = aio_options, + .fsync = aio_Fsync, + .open = aio_Open, + .xfer = aio_Xfer, + .close = aio_Close, + .sync = aio_Sync, + .check_params = aio_check_params, + .delete = POSIX_Delete, + .get_version = aiori_get_version, + .get_file_size = POSIX_GetFileSize, + .statfs = aiori_posix_statfs, + .mkdir = aiori_posix_mkdir, + .rmdir = aiori_posix_rmdir, + .access = aiori_posix_access, + .stat = aiori_posix_stat, + .enable_mdtest = true +}; diff --git a/src/aiori.c b/src/aiori.c index 05e4935..2d8b6c8 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -42,6 +42,9 @@ ior_aiori_t *available_aiori[] = { #ifdef USE_POSIX_AIORI &posix_aiori, #endif +#ifdef USE_AIO_AIORI + &aio_aiori, +#endif #ifdef USE_PMDK_AIORI &pmdk_aiori, #endif diff --git a/src/aiori.h b/src/aiori.h index 6b185d7..a1adc6d 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -120,6 +120,7 @@ enum bench_type { }; extern ior_aiori_t dummy_aiori; +extern ior_aiori_t aio_aiori; extern ior_aiori_t daos_aiori; extern ior_aiori_t dfs_aiori; extern ior_aiori_t hdf5_aiori; @@ -154,15 +155,6 @@ int aiori_posix_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * module_o int aiori_posix_rmdir (const char *path, aiori_mod_opt_t * module_options); int aiori_posix_access (const char *path, int mode, aiori_mod_opt_t * module_options); int aiori_posix_stat (const char *path, struct stat *buf, aiori_mod_opt_t * module_options); -void aiori_posix_xfer_hints(aiori_xfer_hint_t * params); - -aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * module_options); -int POSIX_Mknod(char *testFileName); -aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * module_options); -IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, char *testFileName); -void POSIX_Delete(char *testFileName, aiori_mod_opt_t * module_options); -void POSIX_Close(aiori_fd_t *fd, aiori_mod_opt_t * module_options); -option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); /* NOTE: these 3 MPI-IO functions are exported for reuse by HDF5/PNetCDF */ diff --git a/src/mdtest.c b/src/mdtest.c index 083e2d4..c713796 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -850,6 +850,9 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran /* create phase */ if(create_only) { + progress->stone_wall_timer_seconds = stone_wall_timer_seconds; + progress->items_done = 0; + progress->start_time = GetTimeStamp(); for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); if (unique_dir_per_task) { @@ -873,6 +876,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran create_remove_items(0, 1, 1, 0, temp_path, 0, progress); } } + progress->stone_wall_timer_seconds = 0; } phase_end(); @@ -1048,6 +1052,10 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro /* create phase */ if (create_only ) { + progress->stone_wall_timer_seconds = stone_wall_timer_seconds; + progress->items_done = 0; + progress->start_time = GetTimeStamp(); + for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); @@ -1061,8 +1069,6 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro sprintf( temp_path, "%s/%s", testdir, path ); } - - VERBOSE(3,-1,"file_test: create path is '%s'", temp_path ); /* "touch" the files */ @@ -1663,8 +1669,7 @@ void create_remove_directory_tree(int create, static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t * summary_table){ rank_progress_t progress_o; memset(& progress_o, 0 , sizeof(progress_o)); - progress_o.start_time = GetTimeStamp(); - progress_o.stone_wall_timer_seconds = stone_wall_timer_seconds; + progress_o.stone_wall_timer_seconds = 0; progress_o.items_per_dir = items_per_dir; rank_progress_t * progress = & progress_o; @@ -1748,6 +1753,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t summary_table->stonewall_last_item[8] = num_dirs_in_tree; VERBOSE(1,-1,"V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[8]); } + sprintf(unique_mk_dir, "%s.0", base_tree_name); sprintf(unique_chdir_dir, "%s.0", base_tree_name); sprintf(unique_stat_dir, "%s.0", base_tree_name); @@ -1790,6 +1796,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t DelaySecs(pre_delay); } VERBOSE(3,5,"will file_test on %s", unique_mk_dir); + file_test(j, i, unique_mk_dir, progress); } } @@ -1980,7 +1987,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'v', NULL, "verbosity (each instance of option increments by one)", OPTION_FLAG, 'd', & verbose}, {'V', NULL, "verbosity value", OPTION_OPTIONAL_ARGUMENT, 'd', & verbose}, {'w', NULL, "bytes to write to each file after it is created", OPTION_OPTIONAL_ARGUMENT, 'l', & write_bytes}, - {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase)", OPTION_OPTIONAL_ARGUMENT, 'd', & stone_wall_timer_seconds}, + {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase and files)", OPTION_OPTIONAL_ARGUMENT, 'd', & stone_wall_timer_seconds}, {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & stoneWallingStatusFile}, {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & verify_read}, {0, "verify-write", "Verify the data after a write by reading it back immediately", OPTION_FLAG, 'd', & verify_write}, From e1741c188ad04bdbd878c6ed86c1cd7fe58bc475 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 21 Jul 2020 16:23:25 +0100 Subject: [PATCH 031/154] AIO plugin: missing reference to include. --- src/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile.am b/src/Makefile.am index 03148d2..9531850 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -5,7 +5,7 @@ if USE_CAPS bin_PROGRAMS += IOR MDTEST endif -noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h mdtest.h aiori-debug.h +noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h mdtest.h aiori-debug.h aiori-POSIX.h lib_LIBRARIES = libaiori.a libaiori_a_SOURCES = ior.c mdtest.c utilities.c parse_options.c ior-output.c option.c From e3f302ca532029ba0e23288b2cae4c44d511668f Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 21 Jul 2020 16:50:13 +0100 Subject: [PATCH 032/154] AIORI AIO check params updated. --- src/aiori-aio.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aiori-aio.c b/src/aiori-aio.c index 3e21f64..f9ee475 100644 --- a/src/aiori-aio.c +++ b/src/aiori-aio.c @@ -100,7 +100,10 @@ static int aio_check_params(aiori_mod_opt_t * param){ aio_options_t * o = (aio_options_t*) param; POSIX_check_params((aiori_mod_opt_t*) o->p); if(o->max_pending < 8){ - ERRF("max-pending = %d < 8", o->max_pending); + ERRF("AIO max-pending = %d < 8", o->max_pending); + } + if(o->granularity > o->max_pending){ + ERRF("AIO granularity must be < max-pending, is %d > %d", o->granularity, o->max_pending); } return 0; } From db3d06ec80effd95ae8d7b0c361c39ccb2d75578 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 22 Jul 2020 19:39:11 +0000 Subject: [PATCH 033/154] remove comm from get size cb in DAOS driver Signed-off-by: Mohamad Chaarawi --- src/aiori-DAOS.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c index 969507c..f2a096e 100644 --- a/src/aiori-DAOS.c +++ b/src/aiori-DAOS.c @@ -86,7 +86,7 @@ static void DAOS_Close(aiori_fd_t *, aiori_mod_opt_t *); static void DAOS_Delete(char *, aiori_mod_opt_t *); static char* DAOS_GetVersion(); static void DAOS_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -static IOR_offset_t DAOS_GetFileSize(aiori_mod_opt_t *, MPI_Comm, char *); +static IOR_offset_t DAOS_GetFileSize(aiori_mod_opt_t *, char *); static option_help * DAOS_options(); static void DAOS_init_xfer_options(aiori_xfer_hint_t *); static int DAOS_check_params(aiori_mod_opt_t *); @@ -532,7 +532,7 @@ DAOS_Fsync(aiori_fd_t *file, aiori_mod_opt_t *param) } static IOR_offset_t -DAOS_GetFileSize(aiori_mod_opt_t *param, MPI_Comm comm, char *testFileName) +DAOS_GetFileSize(aiori_mod_opt_t *param, char *testFileName) { daos_obj_id_t oid; daos_size_t size; From 4e2d1790529a416ea6b28d6f619b33d44625c5fc Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Mon, 3 Aug 2020 12:30:21 +0100 Subject: [PATCH 034/154] Detection of system features for number of sockets #245 (#246) * Configure checks for architecture-specific functions to detect nr of sockets. #245 --- configure.ac | 24 ++++++++++++++++++++++++ src/ior.c | 3 +-- src/utilities.c | 24 ++++++++++++++++++------ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index dc05ee7..1253b51 100755 --- a/configure.ac +++ b/configure.ac @@ -400,6 +400,30 @@ Consider --with-aws4c=, CPPFLAGS, LDFLAGS, etc]) LDFLAGS=$ORIG_LDFLAGS ]) +# Check for existence of the function to detect the CPU socket ID (for multi-socket systems) +AC_COMPILE_IFELSE( + [AC_LANG_SOURCE([[ + int main(){ + unsigned long a,d,c; + __asm__ volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c)); + return 0; + } + ]])], + AC_DEFINE([HAVE_RDTSCP_ASM], [], [Has ASM to detect CPU socket ID])) + +AC_COMPILE_IFELSE( + [AC_LANG_SOURCE([[ + #define _GNU_SOURCE + #include + #include + unsigned long GetProcessorAndCore(int *chip, int *core){ + return syscall(SYS_getcpu, core, chip, NULL); + } + int main(){ + } + ]])], + AC_DEFINE([HAVE_GETCPU_SYSCALL], [], [Has syscall to detect CPU socket ID])) + # Enable building "IOR", in all capitals AC_ARG_ENABLE([caps], diff --git a/src/ior.c b/src/ior.c index c220ae0..196f6ec 100755 --- a/src/ior.c +++ b/src/ior.c @@ -792,8 +792,7 @@ void GetTestFileName(char *testFileName, IOR_param_t * test) strcpy(initialTestFileName, test->testFileName); if(test->dualMount){ GetProcessorAndCore(&socket, &core); - sprintf(tmpString, "%s%d/%s",initialTestFileName, - socket, "data"); + sprintf(tmpString, "%s%d/%s",initialTestFileName, socket, "data"); strcpy(initialTestFileName, tmpString); } fileNames = ParseFileName(initialTestFileName, &count); diff --git a/src/utilities.c b/src/utilities.c index 5b65e55..36db9c9 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -16,6 +16,12 @@ # include "config.h" #endif +#ifdef HAVE_GETCPU_SYSCALL +# define _GNU_SOURCE +# include +# include +#endif + #ifdef __linux__ # define _GNU_SOURCE /* Needed for O_DIRECT in fcntl */ #endif /* __linux__ */ @@ -869,17 +875,15 @@ char *HumanReadable(IOR_offset_t value, int base) return valueStr; } -#if defined(__aarch64__) -// TODO: This might be general enough to provide the functionality for any system -// regardless of processor type given we aren't worried about thread/process migration. +#if defined(HAVE_GETCPU_SYSCALL) +// Assume we aren't worried about thread/process migration. // Test on Intel systems and see if we can get rid of the architecture specificity // of the code. unsigned long GetProcessorAndCore(int *chip, int *core){ return syscall(SYS_getcpu, core, chip, NULL); } -// TODO: Add in AMD function -#else -// If we're not on an ARM processor assume we're on an intel processor and use the +#elif defined(HAVE_RDTSCP_ASM) +// We're on an intel processor and use the // rdtscp instruction. unsigned long GetProcessorAndCore(int *chip, int *core){ unsigned long a,d,c; @@ -888,4 +892,12 @@ unsigned long GetProcessorAndCore(int *chip, int *core){ *core = c & 0xFFF; return ((unsigned long)a) | (((unsigned long)d) << 32);; } +#else +// TODO: Add in AMD function +unsigned long GetProcessorAndCore(int *chip, int *core){ +#warning GetProcessorAndCore is implemented as a dummy + *chip = 0; + *core = 0; + return 1; +} #endif From af2429b47e9bdc8ba9dd9992083d03e94f09a9d1 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Thu, 13 Aug 2020 16:25:36 +0100 Subject: [PATCH 035/154] Fix s3-4c implementation (#247) * Ported S3-4c version to current AIORI interface. * S3-4c: add crypto dependency * S3: Store username/host in options. --- src/Makefile.am | 1 + src/aiori-S3-4c.c | 724 ++++++++++++++++++++-------------------------- src/ior.c | 1 - src/ior.h | 4 +- 4 files changed, 310 insertions(+), 420 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 9531850..0adbf32 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -105,6 +105,7 @@ extraLDADD += -lcurl extraLDADD += -lxml2 extraLDADD += -laws4c extraLDADD += -laws4c_extra +extraLDADD += -lcrypto endif if USE_S3_LIBS3_AIORI diff --git a/src/aiori-S3-4c.c b/src/aiori-S3-4c.c index 11cc549..f34fadb 100755 --- a/src/aiori-S3-4c.c +++ b/src/aiori-S3-4c.c @@ -91,16 +91,6 @@ #include #include -/* -#ifdef HAVE_LUSTRE_USER -#include -#endif -*/ - -#include "ior.h" -#include "aiori.h" -#include "iordef.h" - #include #include // from libxml2 @@ -109,20 +99,17 @@ #include "aws4c.h" // extended vers of "aws4c" lib for S3 via libcurl #include "aws4c_extra.h" // utilities, e.g. for parsing XML in responses +#include "ior.h" +#include "aiori.h" +#include "aiori-debug.h" +extern int rank; +extern MPI_Comm testComm; - -/* buffer is used to generate URLs, err_msgs, etc */ #define BUFF_SIZE 1024 -static char buff[BUFF_SIZE]; - const int ETAG_SIZE = 32; - CURLcode rc; -/* Any objects we create or delete will be under this bucket */ -const char* bucket_name = "ior"; - /* TODO: The following stuff goes into options! */ /* REST/S3 variables */ // CURL* curl; /* for libcurl "easy" fns (now managed by aws4c) */ @@ -130,6 +117,9 @@ const char* bucket_name = "ior"; # define IOR_CURL_NOCONTINUE 0x02 # define IOR_CURL_S3_EMC_EXT 0x04 /* allow EMC extensions to S3? */ +#define MAX_UPLOAD_ID_SIZE 256 /* TODO don't know the actual value */ + + #ifdef USE_S3_4C_AIORI # include # include "aws4c.h" @@ -138,29 +128,47 @@ const char* bucket_name = "ior"; typedef void IOBuf; /* unused, but needs a type */ #endif - IOBuf* io_buf; /* aws4c places parsed header values here */ - IOBuf* etags; /* accumulate ETags for N:1 parts */ + +typedef struct { + /* Any objects we create or delete will be under this bucket */ + char* bucket_name; + char* user; + char* host; + /* Runtime data, this data isn't yet safe to allow concurrent access to multiple files, only open one file at a time */ + int curl_flags; + IOBuf* io_buf; /* aws4c places parsed header values here */ + IOBuf* etags; /* accumulate ETags for N:1 parts */ + size_t part_number; + char UploadId[MAX_UPLOAD_ID_SIZE]; /* key for multi-part-uploads */ + int written; /* did we write to the file */ +} s3_options_t; /////////////////////////////////////////////// +static aiori_xfer_hint_t * hints = NULL; + +static void S3_xfer_hints(aiori_xfer_hint_t * params){ + hints = params; +} + /**************************** P R O T O T Y P E S *****************************/ -static void* S3_Create(char*, IOR_param_t*); -static void* S3_Open(char*, IOR_param_t*); -static IOR_offset_t S3_Xfer(int, void*, IOR_size_t*, IOR_offset_t, IOR_param_t*); -static void S3_Close(void*, IOR_param_t*); +static aiori_fd_t* S3_Create(char *path, int iorflags, aiori_mod_opt_t * options); +static aiori_fd_t* S3_Open(char *path, int flags, aiori_mod_opt_t * options); +static IOR_offset_t S3_Xfer(int access, aiori_fd_t * afd, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options); +static void S3_Close(aiori_fd_t * afd, aiori_mod_opt_t * options); -static void* EMC_Create(char*, IOR_param_t*); -static void* EMC_Open(char*, IOR_param_t*); -static IOR_offset_t EMC_Xfer(int, void*, IOR_size_t*, IOR_offset_t, IOR_param_t*); -static void EMC_Close(void*, IOR_param_t*); - -static void S3_Delete(char*, IOR_param_t*); -static void S3_Fsync(void*, IOR_param_t*); -static IOR_offset_t S3_GetFileSize(IOR_param_t*, MPI_Comm, char*); -static void S3_init(void * options); -static void S3_finalize(void * options); -static int S3_check_params(IOR_param_t *); +static aiori_fd_t* EMC_Create(char *path, int iorflags, aiori_mod_opt_t * options); +static aiori_fd_t* EMC_Open(char *path, int flags, aiori_mod_opt_t * options); +static IOR_offset_t EMC_Xfer(int access, aiori_fd_t * afd, IOR_size_t * buffer, IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * options); +static void EMC_Close(aiori_fd_t * afd, aiori_mod_opt_t * options); +static void S3_Delete(char *path, aiori_mod_opt_t * options); +static void S3_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * options); +static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName); +static void S3_init(aiori_mod_opt_t * options); +static void S3_finalize(aiori_mod_opt_t * options); +static int S3_check_params(aiori_mod_opt_t * options); +static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); /************************** D E C L A R A T I O N S ***************************/ @@ -173,6 +181,7 @@ ior_aiori_t s3_4c_aiori = { .create = S3_Create, .open = S3_Open, .xfer = S3_Xfer, + .xfer_hints = S3_xfer_hints, .close = S3_Close, .delete = S3_Delete, .get_version = aiori_get_version, @@ -180,7 +189,9 @@ ior_aiori_t s3_4c_aiori = { .get_file_size = S3_GetFileSize, .initialize = S3_init, .finalize = S3_finalize, - .check_params = S3_check_params + .check_params = S3_check_params, + .get_options = S3_options, + .enable_mdtest = true }; // "S3", plus EMC-extensions enabled @@ -193,7 +204,7 @@ ior_aiori_t s3_plus_aiori = { .xfer = S3_Xfer, .close = S3_Close, .delete = S3_Delete, - .set_version = S3_SetVersion, + .get_version = aiori_get_version, .fsync = S3_Fsync, .get_file_size = S3_GetFileSize, .initialize = S3_init, @@ -210,7 +221,7 @@ ior_aiori_t s3_emc_aiori = { .xfer = EMC_Xfer, .close = EMC_Close, .delete = S3_Delete, - .set_version = S3_SetVersion, + .get_version = aiori_get_version, .fsync = S3_Fsync, .get_file_size = S3_GetFileSize, .initialize = S3_init, @@ -218,26 +229,50 @@ ior_aiori_t s3_emc_aiori = { }; -static void S3_init(void * options){ +static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ + s3_options_t * o = malloc(sizeof(s3_options_t)); + if (init_values != NULL){ + memcpy(o, init_values, sizeof(s3_options_t)); + }else{ + memset(o, 0, sizeof(s3_options_t)); + } + + *init_backend_options = (aiori_mod_opt_t*) o; + o->bucket_name = "ior"; + + option_help h [] = { + {0, "S3-4c.user", "The username (in ~/.awsAuth).", OPTION_OPTIONAL_ARGUMENT, 's', & o->user}, + {0, "S3-4C.host", "The host optionally followed by:port.", OPTION_OPTIONAL_ARGUMENT, 's', & o->host}, + {0, "S3-4c.bucket-name", "The name of the bucket.", OPTION_OPTIONAL_ARGUMENT, 's', & o->bucket_name}, + LAST_OPTION + }; + option_help * help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); + return help; +} + + +static void S3_init(aiori_mod_opt_t * options){ /* This is supposed to be done before *any* threads are created. * Could MPI_Init() create threads (or call multi-threaded * libraries)? We'll assume so. */ AWS4C_CHECK( aws_init() ); } -static void S3_finalize(void * options){ +static void S3_finalize(aiori_mod_opt_t * options){ /* done once per program, after exiting all threads. * NOTE: This fn doesn't return a value that can be checked for success. */ aws_cleanup(); } -static int S3_check_params(IOR_param_t * test){ +static int S3_check_params(aiori_mod_opt_t * test){ + if(! hints) return 0; /* N:1 and N:N */ - IOR_offset_t NtoN = test->filePerProc; + IOR_offset_t NtoN = hints->filePerProc; IOR_offset_t Nto1 = ! NtoN; - IOR_offset_t s = test->segmentCount; - IOR_offset_t t = test->transferSize; - IOR_offset_t b = test->blockSize; + IOR_offset_t s = hints->segmentCount; + IOR_offset_t t = hints->transferSize; + IOR_offset_t b = hints->blockSize; if (Nto1 && (s != 1) && (b != t)) { ERR("N:1 (strided) requires xfer-size == block-size"); @@ -292,15 +327,15 @@ static int S3_check_params(IOR_param_t * test){ */ -static void s3_connect( IOR_param_t* param ) { - if (param->verbose >= VERBOSE_2) { - printf("-> s3_connect\n"); /* DEBUGGING */ - } +static void s3_connect( s3_options_t* param ) { + //if (param->verbose >= VERBOSE_2) { + // printf("-> s3_connect\n"); /* DEBUGGING */ + //} if ( param->curl_flags & IOR_CURL_INIT ) { - if (param->verbose >= VERBOSE_2) { - printf("<- s3_connect [nothing to do]\n"); /* DEBUGGING */ - } + //if (param->verbose >= VERBOSE_2) { + // printf("<- s3_connect [nothing to do]\n"); /* DEBUGGING */ + //} return; } @@ -318,8 +353,8 @@ static void s3_connect( IOR_param_t* param ) { // NOTE: These inits could be done in init_IORParam_t(), in ior.c, but // would require conditional compilation, there. - aws_set_debug(param->verbose >= 4); - aws_read_config(getenv("USER")); // requires ~/.awsAuth + aws_set_debug(0); // param->verbose >= 4 + aws_read_config(param->user); // requires ~/.awsAuth aws_reuse_connections(1); // initialize IOBufs. These are basically dynamically-extensible @@ -346,8 +381,8 @@ static void s3_connect( IOR_param_t* param ) { // snprintf(buff, BUFF_SIZE, "10.140.0.%d", 15 + (rank % 4)); // s3_set_host(buff); - snprintf(buff, BUFF_SIZE, "10.140.0.%d:9020", 15 + (rank % 4)); - s3_set_host(buff); + //snprintf(options->buff, BUFF_SIZE, "10.140.0.%d:9020", 15 + (rank % 4)); + //s3_set_host(options->buff); #else /* @@ -366,23 +401,25 @@ static void s3_connect( IOR_param_t* param ) { // s3_set_host( "10.143.0.1:80"); #endif + s3_set_host(param->host); + // make sure test-bucket exists - s3_set_bucket((char*)bucket_name); + s3_set_bucket((char*) param->bucket_name); if (rank == 0) { AWS4C_CHECK( s3_head(param->io_buf, "") ); if ( param->io_buf->code == 404 ) { // "404 Not Found" - printf(" bucket '%s' doesn't exist\n", bucket_name); + printf(" bucket '%s' doesn't exist\n", param->bucket_name); AWS4C_CHECK( s3_put(param->io_buf, "") ); /* creates URL as bucket + obj */ AWS4C_CHECK_OK( param->io_buf ); // assure "200 OK" - printf("created bucket '%s'\n", bucket_name); + printf("created bucket '%s'\n", param->bucket_name); } else { // assure "200 OK" AWS4C_CHECK_OK( param->io_buf ); } } - MPI_CHECK(MPI_Barrier(param->testComm), "barrier error"); + MPI_CHECK(MPI_Barrier(testComm), "barrier error"); // Maybe allow EMC extensions to S3 @@ -391,24 +428,22 @@ static void s3_connect( IOR_param_t* param ) { // don't perform these inits more than once param->curl_flags |= IOR_CURL_INIT; - - if (param->verbose >= VERBOSE_2) { - printf("<- s3_connect [success]\n"); - } + //if (param->verbose >= VERBOSE_2) { + // printf("<- s3_connect [success]\n"); + //} } static void -s3_disconnect( IOR_param_t* param ) { - if (param->verbose >= VERBOSE_2) { - printf("-> s3_disconnect\n"); - } - +s3_disconnect( s3_options_t* param ) { + //if (param->verbose >= VERBOSE_2) { + // printf("-> s3_disconnect\n"); + //} // nothing to do here, if using new aws4c ... - if (param->verbose >= VERBOSE_2) { - printf("<- s3_disconnect\n"); - } + //if (param->verbose >= VERBOSE_2) { + // printf("<- s3_disconnect\n"); + //} } @@ -416,8 +451,7 @@ s3_disconnect( IOR_param_t* param ) { // After finalizing an S3 multi-part-upload, you must reset some things // before you can use multi-part-upload again. This will also avoid (one // particular set of) memory-leaks. -void -s3_MPU_reset(IOR_param_t* param) { +void s3_MPU_reset(s3_options_t* param) { aws_iobuf_reset(param->io_buf); aws_iobuf_reset(param->etags); param->part_number = 0; @@ -453,46 +487,44 @@ s3_MPU_reset(IOR_param_t* param) { * */ -static -void * -S3_Create_Or_Open_internal(char* testFileName, - IOR_param_t* param, - unsigned char createFile, - int multi_part_upload_p ) { +static aiori_fd_t * S3_Create_Or_Open_internal(char* testFileName, int openFlags, s3_options_t* param, int multi_part_upload_p ) { + unsigned char createFile = openFlags & IOR_CREAT; - if (param->verbose >= VERBOSE_2) { - printf("-> S3_Create_Or_Open('%s', ,%d, %d)\n", - testFileName, createFile, multi_part_upload_p); - } + //if (param->verbose >= VERBOSE_2) { + // printf("-> S3_Create_Or_Open('%s', ,%d, %d)\n", + // testFileName, createFile, multi_part_upload_p); + //} /* initialize curl, if needed */ s3_connect( param ); /* Check for unsupported flags */ - if ( param->openFlags & IOR_EXCL ) { - fprintf( stdout, "Opening in Exclusive mode is not implemented in S3\n" ); - } - if ( param->useO_DIRECT == TRUE ) { - fprintf( stdout, "Direct I/O mode is not implemented in S3\n" ); - } + //if ( param->openFlags & IOR_EXCL ) { + // fprintf( stdout, "Opening in Exclusive mode is not implemented in S3\n" ); + //} + //if ( param->useO_DIRECT == TRUE ) { + // fprintf( stdout, "Direct I/O mode is not implemented in S3\n" ); + //} // easier to think - int n_to_n = param->filePerProc; + int n_to_n = hints->filePerProc; int n_to_1 = ! n_to_n; /* check whether object needs reset to zero-length */ int needs_reset = 0; if (! multi_part_upload_p) needs_reset = 1; /* so "append" can work */ - else if ( param->openFlags & IOR_TRUNC ) + else if ( openFlags & IOR_TRUNC ) needs_reset = 1; /* so "append" can work */ else if (createFile) { // AWS4C_CHECK( s3_head(param->io_buf, testFileName) ); // if ( ! AWS4C_OK(param->io_buf) ) needs_reset = 1; } - - if ( param->open == WRITE ) { + char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ + param->written = 0; + if ( openFlags & IOR_WRONLY || openFlags & IOR_RDWR ) { + param->written = 1; /* initializations for N:1 or N:N writes using multi-part upload */ if (multi_part_upload_p) { @@ -522,23 +554,21 @@ S3_Create_Or_Open_internal(char* testFileName, response->first->len, NULL, NULL, 0); if (doc == NULL) - ERR_SIMPLE("Rank0 Failed to find POST response\n"); + ERR("Rank0 Failed to find POST response\n"); // navigate parsed XML-tree to find UploadId xmlNode* root_element = xmlDocGetRootElement(doc); const char* upload_id = find_element_named(root_element, (char*)"UploadId"); if (! upload_id) - ERR_SIMPLE("couldn't find 'UploadId' in returned XML\n"); + ERR("couldn't find 'UploadId' in returned XML\n"); - if (param->verbose >= VERBOSE_3) - printf("got UploadId = '%s'\n", upload_id); + //if (param->verbose >= VERBOSE_3) + // printf("got UploadId = '%s'\n", upload_id); const size_t upload_id_len = strlen(upload_id); if (upload_id_len > MAX_UPLOAD_ID_SIZE) { - snprintf(buff, BUFF_SIZE, - "UploadId length %d exceeds expected max (%d)", - upload_id_len, MAX_UPLOAD_ID_SIZE); - ERR_SIMPLE(buff); + snprintf(buff, BUFF_SIZE, "UploadId length %zd exceeds expected max (%d)", upload_id_len, MAX_UPLOAD_ID_SIZE); + ERR(buff); } // save the UploadId we found @@ -551,16 +581,15 @@ S3_Create_Or_Open_internal(char* testFileName, // For N:1, share UploadId across all ranks if (n_to_1) - MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm); + MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, testComm); } else // N:1, and we're not rank0. recv UploadID from Rank 0 - MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, param->testComm); + MPI_Bcast(param->UploadId, MAX_UPLOAD_ID_SIZE, MPI_BYTE, 0, testComm); } /* initializations for N:N or N:1 writes using EMC byte-range extensions */ else { - /* maybe reset to zero-length, so "append" can work */ if (needs_reset) { @@ -576,84 +605,48 @@ S3_Create_Or_Open_internal(char* testFileName, } } - - if (param->verbose >= VERBOSE_2) { - printf("<- S3_Create_Or_Open\n"); - } - return ((void *) testFileName ); + //if (param->verbose >= VERBOSE_2) { + // printf("<- S3_Create_Or_Open\n"); + //} + return ((aiori_fd_t *) testFileName ); } +static aiori_fd_t * S3_Create( char *testFileName, int iorflags, aiori_mod_opt_t * param ) { + //if (param->verbose >= VERBOSE_2) { + // printf("-> S3_Create\n"); + //} - -static -void * -S3_Create( char *testFileName, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_2) { - printf("-> S3_Create\n"); - } - - if (param->verbose >= VERBOSE_2) { - printf("<- S3_Create\n"); - } - return S3_Create_Or_Open_internal( testFileName, param, TRUE, TRUE ); -} -static -void * -EMC_Create( char *testFileName, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_2) { - printf("-> EMC_Create\n"); - } - - if (param->verbose >= VERBOSE_2) { - printf("<- EMC_Create\n"); - } - return S3_Create_Or_Open_internal( testFileName, param, TRUE, FALSE ); + //if (param->verbose >= VERBOSE_2) { + // printf("<- S3_Create\n"); + //} + return S3_Create_Or_Open_internal( testFileName, iorflags, (s3_options_t*) param, TRUE ); } +static aiori_fd_t * EMC_Create( char *testFileName, int iorflags, aiori_mod_opt_t * param ) { + //if (param->verbose >= VERBOSE_2) { + // printf("-> EMC_Create\n"); + //} - - - - -static -void * -S3_Open( char *testFileName, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_2) { - printf("-> S3_Open\n"); - } - - if ( param->openFlags & IOR_CREAT ) { - if (param->verbose >= VERBOSE_2) { - printf("<- S3_Open( ... TRUE)\n"); - } - return S3_Create_Or_Open_internal( testFileName, param, TRUE, TRUE ); - } - else { - if (param->verbose >= VERBOSE_2) { - printf("<- S3_Open( ... FALSE)\n"); - } - return S3_Create_Or_Open_internal( testFileName, param, FALSE, TRUE ); - } + //if (param->verbose >= VERBOSE_2) { + // printf("<- EMC_Create\n"); + //} + return S3_Create_Or_Open_internal( testFileName, iorflags, (s3_options_t*) param, FALSE ); } -static -void * -EMC_Open( char *testFileName, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_2) { - printf("-> S3_Open\n"); - } - if ( param->openFlags & IOR_CREAT ) { - if (param->verbose >= VERBOSE_2) { - printf("<- EMC_Open( ... TRUE)\n"); - } - return S3_Create_Or_Open_internal( testFileName, param, TRUE, FALSE ); - } - else { - if (param->verbose >= VERBOSE_2) { - printf("<- EMC_Open( ... FALSE)\n"); - } - return S3_Create_Or_Open_internal( testFileName, param, FALSE, FALSE ); - } +static aiori_fd_t * S3_Open( char *testFileName, int flags, aiori_mod_opt_t * param ) { + //if (param->verbose >= VERBOSE_2) { + // printf("-> S3_Open\n"); + //} + + return S3_Create_Or_Open_internal( testFileName, flags, (s3_options_t*) param, TRUE ); +} + +static aiori_fd_t * EMC_Open( char *testFileName, int flags, aiori_mod_opt_t * param ) { + //if (param->verbose >= VERBOSE_2) { + // printf("-> S3_Open\n"); + //} + + return S3_Create_Or_Open_internal( testFileName, flags, (s3_options_t*) param, FALSE ); } @@ -730,39 +723,35 @@ EMC_Open( char *testFileName, IOR_param_t * param ) { */ -static -IOR_offset_t -S3_Xfer_internal(int access, - void* file, +static IOR_offset_t S3_Xfer_internal(int access, + aiori_fd_t* file, IOR_size_t* buffer, IOR_offset_t length, - IOR_param_t* param, + IOR_offset_t offset, + s3_options_t* param, int multi_part_upload_p ) { - - if (param->verbose >= VERBOSE_2) { - printf("-> S3_Xfer(acc:%d, target:%s, buf:0x%llx, len:%llu, 0x%llx)\n", - access, (char*)file, buffer, length, param); - } + //if (param->verbose >= VERBOSE_2) { + // printf("-> S3_Xfer(acc:%d, target:%s, buf:0x%llx, len:%llu, 0x%llx)\n", + // access, (char*)file, buffer, length, param); + //} char* fname = (char*)file; /* see NOTE above S3_Create_Or_Open() */ size_t remaining = (size_t)length; char* data_ptr = (char *)buffer; - off_t offset = param->offset; // easier to think - int n_to_n = param->filePerProc; + int n_to_n = hints->filePerProc; int n_to_1 = (! n_to_n); - int segmented = (param->segmentCount == 1); + int segmented = (hints->segmentCount == 1); if (access == WRITE) { /* WRITE */ - - if (verbose >= VERBOSE_3) { - fprintf( stdout, "rank %d writing length=%lld to offset %lld\n", - rank, - remaining, - param->offset + length - remaining); - } + //if (verbose >= VERBOSE_3) { + // fprintf( stdout, "rank %d writing length=%lld to offset %lld\n", + // rank, + // remaining, + // param->offset + length - remaining); + //} if (multi_part_upload_p) { @@ -790,11 +779,11 @@ S3_Xfer_internal(int access, size_t part_number; if (n_to_1) { if (segmented) { // segmented - size_t parts_per_rank = param->blockSize / param->transferSize; + size_t parts_per_rank = hints->blockSize / hints->transferSize; part_number = (rank * parts_per_rank) + param->part_number; } else // strided - part_number = (param->part_number * param->numTasks) + rank; + part_number = (param->part_number * hints->numTasks) + rank; } else part_number = param->part_number; @@ -804,14 +793,15 @@ S3_Xfer_internal(int access, // if (verbose >= VERBOSE_3) { // fprintf( stdout, "rank %d of %d writing (%s,%s) part_number %lld\n", // rank, - // param->numTasks, + // hints->numTasks, // (n_to_1 ? "N:1" : "N:N"), // (segmented ? "segmented" : "strided"), // part_number); // } + char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ snprintf(buff, BUFF_SIZE, - "%s?partNumber=%d&uploadId=%s", + "%s?partNumber=%zd&uploadId=%s", fname, part_number, param->UploadId); // For performance, we append directly into the linked list @@ -838,16 +828,16 @@ S3_Xfer_internal(int access, // } // } - if (verbose >= VERBOSE_3) { - fprintf( stdout, "rank %d of %d (%s,%s) offset %lld, part# %lld --> ETag %s\n", - rank, - param->numTasks, - (n_to_1 ? "N:1" : "N:N"), - (segmented ? "segmented" : "strided"), - offset, - part_number, - param->io_buf->eTag); // incl quote-marks at [0] and [len-1] - } + //if (verbose >= VERBOSE_3) { + // fprintf( stdout, "rank %d of %d (%s,%s) offset %lld, part# %lld --> ETag %s\n", + // rank, + // hints->numTasks, + // (n_to_1 ? "N:1" : "N:N"), + // (segmented ? "segmented" : "strided"), + // offset, + // part_number, + // param->io_buf->eTag); // incl quote-marks at [0] and [len-1] + //} if (strlen(param->io_buf->eTag) != ETAG_SIZE+2) { /* quotes at both ends */ fprintf(stderr, "Rank %d: ERROR: expected ETag to be %d hex digits\n", rank, ETAG_SIZE); @@ -862,9 +852,9 @@ S3_Xfer_internal(int access, param->io_buf->eTag +1, strlen(param->io_buf->eTag) -2); // DEBUGGING - if (verbose >= VERBOSE_4) { - printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag); - } + //if (verbose >= VERBOSE_4) { + // printf("rank %d: part %d = ETag %s\n", rank, part_number, param->io_buf->eTag); + //} // drop ptrs to , in param->io_buf aws_iobuf_reset(param->io_buf); @@ -885,7 +875,7 @@ S3_Xfer_internal(int access, // than empty storage. aws_iobuf_reset(param->io_buf); aws_iobuf_append_static(param->io_buf, data_ptr, remaining); - AWS4C_CHECK ( s3_put(param->io_buf, file) ); + AWS4C_CHECK ( s3_put(param->io_buf, (char*) file) ); AWS4C_CHECK_OK( param->io_buf ); // drop ptrs to , in param->io_buf @@ -893,18 +883,18 @@ S3_Xfer_internal(int access, } - if ( param->fsyncPerWrite == TRUE ) { + if ( hints->fsyncPerWrite == TRUE ) { WARN("S3 doesn't support 'fsync'" ); /* does it? */ } } else { /* READ or CHECK */ - if (verbose >= VERBOSE_3) { - fprintf( stdout, "rank %d reading from offset %lld\n", - rank, - param->offset + length - remaining ); - } + //if (verbose >= VERBOSE_3) { + // fprintf( stdout, "rank %d reading from offset %lld\n", + // rank, + // hints->offset + length - remaining ); + //} // read specific byte-range from the object // [This is included in the "pure" S3 spec.] @@ -917,43 +907,45 @@ S3_Xfer_internal(int access, // libcurl writefunction, invoked via aws4c. aws_iobuf_reset(param->io_buf); aws_iobuf_extend_static(param->io_buf, data_ptr, remaining); - AWS4C_CHECK( s3_get(param->io_buf, file) ); + AWS4C_CHECK( s3_get(param->io_buf, (char*) file) ); if (param->io_buf->code != 206) { /* '206 Partial Content' */ + char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ snprintf(buff, BUFF_SIZE, "Unexpected result (%d, '%s')", param->io_buf->code, param->io_buf->result); - ERR_SIMPLE(buff); + ERR(buff); } // drop refs to , in param->io_buf aws_iobuf_reset(param->io_buf); } - - if (param->verbose >= VERBOSE_2) { - printf("<- S3_Xfer\n"); - } + //if (verbose >= VERBOSE_2) { + // printf("<- S3_Xfer\n"); + //} return ( length ); } -static -IOR_offset_t -S3_Xfer(int access, - void* file, +static IOR_offset_t S3_Xfer(int access, + aiori_fd_t* file, IOR_size_t* buffer, IOR_offset_t length, - IOR_param_t* param ) { - S3_Xfer_internal(access, file, buffer, length, param, TRUE); + IOR_offset_t offset, + aiori_mod_opt_t* param ) { + S3_Xfer_internal(access, file, buffer, length, offset, (s3_options_t*) param, TRUE); } + + static IOR_offset_t EMC_Xfer(int access, - void* file, + aiori_fd_t* file, IOR_size_t* buffer, IOR_offset_t length, - IOR_param_t* param ) { - S3_Xfer_internal(access, file, buffer, length, param, FALSE); + IOR_offset_t offset, + aiori_mod_opt_t* param ) { + S3_Xfer_internal(access, file, buffer, length, offset, (s3_options_t*) param, FALSE); } @@ -992,16 +984,10 @@ EMC_Xfer(int access, * MPI_COMM_WORLD. */ -static -void -S3_Fsync( void *fd, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_2) { - printf("-> S3_Fsync [no-op]\n"); - } - - if (param->verbose >= VERBOSE_2) { - printf("<- S3_Fsync\n"); - } +static void S3_Fsync( aiori_fd_t *fd, aiori_mod_opt_t * param ) { + //if (param->verbose >= VERBOSE_2) { + // printf("-> S3_Fsync [no-op]\n"); + //} } @@ -1030,29 +1016,17 @@ S3_Fsync( void *fd, IOR_param_t * param ) { * See S3_Fsync() for some possible considerations. */ -static -void -S3_Close_internal( void* fd, - IOR_param_t* param, - int multi_part_upload_p ) { +static void S3_Close_internal(aiori_fd_t* fd, s3_options_t* param, int multi_part_upload_p) { char* fname = (char*)fd; /* see NOTE above S3_Create_Or_Open() */ // easier to think - int n_to_n = param->filePerProc; + int n_to_n = hints->filePerProc; int n_to_1 = (! n_to_n); - int segmented = (param->segmentCount == 1); - - if (param->verbose >= VERBOSE_2) { - printf("-> S3_Close('%s', ,%d) %s\n", - fname, - multi_part_upload_p, - ((n_to_n) ? "N:N" : ((segmented) ? "N:1(seg)" : "N:1(str)"))); - } - - if (param->open == WRITE) { + int segmented = (hints->segmentCount == 1); + if (param->written) { // finalizing Multi-Part Upload (for N:1 or N:N) if (multi_part_upload_p) { @@ -1078,11 +1052,11 @@ S3_Close_internal( void* fd, // Everybody should have the same number of ETags (?) size_t etag_count_max = 0; /* highest number on any proc */ MPI_Allreduce(&etags_per_rank, &etag_count_max, - 1, mpi_size_t, MPI_MAX, param->testComm); + 1, mpi_size_t, MPI_MAX, testComm); if (etags_per_rank != etag_count_max) { - printf("Rank %d: etag count mismatch: max:%d, mine:%d\n", + printf("Rank %d: etag count mismatch: max:%zd, mine:%zd\n", rank, etag_count_max, etags_per_rank); - MPI_Abort(param->testComm, 1); + MPI_Abort(testComm, 1); } // collect ETag data at Rank0 @@ -1095,26 +1069,25 @@ S3_Close_internal( void* fd, int j; int rnk; - char* etag_vec = (char*)malloc((param->numTasks * etag_data_size) +1); + char* etag_vec = (char*)malloc((hints->numTasks * etag_data_size) +1); if (! etag_vec) { - fprintf(stderr, "rank 0 failed to malloc %d bytes\n", - param->numTasks * etag_data_size); - MPI_Abort(param->testComm, 1); + fprintf(stderr, "rank 0 failed to malloc %zd bytes\n", + hints->numTasks * etag_data_size); + MPI_Abort(testComm, 1); } MPI_Gather(etag_data, etag_data_size, MPI_BYTE, etag_vec, etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD); // --- debugging: show the gathered etag data // (This shows the raw concatenated etag-data from each node.) - if (param->verbose >= VERBOSE_4) { - - printf("rank 0: gathered %d etags from all ranks:\n", etags_per_rank); + if (verbose >= VERBOSE_4) { + printf("rank 0: gathered %zd etags from all ranks:\n", etags_per_rank); etag_ptr=etag_vec; - for (rnk=0; rnknumTasks; ++rnk) { + for (rnk=0; rnk < hints->numTasks; ++rnk) { printf("\t[%d]: '", rnk); int ii; - for (ii=0; iinumTasks; + i_max = hints->numTasks; j_max = etags_per_rank; start_multiplier = etag_data_size; /* one rank's-worth of Etag data */ stride = ETAG_SIZE; /* one ETag */ } else { // strided i_max = etags_per_rank; - j_max = param->numTasks; + j_max = hints->numTasks; start_multiplier = ETAG_SIZE; /* one ETag */ stride = etag_data_size; /* one rank's-worth of Etag data */ } @@ -1203,7 +1176,7 @@ S3_Close_internal( void* fd, char etag[ETAG_SIZE +1]; memcpy(etag, etag_ptr, ETAG_SIZE); etag[ETAG_SIZE] = 0; - + char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ // write XML for next part, with Etag ... snprintf(buff, BUFF_SIZE, " \n" @@ -1221,15 +1194,11 @@ S3_Close_internal( void* fd, // write XML tail ... aws_iobuf_append_str(xml, "\n"); - } - - else { + } else { MPI_Gather(etag_data, etag_data_size, MPI_BYTE, NULL, etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD); } - } - - else { /* N:N */ + } else { /* N:N */ xml = aws_iobuf_new(); aws_iobuf_growth_size(xml, 1024 * 8); @@ -1241,6 +1210,7 @@ S3_Close_internal( void* fd, char etag[ETAG_SIZE +1]; int part = 0; int i; + char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ for (i=0; i\n"); } - - // send request to finalize MPU if (n_to_n || (rank == 0)) { // DEBUGGING: show the XML we constructed - if (param->verbose >= VERBOSE_3) + if (verbose >= VERBOSE_3) debug_iobuf(xml, 1, 1); - + char buff[BUFF_SIZE]; /* buffer is used to generate URLs, err_msgs, etc */ // --- POST our XML to the server. snprintf(buff, BUFF_SIZE, "%s?uploadId=%s", @@ -1300,42 +1268,36 @@ S3_Close_internal( void* fd, // N:1 file until rank0 has finished the S3 multi-part finalize. // The object will not appear to exist, until then. if (n_to_1) - MPI_CHECK(MPI_Barrier(param->testComm), "barrier error"); - } - else { + MPI_CHECK(MPI_Barrier(testComm), "barrier error"); + } else { // No finalization is needed, when using EMC's byte-range writing // support. However, we do need to make sure everyone has // finished writing, before anyone starts reading. if (n_to_1) { - MPI_CHECK(MPI_Barrier(param->testComm), "barrier error"); - if (param->verbose >= VERBOSE_2) - printf("rank %d: passed barrier\n", rank); - } - } + MPI_CHECK(MPI_Barrier(testComm), "barrier error"); + //if (verbose >= VERBOSE_2) + // printf("rank %d: passed barrier\n", rank); + //} + } + } // After writing, reset the CURL connection, so that caches won't be // used for reads. aws_reset_connection(); } - - if (param->verbose >= VERBOSE_2) { - printf("<- S3_Close\n"); - } + //if (param->verbose >= VERBOSE_2) { + // printf("<- S3_Close\n"); + //} } -static -void -S3_Close( void* fd, - IOR_param_t* param ) { - S3_Close_internal(fd, param, TRUE); +static void S3_Close( aiori_fd_t* fd, aiori_mod_opt_t* param ) { + S3_Close_internal(fd, (s3_options_t*) param, TRUE); } -static -void -EMC_Close( void* fd, - IOR_param_t* param ) { - S3_Close_internal(fd, param, FALSE); + +static void EMC_Close( aiori_fd_t* fd, aiori_mod_opt_t* param ) { + S3_Close_internal(fd, (s3_options_t*) param, FALSE); } @@ -1349,13 +1311,36 @@ EMC_Close( void* fd, * successfully read. */ -static -void -S3_Delete( char *testFileName, IOR_param_t * param ) { +static void S3_Delete( char *testFileName, aiori_mod_opt_t * options ) { + //if (param->verbose >= VERBOSE_2) { + // printf("-> S3_Delete(%s)\n", testFileName); + //} + /* maybe initialize curl */ + s3_options_t * param = (s3_options_t*) options; + s3_connect(param ); - if (param->verbose >= VERBOSE_2) { - printf("-> S3_Delete(%s)\n", testFileName); - } +#if 0 + // EMC BUG: If file was written with appends, and is deleted, + // Then any future recreation will result in an object that can't be read. + // this + AWS4C_CHECK( s3_delete(param->io_buf, testFileName) ); +#else + // just replace with a zero-length object for now + aws_iobuf_reset(param->io_buf); + AWS4C_CHECK ( s3_put(param->io_buf, testFileName) ); +#endif + + AWS4C_CHECK_OK( param->io_buf ); + //if (verbose >= VERBOSE_2) + // printf("<- S3_Delete\n"); +} + + +static void EMC_Delete( char *testFileName, aiori_mod_opt_t * options ) { + s3_options_t * param = (s3_options_t*) options; + //if (param->verbose >= VERBOSE_2) { + // printf("-> EMC_Delete(%s)\n", testFileName); + //} /* maybe initialize curl */ s3_connect( param ); @@ -1372,45 +1357,10 @@ S3_Delete( char *testFileName, IOR_param_t * param ) { #endif AWS4C_CHECK_OK( param->io_buf ); - - if (param->verbose >= VERBOSE_2) - printf("<- S3_Delete\n"); + //if (param->verbose >= VERBOSE_2) + // printf("<- EMC_Delete\n"); } - -static -void -EMC_Delete( char *testFileName, IOR_param_t * param ) { - - if (param->verbose >= VERBOSE_2) { - printf("-> EMC_Delete(%s)\n", testFileName); - } - - /* maybe initialize curl */ - s3_connect( param ); - -#if 0 - // EMC BUG: If file was written with appends, and is deleted, - // Then any future recreation will result in an object that can't be read. - // this - AWS4C_CHECK( s3_delete(param->io_buf, testFileName) ); -#else - // just replace with a zero-length object for now - aws_iobuf_reset(param->io_buf); - AWS4C_CHECK ( s3_put(param->io_buf, testFileName) ); -#endif - - AWS4C_CHECK_OK( param->io_buf ); - - if (param->verbose >= VERBOSE_2) - printf("<- EMC_Delete\n"); -} - - - - - - /* * HTTP HEAD returns meta-data for a "file". * @@ -1420,15 +1370,11 @@ EMC_Delete( char *testFileName, IOR_param_t * param ) { * request more data than the header actually takes? */ -static -IOR_offset_t -S3_GetFileSize(IOR_param_t * param, - MPI_Comm testComm, - char * testFileName) { - - if (param->verbose >= VERBOSE_2) { - printf("-> S3_GetFileSize(%s)\n", testFileName); - } +static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char * testFileName) { + s3_options_t * param = (s3_options_t*) options; + //if (param->verbose >= VERBOSE_2) { + // printf("-> S3_GetFileSize(%s)\n", testFileName); + //} IOR_offset_t aggFileSizeFromStat; /* i.e. "long long int" */ IOR_offset_t tmpMin, tmpMax, tmpSum; @@ -1442,63 +1388,9 @@ S3_GetFileSize(IOR_param_t * param, if ( ! AWS4C_OK(param->io_buf) ) { fprintf(stderr, "rank %d: couldn't stat '%s': %s\n", rank, testFileName, param->io_buf->result); - MPI_Abort(param->testComm, 1); + MPI_Abort(testComm, 1); } aggFileSizeFromStat = param->io_buf->contentLen; - if (param->verbose >= VERBOSE_2) { - printf("\trank %d: file-size %llu\n", rank, aggFileSizeFromStat); - } - - if ( param->filePerProc == TRUE ) { - if (param->verbose >= VERBOSE_2) { - printf("\tall-reduce (1)\n"); - } - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, - &tmpSum, /* sum */ - 1, - MPI_LONG_LONG_INT, - MPI_SUM, - testComm ), - "cannot total data moved" ); - - aggFileSizeFromStat = tmpSum; - } - else { - if (param->verbose >= VERBOSE_2) { - printf("\tall-reduce (2a)\n"); - } - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, - &tmpMin, /* min */ - 1, - MPI_LONG_LONG_INT, - MPI_MIN, - testComm ), - "cannot total data moved" ); - - if (param->verbose >= VERBOSE_2) { - printf("\tall-reduce (2b)\n"); - } - MPI_CHECK(MPI_Allreduce(&aggFileSizeFromStat, - &tmpMax, /* max */ - 1, - MPI_LONG_LONG_INT, - MPI_MAX, - testComm ), - "cannot total data moved" ); - - if ( tmpMin != tmpMax ) { - if ( rank == 0 ) { - WARN( "inconsistent file size by different tasks" ); - } - - /* incorrect, but now consistent across tasks */ - aggFileSizeFromStat = tmpMin; - } - } - - if (param->verbose >= VERBOSE_2) { - printf("<- S3_GetFileSize [%llu]\n", aggFileSizeFromStat); - } return ( aggFileSizeFromStat ); } diff --git a/src/ior.c b/src/ior.c index 196f6ec..6601125 100755 --- a/src/ior.c +++ b/src/ior.c @@ -246,7 +246,6 @@ void init_IOR_Param_t(IOR_param_t * p) p->hdfs_block_size = 0; p->URI = NULL; - p->part_number = 0; } static void diff --git a/src/ior.h b/src/ior.h index ac258e0..843884d 100755 --- a/src/ior.h +++ b/src/ior.h @@ -168,9 +168,7 @@ typedef struct int hdfs_block_size; /* internal blk-size. (0 gets default) */ char* URI; /* "path" to target object */ - size_t part_number; /* multi-part upload increment (PER-RANK!) */ - char* UploadId; /* key for multi-part-uploads */ - + /* RADOS variables */ rados_t rados_cluster; /* RADOS cluster handle */ rados_ioctx_t rados_ioctx; /* I/O context for our pool in the RADOS cluster */ From cc0ac5008660c4979f596d328c93dcc16fcb321b Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Wed, 2 Sep 2020 10:08:52 +0100 Subject: [PATCH 036/154] POSIX GFPS adjusted to new API #248 (#253) --- src/aiori-POSIX.c | 12 ++++++------ src/ior-output.c | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index c46c99b..b099903 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -179,7 +179,7 @@ void gpfs_free_all_locks(int fd) EWARNF("gpfs_fcntl(%d, ...) release all locks hint failed.", fd); } } -void gpfs_access_start(int fd, IOR_offset_t length, int access) +void gpfs_access_start(int fd, IOR_offset_t length, IOR_offset_t offset, int access) { int rc; struct { @@ -193,7 +193,7 @@ void gpfs_access_start(int fd, IOR_offset_t length, int access) take_locks.access.structLen = sizeof(take_locks.access); take_locks.access.structType = GPFS_ACCESS_RANGE; - take_locks.access.start = hints->offset; + take_locks.access.start = offset; take_locks.access.length = length; take_locks.access.isWrite = (access == WRITE); @@ -203,7 +203,7 @@ void gpfs_access_start(int fd, IOR_offset_t length, int access) } } -void gpfs_access_end(int fd, IOR_offset_t length, int access) +void gpfs_access_end(int fd, IOR_offset_t length, IOR_offset_t offset, int access) { int rc; struct { @@ -218,7 +218,7 @@ void gpfs_access_end(int fd, IOR_offset_t length, int access) free_locks.free.structLen = sizeof(free_locks.free); free_locks.free.structType = GPFS_FREE_RANGE; - free_locks.free.start = hints->offset; + free_locks.free.start = offset; free_locks.free.length = length; rc = gpfs_fcntl(fd, &free_locks); @@ -539,7 +539,7 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer #ifdef HAVE_GPFS_FCNTL_H if (o->gpfs_hint_access) { - gpfs_access_start(fd, length, access); + gpfs_access_start(fd, length, offset, access); } #endif @@ -600,7 +600,7 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer } #ifdef HAVE_GPFS_FCNTL_H if (o->gpfs_hint_access) { - gpfs_access_end(fd, length, param, access); + gpfs_access_end(fd, length, offset, access); } #endif return (length); diff --git a/src/ior-output.c b/src/ior-output.c index 25366eb..4c05170 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -371,10 +371,10 @@ void ShowTestStart(IOR_param_t *test) PrintKeyValInt("setTimeStampSignature/incompressibleSeed", test->setTimeStampSignature); /* Seed value was copied into setTimeStampSignature as well */ PrintKeyValInt("collective", test->collective); PrintKeyValInt("segmentCount", test->segmentCount); - #ifdef HAVE_GPFS_FCNTL_H - PrintKeyValInt("gpfsHintAccess", test->gpfs_hint_access); - PrintKeyValInt("gpfsReleaseToken", test->gpfs_release_token); - #endif + //#ifdef HAVE_GPFS_FCNTL_H + //PrintKeyValInt("gpfsHintAccess", test->gpfs_hint_access); + //PrintKeyValInt("gpfsReleaseToken", test->gpfs_release_token); + //#endif PrintKeyValInt("transferSize", test->transferSize); PrintKeyValInt("blockSize", test->blockSize); PrintEndSection(); From 7c201c0d9c8f359c8566de62894de2c197388d09 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 2 Sep 2020 12:12:17 -0500 Subject: [PATCH 037/154] HDF5: fix segfault in HDF5 driver after latest AIORI changes (#255) - the hints structure was not getting initialized in MPIIO when HDF5 backend is used. Since HDF5 utilizes the MPIIO backend, this causes a segfault. Signed-off-by: Mohamad Chaarawi --- src/aiori-HDF5.c | 2 ++ src/aiori-MPIIO.c | 5 ++--- src/aiori.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/aiori-HDF5.c b/src/aiori-HDF5.c index 560dfbb..bd4f29f 100755 --- a/src/aiori-HDF5.c +++ b/src/aiori-HDF5.c @@ -171,6 +171,8 @@ static aiori_xfer_hint_t * hints = NULL; static void HDF5_init_xfer_options(aiori_xfer_hint_t * params){ hints = params; + /** HDF5 utilizes the MPIIO backend too, so init hints there */ + MPIIO_xfer_hints(params); } static int HDF5_check_params(aiori_mod_opt_t * options){ diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index 8462248..2ed0c6d 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -40,7 +40,6 @@ static IOR_offset_t MPIIO_Xfer(int, aiori_fd_t *, IOR_size_t *, static void MPIIO_Close(aiori_fd_t *, aiori_mod_opt_t *); static char* MPIIO_GetVersion(); static void MPIIO_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -static void MPIIO_xfer_hints(aiori_xfer_hint_t * params); static int MPIIO_check_params(aiori_mod_opt_t * options); /************************** D E C L A R A T I O N S ***************************/ @@ -108,7 +107,7 @@ ior_aiori_t mpiio_aiori = { /***************************** F U N C T I O N S ******************************/ static aiori_xfer_hint_t * hints = NULL; -static void MPIIO_xfer_hints(aiori_xfer_hint_t * params){ +void MPIIO_xfer_hints(aiori_xfer_hint_t * params){ hints = params; } @@ -140,10 +139,10 @@ static int MPIIO_check_params(aiori_mod_opt_t * module_options){ */ int MPIIO_Access(const char *path, int mode, aiori_mod_opt_t *module_options) { - mpiio_options_t * param = (mpiio_options_t*) module_options; if(hints->dryRun){ return MPI_SUCCESS; } + mpiio_options_t * param = (mpiio_options_t*) module_options; MPI_File fd; int mpi_mode = MPI_MODE_UNIQUE_OPEN; MPI_Info mpiHints = MPI_INFO_NULL; diff --git a/src/aiori.h b/src/aiori.h index a1adc6d..5dbbcb1 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -157,9 +157,10 @@ int aiori_posix_access (const char *path, int mode, aiori_mod_opt_t * module_opt int aiori_posix_stat (const char *path, struct stat *buf, aiori_mod_opt_t * module_options); -/* NOTE: these 3 MPI-IO functions are exported for reuse by HDF5/PNetCDF */ +/* NOTE: these 4 MPI-IO functions are exported for reuse by HDF5/PNetCDF */ void MPIIO_Delete(char *testFileName, aiori_mod_opt_t * module_options); IOR_offset_t MPIIO_GetFileSize(aiori_mod_opt_t * options, char *testFileName); int MPIIO_Access(const char *, int, aiori_mod_opt_t * module_options); +void MPIIO_xfer_hints(aiori_xfer_hint_t * params); #endif /* not _AIORI_H */ From e60801cf8e224369c88b637a9f3ec531fe969bf9 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Sun, 13 Sep 2020 08:52:02 +0100 Subject: [PATCH 038/154] Prefix S3 options correctly for S3-libs3 (#257) * Option renamed to be consistent --- src/aiori-S3-libs3.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index ef11c43..6a35238 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -58,14 +58,14 @@ static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_m o->bucket_prefix = "ior"; option_help h [] = { - {0, "S3.bucket-per-file", "Use one bucket to map one file/directory, otherwise one bucket is used to store all dirs/files.", OPTION_FLAG, 'd', & o->bucket_per_file}, - {0, "S3.bucket-name-prefix", "The prefix of the bucket(s).", OPTION_OPTIONAL_ARGUMENT, 's', & o->bucket_prefix}, - {0, "S3.dont-suffix-bucket", "By default a hash will be added to the bucket name to increase uniqueness, this disables the option.", OPTION_FLAG, 'd', & o->dont_suffix }, - {0, "S3.s3-compatible", "to be selected when using S3 compatible storage", OPTION_FLAG, 'd', & o->s3_compatible }, - {0, "S3.use-ssl", "used to specify that SSL is needed for the connection", OPTION_FLAG, 'd', & o->use_ssl }, - {0, "S3.host", "The host optionally followed by:port.", OPTION_OPTIONAL_ARGUMENT, 's', & o->host}, - {0, "S3.secret-key", "The secret key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->secret_key}, - {0, "S3.access-key", "The access key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->access_key}, + {0, "S3-libs3.bucket-per-file", "Use one bucket to map one file/directory, otherwise one bucket is used to store all dirs/files.", OPTION_FLAG, 'd', & o->bucket_per_file}, + {0, "S3-libs3.bucket-name-prefix", "The prefix of the bucket(s).", OPTION_OPTIONAL_ARGUMENT, 's', & o->bucket_prefix}, + {0, "S3-libs3.dont-suffix-bucket", "By default a hash will be added to the bucket name to increase uniqueness, this disables the option.", OPTION_FLAG, 'd', & o->dont_suffix }, + {0, "S3-libs3.s3-compatible", "to be selected when using S3 compatible storage", OPTION_FLAG, 'd', & o->s3_compatible }, + {0, "S3-libs3.use-ssl", "used to specify that SSL is needed for the connection", OPTION_FLAG, 'd', & o->use_ssl }, + {0, "S3-libs3.host", "The host optionally followed by:port.", OPTION_OPTIONAL_ARGUMENT, 's', & o->host}, + {0, "S3-libs3.secret-key", "The secret key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->secret_key}, + {0, "S3-libs3.access-key", "The access key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->access_key}, LAST_OPTION }; option_help * help = malloc(sizeof(h)); From 8f14166a7939d644e258b4afaf40e5f3f25f8b1c Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Sun, 13 Sep 2020 09:20:46 +0100 Subject: [PATCH 039/154] S3: removed unneded check. (#258) --- src/aiori-S3-libs3.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index 6a35238..c8e29c2 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -217,7 +217,7 @@ static aiori_fd_t *S3_Create(char *path, int iorflags, aiori_mod_opt_t * options s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); - + if(iorflags & IOR_CREAT){ if(o->bucket_per_file){ @@ -317,7 +317,7 @@ static IOR_offset_t S3_Xfer(int access, aiori_fd_t * afd, IOR_size_t * buffer, I }else{ sprintf(p, "%s", fd->object); } - } + } if(access == WRITE){ S3_put_object(& o->bucket_context, p, length, NULL, NULL, o->timeout, &putObjectHandler, & dh); }else{ @@ -363,7 +363,7 @@ static void S3_Delete(char *path, aiori_mod_opt_t * options) s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_file_name(o, p, path); - + if(o->bucket_per_file){ o->bucket_context.bucketName = p; @@ -386,7 +386,7 @@ static int S3_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; def_bucket_name(o, p, path); - + if (o->bucket_per_file){ S3_create_bucket(o->s3_protocol, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, S3CannedAclPrivate, o->locationConstraint, NULL, o->timeout, & responseHandler, NULL); @@ -406,7 +406,7 @@ static int S3_rmdir (const char *path, aiori_mod_opt_t * options){ s3_options_t * o = (s3_options_t*) options; char p[FILENAME_MAX]; - def_bucket_name(o, p, path); + def_bucket_name(o, p, path); if (o->bucket_per_file){ S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); CHECK_ERROR(p); @@ -423,7 +423,7 @@ static int S3_stat(const char *path, struct stat *buf, aiori_mod_opt_t * options char p[FILENAME_MAX]; def_file_name(o, p, path); memset(buf, 0, sizeof(struct stat)); - // TODO count the individual file fragment sizes together + // TODO count the individual file fragment sizes together if (o->bucket_per_file){ S3_test_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, 0, NULL, @@ -451,10 +451,6 @@ static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName static int S3_check_params(aiori_mod_opt_t * options){ - if(hints->blockSize != hints->transferSize){ - ERR("S3 Blocksize must be transferSize"); - } - return 0; } From bd76b45ef9db100373f7d005de9a866cd44575d0 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Mon, 28 Sep 2020 20:09:48 +0100 Subject: [PATCH 040/154] =?UTF-8?q?Integrated=20comparison=20checks=20into?= =?UTF-8?q?=20CompareData()=20which=20prevents=20additi=E2=80=A6=20(#254)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Integrated comparison checks into CompareData() which prevents additional buffer comparisons at the expense of some computation #249. * IOR: Added code documentation to warn people for changing the creation pattern but not the verification pattern. --- src/ior.c | 168 +++++++++++++++++++----------------------------------- 1 file changed, 58 insertions(+), 110 deletions(-) diff --git a/src/ior.c b/src/ior.c index 6601125..2b8c8ce 100755 --- a/src/ior.c +++ b/src/ior.c @@ -374,36 +374,56 @@ static void CheckFileSize(IOR_test_t *test, char * testFilename, IOR_offset_t da * difference in buffers and returns total errors counted. */ static size_t -CompareBuffers(void *expectedBuffer, - void *unknownBuffer, - size_t size, - IOR_offset_t transferCount, IOR_param_t *test, int access) +CompareData(void *expectedBuffer, size_t size, IOR_offset_t transferCount, IOR_param_t *test, IOR_offset_t offset, int fillrank, int access) { char testFileName[MAX_PATHLEN]; char bufferLabel1[MAX_STR]; char bufferLabel2[MAX_STR]; - size_t i, j, length, first, last; + size_t i, j, length; size_t errorCount = 0; - int inError = 0; - unsigned long long *goodbuf = (unsigned long long *)expectedBuffer; - unsigned long long *testbuf = (unsigned long long *)unknownBuffer; + + IOR_offset_t offsetSignature = 0; + unsigned long long hi, lo, val; // for data verification + hi = ((unsigned long long)fillrank) << 32; + lo = (unsigned long long)test->timeStampSignatureValue; + if (test->storeFileOffset){ + offsetSignature = offset; + } + + unsigned long long *testbuf = (unsigned long long *)expectedBuffer; if (access == WRITECHECK || access == READCHECK) { strcpy(bufferLabel1, "Expected: "); strcpy(bufferLabel2, "Actual: "); } else { - ERR("incorrect argument for CompareBuffers()"); + ERR("incorrect argument for CompareData()"); } length = size / sizeof(IOR_size_t); - first = -1; if (verbose >= VERBOSE_3) { fprintf(out_logfile, "[%d] At file byte offset %lld, comparing %llu-byte transfer\n", rank, (long long) offset, (long long)size); } + + int incompressibleSeed = test->setTimeStampSignature + fillrank; for (i = 0; i < length; i++) { - if (testbuf[i] != goodbuf[i]) { + if(test->dataPacketType == incompressible ) { + /* same logic as in FillIncompressibleBuffer() */ + /* WARNING: make sure that both functions are changed at the same time */ + hi = ((unsigned long long) rand_r(& incompressibleSeed) << 32); + lo = (unsigned long long) rand_r(& incompressibleSeed); + val = hi | lo; + }else{ + if ((i % 2) == 0) { + /* evens contain MPI rank and time in seconds */ + val = hi | lo; + } else { + /* odds contain offset */ + val = offsetSignature + (i * sizeof(unsigned long long)); + } + } + if (testbuf[i] != val) { errorCount++; if (verbose >= VERBOSE_2) { fprintf(out_logfile, @@ -412,58 +432,28 @@ CompareBuffers(void *expectedBuffer, (long long) offset + (IOR_size_t) (i * sizeof(IOR_size_t))); fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel1); - fprintf(out_logfile, "%016llx\n", goodbuf[i]); + fprintf(out_logfile, "%016llx\n", val); fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel2); fprintf(out_logfile, "%016llx\n", testbuf[i]); } - if (!inError) { - inError = 1; - first = i; - last = i; - } else { - last = i; - } - } else if (verbose >= VERBOSE_5 && i % 4 == 0) { + + } else if (verbose >= VERBOSE_5) { fprintf(out_logfile, - "[%d] PASSED offset = %lu bytes, transfer %lld\n", - rank, - ((i * sizeof(unsigned long long)) + - offset), transferCount); + "[%d] PASSED offset = %llu bytes, transfer %lld\n", + rank, ((i * sizeof(unsigned long long)) + offset), transferCount); fprintf(out_logfile, "[%d] GOOD %s0x", rank, bufferLabel1); - for (j = 0; j < 4; j++) - fprintf(out_logfile, "%016llx ", goodbuf[i + j]); + fprintf(out_logfile, "%016llx ", val); fprintf(out_logfile, "\n[%d] GOOD %s0x", rank, bufferLabel2); - for (j = 0; j < 4; j++) - fprintf(out_logfile, "%016llx ", testbuf[i + j]); + fprintf(out_logfile, "%016llx ", testbuf[i]); fprintf(out_logfile, "\n"); } } - if (inError) { - inError = 0; + if (errorCount > 0) { GetTestFileName(testFileName, test); - EWARNF("[%d] FAILED comparison of buffer containing %d-byte ints:\n", - rank, (int)sizeof(unsigned long long int)); - fprintf(out_logfile, "[%d] File name = %s\n", rank, testFileName); - fprintf(out_logfile, "[%d] In transfer %lld, ", rank, - transferCount); - fprintf(out_logfile, - "%lld errors between buffer indices %lld and %lld.\n", - (long long)errorCount, (long long)first, - (long long)last); - fprintf(out_logfile, "[%d] File byte offset = %lu:\n", rank, - ((first * sizeof(unsigned long long)) + offset)); - - fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel1); - for (j = first; j < length && j < first + 4; j++) - fprintf(out_logfile, "%016llx ", goodbuf[j]); - if (j == length) - fprintf(out_logfile, "[end of buffer]"); - fprintf(out_logfile, "\n[%d] %s0x", rank, bufferLabel2); - for (j = first; j < length && j < first + 4; j++) - fprintf(out_logfile, "%016llx ", testbuf[j]); - if (j == length) - fprintf(out_logfile, "[end of buffer]"); - fprintf(out_logfile, "\n"); + EWARNF("[%d] FAILED comparison of buffer in file %s during transfer %lld offset %lld containing %d-byte ints (%zd errors)", + rank, testFileName, transferCount, offset, (int)sizeof(unsigned long long int),errorCount); + }else if(verbose >= VERBOSE_2){ + fprintf(out_logfile, "[%d] comparison successful during transfer %lld offset %lld\n", rank, transferCount, offset); } return (errorCount); } @@ -639,15 +629,22 @@ void DistributeHints(void) * ints, store transfer offset. If storeFileOffset option is used, the file * (not transfer) offset is stored instead. */ +static unsigned int reseed_incompressible_prng = TRUE; static void FillIncompressibleBuffer(void* buffer, IOR_param_t * test) - { size_t i; unsigned long long hi, lo; unsigned long long *buf = (unsigned long long *)buffer; + /* In order for write checks to work, we have to restart the pseudo random sequence */ + /* This function has the same logic as CompareData() */ + /* WARNING: make sure that both functions are changed at the same time */ + if(reseed_incompressible_prng == TRUE) { + test->incompressibleSeed = test->setTimeStampSignature + rank; /* We copied seed into timestampSignature at initialization, also add the rank to add randomness between processes */ + reseed_incompressible_prng = FALSE; + } for (i = 0; i < test->transferSize / sizeof(unsigned long long); i++) { hi = ((unsigned long long) rand_r(&test->incompressibleSeed) << 32); lo = (unsigned long long) rand_r(&test->incompressibleSeed); @@ -655,8 +652,6 @@ FillIncompressibleBuffer(void* buffer, IOR_param_t * test) } } -unsigned int reseed_incompressible_prng = TRUE; - static void FillBuffer(void *buffer, IOR_param_t * test, unsigned long long offset, int fillrank) @@ -666,16 +661,8 @@ FillBuffer(void *buffer, unsigned long long *buf = (unsigned long long *)buffer; if(test->dataPacketType == incompressible ) { /* Make for some non compressible buffers with randomish data */ - - /* In order for write checks to work, we have to restart the pseudo random sequence */ - if(reseed_incompressible_prng == TRUE) { - test->incompressibleSeed = test->setTimeStampSignature + rank; /* We copied seed into timestampSignature at initialization, also add the rank to add randomness between processes */ - reseed_incompressible_prng = FALSE; - } FillIncompressibleBuffer(buffer, test); - } - - else { + } else { hi = ((unsigned long long)fillrank) << 32; lo = (unsigned long long)test->timeStampSignatureValue; for (i = 0; i < test->transferSize / sizeof(unsigned long long); i++) { @@ -1051,15 +1038,6 @@ static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test, int pretendRank) { ioBuffers->buffer = aligned_buffer_alloc(test->transferSize); - - if (test->checkWrite || test->checkRead) { - ioBuffers->checkBuffer = aligned_buffer_alloc(test->transferSize); - } - if (test->checkRead || test->checkWrite) { - ioBuffers->readCheckBuffer = aligned_buffer_alloc(test->transferSize); - } - - return; } /* @@ -1069,15 +1047,6 @@ static void XferBuffersFree(IOR_io_buffers* ioBuffers, IOR_param_t* test) { aligned_buffer_free(ioBuffers->buffer); - - if (test->checkWrite || test->checkRead) { - aligned_buffer_free(ioBuffers->checkBuffer); - } - if (test->checkRead) { - aligned_buffer_free(ioBuffers->readCheckBuffer); - } - - return; } @@ -1439,10 +1408,6 @@ static void TestIoSys(IOR_test_t *test) } rankOffset = (2 * shift) % params->numTasks; } - - // update the check buffer - FillBuffer(ioBuffers.readCheckBuffer, params, 0, (rank + rankOffset) % params->numTasks); - reseed_incompressible_prng = TRUE; /* Re-Seed the PRNG to get same sequence back, if random */ GetTestFileName(testFileName, params); @@ -1505,10 +1470,6 @@ static void TestIoSys(IOR_test_t *test) file_hits_histogram(params); } } - if(operation_flag == READCHECK){ - FillBuffer(ioBuffers.readCheckBuffer, params, 0, (rank + rankOffset) % params->numTasks); - } - /* Using globally passed rankOffset, following function generates testFileName to read */ GetTestFileName(testFileName, params); @@ -1816,8 +1777,6 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset IOR_offset_t transfer; void *buffer = ioBuffers->buffer; - void *checkBuffer = ioBuffers->checkBuffer; - void *readCheckBuffer = ioBuffers->readCheckBuffer; IOR_offset_t offset = offsetArray[pairCnt]; // this looks inappropriate @@ -1846,30 +1805,19 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset nanosleep( & wait, NULL); } } else if (access == WRITECHECK) { - memset(checkBuffer, 'a', transfer); - - if (test->storeFileOffset == TRUE) { - FillBuffer(readCheckBuffer, test, offset, pretendRank); - } - - amtXferred = backend->xfer(access, fd, checkBuffer, transfer, offset, test->backend_options); + ((long long int*) buffer)[0] = ~((long long int*) buffer)[0]; // changes the buffer, no memset to reduce the memory pressure + amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options); if (amtXferred != transfer) ERR("cannot read from file write check"); (*transferCount)++; - *errors += CompareBuffers(readCheckBuffer, checkBuffer, transfer, - *transferCount, test, - WRITECHECK); + *errors += CompareData(buffer, transfer, *transferCount, test, offset, pretendRank, WRITECHECK); } else if (access == READCHECK) { - memset(checkBuffer, 'a', transfer); - - amtXferred = backend->xfer(access, fd, checkBuffer, transfer, offset, test->backend_options); + ((long long int*) buffer)[0] = ~((long long int*) buffer)[0]; // changes the buffer, no memset to reduce the memory pressure + amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options); if (amtXferred != transfer){ ERR("cannot read from file"); } - if (test->storeFileOffset == TRUE) { - FillBuffer(readCheckBuffer, test, offset, pretendRank); - } - *errors += CompareBuffers(readCheckBuffer, checkBuffer, transfer, *transferCount, test, READCHECK); + *errors += CompareData(buffer, transfer, *transferCount, test, offset, pretendRank, READCHECK); } return amtXferred; } From 65ddc53950c9df73b0428400dc2d47884da25524 Mon Sep 17 00:00:00 2001 From: Sebastian Oeste Date: Fri, 9 Oct 2020 10:35:01 +0200 Subject: [PATCH 041/154] CSV output for IOR results (#260) This commit adds CSV output for IOR. The output will just include the `Results` section. All other sections are omitted, since it's hard to produce a useful csv with all information included. --- src/ior-output.c | 44 +++++++++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/src/ior-output.c b/src/ior-output.c index 4c05170..b1a547a 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -20,6 +20,8 @@ void PrintTableHeader(){ fprintf(out_resultfile, "\n"); fprintf(out_resultfile, "access bw(MiB/s) IOPS Latency(s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); fprintf(out_resultfile, "------ --------- ---- ---------- ---------- --------- -------- -------- -------- -------- ----\n"); + }else if(outputFormat == OUTPUT_CSV){ + fprintf(out_resultfile, "access,bw(MiB/s),IOPS,Latency,block(KiB),xfer(KiB),open(s),wr/rd(s),close(s),total(s),iter\n"); } } @@ -45,8 +47,6 @@ static void PrintKeyValStart(char * key){ } if(outputFormat == OUTPUT_JSON){ fprintf(out_resultfile, "\"%s\": \"", key); - }else if(outputFormat == OUTPUT_CSV){ - } } @@ -84,7 +84,7 @@ static void PrintKeyVal(char * key, char * value){ if(outputFormat == OUTPUT_JSON){ fprintf(out_resultfile, "\"%s\": \"%s\"", key, value); }else if(outputFormat == OUTPUT_CSV){ - fprintf(out_resultfile, "%s", value); + fprintf(out_resultfile, "%s,", value); } } @@ -98,7 +98,7 @@ static void PrintKeyValDouble(char * key, double value){ if(outputFormat == OUTPUT_JSON){ fprintf(out_resultfile, "\"%s\": %.4f", key, value); }else if(outputFormat == OUTPUT_CSV){ - fprintf(out_resultfile, "%.4f", value); + fprintf(out_resultfile, "%.4f,", value); } } @@ -113,7 +113,7 @@ static void PrintKeyValInt(char * key, int64_t value){ if(outputFormat == OUTPUT_JSON){ fprintf(out_resultfile, "\"%s\": %lld", key, (long long) value); }else if(outputFormat == OUTPUT_CSV){ - fprintf(out_resultfile, "%lld", (long long) value); + fprintf(out_resultfile, "%lld,", (long long) value); } } @@ -203,13 +203,16 @@ void PrintRepeatEnd(){ void PrintRepeatStart(){ if (rank != 0) return; - if( outputFormat == OUTPUT_DEFAULT){ + if(outputFormat == OUTPUT_DEFAULT){ return; } PrintArrayStart(); } void PrintTestEnds(){ + if (outputFormat == OUTPUT_CSV){ + return; + } if (rank != 0 || verbose <= VERBOSE_0) { PrintEndSection(); return; @@ -246,7 +249,20 @@ void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, do PrintKeyValDouble("closeTime", diff_subset[2]); PrintKeyValDouble("totalTime", totalTime); PrintEndSection(); + }else if (outputFormat == OUTPUT_CSV){ + PrintKeyVal("access", access == WRITE ? "write" : "read"); + PrintKeyValDouble("bwMiB", bw / MEBIBYTE); + PrintKeyValDouble("iops", iops); + PrintKeyValDouble("latency", latency); + PrintKeyValDouble("blockKiB", (double)test->params.blockSize / KIBIBYTE); + PrintKeyValDouble("xferKiB", (double)test->params.transferSize / KIBIBYTE); + PrintKeyValDouble("openTime", diff_subset[0]); + PrintKeyValDouble("wrRdTime", diff_subset[1]); + PrintKeyValDouble("closeTime", diff_subset[2]); + PrintKeyValDouble("totalTime", totalTime); + fprintf(out_resultfile, "%d\n", rep); } + fflush(out_resultfile); } @@ -258,6 +274,10 @@ void PrintHeader(int argc, char **argv) if (rank != 0) return; + if (outputFormat == OUTPUT_CSV){ + return; + } + PrintStartSection(); if (outputFormat != OUTPUT_DEFAULT){ PrintKeyVal("Version", META_VERSION); @@ -319,6 +339,9 @@ void PrintHeader(int argc, char **argv) */ void ShowTestStart(IOR_param_t *test) { + if (outputFormat == OUTPUT_CSV){ + return; + } PrintStartSection(); PrintKeyValInt("TestID", test->id); PrintKeyVal("StartTime", CurrentTimeString()); @@ -401,6 +424,9 @@ void ShowTestEnd(IOR_test_t *tptr){ */ void ShowSetup(IOR_param_t *params) { + if (outputFormat == OUTPUT_CSV){ + return; + } if (params->debug) { fprintf(out_logfile, "\n*** DEBUG MODE ***\n"); fprintf(out_logfile, "*** %s ***\n\n", params->debug); @@ -612,8 +638,6 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) } PrintKeyValDouble("xsizeMiB", (double) point->aggFileSizeForBW / MEBIBYTE); PrintEndSection(); - }else if (outputFormat == OUTPUT_CSV){ - } fflush(out_resultfile); @@ -638,7 +662,7 @@ void PrintLongSummaryHeader() if (rank != 0 || verbose <= VERBOSE_0) return; if(outputFormat != OUTPUT_DEFAULT){ - return; + return; } fprintf(out_resultfile, "\n"); @@ -665,8 +689,6 @@ void PrintLongSummaryAllTests(IOR_test_t *tests_head) fprintf(out_resultfile, "Summary of all tests:"); }else if (outputFormat == OUTPUT_JSON){ PrintNamedArrayStart("summary"); - }else if (outputFormat == OUTPUT_CSV){ - } PrintLongSummaryHeader(); From 7bc9680b85df871c7d5d490de59153386445d69d Mon Sep 17 00:00:00 2001 From: otatebe <39575743+otatebe@users.noreply.github.com> Date: Mon, 26 Oct 2020 20:26:10 +0900 Subject: [PATCH 042/154] aiori-Gfarm: update to the new aiori interface (#262) --- src/aiori-Gfarm.c | 92 ++++++++++++++++++++++------------------------- 1 file changed, 42 insertions(+), 50 deletions(-) diff --git a/src/aiori-Gfarm.c b/src/aiori-Gfarm.c index a7af0ea..fecda08 100644 --- a/src/aiori-Gfarm.c +++ b/src/aiori-Gfarm.c @@ -14,6 +14,14 @@ struct gfarm_file { GFS_File gf; }; +static aiori_xfer_hint_t *hints = NULL; + +void +Gfarm_xfer_hints(aiori_xfer_hint_t *params) +{ + hints = params; +} + void Gfarm_initialize() { @@ -26,14 +34,14 @@ Gfarm_finalize() gfarm_terminate(); } -void * -Gfarm_create(char *fn, IOR_param_t *param) +aiori_fd_t * +Gfarm_create(char *fn, int flag, aiori_mod_opt_t *param) { GFS_File gf; struct gfarm_file *fp; gfarm_error_t e; - if (param->dryRun) + if (hints->dryRun) return (NULL); e = gfs_pio_create(fn, GFARM_FILE_RDWR, 0664, &gf); @@ -43,17 +51,17 @@ Gfarm_create(char *fn, IOR_param_t *param) if (fp == NULL) ERR("no memory"); fp->gf = gf; - return (fp); + return ((aiori_fd_t *)fp); } -void * -Gfarm_open(char *fn, IOR_param_t *param) +aiori_fd_t * +Gfarm_open(char *fn, int flag, aiori_mod_opt_t *param) { GFS_File gf; struct gfarm_file *fp; gfarm_error_t e; - if (param->dryRun) + if (hints->dryRun) return (NULL); e = gfs_pio_open(fn, GFARM_FILE_RDWR, &gf); @@ -63,14 +71,14 @@ Gfarm_open(char *fn, IOR_param_t *param) if (fp == NULL) ERR("no memory"); fp->gf = gf; - return (fp); + return ((aiori_fd_t *)fp); } IOR_offset_t -Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, - IOR_param_t *param) +Gfarm_xfer(int access, aiori_fd_t *fd, IOR_size_t *buffer, + IOR_offset_t len, IOR_offset_t offset, aiori_mod_opt_t *param) { - struct gfarm_file *fp = fd; + struct gfarm_file *fp = (struct gfarm_file *)fd; IOR_offset_t rem = len; gfarm_off_t off; gfarm_error_t e; @@ -78,7 +86,7 @@ Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, int sz, n; char *buf = (char *)buffer; - if (param->dryRun) + if (hints->dryRun) return (len); if (len > MAX_SZ) @@ -86,7 +94,7 @@ Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, else sz = len; - e = gfs_pio_seek(fp->gf, param->offset, GFARM_SEEK_SET, &off); + e = gfs_pio_seek(fp->gf, offset, GFARM_SEEK_SET, &off); if (e != GFARM_ERR_NO_ERROR) ERR("gfs_pio_seek failed"); while (rem > 0) { @@ -105,11 +113,11 @@ Gfarm_xfer(int access, void *fd, IOR_size_t *buffer, IOR_offset_t len, } void -Gfarm_close(void *fd, IOR_param_t *param) +Gfarm_close(aiori_fd_t *fd, aiori_mod_opt_t *param) { - struct gfarm_file *fp = fd; + struct gfarm_file *fp = (struct gfarm_file *)fd; - if (param->dryRun) + if (hints->dryRun) return; if (gfs_pio_close(fp->gf) != GFARM_ERR_NO_ERROR) @@ -118,11 +126,11 @@ Gfarm_close(void *fd, IOR_param_t *param) } void -Gfarm_delete(char *fn, IOR_param_t *param) +Gfarm_delete(char *fn, aiori_mod_opt_t *param) { gfarm_error_t e; - if (param->dryRun) + if (hints->dryRun) return; e = gfs_unlink(fn); @@ -137,11 +145,11 @@ Gfarm_version() } void -Gfarm_fsync(void *fd, IOR_param_t *param) +Gfarm_fsync(aiori_fd_t *fd, aiori_mod_opt_t *param) { - struct gfarm_file *fp = fd; + struct gfarm_file *fp = (struct gfarm_file *)fd; - if (param->dryRun) + if (hints->dryRun) return; if (gfs_pio_sync(fp->gf) != GFARM_ERR_NO_ERROR) @@ -149,12 +157,12 @@ Gfarm_fsync(void *fd, IOR_param_t *param) } IOR_offset_t -Gfarm_get_file_size(IOR_param_t *param, MPI_Comm comm, char *fn) +Gfarm_get_file_size(aiori_mod_opt_t *param, char *fn) { struct gfs_stat st; IOR_offset_t size, sum, min, max; - if (param->dryRun) + if (hints->dryRun) return (0); if (gfs_stat(fn, &st) != GFARM_ERR_NO_ERROR) @@ -162,34 +170,17 @@ Gfarm_get_file_size(IOR_param_t *param, MPI_Comm comm, char *fn) size = st.st_size; gfs_stat_free(&st); - if (param->filePerProc == TRUE) { - MPI_CHECK(MPI_Allreduce(&size, &sum, 1, MPI_LONG_LONG_INT, - MPI_SUM, comm), "cannot total data moved"); - size = sum; - } else { - MPI_CHECK(MPI_Allreduce(&size, &min, 1, MPI_LONG_LONG_INT, - MPI_MIN, comm), "cannot total data moved"); - MPI_CHECK(MPI_Allreduce(&size, &max, 1, MPI_LONG_LONG_INT, - MPI_MAX, comm), "cannot total data moved"); - if (min != max) { - if (rank == 0) - WARN("inconsistent file size by different " - "tasks"); - /* incorrect, but now consistent across tasks */ - size = min; - } - } return (size); } int -Gfarm_statfs(const char *fn, ior_aiori_statfs_t *st, IOR_param_t *param) +Gfarm_statfs(const char *fn, ior_aiori_statfs_t *st, aiori_mod_opt_t *param) { gfarm_off_t used, avail, files; gfarm_error_t e; int bsize = 4096; - if (param->dryRun) + if (hints->dryRun) return (0); e = gfs_statfs_by_path(fn, &used, &avail, &files); @@ -206,11 +197,11 @@ Gfarm_statfs(const char *fn, ior_aiori_statfs_t *st, IOR_param_t *param) } int -Gfarm_mkdir(const char *fn, mode_t mode, IOR_param_t *param) +Gfarm_mkdir(const char *fn, mode_t mode, aiori_mod_opt_t *param) { gfarm_error_t e; - if (param->dryRun) + if (hints->dryRun) return (0); e = gfs_mkdir(fn, mode); @@ -221,11 +212,11 @@ Gfarm_mkdir(const char *fn, mode_t mode, IOR_param_t *param) } int -Gfarm_rmdir(const char *fn, IOR_param_t *param) +Gfarm_rmdir(const char *fn, aiori_mod_opt_t *param) { gfarm_error_t e; - if (param->dryRun) + if (hints->dryRun) return (0); e = gfs_rmdir(fn); @@ -236,12 +227,12 @@ Gfarm_rmdir(const char *fn, IOR_param_t *param) } int -Gfarm_access(const char *fn, int mode, IOR_param_t *param) +Gfarm_access(const char *fn, int mode, aiori_mod_opt_t *param) { struct gfs_stat st; gfarm_error_t e; - if (param->dryRun) + if (hints->dryRun) return (0); e = gfs_stat(fn, &st); @@ -259,12 +250,12 @@ Gfarm_access(const char *fn, int mode, IOR_param_t *param) #define STAT_BLKSIZ 512 /* for st_blocks */ int -Gfarm_stat(const char *fn, struct stat *buf, IOR_param_t *param) +Gfarm_stat(const char *fn, struct stat *buf, aiori_mod_opt_t *param) { struct gfs_stat st; gfarm_error_t e; - if (param->dryRun) + if (hints->dryRun) return (0); e = gfs_stat(fn, &st); @@ -298,6 +289,7 @@ ior_aiori_t gfarm_aiori = { .name_legacy = NULL, .create = Gfarm_create, .open = Gfarm_open, + .xfer_hints = Gfarm_xfer_hints, .xfer = Gfarm_xfer, .close = Gfarm_close, .delete = Gfarm_delete, From 077f4ef98dc536eb87631243250ddcdee85da1cb Mon Sep 17 00:00:00 2001 From: Jean-Yves VET Date: Mon, 26 Oct 2020 15:37:19 +0100 Subject: [PATCH 043/154] aiori-IME: Fix init after finalize issue (#263) --- src/aiori-IME.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiori-IME.c b/src/aiori-IME.c index 0fccdb0..d9c76a5 100755 --- a/src/aiori-IME.c +++ b/src/aiori-IME.c @@ -160,7 +160,7 @@ void IME_Finalize() return; (void)ime_native_finalize(); - ime_initialized = true; + ime_initialized = false; } /* From dab62b57c86f29f743aab756d287904bc711776c Mon Sep 17 00:00:00 2001 From: donko Date: Wed, 28 Oct 2020 18:38:17 +0900 Subject: [PATCH 044/154] fix JSON format, the end of Array object (#264) --- src/ior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ior.c b/src/ior.c index 2b8c8ce..010a5ce 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1529,8 +1529,8 @@ static void TestIoSys(IOR_test_t *test) params->errorFound = FALSE; rankOffset = 0; - PrintRepeatEnd(); } + PrintRepeatEnd(); MPI_CHECK(MPI_Comm_free(&testComm), "MPI_Comm_free() error"); From d750d323e31e94c3e8ec6c79f00e03f90089d4aa Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Wed, 28 Oct 2020 09:47:38 +0000 Subject: [PATCH 045/154] JSON output. Remove duplicated keys. (#265) --- src/ior-output.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/ior-output.c b/src/ior-output.c index b1a547a..bf5f080 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -620,9 +620,6 @@ static void PrintLongSummaryOneOperation(IOR_test_t *test, const int access) PrintKeyValInt("taskPerNodeOffset", params->taskPerNodeOffset); PrintKeyValInt("reorderTasksRandom", params->reorderTasksRandom); PrintKeyValInt("reorderTasksRandomSeed", params->reorderTasksRandomSeed); - PrintKeyValInt("segmentCount", params->segmentCount); - PrintKeyValInt("blockSize", params->blockSize); - PrintKeyValInt("transferSize", params->transferSize); PrintKeyValDouble("bwMaxMIB", bw->max / MEBIBYTE); PrintKeyValDouble("bwMinMIB", bw->min / MEBIBYTE); PrintKeyValDouble("bwMeanMIB", bw->mean / MEBIBYTE); From 4898badf48ba350cb13a6bde87b39963da2a9b9f Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 29 Oct 2020 10:37:56 +0000 Subject: [PATCH 046/154] Fix compiler issue with redefinition via typedef. --- src/aiori.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aiori.h b/src/aiori.h index 5dbbcb1..ba84b60 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -76,9 +76,9 @@ typedef struct aiori_xfer_hint_t{ } aiori_xfer_hint_t; /* this is a dummy structure to create some type safety */ -typedef struct aiori_mod_opt_t{ +struct aiori_mod_opt_t{ void * dummy; -} aiori_mod_opt_t; +}; typedef struct aiori_fd_t{ void * dummy; From c28ed6dc728e9a14831d0705fb5ddd60d415ba08 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 2 Nov 2020 17:34:11 +0000 Subject: [PATCH 047/154] Partial conversion of existing md-workbench to IOR APIs. --- src/md-workbench.c | 1031 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1031 insertions(+) create mode 100644 src/md-workbench.c diff --git a/src/md-workbench.c b/src/md-workbench.c new file mode 100644 index 0000000..6e664a3 --- /dev/null +++ b/src/md-workbench.c @@ -0,0 +1,1031 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "aiori.h" +#include "utilities.h" +#include "parse_options.h" + +/* +This is the modified version md-workbench-fs that can utilize AIORI. +It follows the hierarchical file system semantics in contrast to the md-workbench (without -fs) which has dataset and object semantics. + */ + +// successfull, errors +typedef struct { + int suc; + int err; +} op_stat_t; + +// A runtime for an operation and when the operation was started +typedef struct{ + float time_since_app_start; + float runtime; +} time_result_t; + +typedef struct{ + float min; + float q1; + float median; + float q3; + float q90; + float q99; + float max; +} time_statistics_t; + +// statistics for running a single phase +typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! + double t; // maximum time + double * t_all; + + op_stat_t dset_name; + op_stat_t dset_create; + op_stat_t dset_delete; + + op_stat_t obj_name; + op_stat_t obj_create; + op_stat_t obj_read; + op_stat_t obj_stat; + op_stat_t obj_delete; + + // time measurements individual runs + uint64_t repeats; + time_result_t * time_create; + time_result_t * time_read; + time_result_t * time_stat; + time_result_t * time_delete; + + time_statistics_t stats_create; + time_statistics_t stats_read; + time_statistics_t stats_stat; + time_statistics_t stats_delete; + + // the maximum time for any single operation + double max_op_time; + double phase_start_timer; + int stonewall_iterations; +} phase_stat_t; + +#define CHECK_MPI_RET(ret) if (ret != MPI_SUCCESS){ printf("Unexpected error in MPI on Line %d\n", __LINE__);} +#define LLU (long long unsigned) +#define min(a,b) (a < b ? a : b) + +struct benchmark_options{ + ior_aiori_t const * backend; + void * backend_options; + + char * interface; + int num; + int precreate; + int dset_count; + + int offset; + int iterations; + int file_size; + int read_only; + int stonewall_timer; + int stonewall_timer_wear_out; + + char * latency_file_prefix; + int latency_keep_all; + + int phase_cleanup; + int phase_precreate; + int phase_benchmark; + + //int limit_memory; + //int limit_memory_between_phases; + + int verbosity; + int process_report; + + int print_detailed_stats; + int quiet_output; + + char * run_info_file; + + int ignore_precreate_errors; + int rank; + int size; + + float relative_waiting_factor; + int adaptive_waiting_mode; + + uint64_t start_item_number; +}; + +static int global_iteration = 0; + +struct benchmark_options o; + +void init_options(){ + memset(& o, 0, sizeof(o)); + o.interface = "POSIX"; + o.num = 1000; + o.precreate = 3000; + o.dset_count = 10; + o.offset = 1; + o.iterations = 3; + o.file_size = 3901; + o.run_info_file = "mdtest.status"; +} + +static void wait(double runtime){ + double waittime = runtime * o.relative_waiting_factor; + //printf("waittime: %e\n", waittime); + if(waittime < 0.01){ + double start; + start = GetTimeStamp(); + double cur = GetTimeStamp(); + double end = cur + waittime; + while (cur < end){ + cur = GetTimeStamp(); + } + }else{ + struct timespec w; + w.tv_sec = (time_t) (waittime); + w.tv_nsec = (long) ((waittime - w.tv_sec) * 1000 * 1000 * 1000); + nanosleep(& w, NULL); + } +} + +static void init_stats(phase_stat_t * p, size_t repeats){ + memset(p, 0, sizeof(phase_stat_t)); + p->repeats = repeats; + size_t timer_size = repeats * sizeof(time_result_t); + p->time_create = (time_result_t *) malloc(timer_size); + p->time_read = (time_result_t *) malloc(timer_size); + p->time_stat = (time_result_t *) malloc(timer_size); + p->time_delete = (time_result_t *) malloc(timer_size); +} + +static float add_timed_result(double start, double phase_start_timer, time_result_t * results, size_t pos, double * max_time, double * out_op_time){ + float curtime = start - phase_start_timer; + double op_time = GetTimeStamp(); + results[pos].runtime = (float) op_time; + results[pos].time_since_app_start = curtime; + if (op_time > *max_time){ + *max_time = op_time; + } + *out_op_time = op_time; + return curtime; +} + +static void print_detailed_stat_header(){ + printf("phase\t\td name\tcreate\tdelete\tob nam\tcreate\tread\tstat\tdelete\tt_inc_b\tt_no_bar\tthp\tmax_t\n"); +} + +static int sum_err(phase_stat_t * p){ + return p->dset_name.err + p->dset_create.err + p->dset_delete.err + p->obj_name.err + p->obj_create.err + p->obj_read.err + p->obj_stat.err + p->obj_delete.err; +} + +static double statistics_mean(int count, double * arr){ + double sum = 0; + for(int i=0; i < o.size; i++){ + sum += arr[i]; + } + return sum / o.size; +} + +static double statistics_std_dev(int count, double * arr){ + double mean = statistics_mean(count, arr); + double sum = 0; + for(int i=0; i < o.size; i++){ + sum += (mean - arr[i])*(mean - arr[i]); + } + return sqrt(sum / (o.size-1)); +} + +static void statistics_minmax(int count, double * arr, double * out_min, double * out_max){ + double min = 1e308; + double max = 0; + for(int i=0; i < o.size; i++){ + min = (arr[i] < min) ? arr[i] : min; + max = (arr[i] > max) ? arr[i] : max; + } + *out_min = min; + *out_max = max; +} + +static void print_p_stat(char * buff, const char * name, phase_stat_t * p, double t, int print_global){ + const double tp = (double)(p->obj_create.suc + p->obj_read.suc) * o.file_size / t / 1024 / 1024; + + const int errs = sum_err(p); + double r_min = 0; + double r_max = 0; + double r_mean = 0; + double r_std = 0; + + if(p->t_all){ + // we can compute several derived values that provide insight about quality of service, latency distribution and load balancing + statistics_minmax(o.size, p->t_all, & r_min, & r_max); + r_mean = statistics_mean(o.size, p->t_all); + r_std = statistics_std_dev(o.size, p->t_all); + } + + if (o.print_detailed_stats){ + sprintf(buff, "%s \t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.3fs\t%.3fs\t%.2f MiB/s %.4e", name, p->dset_name.suc, p->dset_create.suc, p->dset_delete.suc, p->obj_name.suc, p->obj_create.suc, p->obj_read.suc, p->obj_stat.suc, p->obj_delete.suc, p->t, t, tp, p->max_op_time); + + if (errs > 0){ + sprintf(buff, "%s err\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d", name, p->dset_name.err, p->dset_create.err, p->dset_delete.err, p->obj_name.err, p->obj_create.err, p->obj_read.err, p->obj_stat.err, p->obj_delete.err); + } + }else{ + int pos = 0; + // single line + pos += sprintf(buff, "%s process max:%.2fs ", name, t); + if(print_global){ + pos += sprintf(buff + pos, "min:%.1fs mean: %.1fs balance:%.1f stddev:%.1f ", r_min, r_mean, r_min/r_max * 100.0, r_std); + } + int ioops_per_iter = 4; + if(o.read_only){ + ioops_per_iter = 2; + } + + switch(name[0]){ + case('b'): + pos += sprintf(buff + pos, "rate:%.1f iops/s objects:%d rate:%.1f obj/s tp:%.1f MiB/s op-max:%.4es", + p->obj_read.suc * ioops_per_iter / t, // write, stat, read, delete + p->obj_read.suc, + p->obj_read.suc / t, + tp, + p->max_op_time); + + if(o.relative_waiting_factor > 1e-9){ + pos += sprintf(buff + pos, " waiting_factor:%.2f", o.relative_waiting_factor); + } + break; + case('p'): + pos += sprintf(buff + pos, "rate:%.1f iops/s dsets: %d objects:%d rate:%.3f dset/s rate:%.1f obj/s tp:%.1f MiB/s op-max:%.4es", + (p->dset_create.suc + p->obj_create.suc) / t, + p->dset_create.suc, + p->obj_create.suc, + p->dset_create.suc / t, + p->obj_create.suc / t, + tp, + p->max_op_time); + break; + case('c'): + pos += sprintf(buff + pos, "rate:%.1f iops/s objects:%d dsets: %d rate:%.1f obj/s rate:%.3f dset/s op-max:%.4es", + (p->obj_delete.suc + p->dset_delete.suc) / t, + p->obj_delete.suc, + p->dset_delete.suc, + p->obj_delete.suc / t, + p->dset_delete.suc / t, + p->max_op_time); + break; + default: + pos = sprintf(buff, "%s: unknown phase", name); + break; + } + + if(! o.quiet_output || errs > 0){ + pos += sprintf(buff + pos, " (%d errs", errs); + if(errs > 0){ + pos += sprintf(buff + pos, "!!!)" ); + }else{ + pos += sprintf(buff + pos, ")" ); + } + } + if(! o.quiet_output && p->stonewall_iterations){ + pos += sprintf(buff + pos, " stonewall-iter:%d", p->stonewall_iterations); + } + + if(p->stats_read.max > 1e-9){ + time_statistics_t stat = p->stats_read; + pos += sprintf(buff + pos, " read(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max); + } + if(p->stats_stat.max > 1e-9){ + time_statistics_t stat = p->stats_stat; + pos += sprintf(buff + pos, " stat(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max); + } + if(p->stats_create.max > 1e-9){ + time_statistics_t stat = p->stats_create; + pos += sprintf(buff + pos, " create(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max); + } + if(p->stats_delete.max > 1e-9){ + time_statistics_t stat = p->stats_delete; + pos += sprintf(buff + pos, " delete(%.4es, %.4es, %.4es, %.4es, %.4es, %.4es, %.4es)", stat.min, stat.q1, stat.median, stat.q3, stat.q90, stat.q99, stat.max); + } + } +} + +static int compare_floats(time_result_t * x, time_result_t * y){ + return x->runtime < y->runtime ? -1 : (x->runtime > y->runtime ? +1 : 0); +} + +static double runtime_quantile(int repeats, time_result_t * times, float quantile){ + int pos = round(quantile * repeats + 0.49); + return times[pos].runtime; +} + +static uint64_t aggregate_timers(int repeats, int max_repeats, time_result_t * times, time_result_t * global_times){ + uint64_t count = 0; + int ret; + // due to stonewall, the number of repeats may be different per process + if(o.rank == 0){ + MPI_Status status; + memcpy(global_times, times, repeats * 2 * sizeof(float)); + count += repeats; + for(int i=1; i < o.size; i++){ + int cnt; + ret = MPI_Recv(& global_times[count], max_repeats*2, MPI_FLOAT, i, 888, MPI_COMM_WORLD, & status); + CHECK_MPI_RET(ret) + MPI_Get_count(& status, MPI_FLOAT, & cnt); + count += cnt / 2; + } + }else{ + ret = MPI_Send(times, repeats * 2, MPI_FLOAT, 0, 888, MPI_COMM_WORLD); + CHECK_MPI_RET(ret) + } + + return count; +} + +static void compute_histogram(const char * name, time_result_t * times, time_statistics_t * stats, size_t repeats, int writeLatencyFile){ + if(writeLatencyFile && o.latency_file_prefix ){ + char file[1024]; + sprintf(file, "%s-%.2f-%d-%s.csv", o.latency_file_prefix, o.relative_waiting_factor, global_iteration, name); + FILE * f = fopen(file, "w+"); + if(f == NULL){ + printf("%d: Error writing to latency file: %s\n", o.rank, file); + return; + } + fprintf(f, "time,runtime\n"); + for(size_t i = 0; i < repeats; i++){ + fprintf(f, "%.7f,%.4e\n", times[i].time_since_app_start, times[i].runtime); + } + fclose(f); + } + // now sort the times and pick the quantiles + qsort(times, repeats, sizeof(time_result_t), (int (*)(const void *, const void *)) compare_floats); + stats->min = times[0].runtime; + stats->q1 = runtime_quantile(repeats, times, 0.25); + if(repeats % 2 == 0){ + stats->median = (times[repeats/2].runtime + times[repeats/2 - 1].runtime)/2.0; + }else{ + stats->median = times[repeats/2].runtime; + } + stats->q3 = runtime_quantile(repeats, times, 0.75); + stats->q90 = runtime_quantile(repeats, times, 0.90); + stats->q99 = runtime_quantile(repeats, times, 0.99); + stats->max = times[repeats - 1].runtime; +} + +static void end_phase(const char * name, phase_stat_t * p){ + int ret; + char buff[4096]; + + //char * limit_memory_P = NULL; + MPI_Barrier(MPI_COMM_WORLD); + + int max_repeats = o.precreate * o.dset_count; + if(strcmp(name,"benchmark") == 0){ + max_repeats = o.num * o.dset_count; + } + + // prepare the summarized report + phase_stat_t g_stat; + init_stats(& g_stat, (o.rank == 0 ? 1 : 0) * ((size_t) max_repeats) * o.size); + // reduce timers + ret = MPI_Reduce(& p->t, & g_stat.t, 2, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + CHECK_MPI_RET(ret) + if(o.rank == 0) { + g_stat.t_all = (double*) malloc(sizeof(double) * o.size); + } + ret = MPI_Gather(& p->t, 1, MPI_DOUBLE, g_stat.t_all, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + CHECK_MPI_RET(ret) + ret = MPI_Reduce(& p->dset_name, & g_stat.dset_name, 2*(3+5), MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + CHECK_MPI_RET(ret) + ret = MPI_Reduce(& p->max_op_time, & g_stat.max_op_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + CHECK_MPI_RET(ret) + if( p->stonewall_iterations ){ + ret = MPI_Reduce(& p->repeats, & g_stat.repeats, 1, MPI_UINT64_T, MPI_MIN, 0, MPI_COMM_WORLD); + CHECK_MPI_RET(ret) + g_stat.stonewall_iterations = p->stonewall_iterations; + } + int write_rank0_latency_file = (o.rank == 0) && ! o.latency_keep_all; + + if(strcmp(name,"precreate") == 0){ + uint64_t repeats = aggregate_timers(p->repeats, max_repeats, p->time_create, g_stat.time_create); + if(o.rank == 0){ + compute_histogram("precreate-all", g_stat.time_create, & g_stat.stats_create, repeats, o.latency_keep_all); + } + compute_histogram("precreate", p->time_create, & p->stats_create, p->repeats, write_rank0_latency_file); + }else if(strcmp(name,"cleanup") == 0){ + uint64_t repeats = aggregate_timers(p->repeats, max_repeats, p->time_delete, g_stat.time_delete); + if(o.rank == 0) { + compute_histogram("cleanup-all", g_stat.time_delete, & g_stat.stats_delete, repeats, o.latency_keep_all); + } + compute_histogram("cleanup", p->time_delete, & p->stats_delete, p->repeats, write_rank0_latency_file); + }else if(strcmp(name,"benchmark") == 0){ + uint64_t repeats = aggregate_timers(p->repeats, max_repeats, p->time_read, g_stat.time_read); + if(o.rank == 0) { + compute_histogram("read-all", g_stat.time_read, & g_stat.stats_read, repeats, o.latency_keep_all); + } + compute_histogram("read", p->time_read, & p->stats_read, p->repeats, write_rank0_latency_file); + + repeats = aggregate_timers(p->repeats, max_repeats, p->time_stat, g_stat.time_stat); + if(o.rank == 0) { + compute_histogram("stat-all", g_stat.time_stat, & g_stat.stats_stat, repeats, o.latency_keep_all); + } + compute_histogram("stat", p->time_stat, & p->stats_stat, p->repeats, write_rank0_latency_file); + + if(! o.read_only){ + repeats = aggregate_timers(p->repeats, max_repeats, p->time_create, g_stat.time_create); + if(o.rank == 0) { + compute_histogram("create-all", g_stat.time_create, & g_stat.stats_create, repeats, o.latency_keep_all); + } + compute_histogram("create", p->time_create, & p->stats_create, p->repeats, write_rank0_latency_file); + + repeats = aggregate_timers(p->repeats, max_repeats, p->time_delete, g_stat.time_delete); + if(o.rank == 0) { + compute_histogram("delete-all", g_stat.time_delete, & g_stat.stats_delete, repeats, o.latency_keep_all); + } + compute_histogram("delete", p->time_delete, & p->stats_delete, p->repeats, write_rank0_latency_file); + } + } + + if (o.rank == 0){ + //print the stats: + print_p_stat(buff, name, & g_stat, g_stat.t, 1); + printf("%s\n", buff); + } + + if(o.process_report){ + if(o.rank == 0){ + print_p_stat(buff, name, p, p->t, 0); + printf("0: %s\n", buff); + for(int i=1; i < o.size; i++){ + MPI_Recv(buff, 4096, MPI_CHAR, i, 4711, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + printf("%d: %s\n", i, buff); + } + }else{ + print_p_stat(buff, name, p, p->t, 0); + MPI_Send(buff, 4096, MPI_CHAR, 0, 4711, MPI_COMM_WORLD); + } + } + + if(g_stat.t_all){ + free(g_stat.t_all); + } + if(p->time_create){ + free(p->time_create); + free(p->time_read); + free(p->time_stat); + free(p->time_delete); + } + if(g_stat.time_create){ + free(g_stat.time_create); + free(g_stat.time_read); + free(g_stat.time_stat); + free(g_stat.time_delete); + } + + // allocate memory if necessary + // ret = mem_preallocate(& limit_memory_P, o.limit_memory_between_phases, o.verbosity >= 3); + // if( ret != 0){ + // printf("%d: Error allocating memory!\n", o.rank); + // } + // mem_free_preallocated(& limit_memory_P); +} + +void run_precreate(phase_stat_t * s, int current_index){ + char dset[4096]; + char obj_name[4096]; + int ret; + + for(int i=0; i < o.dset_count; i++){ + ret = o.plugin->def_dset_name(dset, o.rank, i); + if (ret != 0){ + if (! o.ignore_precreate_errors){ + printf("Error defining the dataset name\n"); + MPI_Abort(MPI_COMM_WORLD, 1); + } + s->dset_name.err++; + continue; + } + s->dset_name.suc++; + ret = o.plugin->create_dset(dset); + if (ret == MD_NOOP){ + // do not increment any counter + }else if (ret == 0){ + s->dset_create.suc++; + }else{ + s->dset_create.err++; + if (! o.ignore_precreate_errors){ + printf("%d: Error while creating the dset: %s\n", o.rank, dset); + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + } + + char * buf = malloc(o.file_size); + memset(buf, o.rank % 256, o.file_size); + double op_timer; // timer for individual operations + size_t pos = -1; // position inside the individual measurement array + double op_time; + + // create the obj + for(int f=current_index; f < o.precreate; f++){ + for(int d=0; d < o.dset_count; d++){ + ret = o.plugin->def_dset_name(dset, o.rank, d); + pos++; + ret = o.plugin->def_obj_name(obj_name, o.rank, d, f); + if (ret != 0){ + s->dset_name.err++; + if (! o.ignore_precreate_errors){ + printf("%d: Error while creating the obj name\n", o.rank); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, 1); + } + s->obj_name.err++; + continue; + } + + op_timer = GetTimeStamp(); + ret = o.plugin->write_obj(dset, obj_name, buf, o.file_size); + add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); + + if (o.verbosity >= 2){ + printf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); + } + + if (ret == MD_NOOP){ + // do not increment any counter + }else if (ret == 0){ + s->obj_create.suc++; + }else{ + s->obj_create.err++; + if (! o.ignore_precreate_errors){ + printf("%d: Error while creating the obj: %s\n", o.rank, obj_name); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + } + } + free(buf); +} + +/* FIFO: create a new file, write to it. Then read from the first created file, delete it... */ +void run_benchmark(phase_stat_t * s, int * current_index_p){ + char dset[4096]; + char obj_name[4096]; + int ret; + char * buf = malloc(o.file_size); + memset(buf, o.rank % 256, o.file_size); + double op_timer; // timer for individual operations + size_t pos = -1; // position inside the individual measurement array + int start_index = *current_index_p; + int total_num = o.num; + int armed_stone_wall = (o.stonewall_timer > 0); + int f; + double phase_allreduce_time = 0; + + for(f=0; f < total_num; f++){ + float bench_runtime = 0; // the time since start + for(int d=0; d < o.dset_count; d++){ + double op_time; + const int prevFile = f + start_index; + pos++; + + int readRank = (o.rank - o.offset * (d+1)) % o.size; + readRank = readRank < 0 ? readRank + o.size : readRank; + ret = o.plugin->def_obj_name(obj_name, readRank, d, prevFile); + if (ret != 0){ + s->obj_name.err++; + continue; + } + ret = o.plugin->def_dset_name(dset, readRank, d); + + op_timer = GetTimeStamp(); + ret = o.plugin->stat_obj(dset, obj_name, o.file_size); + bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_stat, pos, & s->max_op_time, & op_time); + if(o.relative_waiting_factor > 1e-9) { + wait(op_time); + } + + if (o.verbosity >= 2){ + printf("%d: stat %s:%s (%d)\n", o.rank, dset, obj_name, ret); + } + + if(ret != 0 && ret != MD_NOOP){ + if (o.verbosity) + printf("%d: Error while stating the obj: %s\n", o.rank, dset); + s->obj_stat.err++; + continue; + } + s->obj_stat.suc++; + + if (o.verbosity >= 2){ + printf("%d: read %s:%s \n", o.rank, dset, obj_name); + } + + op_timer = GetTimeStamp(); + ret = o.plugin->read_obj(dset, obj_name, buf, o.file_size); + bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_read, pos, & s->max_op_time, & op_time); + if(o.relative_waiting_factor > 1e-9) { + wait(op_time); + } + + if (ret == 0){ + s->obj_read.suc++; + }else if (ret == MD_NOOP){ + // nothing to do + }else if (ret == MD_ERROR_FIND){ + printf("%d: Error while accessing the file %s (%s)\n", o.rank, dset, strerror(errno)); + s->obj_read.err++; + }else{ + printf("%d: Error while reading the file %s (%s)\n", o.rank, dset, strerror(errno)); + s->obj_read.err++; + } + + if(o.read_only){ + continue; + } + + op_timer = GetTimeStamp(); + ret = o.plugin->delete_obj(dset, obj_name); + bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); + if(o.relative_waiting_factor > 1e-9) { + wait(op_time); + } + + if (o.verbosity >= 2){ + printf("%d: delete %s:%s (%d)\n", o.rank, dset, obj_name, ret); + } + + if (ret == 0){ + s->obj_delete.suc++; + }else if (ret == MD_NOOP){ + // nothing to do + }else{ + printf("%d: Error while deleting the object %s:%s\n", o.rank, dset, obj_name); + s->obj_delete.err++; + } + + int writeRank = (o.rank + o.offset * (d+1)) % o.size; + ret = o.plugin->def_obj_name(obj_name, writeRank, d, o.precreate + prevFile); + if (ret != 0){ + s->obj_name.err++; + continue; + } + ret = o.plugin->def_dset_name(dset, writeRank, d); + + op_timer = GetTimeStamp(); + ret = o.plugin->write_obj(dset, obj_name, buf, o.file_size); + bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); + if(o.relative_waiting_factor > 1e-9) { + wait(op_time); + } + + if (o.verbosity >= 2){ + printf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); + } + + if (ret == 0){ + s->obj_create.suc++; + }else if (ret == MD_ERROR_CREATE){ + if (o.verbosity) + printf("%d: Error while creating the obj: %s\n",o.rank, dset); + s->obj_create.err++; + }else if (ret == MD_NOOP){ + // do not increment any counter + }else{ + if (o.verbosity) + printf("%d: Error while writing the obj: %s\n", o.rank, dset); + s->obj_create.err++; + } + } // end loop + + if(armed_stone_wall && bench_runtime >= o.stonewall_timer){ + if(o.verbosity){ + printf("%d: stonewall runtime %fs (%ds)\n", o.rank, bench_runtime, o.stonewall_timer); + } + if(! o.stonewall_timer_wear_out){ + s->stonewall_iterations = f; + break; + } + armed_stone_wall = 0; + // wear out mode, now reduce the maximum + int cur_pos = f + 1; + phase_allreduce_time = GetTimeStamp() - s->phase_start_timer; + int ret = MPI_Allreduce(& cur_pos, & total_num, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + CHECK_MPI_RET(ret) + s->phase_start_timer = GetTimeStamp(); + s->stonewall_iterations = total_num; + if(o.rank == 0){ + printf("stonewall wear out %fs (%d iter)\n", bench_runtime, total_num); + } + if(f == total_num){ + break; + } + } + } + s->t = GetTimeStamp() - s->phase_start_timer + phase_allreduce_time; + if(armed_stone_wall && o.stonewall_timer_wear_out){ + int f = total_num; + int ret = MPI_Allreduce(& f, & total_num, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + CHECK_MPI_RET(ret) + s->stonewall_iterations = total_num; + } + if(o.stonewall_timer && ! o.stonewall_timer_wear_out){ + // TODO FIXME + int sh = s->stonewall_iterations; + int ret = MPI_Allreduce(& sh, & s->stonewall_iterations, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + CHECK_MPI_RET(ret) + } + + if(! o.read_only) { + *current_index_p += f; + } + s->repeats = pos + 1; + free(buf); +} + +void run_cleanup(phase_stat_t * s, int start_index){ + char dset[4096]; + char obj_name[4096]; + int ret; + double op_timer; // timer for individual operations + size_t pos = -1; // position inside the individual measurement array + + for(int d=0; d < o.dset_count; d++){ + ret = o.plugin->def_dset_name(dset, o.rank, d); + + for(int f=0; f < o.precreate; f++){ + double op_time; + pos++; + ret = o.plugin->def_obj_name(obj_name, o.rank, d, f + start_index); + + op_timer = GetTimeStamp(); + ret = o.plugin->delete_obj(dset, obj_name); + add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); + + if (o.verbosity >= 2){ + printf("%d: delete %s:%s (%d)\n", o.rank, dset, obj_name, ret); + } + + if (ret == MD_NOOP){ + // nothing to do + }else if (ret == 0){ + s->obj_delete.suc++; + }else if(ret != MD_NOOP){ + s->obj_delete.err++; + } + } + + ret = o.plugin->rm_dset(dset); + + if (o.verbosity >= 2){ + printf("%d: delete dset %s (%d)\n", o.rank, dset, ret); + } + + if (ret == 0){ + s->dset_delete.suc++; + }else if (ret != MD_NOOP){ + s->dset_delete.err++; + } + } +} + + +static option_help options [] = { + {'O', "offset", "Offset in o.ranks between writers and readers. Writers and readers should be located on different nodes.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.offset}, + {'a', "api", "The API (plugin) to use for the benchmark, use list to show all compiled plugins.", OPTION_OPTIONAL_ARGUMENT, 's', & o.interface}, + {'I', "obj-per-proc", "Number of I/O operations per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.num}, + {'L', "latency", "Measure the latency for individual operations, prefix the result files with the provided filename.", OPTION_OPTIONAL_ARGUMENT, 's', & o.latency_file_prefix}, + {0, "latency-all", "Keep the latency files from all ranks.", OPTION_FLAG, 'd', & o.latency_keep_all}, + {'P', "precreate-per-set", "Number of object to precreate per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.precreate}, + {'D', "data-sets", "Number of data sets covered per process and iteration.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.dset_count}, + {'q', "quiet", "Avoid irrelevant printing.", OPTION_FLAG, 'd', & o.quiet_output}, + //{'m', "lim-free-mem", "Allocate memory until this limit (in MiB) is reached.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory}, + // {'M', "lim-free-mem-phase", "Allocate memory until this limit (in MiB) is reached between the phases, but free it before starting the next phase; the time is NOT included for the phase.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory_between_phases}, + {'S', "object-size", "Size for the created objects.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.file_size}, + {'R', "iterations", "Number of times to rerun the main phase", OPTION_OPTIONAL_ARGUMENT, 'd', & o.iterations}, + {'t', "waiting-time", "Waiting time relative to runtime (1.0 is 100%%)", OPTION_OPTIONAL_ARGUMENT, 'f', & o.relative_waiting_factor}, + {'T', "adaptive-waiting", "Compute an adaptive waiting time", OPTION_FLAG, 'd', & o.adaptive_waiting_mode}, + {'1', "run-precreate", "Run precreate phase", OPTION_FLAG, 'd', & o.phase_precreate}, + {'2', "run-benchmark", "Run benchmark phase", OPTION_FLAG, 'd', & o.phase_benchmark}, + {'3', "run-cleanup", "Run cleanup phase (only run explicit phases)", OPTION_FLAG, 'd', & o.phase_cleanup}, + {'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer}, + {'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out}, + {0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number}, + {0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats}, + {0, "read-only", "Run read-only during benchmarking phase (no deletes/writes), probably use with -2", OPTION_FLAG, 'd', & o.read_only}, + {0, "ignore-precreate-errors", "Ignore errors occuring during the pre-creation phase", OPTION_FLAG, 'd', & o.ignore_precreate_errors}, + {0, "process-reports", "Independent report per process/rank", OPTION_FLAG, 'd', & o.process_report}, + {'v', "verbose", "Increase the verbosity level", OPTION_FLAG, 'd', & o.verbosity}, + {0, "run-info-file", "The log file for resuming a previous run", OPTION_OPTIONAL_ARGUMENT, 's', & o.run_info_file}, + LAST_OPTION + }; + +static void printTime(){ + char buff[100]; + time_t now = time(0); + strftime (buff, 100, "%Y-%m-%d %H:%M:%S", localtime (&now)); + printf("%s\n", buff); +} + +static int return_position(){ + int position, ret; + if( o.rank == 0){ + FILE * f = fopen(o.run_info_file, "r"); + if(! f){ + printf("[ERROR] Could not open %s for restart\n", o.run_info_file); + exit(1); + } + ret = fscanf(f, "pos: %d", & position); + if (ret != 1){ + printf("Could not read from %s for restart\n", o.run_info_file); + exit(1); + } + fclose(f); + } + ret = MPI_Bcast( & position, 1, MPI_INT, 0, MPI_COMM_WORLD ); + return position; +} + +static void store_position(int position){ + if (o.rank != 0){ + return; + } + FILE * f = fopen(o.run_info_file, "w"); + if(! f){ + printf("[ERROR] Could not open %s for saving data\n", o.run_info_file); + exit(1); + } + fprintf(f, "pos: %d\n", position); + fclose(f); +} + +int main(int argc, char ** argv){ + int ret; + int printhelp = 0; + char * limit_memory_P = NULL; + + init_options(); + + MPI_Init(& argc, & argv); + MPI_Comm_rank(MPI_COMM_WORLD, & o.rank); + MPI_Comm_size(MPI_COMM_WORLD, & o.size); + + if (o.rank == 0 && ! o.quiet_output){ + printf("Args: %s", argv[0]); + for(int i=1; i < argc; i++){ + printf(" \"%s\"", argv[i]); + } + printf("\n"); + } + + options_all_t * global_options = airoi_create_all_module_options(options); + int parsed = option_parse(argc, argv, global_options); + o.backend = aiori_select(o.interface); + if (o.backend == NULL) + ERR("Unrecognized I/O API"); + if (! o.backend->enable_mdtest) + ERR("Backend doesn't support MDWorbench"); + o.backend_options = airoi_update_module_options(o.backend, global_options); + + if (!(o.phase_cleanup || o.phase_precreate || o.phase_benchmark)){ + // enable all phases + o.phase_cleanup = o.phase_precreate = o.phase_benchmark = 1; + } + if (! o.phase_precreate && o.phase_benchmark && o.stonewall_timer && ! o.stonewall_timer_wear_out){ + if(o.rank == 0) + printf("Invalid options, if running only the benchmark phase using -2 with stonewall option then use stonewall wear-out\n"); + exit(1); + } + + ret = o.plugin->initialize(); + if (ret != 0){ + printf("%d: Error initializing module\n", o.rank); + MPI_Abort(MPI_COMM_WORLD, 1); + } + + int current_index = 0; + + if ( (o.phase_cleanup || o.phase_benchmark) && ! o.phase_precreate ){ + current_index = return_position(); + } + + if(o.start_item_number){ + printf("Using start position %lld\n", (long long) o.start_item_number); + current_index = o.start_item_number; + } + + size_t total_obj_count = o.dset_count * (size_t) (o.num * o.iterations + o.precreate) * o.size; + if (o.rank == 0 && ! o.quiet_output){ + printf("MD-Workbench total objects: %zu workingset size: %.3f MiB (version: %s) time: ", total_obj_count, ((double) o.size) * o.dset_count * o.precreate * o.file_size / 1024.0 / 1024.0, PACKAGE_VERSION); + printTime(); + if(o.num > o.precreate){ + printf("WARNING: num > precreate, this may cause the situation that no objects are available to read\n"); + } + } + + if ( o.rank == 0 && ! o.quiet_output ){ + // print the set output options + option_print_current(options); + printf("\n"); + } + + // preallocate memory if necessary + //ret = mem_preallocate(& limit_memory_P, o.limit_memory, o.verbosity >= 3); + //if(ret != 0){ + // printf("%d: Error allocating memory\n", o.rank); + // MPI_Abort(MPI_COMM_WORLD, 1); + //} + + double bench_start; + bench_start = GetTimeStamp(); + phase_stat_t phase_stats; + + if(o.rank == 0 && o.print_detailed_stats && ! o.quiet_output){ + print_detailed_stat_header(); + } + + if (o.phase_precreate){ + if (o.rank == 0){ + ret = o.plugin->prepare_global(); + if ( ret != 0 && ret != MD_NOOP ){ + if ( ! (ret == MD_EXISTS && o.ignore_precreate_errors)){ + printf("Rank 0 could not prepare the run, aborting\n"); + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + } + init_stats(& phase_stats, o.precreate * o.dset_count); + MPI_Barrier(MPI_COMM_WORLD); + + // pre-creation phase + phase_stats.phase_start_timer = GetTimeStamp(); + run_precreate(& phase_stats, current_index); + phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer; + end_phase("precreate", & phase_stats); + } + + if (o.phase_benchmark){ + // benchmark phase + for(global_iteration = 0; global_iteration < o.iterations; global_iteration++){ + if(o.adaptive_waiting_mode){ + o.relative_waiting_factor = 0; + } + init_stats(& phase_stats, o.num * o.dset_count); + MPI_Barrier(MPI_COMM_WORLD); + phase_stats.phase_start_timer = GetTimeStamp(); + run_benchmark(& phase_stats, & current_index); + end_phase("benchmark", & phase_stats); + + if(o.adaptive_waiting_mode){ + o.relative_waiting_factor = 0.0625; + for(int r=0; r <= 6; r++){ + init_stats(& phase_stats, o.num * o.dset_count); + MPI_Barrier(MPI_COMM_WORLD); + phase_stats.phase_start_timer = GetTimeStamp(); + run_benchmark(& phase_stats, & current_index); + end_phase("benchmark", & phase_stats); + o.relative_waiting_factor *= 2; + } + } + } + } + + // cleanup phase + if (o.phase_cleanup){ + init_stats(& phase_stats, o.precreate * o.dset_count); + phase_stats.phase_start_timer = GetTimeStamp(); + run_cleanup(& phase_stats, current_index); + phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer; + end_phase("cleanup", & phase_stats); + + if (o.rank == 0){ + ret = o.plugin->purge_global(); + if (ret != 0 && ret != MD_NOOP){ + printf("Rank 0: Error purging the global environment\n"); + } + } + }else{ + store_position(current_index); + } + + double t_all = GetTimeStamp(); + ret = o.plugin->finalize(); + if (ret != 0){ + printf("Error while finalization of module\n"); + } + if (o.rank == 0 && ! o.quiet_output){ + printf("Total runtime: %.0fs time: ", t_all); + printTime(); + } + + //mem_free_preallocated(& limit_memory_P); + + MPI_Finalize(); + return 0; +} From 9ace15cce35069062d2b067c4afd8cb10c0736ec Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 2 Nov 2020 17:56:28 +0000 Subject: [PATCH 048/154] Workbench: further conversation. --- src/Makefile.am | 13 +++- src/md-workbench-main.c | 10 +++ src/md-workbench.c | 159 +++++++++++----------------------------- src/md-workbench.h | 63 ++++++++++++++++ 4 files changed, 124 insertions(+), 121 deletions(-) create mode 100644 src/md-workbench-main.c create mode 100644 src/md-workbench.h diff --git a/src/Makefile.am b/src/Makefile.am index 0adbf32..47ede87 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,20 +1,25 @@ SUBDIRS = . test -bin_PROGRAMS = ior mdtest +bin_PROGRAMS = ior mdtest md-workbench if USE_CAPS -bin_PROGRAMS += IOR MDTEST +bin_PROGRAMS += IOR MDTEST MD-WORKBENCH endif -noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h mdtest.h aiori-debug.h aiori-POSIX.h +noinst_HEADERS = ior.h utilities.h parse_options.h aiori.h iordef.h ior-internal.h option.h mdtest.h aiori-debug.h aiori-POSIX.h md-workbench.h lib_LIBRARIES = libaiori.a -libaiori_a_SOURCES = ior.c mdtest.c utilities.c parse_options.c ior-output.c option.c +libaiori_a_SOURCES = ior.c mdtest.c utilities.c parse_options.c ior-output.c option.c md-workbench.c extraSOURCES = aiori.c aiori-DUMMY.c extraLDADD = extraLDFLAGS = extraCPPFLAGS = +md_workbench_SOURCES = md-workbench.c md-workbench-main.c +md_workbench_LDFLAGS = +md_workbench_LDADD = libaiori.a +md_workbench_CPPFLAGS = + ior_SOURCES = ior-main.c ior_LDFLAGS = ior_LDADD = libaiori.a diff --git a/src/md-workbench-main.c b/src/md-workbench-main.c new file mode 100644 index 0000000..bdd12f2 --- /dev/null +++ b/src/md-workbench-main.c @@ -0,0 +1,10 @@ +#include + +#include "md-workbench.h" + +int main(int argc, char ** argv){ + MPI_Init(& argc, & argv); + int ret = md_workbench(argc, argv); + MPI_Finalize(); + return ret; +} diff --git a/src/md-workbench.c b/src/md-workbench.c index 6e664a3..ddf250d 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -1,13 +1,14 @@ #include #include -#include #include #include #include #include #include +#include "md-workbench.h" +#include "config.h" #include "aiori.h" #include "utilities.h" #include "parse_options.h" @@ -17,60 +18,6 @@ This is the modified version md-workbench-fs that can utilize AIORI. It follows the hierarchical file system semantics in contrast to the md-workbench (without -fs) which has dataset and object semantics. */ -// successfull, errors -typedef struct { - int suc; - int err; -} op_stat_t; - -// A runtime for an operation and when the operation was started -typedef struct{ - float time_since_app_start; - float runtime; -} time_result_t; - -typedef struct{ - float min; - float q1; - float median; - float q3; - float q90; - float q99; - float max; -} time_statistics_t; - -// statistics for running a single phase -typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! - double t; // maximum time - double * t_all; - - op_stat_t dset_name; - op_stat_t dset_create; - op_stat_t dset_delete; - - op_stat_t obj_name; - op_stat_t obj_create; - op_stat_t obj_read; - op_stat_t obj_stat; - op_stat_t obj_delete; - - // time measurements individual runs - uint64_t repeats; - time_result_t * time_create; - time_result_t * time_read; - time_result_t * time_stat; - time_result_t * time_delete; - - time_statistics_t stats_create; - time_statistics_t stats_read; - time_statistics_t stats_stat; - time_statistics_t stats_delete; - - // the maximum time for any single operation - double max_op_time; - double phase_start_timer; - int stonewall_iterations; -} phase_stat_t; #define CHECK_MPI_RET(ret) if (ret != MPI_SUCCESS){ printf("Unexpected error in MPI on Line %d\n", __LINE__);} #define LLU (long long unsigned) @@ -79,6 +26,7 @@ typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! struct benchmark_options{ ior_aiori_t const * backend; void * backend_options; + aiori_xfer_hint_t hints; char * interface; int num; @@ -501,7 +449,7 @@ void run_precreate(phase_stat_t * s, int current_index){ int ret; for(int i=0; i < o.dset_count; i++){ - ret = o.plugin->def_dset_name(dset, o.rank, i); + ret = o.backend->def_dset_name(dset, o.rank, i); if (ret != 0){ if (! o.ignore_precreate_errors){ printf("Error defining the dataset name\n"); @@ -511,10 +459,8 @@ void run_precreate(phase_stat_t * s, int current_index){ continue; } s->dset_name.suc++; - ret = o.plugin->create_dset(dset); - if (ret == MD_NOOP){ - // do not increment any counter - }else if (ret == 0){ + ret = o.backend->create_dset(dset); + if (ret == 0){ s->dset_create.suc++; }else{ s->dset_create.err++; @@ -534,9 +480,9 @@ void run_precreate(phase_stat_t * s, int current_index){ // create the obj for(int f=current_index; f < o.precreate; f++){ for(int d=0; d < o.dset_count; d++){ - ret = o.plugin->def_dset_name(dset, o.rank, d); + ret = o.backend->def_dset_name(dset, o.rank, d); pos++; - ret = o.plugin->def_obj_name(obj_name, o.rank, d, f); + ret = o.backend->def_obj_name(obj_name, o.rank, d, f); if (ret != 0){ s->dset_name.err++; if (! o.ignore_precreate_errors){ @@ -549,16 +495,14 @@ void run_precreate(phase_stat_t * s, int current_index){ } op_timer = GetTimeStamp(); - ret = o.plugin->write_obj(dset, obj_name, buf, o.file_size); + ret = o.backend->write_obj(dset, obj_name, buf, o.file_size); add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); if (o.verbosity >= 2){ printf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); } - if (ret == MD_NOOP){ - // do not increment any counter - }else if (ret == 0){ + if (ret == 0){ s->obj_create.suc++; }else{ s->obj_create.err++; @@ -597,15 +541,15 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ int readRank = (o.rank - o.offset * (d+1)) % o.size; readRank = readRank < 0 ? readRank + o.size : readRank; - ret = o.plugin->def_obj_name(obj_name, readRank, d, prevFile); + ret = o.backend->def_obj_name(obj_name, readRank, d, prevFile); if (ret != 0){ s->obj_name.err++; continue; } - ret = o.plugin->def_dset_name(dset, readRank, d); + ret = o.backend->def_dset_name(dset, readRank, d); op_timer = GetTimeStamp(); - ret = o.plugin->stat_obj(dset, obj_name, o.file_size); + ret = o.backend->stat_obj(dset, obj_name, o.file_size); bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_stat, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { wait(op_time); @@ -615,7 +559,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ printf("%d: stat %s:%s (%d)\n", o.rank, dset, obj_name, ret); } - if(ret != 0 && ret != MD_NOOP){ + if(ret != 0){ if (o.verbosity) printf("%d: Error while stating the obj: %s\n", o.rank, dset); s->obj_stat.err++; @@ -628,7 +572,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } op_timer = GetTimeStamp(); - ret = o.plugin->read_obj(dset, obj_name, buf, o.file_size); + ret = o.backend->read_obj(dset, obj_name, buf, o.file_size); bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_read, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { wait(op_time); @@ -636,11 +580,6 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ if (ret == 0){ s->obj_read.suc++; - }else if (ret == MD_NOOP){ - // nothing to do - }else if (ret == MD_ERROR_FIND){ - printf("%d: Error while accessing the file %s (%s)\n", o.rank, dset, strerror(errno)); - s->obj_read.err++; }else{ printf("%d: Error while reading the file %s (%s)\n", o.rank, dset, strerror(errno)); s->obj_read.err++; @@ -651,7 +590,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } op_timer = GetTimeStamp(); - ret = o.plugin->delete_obj(dset, obj_name); + ret = o.backend->delete_obj(dset, obj_name); bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { wait(op_time); @@ -663,23 +602,21 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ if (ret == 0){ s->obj_delete.suc++; - }else if (ret == MD_NOOP){ - // nothing to do }else{ printf("%d: Error while deleting the object %s:%s\n", o.rank, dset, obj_name); s->obj_delete.err++; } int writeRank = (o.rank + o.offset * (d+1)) % o.size; - ret = o.plugin->def_obj_name(obj_name, writeRank, d, o.precreate + prevFile); + ret = o.backend->def_obj_name(obj_name, writeRank, d, o.precreate + prevFile); if (ret != 0){ s->obj_name.err++; continue; } - ret = o.plugin->def_dset_name(dset, writeRank, d); + ret = o.backend->def_dset_name(dset, writeRank, d); op_timer = GetTimeStamp(); - ret = o.plugin->write_obj(dset, obj_name, buf, o.file_size); + ret = o.backend->write_obj(dset, obj_name, buf, o.file_size); bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { wait(op_time); @@ -691,12 +628,6 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ if (ret == 0){ s->obj_create.suc++; - }else if (ret == MD_ERROR_CREATE){ - if (o.verbosity) - printf("%d: Error while creating the obj: %s\n",o.rank, dset); - s->obj_create.err++; - }else if (ret == MD_NOOP){ - // do not increment any counter }else{ if (o.verbosity) printf("%d: Error while writing the obj: %s\n", o.rank, dset); @@ -757,31 +688,29 @@ void run_cleanup(phase_stat_t * s, int start_index){ size_t pos = -1; // position inside the individual measurement array for(int d=0; d < o.dset_count; d++){ - ret = o.plugin->def_dset_name(dset, o.rank, d); + ret = o.backend->def_dset_name(dset, o.rank, d); for(int f=0; f < o.precreate; f++){ double op_time; pos++; - ret = o.plugin->def_obj_name(obj_name, o.rank, d, f + start_index); + ret = o.backend->def_obj_name(obj_name, o.rank, d, f + start_index); op_timer = GetTimeStamp(); - ret = o.plugin->delete_obj(dset, obj_name); + ret = o.backend->delete_obj(dset, obj_name); add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); if (o.verbosity >= 2){ printf("%d: delete %s:%s (%d)\n", o.rank, dset, obj_name, ret); } - if (ret == MD_NOOP){ - // nothing to do - }else if (ret == 0){ + if (ret == 0){ s->obj_delete.suc++; - }else if(ret != MD_NOOP){ + }else{ s->obj_delete.err++; } } - ret = o.plugin->rm_dset(dset); + ret = o.backend->rm_dset(dset); if (o.verbosity >= 2){ printf("%d: delete dset %s (%d)\n", o.rank, dset, ret); @@ -789,7 +718,7 @@ void run_cleanup(phase_stat_t * s, int start_index){ if (ret == 0){ s->dset_delete.suc++; - }else if (ret != MD_NOOP){ + }else{ s->dset_delete.err++; } } @@ -865,14 +794,13 @@ static void store_position(int position){ fclose(f); } -int main(int argc, char ** argv){ +int md_workbench(int argc, char ** argv){ int ret; int printhelp = 0; char * limit_memory_P = NULL; init_options(); - MPI_Init(& argc, & argv); MPI_Comm_rank(MPI_COMM_WORLD, & o.rank); MPI_Comm_size(MPI_COMM_WORLD, & o.size); @@ -884,6 +812,7 @@ int main(int argc, char ** argv){ printf("\n"); } + memset(& o.hints, 0, sizeof(o.hints)); options_all_t * global_options = airoi_create_all_module_options(options); int parsed = option_parse(argc, argv, global_options); o.backend = aiori_select(o.interface); @@ -903,11 +832,14 @@ int main(int argc, char ** argv){ exit(1); } - ret = o.plugin->initialize(); - if (ret != 0){ - printf("%d: Error initializing module\n", o.rank); - MPI_Abort(MPI_COMM_WORLD, 1); + o.backend->initialize(o.backend_options); + if(o.backend->xfer_hints){ + o.backend->xfer_hints(& o.hints); } + if(o.backend->check_params){ + o.backend->check_params(o.backend_options); + } + int current_index = 0; @@ -952,12 +884,10 @@ int main(int argc, char ** argv){ if (o.phase_precreate){ if (o.rank == 0){ - ret = o.plugin->prepare_global(); - if ( ret != 0 && ret != MD_NOOP ){ - if ( ! (ret == MD_EXISTS && o.ignore_precreate_errors)){ - printf("Rank 0 could not prepare the run, aborting\n"); - MPI_Abort(MPI_COMM_WORLD, 1); - } + ret = o.backend->prepare_global(); + if ( ret != 0 ){ + printf("Rank 0 could not prepare the run, aborting\n"); + MPI_Abort(MPI_COMM_WORLD, 1); } } init_stats(& phase_stats, o.precreate * o.dset_count); @@ -1005,8 +935,8 @@ int main(int argc, char ** argv){ end_phase("cleanup", & phase_stats); if (o.rank == 0){ - ret = o.plugin->purge_global(); - if (ret != 0 && ret != MD_NOOP){ + ret = o.backend->purge_global(); + if (ret != 0){ printf("Rank 0: Error purging the global environment\n"); } } @@ -1015,17 +945,12 @@ int main(int argc, char ** argv){ } double t_all = GetTimeStamp(); - ret = o.plugin->finalize(); - if (ret != 0){ - printf("Error while finalization of module\n"); - } + o.backend->finalize(o.backend_options); if (o.rank == 0 && ! o.quiet_output){ printf("Total runtime: %.0fs time: ", t_all); printTime(); } //mem_free_preallocated(& limit_memory_P); - - MPI_Finalize(); return 0; } diff --git a/src/md-workbench.h b/src/md-workbench.h new file mode 100644 index 0000000..0be70b1 --- /dev/null +++ b/src/md-workbench.h @@ -0,0 +1,63 @@ +#ifndef IOR_MD_WORKBENCH_H +#define IOR_MD_WORKBENCH_H + +#include + +// successfull, errors +typedef struct { + int suc; + int err; +} op_stat_t; + +// A runtime for an operation and when the operation was started +typedef struct{ + float time_since_app_start; + float runtime; +} time_result_t; + +typedef struct{ + float min; + float q1; + float median; + float q3; + float q90; + float q99; + float max; +} time_statistics_t; + +// statistics for running a single phase +typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! + double t; // maximum time + double * t_all; + + op_stat_t dset_name; + op_stat_t dset_create; + op_stat_t dset_delete; + + op_stat_t obj_name; + op_stat_t obj_create; + op_stat_t obj_read; + op_stat_t obj_stat; + op_stat_t obj_delete; + + // time measurements individual runs + uint64_t repeats; + time_result_t * time_create; + time_result_t * time_read; + time_result_t * time_stat; + time_result_t * time_delete; + + time_statistics_t stats_create; + time_statistics_t stats_read; + time_statistics_t stats_stat; + time_statistics_t stats_delete; + + // the maximum time for any single operation + double max_op_time; + double phase_start_timer; + int stonewall_iterations; +} phase_stat_t; + +int md_workbench(int argc, char ** argv); + +#endif From 348754c87a99222bb7ab20921cc6eb465dd2ae8c Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 2 Nov 2020 18:35:01 +0000 Subject: [PATCH 049/154] md-workbench code ported. --- src/md-workbench.c | 229 ++++++++++++++++++++++----------------------- src/md-workbench.h | 2 - 2 files changed, 112 insertions(+), 119 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index ddf250d..5b39c45 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -18,6 +18,8 @@ This is the modified version md-workbench-fs that can utilize AIORI. It follows the hierarchical file system semantics in contrast to the md-workbench (without -fs) which has dataset and object semantics. */ +#define FILEMODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH +#define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH #define CHECK_MPI_RET(ret) if (ret != MPI_SUCCESS){ printf("Unexpected error in MPI on Line %d\n", __LINE__);} #define LLU (long long unsigned) @@ -57,6 +59,7 @@ struct benchmark_options{ int quiet_output; char * run_info_file; + char * prefix; // directory to work on int ignore_precreate_errors; int rank; @@ -68,20 +71,29 @@ struct benchmark_options{ uint64_t start_item_number; }; -static int global_iteration = 0; +static int global_iteration; struct benchmark_options o; +static void def_dset_name(char * out_name, int n, int d){ + sprintf(out_name, "%s/%d_%d", o.prefix, n, d); +} + +static void def_obj_name(char * out_name, char * dset, int n, int d, int i){ + sprintf(out_name, "%s/%d_%d/file-%d", dset, n, d, i); +} + void init_options(){ memset(& o, 0, sizeof(o)); o.interface = "POSIX"; + o.prefix = "./out"; o.num = 1000; o.precreate = 3000; o.dset_count = 10; o.offset = 1; o.iterations = 3; o.file_size = 3901; - o.run_info_file = "mdtest.status"; + o.run_info_file = "md-workbench.status"; } static void wait(double runtime){ @@ -130,7 +142,7 @@ static void print_detailed_stat_header(){ } static int sum_err(phase_stat_t * p){ - return p->dset_name.err + p->dset_create.err + p->dset_delete.err + p->obj_name.err + p->obj_create.err + p->obj_read.err + p->obj_stat.err + p->obj_delete.err; + return p->dset_create.err + p->dset_delete.err + p->obj_create.err + p->obj_read.err + p->obj_stat.err + p->obj_delete.err; } static double statistics_mean(int count, double * arr){ @@ -178,10 +190,10 @@ static void print_p_stat(char * buff, const char * name, phase_stat_t * p, doubl } if (o.print_detailed_stats){ - sprintf(buff, "%s \t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.3fs\t%.3fs\t%.2f MiB/s %.4e", name, p->dset_name.suc, p->dset_create.suc, p->dset_delete.suc, p->obj_name.suc, p->obj_create.suc, p->obj_read.suc, p->obj_stat.suc, p->obj_delete.suc, p->t, t, tp, p->max_op_time); + sprintf(buff, "%s \t%d\t%d\t%d\t%d\t%d\t%d\t%.3fs\t%.3fs\t%.2f MiB/s %.4e", name, p->dset_create.suc, p->dset_delete.suc, p->obj_create.suc, p->obj_read.suc, p->obj_stat.suc, p->obj_delete.suc, p->t, t, tp, p->max_op_time); if (errs > 0){ - sprintf(buff, "%s err\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d", name, p->dset_name.err, p->dset_create.err, p->dset_delete.err, p->obj_name.err, p->obj_create.err, p->obj_read.err, p->obj_stat.err, p->obj_delete.err); + sprintf(buff, "%s err\t%d\t%d\t%d\t%d\t%d\t%d", name, p->dset_create.err, p->dset_delete.err, p->obj_create.err, p->obj_read.err, p->obj_stat.err, p->obj_delete.err); } }else{ int pos = 0; @@ -327,7 +339,7 @@ static void compute_histogram(const char * name, time_result_t * times, time_sta static void end_phase(const char * name, phase_stat_t * p){ int ret; - char buff[4096]; + char buff[MAX_PATHLEN]; //char * limit_memory_P = NULL; MPI_Barrier(MPI_COMM_WORLD); @@ -348,7 +360,7 @@ static void end_phase(const char * name, phase_stat_t * p){ } ret = MPI_Gather(& p->t, 1, MPI_DOUBLE, g_stat.t_all, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); CHECK_MPI_RET(ret) - ret = MPI_Reduce(& p->dset_name, & g_stat.dset_name, 2*(3+5), MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + ret = MPI_Reduce(& p->dset_create, & g_stat.dset_create, 2*(2+4), MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); CHECK_MPI_RET(ret) ret = MPI_Reduce(& p->max_op_time, & g_stat.max_op_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); CHECK_MPI_RET(ret) @@ -410,12 +422,12 @@ static void end_phase(const char * name, phase_stat_t * p){ print_p_stat(buff, name, p, p->t, 0); printf("0: %s\n", buff); for(int i=1; i < o.size; i++){ - MPI_Recv(buff, 4096, MPI_CHAR, i, 4711, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(buff, MAX_PATHLEN, MPI_CHAR, i, 4711, MPI_COMM_WORLD, MPI_STATUS_IGNORE); printf("%d: %s\n", i, buff); } }else{ print_p_stat(buff, name, p, p->t, 0); - MPI_Send(buff, 4096, MPI_CHAR, 0, 4711, MPI_COMM_WORLD); + MPI_Send(buff, MAX_PATHLEN, MPI_CHAR, 0, 4711, MPI_COMM_WORLD); } } @@ -444,22 +456,14 @@ static void end_phase(const char * name, phase_stat_t * p){ } void run_precreate(phase_stat_t * s, int current_index){ - char dset[4096]; - char obj_name[4096]; + char dset[MAX_PATHLEN]; + char obj_name[MAX_PATHLEN]; int ret; for(int i=0; i < o.dset_count; i++){ - ret = o.backend->def_dset_name(dset, o.rank, i); - if (ret != 0){ - if (! o.ignore_precreate_errors){ - printf("Error defining the dataset name\n"); - MPI_Abort(MPI_COMM_WORLD, 1); - } - s->dset_name.err++; - continue; - } - s->dset_name.suc++; - ret = o.backend->create_dset(dset); + def_dset_name(dset, o.rank, i); + + ret = o.backend->mkdir(dset, DIRMODE, o.backend_options); if (ret == 0){ s->dset_create.suc++; }else{ @@ -480,38 +484,32 @@ void run_precreate(phase_stat_t * s, int current_index){ // create the obj for(int f=current_index; f < o.precreate; f++){ for(int d=0; d < o.dset_count; d++){ - ret = o.backend->def_dset_name(dset, o.rank, d); + def_dset_name(dset, o.rank, d); pos++; - ret = o.backend->def_obj_name(obj_name, o.rank, d, f); - if (ret != 0){ - s->dset_name.err++; - if (! o.ignore_precreate_errors){ - printf("%d: Error while creating the obj name\n", o.rank); - fflush(stdout); - MPI_Abort(MPI_COMM_WORLD, 1); - } - s->obj_name.err++; - continue; - } + def_obj_name(obj_name, dset, o.rank, d, f); op_timer = GetTimeStamp(); - ret = o.backend->write_obj(dset, obj_name, buf, o.file_size); - add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); - - if (o.verbosity >= 2){ - printf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); + aiori_fd_t * aiori_fh = o.backend->open(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); + if (NULL == aiori_fh){ + FAIL("unable to open file %s", obj_name); } - - if (ret == 0){ + if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { s->obj_create.suc++; }else{ s->obj_create.err++; if (! o.ignore_precreate_errors){ - printf("%d: Error while creating the obj: %s\n", o.rank, obj_name); - fflush(stdout); - MPI_Abort(MPI_COMM_WORLD, 1); + printf("%d: Error while creating the obj: %s\n", o.rank, obj_name); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, 1); } } + o.backend->close(aiori_fh, o.backend_options); + + add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); + + if (o.verbosity >= 2){ + printf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); + } } } free(buf); @@ -519,8 +517,8 @@ void run_precreate(phase_stat_t * s, int current_index){ /* FIFO: create a new file, write to it. Then read from the first created file, delete it... */ void run_benchmark(phase_stat_t * s, int * current_index_p){ - char dset[4096]; - char obj_name[4096]; + char dset[MAX_PATHLEN]; + char obj_name[MAX_PATHLEN]; int ret; char * buf = malloc(o.file_size); memset(buf, o.rank % 256, o.file_size); @@ -531,25 +529,26 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ int armed_stone_wall = (o.stonewall_timer > 0); int f; double phase_allreduce_time = 0; + aiori_fd_t * aiori_fh; for(f=0; f < total_num; f++){ float bench_runtime = 0; // the time since start for(int d=0; d < o.dset_count; d++){ double op_time; + struct stat stat_buf; const int prevFile = f + start_index; pos++; int readRank = (o.rank - o.offset * (d+1)) % o.size; readRank = readRank < 0 ? readRank + o.size : readRank; - ret = o.backend->def_obj_name(obj_name, readRank, d, prevFile); - if (ret != 0){ - s->obj_name.err++; - continue; - } - ret = o.backend->def_dset_name(dset, readRank, d); + def_dset_name(dset, readRank, d); + def_obj_name(obj_name, dset, readRank, d, prevFile); op_timer = GetTimeStamp(); - ret = o.backend->stat_obj(dset, obj_name, o.file_size); + + ret = o.backend->stat(obj_name, & stat_buf, o.backend_options); + // TODO potentially check return value must be identical to o.file_size + bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_stat, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { wait(op_time); @@ -572,51 +571,61 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } op_timer = GetTimeStamp(); - ret = o.backend->read_obj(dset, obj_name, buf, o.file_size); + aiori_fh = o.backend->open(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); + if (NULL == aiori_fh){ + FAIL("unable to open file %s", obj_name); + } + if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { + s->obj_read.suc++; + }else{ + s->obj_read.err++; + printf("%d: Error while reading the obj: %s\n", o.rank, obj_name); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, 1); + } + o.backend->close(aiori_fh, o.backend_options); + bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_read, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { wait(op_time); } - - if (ret == 0){ - s->obj_read.suc++; - }else{ - printf("%d: Error while reading the file %s (%s)\n", o.rank, dset, strerror(errno)); - s->obj_read.err++; - } - if(o.read_only){ continue; } op_timer = GetTimeStamp(); - ret = o.backend->delete_obj(dset, obj_name); + o.backend->delete(obj_name, o.backend_options); bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { wait(op_time); } if (o.verbosity >= 2){ - printf("%d: delete %s:%s (%d)\n", o.rank, dset, obj_name, ret); - } - - if (ret == 0){ - s->obj_delete.suc++; - }else{ - printf("%d: Error while deleting the object %s:%s\n", o.rank, dset, obj_name); - s->obj_delete.err++; + printf("%d: delete %s:%s\n", o.rank, dset, obj_name); } + s->obj_delete.suc++; int writeRank = (o.rank + o.offset * (d+1)) % o.size; - ret = o.backend->def_obj_name(obj_name, writeRank, d, o.precreate + prevFile); - if (ret != 0){ - s->obj_name.err++; - continue; - } - ret = o.backend->def_dset_name(dset, writeRank, d); + def_dset_name(dset, writeRank, d); + def_obj_name(obj_name, dset, writeRank, d, o.precreate + prevFile); op_timer = GetTimeStamp(); - ret = o.backend->write_obj(dset, obj_name, buf, o.file_size); + aiori_fh = o.backend->open(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); + if (NULL == aiori_fh){ + FAIL("unable to open file %s", obj_name); + } + if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { + s->obj_create.suc++; + }else{ + s->obj_create.err++; + if (! o.ignore_precreate_errors){ + printf("%d: Error while creating the obj: %s\n", o.rank, obj_name); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, 1); + } + } + o.backend->close(aiori_fh, o.backend_options); + bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { wait(op_time); @@ -625,14 +634,6 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ if (o.verbosity >= 2){ printf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); } - - if (ret == 0){ - s->obj_create.suc++; - }else{ - if (o.verbosity) - printf("%d: Error while writing the obj: %s\n", o.rank, dset); - s->obj_create.err++; - } } // end loop if(armed_stone_wall && bench_runtime >= o.stonewall_timer){ @@ -681,45 +682,36 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } void run_cleanup(phase_stat_t * s, int start_index){ - char dset[4096]; - char obj_name[4096]; - int ret; + char dset[MAX_PATHLEN]; + char obj_name[MAX_PATHLEN]; double op_timer; // timer for individual operations size_t pos = -1; // position inside the individual measurement array for(int d=0; d < o.dset_count; d++){ - ret = o.backend->def_dset_name(dset, o.rank, d); + def_dset_name(dset, o.rank, d); for(int f=0; f < o.precreate; f++){ double op_time; pos++; - ret = o.backend->def_obj_name(obj_name, o.rank, d, f + start_index); + def_obj_name(obj_name, dset, o.rank, d, f + start_index); op_timer = GetTimeStamp(); - ret = o.backend->delete_obj(dset, obj_name); + o.backend->delete(obj_name, o.backend_options); add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); if (o.verbosity >= 2){ - printf("%d: delete %s:%s (%d)\n", o.rank, dset, obj_name, ret); - } - - if (ret == 0){ - s->obj_delete.suc++; - }else{ - s->obj_delete.err++; + printf("%d: delete %s\n", o.rank, obj_name); } + s->obj_delete.suc++; } - ret = o.backend->rm_dset(dset); - - if (o.verbosity >= 2){ - printf("%d: delete dset %s (%d)\n", o.rank, dset, ret); - } - - if (ret == 0){ + if (o.backend->rmdir(dset, o.backend_options)) { s->dset_delete.suc++; }else{ - s->dset_delete.err++; + printf("unable to remove directory %s", dset); + } + if (o.verbosity >= 2){ + printf("%d: delete dset %s\n", o.rank, dset); } } } @@ -733,6 +725,7 @@ static option_help options [] = { {0, "latency-all", "Keep the latency files from all ranks.", OPTION_FLAG, 'd', & o.latency_keep_all}, {'P', "precreate-per-set", "Number of object to precreate per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.precreate}, {'D', "data-sets", "Number of data sets covered per process and iteration.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.dset_count}, + {'o', NULL, "Output directory", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix}, {'q', "quiet", "Avoid irrelevant printing.", OPTION_FLAG, 'd', & o.quiet_output}, //{'m', "lim-free-mem", "Allocate memory until this limit (in MiB) is reached.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory}, // {'M', "lim-free-mem-phase", "Allocate memory until this limit (in MiB) is reached between the phases, but free it before starting the next phase; the time is NOT included for the phase.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory_between_phases}, @@ -799,6 +792,7 @@ int md_workbench(int argc, char ** argv){ int printhelp = 0; char * limit_memory_P = NULL; + global_iteration = 0; init_options(); MPI_Comm_rank(MPI_COMM_WORLD, & o.rank); @@ -816,10 +810,12 @@ int md_workbench(int argc, char ** argv){ options_all_t * global_options = airoi_create_all_module_options(options); int parsed = option_parse(argc, argv, global_options); o.backend = aiori_select(o.interface); - if (o.backend == NULL) + if (o.backend == NULL){ ERR("Unrecognized I/O API"); - if (! o.backend->enable_mdtest) + } + if (! o.backend->enable_mdtest){ ERR("Backend doesn't support MDWorbench"); + } o.backend_options = airoi_update_module_options(o.backend, global_options); if (!(o.phase_cleanup || o.phase_precreate || o.phase_benchmark)){ @@ -832,7 +828,9 @@ int md_workbench(int argc, char ** argv){ exit(1); } - o.backend->initialize(o.backend_options); + if (o.backend->initialize){ + o.backend->initialize(o.backend_options); + } if(o.backend->xfer_hints){ o.backend->xfer_hints(& o.hints); } @@ -884,10 +882,8 @@ int md_workbench(int argc, char ** argv){ if (o.phase_precreate){ if (o.rank == 0){ - ret = o.backend->prepare_global(); - if ( ret != 0 ){ - printf("Rank 0 could not prepare the run, aborting\n"); - MPI_Abort(MPI_COMM_WORLD, 1); + if (o.backend->mkdir(o.prefix, DIRMODE, o.backend_options) != 0) { + EWARNF("Unable to create test directory %s", o.prefix); } } init_stats(& phase_stats, o.precreate * o.dset_count); @@ -935,9 +931,8 @@ int md_workbench(int argc, char ** argv){ end_phase("cleanup", & phase_stats); if (o.rank == 0){ - ret = o.backend->purge_global(); - if (ret != 0){ - printf("Rank 0: Error purging the global environment\n"); + if (! o.backend->rmdir(o.prefix, o.backend_options)) { + FAIL("unable to remove directory %s", o.prefix); } } }else{ diff --git a/src/md-workbench.h b/src/md-workbench.h index 0be70b1..c556af8 100644 --- a/src/md-workbench.h +++ b/src/md-workbench.h @@ -30,11 +30,9 @@ typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! double t; // maximum time double * t_all; - op_stat_t dset_name; op_stat_t dset_create; op_stat_t dset_delete; - op_stat_t obj_name; op_stat_t obj_create; op_stat_t obj_read; op_stat_t obj_stat; From d39ae556f0d8c8fc6e85da87762942c74dc2c64c Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 2 Nov 2020 19:04:27 +0000 Subject: [PATCH 050/154] Bugfixing md-workbench errors. --- src/md-workbench.c | 54 +++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index 5b39c45..a1a916d 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -18,7 +18,6 @@ This is the modified version md-workbench-fs that can utilize AIORI. It follows the hierarchical file system semantics in contrast to the md-workbench (without -fs) which has dataset and object semantics. */ -#define FILEMODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH #define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH #define CHECK_MPI_RET(ret) if (ret != MPI_SUCCESS){ printf("Unexpected error in MPI on Line %d\n", __LINE__);} @@ -79,8 +78,8 @@ static void def_dset_name(char * out_name, int n, int d){ sprintf(out_name, "%s/%d_%d", o.prefix, n, d); } -static void def_obj_name(char * out_name, char * dset, int n, int d, int i){ - sprintf(out_name, "%s/%d_%d/file-%d", dset, n, d, i); +static void def_obj_name(char * out_name, int n, int d, int i){ + sprintf(out_name, "%s/%d_%d/file-%d", o.prefix, n, d, i); } void init_options(){ @@ -484,14 +483,13 @@ void run_precreate(phase_stat_t * s, int current_index){ // create the obj for(int f=current_index; f < o.precreate; f++){ for(int d=0; d < o.dset_count; d++){ - def_dset_name(dset, o.rank, d); pos++; - def_obj_name(obj_name, dset, o.rank, d, f); + def_obj_name(obj_name, o.rank, d, f); op_timer = GetTimeStamp(); - aiori_fd_t * aiori_fh = o.backend->open(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); + aiori_fd_t * aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); if (NULL == aiori_fh){ - FAIL("unable to open file %s", obj_name); + FAIL("Unable to open file %s", obj_name); } if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { s->obj_create.suc++; @@ -517,7 +515,6 @@ void run_precreate(phase_stat_t * s, int current_index){ /* FIFO: create a new file, write to it. Then read from the first created file, delete it... */ void run_benchmark(phase_stat_t * s, int * current_index_p){ - char dset[MAX_PATHLEN]; char obj_name[MAX_PATHLEN]; int ret; char * buf = malloc(o.file_size); @@ -541,8 +538,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ int readRank = (o.rank - o.offset * (d+1)) % o.size; readRank = readRank < 0 ? readRank + o.size : readRank; - def_dset_name(dset, readRank, d); - def_obj_name(obj_name, dset, readRank, d, prevFile); + def_obj_name(obj_name, readRank, d, prevFile); op_timer = GetTimeStamp(); @@ -555,25 +551,25 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if (o.verbosity >= 2){ - printf("%d: stat %s:%s (%d)\n", o.rank, dset, obj_name, ret); + printf("%d: stat %s (%d)\n", o.rank, obj_name, ret); } if(ret != 0){ if (o.verbosity) - printf("%d: Error while stating the obj: %s\n", o.rank, dset); + printf("%d: Error while stating the obj: %s\n", o.rank, obj_name); s->obj_stat.err++; continue; } s->obj_stat.suc++; if (o.verbosity >= 2){ - printf("%d: read %s:%s \n", o.rank, dset, obj_name); + printf("%d: read %s \n", o.rank, obj_name); } op_timer = GetTimeStamp(); - aiori_fh = o.backend->open(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); + aiori_fh = o.backend->open(obj_name, IOR_RDONLY, o.backend_options); if (NULL == aiori_fh){ - FAIL("unable to open file %s", obj_name); + FAIL("Unable to open file %s", obj_name); } if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { s->obj_read.suc++; @@ -601,18 +597,17 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if (o.verbosity >= 2){ - printf("%d: delete %s:%s\n", o.rank, dset, obj_name); + printf("%d: delete %s\n", o.rank, obj_name); } s->obj_delete.suc++; int writeRank = (o.rank + o.offset * (d+1)) % o.size; - def_dset_name(dset, writeRank, d); - def_obj_name(obj_name, dset, writeRank, d, o.precreate + prevFile); + def_obj_name(obj_name, writeRank, d, o.precreate + prevFile); op_timer = GetTimeStamp(); - aiori_fh = o.backend->open(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); + aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); if (NULL == aiori_fh){ - FAIL("unable to open file %s", obj_name); + FAIL("Unable to open file %s", obj_name); } if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { s->obj_create.suc++; @@ -632,7 +627,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if (o.verbosity >= 2){ - printf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); + printf("%d: write %s (%d)\n", o.rank, obj_name, ret); } } // end loop @@ -688,12 +683,10 @@ void run_cleanup(phase_stat_t * s, int start_index){ size_t pos = -1; // position inside the individual measurement array for(int d=0; d < o.dset_count; d++){ - def_dset_name(dset, o.rank, d); - for(int f=0; f < o.precreate; f++){ double op_time; pos++; - def_obj_name(obj_name, dset, o.rank, d, f + start_index); + def_obj_name(obj_name, o.rank, d, f + start_index); op_timer = GetTimeStamp(); o.backend->delete(obj_name, o.backend_options); @@ -705,10 +698,11 @@ void run_cleanup(phase_stat_t * s, int start_index){ s->obj_delete.suc++; } - if (o.backend->rmdir(dset, o.backend_options)) { + def_dset_name(dset, o.rank, d); + if (o.backend->rmdir(dset, o.backend_options) == 0) { s->dset_delete.suc++; }else{ - printf("unable to remove directory %s", dset); + printf("Unable to remove directory %s\n", dset); } if (o.verbosity >= 2){ printf("%d: delete dset %s\n", o.rank, dset); @@ -931,8 +925,8 @@ int md_workbench(int argc, char ** argv){ end_phase("cleanup", & phase_stats); if (o.rank == 0){ - if (! o.backend->rmdir(o.prefix, o.backend_options)) { - FAIL("unable to remove directory %s", o.prefix); + if (o.backend->rmdir(o.prefix, o.backend_options) != 0) { + printf("Unable to remove directory %s\n", o.prefix); } } }else{ @@ -940,7 +934,9 @@ int md_workbench(int argc, char ** argv){ } double t_all = GetTimeStamp(); - o.backend->finalize(o.backend_options); + if(o.backend->finalize){ + o.backend->finalize(o.backend_options); + } if (o.rank == 0 && ! o.quiet_output){ printf("Total runtime: %.0fs time: ", t_all); printTime(); From dc89a593712604a8ff0d0150bcfe4403e9683bc1 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 2 Nov 2020 19:23:13 +0000 Subject: [PATCH 051/154] Workbench, adjusting the interface to IOR tool interfaces. --- src/aiori-POSIX.c | 2 +- src/md-workbench-main.c | 2 +- src/md-workbench.c | 63 +++++++++++++++++++++-------------------- src/md-workbench.h | 4 ++- 4 files changed, 38 insertions(+), 33 deletions(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index b099903..e8933b7 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -497,7 +497,7 @@ aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param) *fd = open64(testFileName, fd_oflag); if (*fd < 0) - ERRF("open64(\"%s\", %d) failed", testFileName, fd_oflag); + ERRF("open64(\"%s\", %d) failed: %s", testFileName, fd_oflag, strerror(errno)); #ifdef HAVE_LUSTRE_USER if (o->lustre_ignore_locks) { diff --git a/src/md-workbench-main.c b/src/md-workbench-main.c index bdd12f2..0165824 100644 --- a/src/md-workbench-main.c +++ b/src/md-workbench-main.c @@ -4,7 +4,7 @@ int main(int argc, char ** argv){ MPI_Init(& argc, & argv); - int ret = md_workbench(argc, argv); + int ret = md_workbench_run(argc, argv, MPI_COMM_WORLD, stdout); MPI_Finalize(); return ret; } diff --git a/src/md-workbench.c b/src/md-workbench.c index a1a916d..d0b37e3 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -28,6 +28,8 @@ struct benchmark_options{ ior_aiori_t const * backend; void * backend_options; aiori_xfer_hint_t hints; + MPI_Comm com; + FILE * logfile; char * interface; int num; @@ -36,6 +38,7 @@ struct benchmark_options{ int offset; int iterations; + int global_iteration; int file_size; int read_only; int stonewall_timer; @@ -70,8 +73,6 @@ struct benchmark_options{ uint64_t start_item_number; }; -static int global_iteration; - struct benchmark_options o; static void def_dset_name(char * out_name, int n, int d){ @@ -293,13 +294,13 @@ static uint64_t aggregate_timers(int repeats, int max_repeats, time_result_t * t count += repeats; for(int i=1; i < o.size; i++){ int cnt; - ret = MPI_Recv(& global_times[count], max_repeats*2, MPI_FLOAT, i, 888, MPI_COMM_WORLD, & status); + ret = MPI_Recv(& global_times[count], max_repeats*2, MPI_FLOAT, i, 888, o.com, & status); CHECK_MPI_RET(ret) MPI_Get_count(& status, MPI_FLOAT, & cnt); count += cnt / 2; } }else{ - ret = MPI_Send(times, repeats * 2, MPI_FLOAT, 0, 888, MPI_COMM_WORLD); + ret = MPI_Send(times, repeats * 2, MPI_FLOAT, 0, 888, o.com); CHECK_MPI_RET(ret) } @@ -309,7 +310,7 @@ static uint64_t aggregate_timers(int repeats, int max_repeats, time_result_t * t static void compute_histogram(const char * name, time_result_t * times, time_statistics_t * stats, size_t repeats, int writeLatencyFile){ if(writeLatencyFile && o.latency_file_prefix ){ char file[1024]; - sprintf(file, "%s-%.2f-%d-%s.csv", o.latency_file_prefix, o.relative_waiting_factor, global_iteration, name); + sprintf(file, "%s-%.2f-%d-%s.csv", o.latency_file_prefix, o.relative_waiting_factor, o.global_iteration, name); FILE * f = fopen(file, "w+"); if(f == NULL){ printf("%d: Error writing to latency file: %s\n", o.rank, file); @@ -341,7 +342,7 @@ static void end_phase(const char * name, phase_stat_t * p){ char buff[MAX_PATHLEN]; //char * limit_memory_P = NULL; - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(o.com); int max_repeats = o.precreate * o.dset_count; if(strcmp(name,"benchmark") == 0){ @@ -352,19 +353,19 @@ static void end_phase(const char * name, phase_stat_t * p){ phase_stat_t g_stat; init_stats(& g_stat, (o.rank == 0 ? 1 : 0) * ((size_t) max_repeats) * o.size); // reduce timers - ret = MPI_Reduce(& p->t, & g_stat.t, 2, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + ret = MPI_Reduce(& p->t, & g_stat.t, 2, MPI_DOUBLE, MPI_MAX, 0, o.com); CHECK_MPI_RET(ret) if(o.rank == 0) { g_stat.t_all = (double*) malloc(sizeof(double) * o.size); } - ret = MPI_Gather(& p->t, 1, MPI_DOUBLE, g_stat.t_all, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + ret = MPI_Gather(& p->t, 1, MPI_DOUBLE, g_stat.t_all, 1, MPI_DOUBLE, 0, o.com); CHECK_MPI_RET(ret) - ret = MPI_Reduce(& p->dset_create, & g_stat.dset_create, 2*(2+4), MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + ret = MPI_Reduce(& p->dset_create, & g_stat.dset_create, 2*(2+4), MPI_INT, MPI_SUM, 0, o.com); CHECK_MPI_RET(ret) - ret = MPI_Reduce(& p->max_op_time, & g_stat.max_op_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + ret = MPI_Reduce(& p->max_op_time, & g_stat.max_op_time, 1, MPI_DOUBLE, MPI_MAX, 0, o.com); CHECK_MPI_RET(ret) if( p->stonewall_iterations ){ - ret = MPI_Reduce(& p->repeats, & g_stat.repeats, 1, MPI_UINT64_T, MPI_MIN, 0, MPI_COMM_WORLD); + ret = MPI_Reduce(& p->repeats, & g_stat.repeats, 1, MPI_UINT64_T, MPI_MIN, 0, o.com); CHECK_MPI_RET(ret) g_stat.stonewall_iterations = p->stonewall_iterations; } @@ -421,12 +422,12 @@ static void end_phase(const char * name, phase_stat_t * p){ print_p_stat(buff, name, p, p->t, 0); printf("0: %s\n", buff); for(int i=1; i < o.size; i++){ - MPI_Recv(buff, MAX_PATHLEN, MPI_CHAR, i, 4711, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(buff, MAX_PATHLEN, MPI_CHAR, i, 4711, o.com, MPI_STATUS_IGNORE); printf("%d: %s\n", i, buff); } }else{ print_p_stat(buff, name, p, p->t, 0); - MPI_Send(buff, MAX_PATHLEN, MPI_CHAR, 0, 4711, MPI_COMM_WORLD); + MPI_Send(buff, MAX_PATHLEN, MPI_CHAR, 0, 4711, o.com); } } @@ -469,7 +470,7 @@ void run_precreate(phase_stat_t * s, int current_index){ s->dset_create.err++; if (! o.ignore_precreate_errors){ printf("%d: Error while creating the dset: %s\n", o.rank, dset); - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(o.com, 1); } } } @@ -498,7 +499,7 @@ void run_precreate(phase_stat_t * s, int current_index){ if (! o.ignore_precreate_errors){ printf("%d: Error while creating the obj: %s\n", o.rank, obj_name); fflush(stdout); - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(o.com, 1); } } o.backend->close(aiori_fh, o.backend_options); @@ -577,7 +578,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ s->obj_read.err++; printf("%d: Error while reading the obj: %s\n", o.rank, obj_name); fflush(stdout); - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(o.com, 1); } o.backend->close(aiori_fh, o.backend_options); @@ -616,7 +617,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ if (! o.ignore_precreate_errors){ printf("%d: Error while creating the obj: %s\n", o.rank, obj_name); fflush(stdout); - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(o.com, 1); } } o.backend->close(aiori_fh, o.backend_options); @@ -643,7 +644,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ // wear out mode, now reduce the maximum int cur_pos = f + 1; phase_allreduce_time = GetTimeStamp() - s->phase_start_timer; - int ret = MPI_Allreduce(& cur_pos, & total_num, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + int ret = MPI_Allreduce(& cur_pos, & total_num, 1, MPI_INT, MPI_MAX, o.com); CHECK_MPI_RET(ret) s->phase_start_timer = GetTimeStamp(); s->stonewall_iterations = total_num; @@ -658,14 +659,14 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ s->t = GetTimeStamp() - s->phase_start_timer + phase_allreduce_time; if(armed_stone_wall && o.stonewall_timer_wear_out){ int f = total_num; - int ret = MPI_Allreduce(& f, & total_num, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + int ret = MPI_Allreduce(& f, & total_num, 1, MPI_INT, MPI_MAX, o.com); CHECK_MPI_RET(ret) s->stonewall_iterations = total_num; } if(o.stonewall_timer && ! o.stonewall_timer_wear_out){ // TODO FIXME int sh = s->stonewall_iterations; - int ret = MPI_Allreduce(& sh, & s->stonewall_iterations, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + int ret = MPI_Allreduce(& sh, & s->stonewall_iterations, 1, MPI_INT, MPI_MAX, o.com); CHECK_MPI_RET(ret) } @@ -764,7 +765,7 @@ static int return_position(){ } fclose(f); } - ret = MPI_Bcast( & position, 1, MPI_INT, 0, MPI_COMM_WORLD ); + ret = MPI_Bcast( & position, 1, MPI_INT, 0, o.com ); return position; } @@ -781,16 +782,18 @@ static void store_position(int position){ fclose(f); } -int md_workbench(int argc, char ** argv){ +int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile){ int ret; int printhelp = 0; char * limit_memory_P = NULL; - global_iteration = 0; init_options(); - MPI_Comm_rank(MPI_COMM_WORLD, & o.rank); - MPI_Comm_size(MPI_COMM_WORLD, & o.size); + o.com = world_com; + o.logfile = out_logfile; + + MPI_Comm_rank(o.com, & o.rank); + MPI_Comm_size(o.com, & o.size); if (o.rank == 0 && ! o.quiet_output){ printf("Args: %s", argv[0]); @@ -863,7 +866,7 @@ int md_workbench(int argc, char ** argv){ //ret = mem_preallocate(& limit_memory_P, o.limit_memory, o.verbosity >= 3); //if(ret != 0){ // printf("%d: Error allocating memory\n", o.rank); - // MPI_Abort(MPI_COMM_WORLD, 1); + // MPI_Abort(o.com, 1); //} double bench_start; @@ -881,7 +884,7 @@ int md_workbench(int argc, char ** argv){ } } init_stats(& phase_stats, o.precreate * o.dset_count); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(o.com); // pre-creation phase phase_stats.phase_start_timer = GetTimeStamp(); @@ -892,12 +895,12 @@ int md_workbench(int argc, char ** argv){ if (o.phase_benchmark){ // benchmark phase - for(global_iteration = 0; global_iteration < o.iterations; global_iteration++){ + for(o.global_iteration = 0; o.global_iteration < o.iterations; o.global_iteration++){ if(o.adaptive_waiting_mode){ o.relative_waiting_factor = 0; } init_stats(& phase_stats, o.num * o.dset_count); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(o.com); phase_stats.phase_start_timer = GetTimeStamp(); run_benchmark(& phase_stats, & current_index); end_phase("benchmark", & phase_stats); @@ -906,7 +909,7 @@ int md_workbench(int argc, char ** argv){ o.relative_waiting_factor = 0.0625; for(int r=0; r <= 6; r++){ init_stats(& phase_stats, o.num * o.dset_count); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(o.com); phase_stats.phase_start_timer = GetTimeStamp(); run_benchmark(& phase_stats, & current_index); end_phase("benchmark", & phase_stats); diff --git a/src/md-workbench.h b/src/md-workbench.h index c556af8..2bfbddc 100644 --- a/src/md-workbench.h +++ b/src/md-workbench.h @@ -2,6 +2,8 @@ #define IOR_MD_WORKBENCH_H #include +#include +#include // successfull, errors typedef struct { @@ -56,6 +58,6 @@ typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! int stonewall_iterations; } phase_stat_t; -int md_workbench(int argc, char ** argv); +int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile); #endif From 654b797788d923a939578c10d5565badf11e5eff Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 2 Nov 2020 19:34:15 +0000 Subject: [PATCH 052/154] Converted output to IOR output. --- src/md-workbench.c | 77 ++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 41 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index d0b37e3..2e0c47b 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -24,6 +24,8 @@ It follows the hierarchical file system semantics in contrast to the md-workbenc #define LLU (long long unsigned) #define min(a,b) (a < b ? a : b) +#define oprintf(...) do { fprintf(o.logfile, __VA_ARGS__); fflush(o.logfile); } while(0); + struct benchmark_options{ ior_aiori_t const * backend; void * backend_options; @@ -309,11 +311,11 @@ static uint64_t aggregate_timers(int repeats, int max_repeats, time_result_t * t static void compute_histogram(const char * name, time_result_t * times, time_statistics_t * stats, size_t repeats, int writeLatencyFile){ if(writeLatencyFile && o.latency_file_prefix ){ - char file[1024]; + char file[MAX_PATHLEN]; sprintf(file, "%s-%.2f-%d-%s.csv", o.latency_file_prefix, o.relative_waiting_factor, o.global_iteration, name); FILE * f = fopen(file, "w+"); if(f == NULL){ - printf("%d: Error writing to latency file: %s\n", o.rank, file); + ERRF("%d: Error writing to latency file: %s\n", o.rank, file); return; } fprintf(f, "time,runtime\n"); @@ -414,16 +416,16 @@ static void end_phase(const char * name, phase_stat_t * p){ if (o.rank == 0){ //print the stats: print_p_stat(buff, name, & g_stat, g_stat.t, 1); - printf("%s\n", buff); + oprintf("%s\n", buff); } if(o.process_report){ if(o.rank == 0){ print_p_stat(buff, name, p, p->t, 0); - printf("0: %s\n", buff); + oprintf("0: %s\n", buff); for(int i=1; i < o.size; i++){ MPI_Recv(buff, MAX_PATHLEN, MPI_CHAR, i, 4711, o.com, MPI_STATUS_IGNORE); - printf("%d: %s\n", i, buff); + oprintf("%d: %s\n", i, buff); } }else{ print_p_stat(buff, name, p, p->t, 0); @@ -469,8 +471,7 @@ void run_precreate(phase_stat_t * s, int current_index){ }else{ s->dset_create.err++; if (! o.ignore_precreate_errors){ - printf("%d: Error while creating the dset: %s\n", o.rank, dset); - MPI_Abort(o.com, 1); + ERRF("%d: Error while creating the dset: %s\n", o.rank, dset); } } } @@ -497,9 +498,7 @@ void run_precreate(phase_stat_t * s, int current_index){ }else{ s->obj_create.err++; if (! o.ignore_precreate_errors){ - printf("%d: Error while creating the obj: %s\n", o.rank, obj_name); - fflush(stdout); - MPI_Abort(o.com, 1); + ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name); } } o.backend->close(aiori_fh, o.backend_options); @@ -507,7 +506,7 @@ void run_precreate(phase_stat_t * s, int current_index){ add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); if (o.verbosity >= 2){ - printf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); + oprintf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); } } } @@ -552,19 +551,19 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if (o.verbosity >= 2){ - printf("%d: stat %s (%d)\n", o.rank, obj_name, ret); + oprintf("%d: stat %s (%d)\n", o.rank, obj_name, ret); } if(ret != 0){ if (o.verbosity) - printf("%d: Error while stating the obj: %s\n", o.rank, obj_name); + ERRF("%d: Error while stating the obj: %s\n", o.rank, obj_name); s->obj_stat.err++; continue; } s->obj_stat.suc++; if (o.verbosity >= 2){ - printf("%d: read %s \n", o.rank, obj_name); + oprintf("%d: read %s \n", o.rank, obj_name); } op_timer = GetTimeStamp(); @@ -576,9 +575,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ s->obj_read.suc++; }else{ s->obj_read.err++; - printf("%d: Error while reading the obj: %s\n", o.rank, obj_name); - fflush(stdout); - MPI_Abort(o.com, 1); + ERRF("%d: Error while reading the obj: %s\n", o.rank, obj_name); } o.backend->close(aiori_fh, o.backend_options); @@ -598,7 +595,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if (o.verbosity >= 2){ - printf("%d: delete %s\n", o.rank, obj_name); + oprintf("%d: delete %s\n", o.rank, obj_name); } s->obj_delete.suc++; @@ -615,9 +612,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ }else{ s->obj_create.err++; if (! o.ignore_precreate_errors){ - printf("%d: Error while creating the obj: %s\n", o.rank, obj_name); - fflush(stdout); - MPI_Abort(o.com, 1); + ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name); } } o.backend->close(aiori_fh, o.backend_options); @@ -628,13 +623,13 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if (o.verbosity >= 2){ - printf("%d: write %s (%d)\n", o.rank, obj_name, ret); + oprintf("%d: write %s (%d)\n", o.rank, obj_name, ret); } } // end loop if(armed_stone_wall && bench_runtime >= o.stonewall_timer){ if(o.verbosity){ - printf("%d: stonewall runtime %fs (%ds)\n", o.rank, bench_runtime, o.stonewall_timer); + oprintf("%d: stonewall runtime %fs (%ds)\n", o.rank, bench_runtime, o.stonewall_timer); } if(! o.stonewall_timer_wear_out){ s->stonewall_iterations = f; @@ -649,7 +644,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ s->phase_start_timer = GetTimeStamp(); s->stonewall_iterations = total_num; if(o.rank == 0){ - printf("stonewall wear out %fs (%d iter)\n", bench_runtime, total_num); + oprintf("stonewall wear out %fs (%d iter)\n", bench_runtime, total_num); } if(f == total_num){ break; @@ -694,7 +689,7 @@ void run_cleanup(phase_stat_t * s, int start_index){ add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); if (o.verbosity >= 2){ - printf("%d: delete %s\n", o.rank, obj_name); + oprintf("%d: delete %s\n", o.rank, obj_name); } s->obj_delete.suc++; } @@ -703,10 +698,10 @@ void run_cleanup(phase_stat_t * s, int start_index){ if (o.backend->rmdir(dset, o.backend_options) == 0) { s->dset_delete.suc++; }else{ - printf("Unable to remove directory %s\n", dset); + oprintf("Unable to remove directory %s\n", dset); } if (o.verbosity >= 2){ - printf("%d: delete dset %s\n", o.rank, dset); + oprintf("%d: delete dset %s\n", o.rank, dset); } } } @@ -747,7 +742,7 @@ static void printTime(){ char buff[100]; time_t now = time(0); strftime (buff, 100, "%Y-%m-%d %H:%M:%S", localtime (&now)); - printf("%s\n", buff); + oprintf("%s\n", buff); } static int return_position(){ @@ -755,12 +750,12 @@ static int return_position(){ if( o.rank == 0){ FILE * f = fopen(o.run_info_file, "r"); if(! f){ - printf("[ERROR] Could not open %s for restart\n", o.run_info_file); + ERRF("[ERROR] Could not open %s for restart\n", o.run_info_file); exit(1); } ret = fscanf(f, "pos: %d", & position); if (ret != 1){ - printf("Could not read from %s for restart\n", o.run_info_file); + ERRF("Could not read from %s for restart\n", o.run_info_file); exit(1); } fclose(f); @@ -775,7 +770,7 @@ static void store_position(int position){ } FILE * f = fopen(o.run_info_file, "w"); if(! f){ - printf("[ERROR] Could not open %s for saving data\n", o.run_info_file); + ERRF("[ERROR] Could not open %s for saving data\n", o.run_info_file); exit(1); } fprintf(f, "pos: %d\n", position); @@ -796,11 +791,11 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf MPI_Comm_size(o.com, & o.size); if (o.rank == 0 && ! o.quiet_output){ - printf("Args: %s", argv[0]); + oprintf("Args: %s", argv[0]); for(int i=1; i < argc; i++){ - printf(" \"%s\"", argv[i]); + oprintf(" \"%s\"", argv[i]); } - printf("\n"); + oprintf("\n"); } memset(& o.hints, 0, sizeof(o.hints)); @@ -821,7 +816,7 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf } if (! o.phase_precreate && o.phase_benchmark && o.stonewall_timer && ! o.stonewall_timer_wear_out){ if(o.rank == 0) - printf("Invalid options, if running only the benchmark phase using -2 with stonewall option then use stonewall wear-out\n"); + ERR("Invalid options, if running only the benchmark phase using -2 with stonewall option then use stonewall wear-out"); exit(1); } @@ -843,23 +838,23 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf } if(o.start_item_number){ - printf("Using start position %lld\n", (long long) o.start_item_number); + oprintf("Using start position %lld\n", (long long) o.start_item_number); current_index = o.start_item_number; } size_t total_obj_count = o.dset_count * (size_t) (o.num * o.iterations + o.precreate) * o.size; if (o.rank == 0 && ! o.quiet_output){ - printf("MD-Workbench total objects: %zu workingset size: %.3f MiB (version: %s) time: ", total_obj_count, ((double) o.size) * o.dset_count * o.precreate * o.file_size / 1024.0 / 1024.0, PACKAGE_VERSION); + oprintf("MD-Workbench total objects: %zu workingset size: %.3f MiB (version: %s) time: ", total_obj_count, ((double) o.size) * o.dset_count * o.precreate * o.file_size / 1024.0 / 1024.0, PACKAGE_VERSION); printTime(); if(o.num > o.precreate){ - printf("WARNING: num > precreate, this may cause the situation that no objects are available to read\n"); + oprintf("WARNING: num > precreate, this may cause the situation that no objects are available to read\n"); } } if ( o.rank == 0 && ! o.quiet_output ){ // print the set output options option_print_current(options); - printf("\n"); + oprintf("\n"); } // preallocate memory if necessary @@ -929,7 +924,7 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf if (o.rank == 0){ if (o.backend->rmdir(o.prefix, o.backend_options) != 0) { - printf("Unable to remove directory %s\n", o.prefix); + oprintf("Unable to remove directory %s\n", o.prefix); } } }else{ @@ -941,7 +936,7 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf o.backend->finalize(o.backend_options); } if (o.rank == 0 && ! o.quiet_output){ - printf("Total runtime: %.0fs time: ", t_all); + oprintf("Total runtime: %.0fs time: ", t_all); printTime(); } From 82d20f27445a1a5b15946a0951b26584776d1cdc Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 2 Nov 2020 20:12:15 +0000 Subject: [PATCH 053/154] Basic API converted. --- src/aiori-POSIX.c | 2 +- src/md-workbench-main.c | 7 +++++-- src/md-workbench.c | 22 ++++++++++++++-------- src/md-workbench.h | 5 +++-- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index e8933b7..2f5bcd7 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -100,7 +100,7 @@ option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_o #endif #ifdef HAVE_LUSTRE_USER - {0, "posix.lustre.stripecount", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_count}, + {0, "posix.lustre.stribeegfs_chunkSizepecount", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_count}, {0, "posix.lustre.stripesize", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_size}, {0, "posix.lustre.startost", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_start_ost}, {0, "posix.lustre.ignorelocks", "", OPTION_FLAG, 'd', & o->lustre_ignore_locks}, diff --git a/src/md-workbench-main.c b/src/md-workbench-main.c index 0165824..bb94126 100644 --- a/src/md-workbench-main.c +++ b/src/md-workbench-main.c @@ -4,7 +4,10 @@ int main(int argc, char ** argv){ MPI_Init(& argc, & argv); - int ret = md_workbench_run(argc, argv, MPI_COMM_WORLD, stdout); + //phase_stat_t* results = + md_workbench_run(argc, argv, MPI_COMM_WORLD, stdout); + // API check, access the results of the first phase which is precrate. + //printf("Max op runtime: %f\n", results->max_op_time); MPI_Finalize(); - return ret; + return 0; } diff --git a/src/md-workbench.c b/src/md-workbench.c index 2e0c47b..b1c4dc8 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -38,6 +38,8 @@ struct benchmark_options{ int precreate; int dset_count; + int result_position; // in the global structure + int offset; int iterations; int global_iteration; @@ -339,7 +341,7 @@ static void compute_histogram(const char * name, time_result_t * times, time_sta stats->max = times[repeats - 1].runtime; } -static void end_phase(const char * name, phase_stat_t * p){ +static void end_phase(const char * name, phase_stat_t * p, phase_stat_t * result){ int ret; char buff[MAX_PATHLEN]; @@ -449,6 +451,10 @@ static void end_phase(const char * name, phase_stat_t * p){ free(g_stat.time_delete); } + // copy the result back for the API + memcpy(& result[o.result_position], & g_stat, sizeof(g_stat)); + o.result_position++; + // allocate memory if necessary // ret = mem_preallocate(& limit_memory_P, o.limit_memory_between_phases, o.verbosity >= 3); // if( ret != 0){ @@ -777,7 +783,7 @@ static void store_position(int position){ fclose(f); } -int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile){ +phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile){ int ret; int printhelp = 0; char * limit_memory_P = NULL; @@ -867,6 +873,7 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf double bench_start; bench_start = GetTimeStamp(); phase_stat_t phase_stats; + phase_stat_t* all_phases_stats = malloc(sizeof(phase_stat_t) * (2 + o.iterations)); if(o.rank == 0 && o.print_detailed_stats && ! o.quiet_output){ print_detailed_stat_header(); @@ -885,7 +892,7 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf phase_stats.phase_start_timer = GetTimeStamp(); run_precreate(& phase_stats, current_index); phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer; - end_phase("precreate", & phase_stats); + end_phase("precreate", & phase_stats, all_phases_stats); } if (o.phase_benchmark){ @@ -898,7 +905,7 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf MPI_Barrier(o.com); phase_stats.phase_start_timer = GetTimeStamp(); run_benchmark(& phase_stats, & current_index); - end_phase("benchmark", & phase_stats); + end_phase("benchmark", & phase_stats, all_phases_stats); if(o.adaptive_waiting_mode){ o.relative_waiting_factor = 0.0625; @@ -907,7 +914,7 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf MPI_Barrier(o.com); phase_stats.phase_start_timer = GetTimeStamp(); run_benchmark(& phase_stats, & current_index); - end_phase("benchmark", & phase_stats); + end_phase("benchmark", & phase_stats, all_phases_stats); o.relative_waiting_factor *= 2; } } @@ -920,7 +927,7 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf phase_stats.phase_start_timer = GetTimeStamp(); run_cleanup(& phase_stats, current_index); phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer; - end_phase("cleanup", & phase_stats); + end_phase("cleanup", & phase_stats, all_phases_stats); if (o.rank == 0){ if (o.backend->rmdir(o.prefix, o.backend_options) != 0) { @@ -939,7 +946,6 @@ int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logf oprintf("Total runtime: %.0fs time: ", t_all); printTime(); } - //mem_free_preallocated(& limit_memory_P); - return 0; + return all_phases_stats; } diff --git a/src/md-workbench.h b/src/md-workbench.h index 2bfbddc..e8794f5 100644 --- a/src/md-workbench.h +++ b/src/md-workbench.h @@ -40,7 +40,7 @@ typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! op_stat_t obj_stat; op_stat_t obj_delete; - // time measurements individual runs + // time measurements of individual runs, these are not returned for now by the API! uint64_t repeats; time_result_t * time_create; time_result_t * time_read; @@ -58,6 +58,7 @@ typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! int stonewall_iterations; } phase_stat_t; -int md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile); +// @Return The first statistics returned are precreate, then iteration many benchmark runs, the last is cleanup +phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile); #endif From cb397242f9b896acd3429d3125303a340112b629 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 3 Nov 2020 04:01:09 -0600 Subject: [PATCH 054/154] DAOS backend cleanup (#266) - remove legacy DAOS driver & update Readme. - update configure options to remove cart requirement - Optimize DFS file get_size Signed-off-by: Mohamad Chaarawi --- README_DAOS | 73 ++---- configure.ac | 26 +-- src/Makefile.am | 3 +- src/aiori-DAOS.c | 570 ----------------------------------------------- src/aiori-DFS.c | 32 ++- src/aiori.c | 1 - src/ior.c | 4 +- 7 files changed, 49 insertions(+), 660 deletions(-) delete mode 100644 src/aiori-DAOS.c diff --git a/README_DAOS b/README_DAOS index f54b426..0314277 100644 --- a/README_DAOS +++ b/README_DAOS @@ -4,55 +4,13 @@ Building The DAOS library must be installed on the system. ./bootstrap -./configure --prefix=iorInstallDir --with-daos=DIR --with-cart=DIR - -One must specify "--with-daos=/path/to/daos/install and --with-cart". When that -is specified the DAOS and DFS driver will be built. - -The DAOS driver uses the DAOS API to open a container (or create it if it -doesn't exist first) then create an array object in that container (file) and -read/write to the array object using the daos Array API. The DAOS driver works -with IOR only (no mdtest support yet). The file name used by IOR (passed by -o -option) is hashed to an object ID that is used as the array oid. +./configure --prefix=iorInstallDir --with-daos=DIR The DFS (DAOS File System) driver creates an encapsulated namespace and emulates the POSIX driver using the DFS API directly on top of DAOS. The DFS driver works with both IOR and mdtest. -Running with DAOS API ---------------------- - -ior -a DAOS [ior_options] [daos_options] - -In the IOR options, the file name should be specified as a container uuid using -"-o ". If the "-E" option is given, then this UUID shall denote -an existing container created by a "matching" IOR run. Otherwise, IOR will -create a new container with this UUID. In the latter case, one may use -uuidgen(1) to generate the UUID of the new container. - -The DAOS options include: - -Required Options: ---daos.pool : pool uuid to connect to (has to be created beforehand) ---daos.svcl : pool svcl list (: separated) ---daos.cont : container for the IOR files/objects (can use `uuidgen`) - -Optional Options: ---daos.group : group name of servers with the pool ---daos.chunk_size : Chunk size of the array object controlling striping over DKEYs ---daos.destroy flag to destroy the container on finalize ---daos.oclass : specific object class for array object - -Examples that should work include: - - - "ior -a DAOS -w -W -o file_name --daos.pool --daos.svcl \ - --daos.cont " - - - "ior -a DAOS -w -W -r -R -o file_name -b 1g -t 4m \ - --daos.pool --daos.svcl --daos.cont \ - --daos.chunk_size 1024 --daos.oclass R2" - -Running with DFS API +Running --------------------- ior -a DFS [ior_options] [dfs_options] @@ -64,15 +22,17 @@ Required Options: --dfs.cont : container uuid that will hold the encapsulated namespace Optional Options: ---dfs.group : group name of servers with the pool ---dfs.chunk_size : Chunk size of the files ---dfs.destroy flag to destroy the container on finalize ---dfs.oclass : specific object class for files +--dfs.group : group name of servers with the pool (default: daos_server) +--dfs.chunk_size : Chunk size of the files (default: 1MiB) +--dfs.destroy: flag to destroy the container on finalize (default: no) +--dfs.oclass : specific object class for files (default: SX) +--dfs.dir_oclass : specific object class for directories (default: SX) +--dfs.prefix : absolute path to account for DFS files/dirs before the cont root -In the IOR options, the file name should be specified on the root dir directly -since ior does not create directories and the DFS container representing the -encapsulated namespace is not the same as the system namespace the user is -executing from. +If prefix is not set, in the IOR options, the file name should be specified on +the root dir directly since ior does not create directories and the DFS +container representing the encapsulated namespace is not the same as the system +namespace the user is executing from. Examples that should work include: - "ior -a DFS -w -W -o /test1 --dfs.pool --dfs.svcl --dfs.cont " @@ -80,7 +40,8 @@ Examples that should work include: - "ior -a DFS -w -r -o /test3 -b 8g -t 1m -C --dfs.pool --dfs.svcl --dfs.cont " Running mdtest, the user needs to specify a directory with -d where the test -tree will be created. Some examples: - - "mdtest -a DFS -n 100 -F -D -d /bla --dfs.pool --dfs.svcl --dfs.cont " - - "mdtest -a DFS -n 1000 -F -C -d /bla --dfs.pool --dfs.svcl --dfs.cont " - - "mdtest -a DFS -I 10 -z 5 -b 2 -L -d /bla --dfs.pool --dfs.svcl --dfs.cont " +tree will be created (set '/' if writing to the root of the DFS container). Some +examples: + - "mdtest -a DFS -n 100 -F -D -d / --dfs.pool --dfs.svcl --dfs.cont " + - "mdtest -a DFS -n 1000 -F -C -d / --dfs.pool --dfs.svcl --dfs.cont " + - "mdtest -a DFS -I 10 -z 5 -b 2 -L -d / --dfs.pool --dfs.svcl --dfs.cont " diff --git a/configure.ac b/configure.ac index 1253b51..8a859c8 100755 --- a/configure.ac +++ b/configure.ac @@ -239,40 +239,26 @@ AM_COND_IF([USE_CEPHFS_AIORI],[ AC_DEFINE([USE_CEPHFS_AIORI], [], [Build CEPHFS backend AIORI]) ]) -# DAOS Backends (DAOS and DFS) IO support require DAOS and CART/GURT -AC_ARG_WITH([cart], - [AS_HELP_STRING([--with-cart], - [support IO with DAOS backends @<:@default=no@:>@])], - [], [with_daos=no]) - -AS_IF([test "x$with_cart" != xno], [ - CART="yes" - LDFLAGS="$LDFLAGS -L$with_cart/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_cart/lib64" - LDFLAGS="$LDFLAGS -L$with_cart/lib -Wl,--enable-new-dtags -Wl,-rpath=$with_cart/lib" - CPPFLAGS="$CPPFLAGS -I$with_cart/include/" - AC_CHECK_HEADERS(gurt/common.h,, [unset CART]) - AC_CHECK_LIB([gurt], [d_hash_murmur64],, [unset CART]) -]) - +# DAOS-FS Backend (DFS) AC_ARG_WITH([daos], [AS_HELP_STRING([--with-daos], - [support IO with DAOS backends @<:@default=no@:>@])], + [support IO with DAOS backend @<:@default=no@:>@])], [], [with_daos=no]) - AS_IF([test "x$with_daos" != xno], [ DAOS="yes" LDFLAGS="$LDFLAGS -L$with_daos/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_daos/lib64" CPPFLAGS="$CPPFLAGS -I$with_daos/include" - AC_CHECK_HEADERS(daos_types.h,, [unset DAOS]) + AC_CHECK_HEADERS(gurt/common.h,, [unset DAOS]) + AC_CHECK_HEADERS(daos.h,, [unset DAOS]) + AC_CHECK_LIB([gurt], [d_hash_murmur64],, [unset DAOS]) AC_CHECK_LIB([uuid], [uuid_generate],, [unset DAOS]) AC_CHECK_LIB([daos_common], [daos_sgl_init],, [unset DAOS]) AC_CHECK_LIB([daos], [daos_init],, [unset DAOS]) AC_CHECK_LIB([dfs], [dfs_mkdir],, [unset DAOS]) ]) - AM_CONDITIONAL([USE_DAOS_AIORI], [test x$DAOS = xyes]) AM_COND_IF([USE_DAOS_AIORI],[ - AC_DEFINE([USE_DAOS_AIORI], [], [Build DAOS backends AIORI]) + AC_DEFINE([USE_DAOS_AIORI], [], [Build DAOS-FS backend AIORI]) ]) # Gfarm support diff --git a/src/Makefile.am b/src/Makefile.am index 0adbf32..41bccc9 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -87,7 +87,8 @@ endif if USE_DAOS_AIORI -extraSOURCES += aiori-DAOS.c aiori-DFS.c +extraSOURCES += aiori-DFS.c +extraLDADD += -lgurt -ldaos_common -ldaos -ldfs -luuid endif if USE_GFARM_AIORI diff --git a/src/aiori-DAOS.c b/src/aiori-DAOS.c deleted file mode 100644 index f2a096e..0000000 --- a/src/aiori-DAOS.c +++ /dev/null @@ -1,570 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -/* - * Copyright (C) 2018-2020 Intel Corporation - * See the file COPYRIGHT for a complete copyright notice and license. - */ - -/* - * This file implements the abstract I/O interface for DAOS Array API. - */ - -#define _BSD_SOURCE - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "aiori.h" -#include "utilities.h" -#include "iordef.h" - -/************************** O P T I O N S *****************************/ -typedef struct { - char *pool; - char *svcl; - char *group; - char *cont; - int chunk_size; - int destroy; - char *oclass; -} DAOS_options_t; - -static option_help * DAOS_options(aiori_mod_opt_t ** init_backend_options, - aiori_mod_opt_t * init_values){ - DAOS_options_t * o = malloc(sizeof(DAOS_options_t)); - - if (init_values != NULL) { - memcpy(o, init_values, sizeof(DAOS_options_t)); - } else { - memset(o, 0, sizeof(DAOS_options_t)); - /* initialize the options properly */ - o->chunk_size = 1048576; - } - - *init_backend_options = (aiori_mod_opt_t *) o; - - option_help h [] = { - {0, "daos.pool", "pool uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o->pool}, - {0, "daos.svcl", "pool SVCL", OPTION_OPTIONAL_ARGUMENT, 's', &o->svcl}, - {0, "daos.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', &o->group}, - {0, "daos.cont", "container uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o->cont}, - {0, "daos.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o->chunk_size}, - {0, "daos.destroy", "Destroy Container", OPTION_FLAG, 'd', &o->destroy}, - {0, "daos.oclass", "object class", OPTION_OPTIONAL_ARGUMENT, 's', &o->oclass}, - LAST_OPTION - }; - - option_help * help = malloc(sizeof(h)); - memcpy(help, h, sizeof(h)); - return help; -} - -/**************************** P R O T O T Y P E S *****************************/ - -static void DAOS_Init(aiori_mod_opt_t *); -static void DAOS_Fini(aiori_mod_opt_t *); -static aiori_fd_t *DAOS_Create(char *, int, aiori_mod_opt_t *); -static aiori_fd_t *DAOS_Open(char *, int, aiori_mod_opt_t *); -static int DAOS_Access(const char *, int, aiori_mod_opt_t *); -static IOR_offset_t DAOS_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, - IOR_offset_t, aiori_mod_opt_t *); -static void DAOS_Close(aiori_fd_t *, aiori_mod_opt_t *); -static void DAOS_Delete(char *, aiori_mod_opt_t *); -static char* DAOS_GetVersion(); -static void DAOS_Fsync(aiori_fd_t *, aiori_mod_opt_t *); -static IOR_offset_t DAOS_GetFileSize(aiori_mod_opt_t *, char *); -static option_help * DAOS_options(); -static void DAOS_init_xfer_options(aiori_xfer_hint_t *); -static int DAOS_check_params(aiori_mod_opt_t *); - -/************************** D E C L A R A T I O N S ***************************/ - -ior_aiori_t daos_aiori = { - .name = "DAOS", - .initialize = DAOS_Init, - .finalize = DAOS_Fini, - .create = DAOS_Create, - .open = DAOS_Open, - .access = DAOS_Access, - .xfer = DAOS_Xfer, - .close = DAOS_Close, - .delete = DAOS_Delete, - .get_version = DAOS_GetVersion, - .xfer_hints = DAOS_init_xfer_options, - .fsync = DAOS_Fsync, - .get_file_size = DAOS_GetFileSize, - .statfs = aiori_posix_statfs, - .mkdir = aiori_posix_mkdir, - .rmdir = aiori_posix_rmdir, - .stat = aiori_posix_stat, - .get_options = DAOS_options, - .xfer_hints = DAOS_init_xfer_options, - .check_params = DAOS_check_params, - .enable_mdtest = false, -}; - -#define IOR_DAOS_MUR_SEED 0xDEAD10CC - -enum handleType { - POOL_HANDLE, - CONT_HANDLE, - ARRAY_HANDLE -}; - -static daos_handle_t poh; -static daos_handle_t coh; -static daos_handle_t aoh; -static daos_oclass_id_t objectClass = OC_SX; -static bool daos_initialized = false; - -/***************************** F U N C T I O N S ******************************/ - -/* For DAOS methods. */ -#define DCHECK(rc, format, ...) \ -do { \ - int _rc = (rc); \ - \ - if (_rc < 0) { \ - fprintf(stderr, "ior ERROR (%s:%d): %d: %d: " \ - format"\n", __FILE__, __LINE__, rank, _rc, \ - ##__VA_ARGS__); \ - fflush(stdout); \ - MPI_Abort(MPI_COMM_WORLD, -1); \ - } \ -} while (0) - -#define INFO(level, format, ...) \ -do { \ - if (verbose >= level) \ - printf("[%d] "format"\n", rank, ##__VA_ARGS__); \ -} while (0) - -/* For generic errors like invalid command line options. */ -#define GERR(format, ...) \ -do { \ - fprintf(stderr, format"\n", ##__VA_ARGS__); \ - MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "MPI_Abort() error"); \ -} while (0) - -static aiori_xfer_hint_t * hints = NULL; - -void DAOS_init_xfer_options(aiori_xfer_hint_t * params) -{ - hints = params; -} - -static int DAOS_check_params(aiori_mod_opt_t * options){ - DAOS_options_t *o = (DAOS_options_t *) options; - - if (o->pool == NULL || o->svcl == NULL || o->cont == NULL) - ERR("Invalid pool or container options\n"); - - return 0; -} - -/* Distribute process 0's pool or container handle to others. */ -static void -HandleDistribute(daos_handle_t *handle, enum handleType type) -{ - d_iov_t global; - int rc; - - global.iov_buf = NULL; - global.iov_buf_len = 0; - global.iov_len = 0; - - if (rank == 0) { - /* Get the global handle size. */ - if (type == POOL_HANDLE) - rc = daos_pool_local2global(*handle, &global); - else if (type == CONT_HANDLE) - rc = daos_cont_local2global(*handle, &global); - else - rc = daos_array_local2global(*handle, &global); - DCHECK(rc, "Failed to get global handle size"); - } - - MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, - MPI_COMM_WORLD), - "Failed to bcast global handle buffer size"); - - global.iov_len = global.iov_buf_len; - global.iov_buf = malloc(global.iov_buf_len); - if (global.iov_buf == NULL) - ERR("Failed to allocate global handle buffer"); - - if (rank == 0) { - if (type == POOL_HANDLE) - rc = daos_pool_local2global(*handle, &global); - else if (type == CONT_HANDLE) - rc = daos_cont_local2global(*handle, &global); - else - rc = daos_array_local2global(*handle, &global); - DCHECK(rc, "Failed to create global handle"); - } - - MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, - MPI_COMM_WORLD), - "Failed to bcast global pool handle"); - - if (rank != 0) { - if (type == POOL_HANDLE) - rc = daos_pool_global2local(global, handle); - else if (type == CONT_HANDLE) - rc = daos_cont_global2local(poh, global, handle); - else - rc = daos_array_global2local(coh, global, 0, handle); - DCHECK(rc, "Failed to get local handle"); - } - - free(global.iov_buf); -} - -static void -DAOS_Init(aiori_mod_opt_t * options) -{ - DAOS_options_t *o = (DAOS_options_t *)options; - int rc; - - if (daos_initialized) - return; - - if (o->pool == NULL || o->svcl == NULL || o->cont == NULL) - return; - - if (o->oclass) { - objectClass = daos_oclass_name2id(o->oclass); - if (objectClass == OC_UNKNOWN) - GERR("Invalid DAOS Object class %s\n", o->oclass); - } - - rc = daos_init(); - if (rc) - DCHECK(rc, "Failed to initialize daos"); - - if (rank == 0) { - uuid_t uuid; - d_rank_list_t *svcl = NULL; - static daos_pool_info_t po_info; - static daos_cont_info_t co_info; - - INFO(VERBOSE_1, "Connecting to pool %s", o->pool); - - rc = uuid_parse(o->pool, uuid); - DCHECK(rc, "Failed to parse 'pool': %s", o->pool); - - svcl = daos_rank_list_parse(o->svcl, ":"); - if (svcl == NULL) - ERR("Failed to allocate svcl"); - - rc = daos_pool_connect(uuid, o->group, svcl, DAOS_PC_RW, - &poh, &po_info, NULL); - d_rank_list_free(svcl); - DCHECK(rc, "Failed to connect to pool %s", o->pool); - - INFO(VERBOSE_1, "Create/Open Container %s", o->cont); - - uuid_clear(uuid); - rc = uuid_parse(o->cont, uuid); - DCHECK(rc, "Failed to parse 'cont': %s", o->cont); - - rc = daos_cont_open(poh, uuid, DAOS_COO_RW, &coh, &co_info, - NULL); - /* If NOEXIST we create it */ - if (rc == -DER_NONEXIST) { - INFO(VERBOSE_2, "Creating DAOS Container...\n"); - rc = daos_cont_create(poh, uuid, NULL, NULL); - if (rc == 0) - rc = daos_cont_open(poh, uuid, DAOS_COO_RW, - &coh, &co_info, NULL); - } - DCHECK(rc, "Failed to create container"); - } - - HandleDistribute(&poh, POOL_HANDLE); - HandleDistribute(&coh, CONT_HANDLE); - aoh.cookie = 0; - - daos_initialized = true; -} - -static void -DAOS_Fini(aiori_mod_opt_t *options) -{ - DAOS_options_t *o = (DAOS_options_t *)options; - int rc; - - if (!daos_initialized) - return; - - MPI_Barrier(MPI_COMM_WORLD); - rc = daos_cont_close(coh, NULL); - if (rc) { - DCHECK(rc, "Failed to close container %s (%d)", o->cont, rc); - MPI_Abort(MPI_COMM_WORLD, -1); - } - MPI_Barrier(MPI_COMM_WORLD); - - if (o->destroy) { - if (rank == 0) { - uuid_t uuid; - double t1, t2; - - INFO(VERBOSE_1, "Destroying DAOS Container %s", o->cont); - uuid_parse(o->cont, uuid); - t1 = MPI_Wtime(); - rc = daos_cont_destroy(poh, uuid, 1, NULL); - t2 = MPI_Wtime(); - if (rc == 0) - INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); - } - - MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); - if (rc) { - if (rank == 0) - DCHECK(rc, "Failed to destroy container %s (%d)", o->cont, rc); - MPI_Abort(MPI_COMM_WORLD, -1); - } - } - - if (rank == 0) - INFO(VERBOSE_1, "Disconnecting from DAOS POOL.."); - - rc = daos_pool_disconnect(poh, NULL); - DCHECK(rc, "Failed to disconnect from pool %s", o->pool); - - MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); - if (rank == 0) - INFO(VERBOSE_1, "Finalizing DAOS.."); - - rc = daos_fini(); - DCHECK(rc, "Failed to finalize daos"); - - daos_initialized = false; -} - -static void -gen_oid(const char *name, daos_obj_id_t *oid) -{ - oid->lo = d_hash_murmur64(name, strlen(name), IOR_DAOS_MUR_SEED); - oid->hi = 0; - - daos_array_generate_id(oid, objectClass, true, 0); -} - -static aiori_fd_t * -DAOS_Create(char *testFileName, int flags, aiori_mod_opt_t *param) -{ - DAOS_options_t *o = (DAOS_options_t*) param; - daos_obj_id_t oid; - int rc; - - /** Convert file name into object ID */ - gen_oid(testFileName, &oid); - - /** Create the array */ - if (hints->filePerProc || rank == 0) { - rc = daos_array_create(coh, oid, DAOS_TX_NONE, 1, o->chunk_size, - &aoh, NULL); - DCHECK(rc, "Failed to create array object\n"); - } - - /** Distribute the array handle if not FPP */ - if (!hints->filePerProc) - HandleDistribute(&aoh, ARRAY_HANDLE); - - return (aiori_fd_t*)(&aoh); -} - -static int -DAOS_Access(const char *testFileName, int mode, aiori_mod_opt_t * param) -{ - daos_obj_id_t oid; - daos_size_t cell_size, chunk_size; - int rc; - - /** Convert file name into object ID */ - gen_oid(testFileName, &oid); - - rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, - &cell_size, &chunk_size, &aoh, NULL); - if (rc) - return rc; - - if (cell_size != 1) - GERR("Invalid DAOS Array object.\n"); - - rc = daos_array_close(aoh, NULL); - aoh.cookie = 0; - return rc; -} - -static aiori_fd_t * -DAOS_Open(char *testFileName, int flags, aiori_mod_opt_t *param) -{ - daos_obj_id_t oid; - - /** Convert file name into object ID */ - gen_oid(testFileName, &oid); - - /** Open the array */ - if (hints->filePerProc || rank == 0) { - daos_size_t cell_size, chunk_size; - int rc; - - rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, - &cell_size, &chunk_size, &aoh, NULL); - DCHECK(rc, "Failed to create array object\n"); - - if (cell_size != 1) - GERR("Invalid DAOS Array object.\n"); - } - - /** Distribute the array handle if not FPP */ - if (!hints->filePerProc) - HandleDistribute(&aoh, ARRAY_HANDLE); - - return (aiori_fd_t*)(&aoh); -} - -static IOR_offset_t -DAOS_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer, IOR_offset_t length, - IOR_offset_t off, aiori_mod_opt_t *param) -{ - daos_array_iod_t iod; - daos_range_t rg; - d_sg_list_t sgl; - d_iov_t iov; - int rc; - - /** set array location */ - iod.arr_nr = 1; - rg.rg_len = length; - rg.rg_idx = off; - iod.arr_rgs = &rg; - - /** set memory location */ - sgl.sg_nr = 1; - d_iov_set(&iov, buffer, length); - sgl.sg_iovs = &iov; - - if (access == WRITE) { - rc = daos_array_write(aoh, DAOS_TX_NONE, &iod, &sgl, NULL); - DCHECK(rc, "daos_array_write() failed (%d).", rc); - } else { - rc = daos_array_read(aoh, DAOS_TX_NONE, &iod, &sgl, NULL); - DCHECK(rc, "daos_array_read() failed (%d).", rc); - } - - return length; -} - -static void -DAOS_Close(aiori_fd_t *file, aiori_mod_opt_t *param) -{ - int rc; - - if (!daos_initialized) - GERR("DAOS is not initialized!"); - - rc = daos_array_close(aoh, NULL); - DCHECK(rc, "daos_array_close() failed (%d).", rc); - - aoh.cookie = 0; -} - -static void -DAOS_Delete(char *testFileName, aiori_mod_opt_t *param) -{ - daos_obj_id_t oid; - daos_size_t cell_size, chunk_size; - int rc; - - if (!daos_initialized) - GERR("DAOS is not initialized!"); - - /** Convert file name into object ID */ - gen_oid(testFileName, &oid); - - /** open the array to verify it exists */ - rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RW, - &cell_size, &chunk_size, &aoh, NULL); - DCHECK(rc, "daos_array_open() failed (%d).", rc); - - if (cell_size != 1) - GERR("Invalid DAOS Array object.\n"); - - rc = daos_array_destroy(aoh, DAOS_TX_NONE, NULL); - DCHECK(rc, "daos_array_destroy() failed (%d).", rc); - - rc = daos_array_close(aoh, NULL); - DCHECK(rc, "daos_array_close() failed (%d).", rc); - aoh.cookie = 0; -} - -static char * -DAOS_GetVersion() -{ - static char ver[1024] = {}; - - sprintf(ver, "%s", "DAOS"); - return ver; -} - -static void -DAOS_Fsync(aiori_fd_t *file, aiori_mod_opt_t *param) -{ - return; -} - -static IOR_offset_t -DAOS_GetFileSize(aiori_mod_opt_t *param, char *testFileName) -{ - daos_obj_id_t oid; - daos_size_t size; - int rc; - - if (!daos_initialized) - GERR("DAOS is not initialized!"); - - /** Convert file name into object ID */ - gen_oid(testFileName, &oid); - - /** open the array to verify it exists */ - if (hints->filePerProc || rank == 0) { - daos_size_t cell_size, chunk_size; - - rc = daos_array_open(coh, oid, DAOS_TX_NONE, DAOS_OO_RO, - &cell_size, &chunk_size, &aoh, NULL); - DCHECK(rc, "daos_array_open() failed (%d).", rc); - - if (cell_size != 1) - GERR("Invalid DAOS Array object.\n"); - - rc = daos_array_get_size(aoh, DAOS_TX_NONE, &size, NULL); - DCHECK(rc, "daos_array_get_size() failed (%d).", rc); - - rc = daos_array_close(aoh, NULL); - DCHECK(rc, "daos_array_close() failed (%d).", rc); - aoh.cookie = 0; - } - - if (!hints->filePerProc) - MPI_Bcast(&size, 1, MPI_LONG, 0, MPI_COMM_WORLD); - - return size; -} diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 62893ae..38e99ca 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -777,21 +777,35 @@ static IOR_offset_t DFS_GetFileSize(aiori_mod_opt_t * test, char *testFileName) { dfs_obj_t *obj; - daos_size_t fsize, tmpMin, tmpMax, tmpSum; + MPI_Comm comm; + daos_size_t fsize; int rc; - rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL, NULL); - if (rc) { - fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc); - return -1; + if (hints->filePerProc == TRUE) { + comm = MPI_COMM_SELF; + } else { + comm = testComm; } - rc = dfs_get_size(dfs, obj, &fsize); - if (rc) - return -1; + if (hints->filePerProc || rank == 0) { + rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL, NULL); + if (rc) { + fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc); + return -1; + } - dfs_release(obj); + rc = dfs_get_size(dfs, obj, &fsize); + dfs_release(obj); + if (rc) + return -1; + } + if (!hints->filePerProc) { + rc = MPI_Bcast(&fsize, 1, MPI_UINT64_T, 0, comm); + if (rc) + return rc; + } + return (fsize); } diff --git a/src/aiori.c b/src/aiori.c index 2d8b6c8..897abb6 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -49,7 +49,6 @@ ior_aiori_t *available_aiori[] = { &pmdk_aiori, #endif #ifdef USE_DAOS_AIORI - &daos_aiori, &dfs_aiori, #endif & dummy_aiori, diff --git a/src/ior.c b/src/ior.c index 010a5ce..435fcb2 100755 --- a/src/ior.c +++ b/src/ior.c @@ -349,8 +349,7 @@ static void CheckFileSize(IOR_test_t *test, char * testFilename, IOR_offset_t da 1, MPI_LONG_LONG_INT, MPI_SUM, testComm), "cannot total data moved"); - if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0 && - strcasecmp(params->api, "DAOS") != 0) { + if (strcasecmp(params->api, "HDF5") != 0 && strcasecmp(params->api, "NCMPI") != 0) { if (verbose >= VERBOSE_0 && rank == 0) { if ((params->expectedAggFileSize != point->aggFileSizeFromXfer) @@ -1609,7 +1608,6 @@ static void ValidateTests(IOR_param_t * test) && (strcasecmp(test->api, "MMAP") != 0) && (strcasecmp(test->api, "HDFS") != 0) && (strcasecmp(test->api, "DFS") != 0) - && (strcasecmp(test->api, "DAOS") != 0) && (strcasecmp(test->api, "Gfarm") != 0) && (strcasecmp(test->api, "RADOS") != 0) && (strcasecmp(test->api, "CEPHFS") != 0)) && test->fsync) From e1dd3103cfc1b94088c54900ca8c5791d48f5ffb Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 3 Nov 2020 10:52:45 +0000 Subject: [PATCH 055/154] Fix make dist for md-workbench. --- src/Makefile.am | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 47ede87..d933aa6 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -15,7 +15,7 @@ extraLDADD = extraLDFLAGS = extraCPPFLAGS = -md_workbench_SOURCES = md-workbench.c md-workbench-main.c +md_workbench_SOURCES = md-workbench-main.c md_workbench_LDFLAGS = md_workbench_LDADD = libaiori.a md_workbench_CPPFLAGS = @@ -132,6 +132,16 @@ mdtest_LDFLAGS += $(extraLDFLAGS) mdtest_LDADD += $(extraLDADD) mdtest_CPPFLAGS += $(extraCPPFLAGS) +md_workbench_SOURCES += $(extraSOURCES) +md_workbench_LDFLAGS += $(extraLDFLAGS) +md_workbench_LDADD += $(extraLDADD) +md_workbench_CPPFLAGS += $(extraCPPFLAGS) + +MD_WORKBENCH_SOURCES = $(md_workbench_SOURCES) +MD_WORKBENCH_LDFLAGS = $(md_workbench_LDFLAGS) +MD_WORKBENCH_LDADD = $(md_workbench_LDADD) +MD_WORKBENCH_CPPFLAGS = $(md_workbench_CPPFLAGS) + IOR_SOURCES = $(ior_SOURCES) IOR_LDFLAGS = $(ior_LDFLAGS) IOR_LDADD = $(ior_LDADD) @@ -145,7 +155,8 @@ MDTEST_CPPFLAGS = $(mdtest_CPPFLAGS) libaiori_a_SOURCES += $(extraSOURCES) libaiori_a_CPPFLAGS = $(extraCPPFLAGS) -# Generate config file with build flags to allow reuse of library +# Generate a config file with the build flags to allow the reuse of library +.PHONY: build.conf all-local: build.conf build.conf: @echo LDFLAGS=$(LDFLAGS) $(extraLDFLAGS) $(extraLDADD) > build.conf From 4e452766b96e748da098fbd9e4339dbc14c655dc Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 4 Nov 2020 12:48:46 +0000 Subject: [PATCH 056/154] Enable random seed to be stored. --- src/ior.c | 22 +++++++++++----------- src/parse_options.c | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/ior.c b/src/ior.c index 010a5ce..dc416e3 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1624,8 +1624,8 @@ static void ValidateTests(IOR_param_t * test) if (test->randomOffset && test->reorderTasks && test->filePerProc == FALSE) ERR("random offset and constant reorder tasks specified with single-shared-file. Choose one and resubmit"); - if (test->randomOffset && test->checkRead) - ERR("random offset not available with read check option (use write check)"); + if (test->randomOffset && test->checkRead && test->randomSeed == -1) + ERR("random offset with read check option requires to set the random seed"); if (test->randomOffset && test->storeFileOffset) ERR("random offset not available with store file offset option)"); if ((strcasecmp(test->api, "HDF5") == 0) && test->randomOffset) @@ -1711,11 +1711,11 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int acce IOR_offset_t fileSize; IOR_offset_t *offsetArray; - /* set up seed for random() */ - if (access == WRITE || access == READ) { + /* set up seed, each process can determine which regions to access individually */ + if (test->randomSeed == -1) { test->randomSeed = seed = rand(); } else { - seed = test->randomSeed; + seed = test->randomSeed + pretendRank; } srand(seed); @@ -1725,16 +1725,16 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int acce } /* count needed offsets (pass 1) */ - for (i = 0; i < fileSize; i += test->transferSize) { - if (test->filePerProc == FALSE) { + if (test->filePerProc == FALSE) { + for (i = 0; i < fileSize; i += test->transferSize) { // this counts which process get how many transferes in // a shared file if ((rand() % test->numTasks) == pretendRank) { offsets++; } - } else { - offsets++; - } + } + } else { + offsets += fileSize / test->transferSize; } /* setup empty array */ @@ -1751,7 +1751,7 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int acce } } else { /* fill with offsets (pass 2) */ - srand(seed); /* need same seed to get same transfers as counted in the beginning*/ + srand(seed); /* need same seedto get same transfers as counted in the beginning*/ for (i = 0; i < fileSize; i += test->transferSize) { if ((rand() % test->numTasks) == pretendRank) { offsetArray[offsetCnt] = i; diff --git a/src/parse_options.c b/src/parse_options.c index c2b5b8c..87e3c91 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -433,6 +433,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'y', NULL, "dualMount -- use dual mount points for a filesystem", OPTION_FLAG, 'd', & params->dualMount}, {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite}, {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset}, + {0, "random-offset-seed", "The seed for -z", OPTION_OPTIONAL_ARGUMENT, 'd', & params->randomSeed}, {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & params->warningAsErrors}, {.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT}, From fb66e77072fd075ac91cdae1623bcbdddd135a0c Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Wed, 4 Nov 2020 13:47:35 +0000 Subject: [PATCH 057/154] Enable random seed to be stored. (#268) --- src/ior.c | 22 +++++++++++----------- src/parse_options.c | 1 + 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/ior.c b/src/ior.c index 435fcb2..55733d5 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1622,8 +1622,8 @@ static void ValidateTests(IOR_param_t * test) if (test->randomOffset && test->reorderTasks && test->filePerProc == FALSE) ERR("random offset and constant reorder tasks specified with single-shared-file. Choose one and resubmit"); - if (test->randomOffset && test->checkRead) - ERR("random offset not available with read check option (use write check)"); + if (test->randomOffset && test->checkRead && test->randomSeed == -1) + ERR("random offset with read check option requires to set the random seed"); if (test->randomOffset && test->storeFileOffset) ERR("random offset not available with store file offset option)"); if ((strcasecmp(test->api, "HDF5") == 0) && test->randomOffset) @@ -1709,11 +1709,11 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int acce IOR_offset_t fileSize; IOR_offset_t *offsetArray; - /* set up seed for random() */ - if (access == WRITE || access == READ) { + /* set up seed, each process can determine which regions to access individually */ + if (test->randomSeed == -1) { test->randomSeed = seed = rand(); } else { - seed = test->randomSeed; + seed = test->randomSeed + pretendRank; } srand(seed); @@ -1723,16 +1723,16 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int acce } /* count needed offsets (pass 1) */ - for (i = 0; i < fileSize; i += test->transferSize) { - if (test->filePerProc == FALSE) { + if (test->filePerProc == FALSE) { + for (i = 0; i < fileSize; i += test->transferSize) { // this counts which process get how many transferes in // a shared file if ((rand() % test->numTasks) == pretendRank) { offsets++; } - } else { - offsets++; - } + } + } else { + offsets += fileSize / test->transferSize; } /* setup empty array */ @@ -1749,7 +1749,7 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int acce } } else { /* fill with offsets (pass 2) */ - srand(seed); /* need same seed to get same transfers as counted in the beginning*/ + srand(seed); /* need same seedto get same transfers as counted in the beginning*/ for (i = 0; i < fileSize; i += test->transferSize) { if ((rand() % test->numTasks) == pretendRank) { offsetArray[offsetCnt] = i; diff --git a/src/parse_options.c b/src/parse_options.c index c2b5b8c..87e3c91 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -433,6 +433,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'y', NULL, "dualMount -- use dual mount points for a filesystem", OPTION_FLAG, 'd', & params->dualMount}, {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite}, {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset}, + {0, "random-offset-seed", "The seed for -z", OPTION_OPTIONAL_ARGUMENT, 'd', & params->randomSeed}, {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & params->warningAsErrors}, {.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT}, From b5dfeea82a8e8f7cbaf2b49c60d52f5047cec1e9 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 4 Nov 2020 18:04:46 +0000 Subject: [PATCH 058/154] Remove output for the API. --- src/md-workbench.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index b1c4dc8..2dfc90b 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -859,8 +859,8 @@ phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE if ( o.rank == 0 && ! o.quiet_output ){ // print the set output options - option_print_current(options); - oprintf("\n"); + // option_print_current(options); + // oprintf("\n"); } // preallocate memory if necessary From d9c74af8f302fd21b5ccb7fde94e57a5f0f647d0 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 4 Nov 2020 19:27:39 +0000 Subject: [PATCH 059/154] Fix (accidently rename of option) --- src/aiori-POSIX.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 2f5bcd7..e8933b7 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -100,7 +100,7 @@ option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_o #endif #ifdef HAVE_LUSTRE_USER - {0, "posix.lustre.stribeegfs_chunkSizepecount", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_count}, + {0, "posix.lustre.stripecount", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_count}, {0, "posix.lustre.stripesize", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_size}, {0, "posix.lustre.startost", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_start_ost}, {0, "posix.lustre.ignorelocks", "", OPTION_FLAG, 'd', & o->lustre_ignore_locks}, From c0ffdf44d0d78cc554561bbddf37c10c56c49494 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 4 Nov 2020 20:25:27 +0000 Subject: [PATCH 060/154] Workbench API improved. --- src/md-workbench.c | 95 +++++++++++++++++++++++++++++++++++++++------- src/md-workbench.h | 48 +++++++---------------- 2 files changed, 94 insertions(+), 49 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index 2dfc90b..d981324 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -26,6 +26,50 @@ It follows the hierarchical file system semantics in contrast to the md-workbenc #define oprintf(...) do { fprintf(o.logfile, __VA_ARGS__); fflush(o.logfile); } while(0); +// successfull, errors +typedef struct { + int suc; + int err; +} op_stat_t; + +// A runtime for an operation and when the operation was started +typedef struct{ + float time_since_app_start; + float runtime; +} time_result_t; + + +// statistics for running a single phase +typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! + double t; // maximum time + double * t_all; + + op_stat_t dset_create; + op_stat_t dset_delete; + + op_stat_t obj_create; + op_stat_t obj_read; + op_stat_t obj_stat; + op_stat_t obj_delete; + + // time measurements of individual runs, these are not returned for now by the API! + uint64_t repeats; + time_result_t * time_create; + time_result_t * time_read; + time_result_t * time_stat; + time_result_t * time_delete; + + time_statistics_t stats_create; + time_statistics_t stats_read; + time_statistics_t stats_stat; + time_statistics_t stats_delete; + + // the maximum time for any single operation + double max_op_time; + double phase_start_timer; + int stonewall_iterations; +} phase_stat_t; + struct benchmark_options{ ior_aiori_t const * backend; void * backend_options; @@ -38,7 +82,7 @@ struct benchmark_options{ int precreate; int dset_count; - int result_position; // in the global structure + mdworkbench_results_t * results; // the results int offset; int iterations; @@ -211,10 +255,13 @@ static void print_p_stat(char * buff, const char * name, phase_stat_t * p, doubl ioops_per_iter = 2; } + double rate; + switch(name[0]){ case('b'): + rate = p->obj_read.suc * ioops_per_iter / t; pos += sprintf(buff + pos, "rate:%.1f iops/s objects:%d rate:%.1f obj/s tp:%.1f MiB/s op-max:%.4es", - p->obj_read.suc * ioops_per_iter / t, // write, stat, read, delete + rate, // write, stat, read, delete p->obj_read.suc, p->obj_read.suc / t, tp, @@ -225,8 +272,9 @@ static void print_p_stat(char * buff, const char * name, phase_stat_t * p, doubl } break; case('p'): + rate = (p->dset_create.suc + p->obj_create.suc) / t; pos += sprintf(buff + pos, "rate:%.1f iops/s dsets: %d objects:%d rate:%.3f dset/s rate:%.1f obj/s tp:%.1f MiB/s op-max:%.4es", - (p->dset_create.suc + p->obj_create.suc) / t, + rate, p->dset_create.suc, p->obj_create.suc, p->dset_create.suc / t, @@ -235,8 +283,9 @@ static void print_p_stat(char * buff, const char * name, phase_stat_t * p, doubl p->max_op_time); break; case('c'): + rate = (p->obj_delete.suc + p->dset_delete.suc) / t; pos += sprintf(buff + pos, "rate:%.1f iops/s objects:%d dsets: %d rate:%.1f obj/s rate:%.3f dset/s op-max:%.4es", - (p->obj_delete.suc + p->dset_delete.suc) / t, + rate, p->obj_delete.suc, p->dset_delete.suc, p->obj_delete.suc / t, @@ -248,6 +297,16 @@ static void print_p_stat(char * buff, const char * name, phase_stat_t * p, doubl break; } + if(print_global){ + mdworkbench_result_t * res = & o.results->result[o.results->count]; + res->errors = errs; + o.results->errors += errs; + res->rate = rate; + res->max_op_time = p->max_op_time; + res->runtime = t; + res->iterations_done = p->repeats; + } + if(! o.quiet_output || errs > 0){ pos += sprintf(buff + pos, " (%d errs", errs); if(errs > 0){ @@ -341,7 +400,7 @@ static void compute_histogram(const char * name, time_result_t * times, time_sta stats->max = times[repeats - 1].runtime; } -static void end_phase(const char * name, phase_stat_t * p, phase_stat_t * result){ +static void end_phase(const char * name, phase_stat_t * p){ int ret; char buff[MAX_PATHLEN]; @@ -452,8 +511,13 @@ static void end_phase(const char * name, phase_stat_t * p, phase_stat_t * result } // copy the result back for the API - memcpy(& result[o.result_position], & g_stat, sizeof(g_stat)); - o.result_position++; + mdworkbench_result_t * res = & o.results->result[o.results->count]; + memcpy(& res->stats_create, & g_stat.stats_create, sizeof(time_statistics_t)); + memcpy(& res->stats_read, & g_stat.stats_read, sizeof(time_statistics_t)); + memcpy(& res->stats_stat, & g_stat.stats_stat, sizeof(time_statistics_t)); + memcpy(& res->stats_delete, & g_stat.stats_delete, sizeof(time_statistics_t)); + + o.results->count++; // allocate memory if necessary // ret = mem_preallocate(& limit_memory_P, o.limit_memory_between_phases, o.verbosity >= 3); @@ -783,7 +847,7 @@ static void store_position(int position){ fclose(f); } -phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile){ +mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile){ int ret; int printhelp = 0; char * limit_memory_P = NULL; @@ -873,7 +937,10 @@ phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE double bench_start; bench_start = GetTimeStamp(); phase_stat_t phase_stats; - phase_stat_t* all_phases_stats = malloc(sizeof(phase_stat_t) * (2 + o.iterations)); + size_t result_count = (2 + o.iterations) * (o.adaptive_waiting_mode ? 7 : 1); + o.results = malloc(sizeof(mdworkbench_results_t) + sizeof(mdworkbench_result_t) * result_count); + memset(o.results, 0, sizeof(mdworkbench_results_t) + sizeof(mdworkbench_result_t) * result_count); + o.results->count = 0; if(o.rank == 0 && o.print_detailed_stats && ! o.quiet_output){ print_detailed_stat_header(); @@ -892,7 +959,7 @@ phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE phase_stats.phase_start_timer = GetTimeStamp(); run_precreate(& phase_stats, current_index); phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer; - end_phase("precreate", & phase_stats, all_phases_stats); + end_phase("precreate", & phase_stats); } if (o.phase_benchmark){ @@ -905,7 +972,7 @@ phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE MPI_Barrier(o.com); phase_stats.phase_start_timer = GetTimeStamp(); run_benchmark(& phase_stats, & current_index); - end_phase("benchmark", & phase_stats, all_phases_stats); + end_phase("benchmark", & phase_stats); if(o.adaptive_waiting_mode){ o.relative_waiting_factor = 0.0625; @@ -914,7 +981,7 @@ phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE MPI_Barrier(o.com); phase_stats.phase_start_timer = GetTimeStamp(); run_benchmark(& phase_stats, & current_index); - end_phase("benchmark", & phase_stats, all_phases_stats); + end_phase("benchmark", & phase_stats); o.relative_waiting_factor *= 2; } } @@ -927,7 +994,7 @@ phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE phase_stats.phase_start_timer = GetTimeStamp(); run_cleanup(& phase_stats, current_index); phase_stats.t = GetTimeStamp() - phase_stats.phase_start_timer; - end_phase("cleanup", & phase_stats, all_phases_stats); + end_phase("cleanup", & phase_stats); if (o.rank == 0){ if (o.backend->rmdir(o.prefix, o.backend_options) != 0) { @@ -947,5 +1014,5 @@ phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE printTime(); } //mem_free_preallocated(& limit_memory_P); - return all_phases_stats; + return o.results; } diff --git a/src/md-workbench.h b/src/md-workbench.h index e8794f5..394a43c 100644 --- a/src/md-workbench.h +++ b/src/md-workbench.h @@ -5,18 +5,6 @@ #include #include -// successfull, errors -typedef struct { - int suc; - int err; -} op_stat_t; - -// A runtime for an operation and when the operation was started -typedef struct{ - float time_since_app_start; - float runtime; -} time_result_t; - typedef struct{ float min; float q1; @@ -27,38 +15,28 @@ typedef struct{ float max; } time_statistics_t; + // statistics for running a single phase typedef struct{ // NOTE: if this type is changed, adjust end_phase() !!! - double t; // maximum time - double * t_all; - - op_stat_t dset_create; - op_stat_t dset_delete; - - op_stat_t obj_create; - op_stat_t obj_read; - op_stat_t obj_stat; - op_stat_t obj_delete; - - // time measurements of individual runs, these are not returned for now by the API! - uint64_t repeats; - time_result_t * time_create; - time_result_t * time_read; - time_result_t * time_stat; - time_result_t * time_delete; - time_statistics_t stats_create; time_statistics_t stats_read; time_statistics_t stats_stat; time_statistics_t stats_delete; - // the maximum time for any single operation + int errors; + double rate; double max_op_time; - double phase_start_timer; - int stonewall_iterations; -} phase_stat_t; + double runtime; + uint64_t iterations_done; +} mdworkbench_result_t; + +typedef struct{ + int count; // the number of results + int errors; + mdworkbench_result_t result[]; +} mdworkbench_results_t; // @Return The first statistics returned are precreate, then iteration many benchmark runs, the last is cleanup -phase_stat_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile); +mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_com, FILE * out_logfile); #endif From 75c08386a44c625d6d568d2f912ff5986f08b263 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 4 Nov 2020 20:47:30 +0000 Subject: [PATCH 061/154] Bugfix porting error for performance stats. --- src/md-workbench.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index d981324..ed9ec4e 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -175,7 +175,7 @@ static void init_stats(phase_stat_t * p, size_t repeats){ static float add_timed_result(double start, double phase_start_timer, time_result_t * results, size_t pos, double * max_time, double * out_op_time){ float curtime = start - phase_start_timer; - double op_time = GetTimeStamp(); + double op_time = GetTimeStamp() - start; results[pos].runtime = (float) op_time; results[pos].time_since_app_start = curtime; if (op_time > *max_time){ @@ -248,7 +248,7 @@ static void print_p_stat(char * buff, const char * name, phase_stat_t * p, doubl // single line pos += sprintf(buff, "%s process max:%.2fs ", name, t); if(print_global){ - pos += sprintf(buff + pos, "min:%.1fs mean: %.1fs balance:%.1f stddev:%.1f ", r_min, r_mean, r_min/r_max * 100.0, r_std); + pos += sprintf(buff + pos, "min:%.2fs mean: %.2fs balance:%.1f stddev:%.1f ", r_min, r_mean, r_min/r_max * 100.0, r_std); } int ioops_per_iter = 4; if(o.read_only){ From 3ee3e9ad5c05d3d0b799217654f2336da8060e32 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 5 Nov 2020 14:00:09 +0000 Subject: [PATCH 062/154] Integrated sequential code into the benchmark main loop. A bit ugly but shows that the code logic hasn't changed. --- src/ior.c | 109 +++++++++++++++++++++--------------------------------- 1 file changed, 42 insertions(+), 67 deletions(-) diff --git a/src/ior.c b/src/ior.c index 55733d5..52630aa 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1644,47 +1644,6 @@ static void ValidateTests(IOR_param_t * test) } } -/** - * Returns a precomputed array of IOR_offset_t for the inner benchmark loop. - * They are sequential and the last element is set to -1 as end marker. - * @param test IOR_param_t for getting transferSize, blocksize and SegmentCount - * @param pretendRank int pretended Rank for shifting the offsets correctly - * @return IOR_offset_t - */ -IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank) -{ - IOR_offset_t i, j, k = 0; - IOR_offset_t offsets; - IOR_offset_t *offsetArray; - - /* count needed offsets */ - offsets = (test->blockSize / test->transferSize) * test->segmentCount; - - /* setup empty array */ - offsetArray = - (IOR_offset_t *) malloc((offsets + 1) * sizeof(IOR_offset_t)); - if (offsetArray == NULL) - ERR("malloc() failed"); - offsetArray[offsets] = -1; /* set last offset with -1 */ - - /* fill with offsets */ - for (i = 0; i < test->segmentCount; i++) { - for (j = 0; j < (test->blockSize / test->transferSize); j++) { - offsetArray[k] = j * test->transferSize; - if (test->filePerProc) { - offsetArray[k] += i * test->blockSize; - } else { - offsetArray[k] += - (i * test->numTasks * test->blockSize) - + (pretendRank * test->blockSize); - } - k++; - } - } - - return (offsetArray); -} - /** * Returns a precomputed array of IOR_offset_t for the inner benchmark loop. * They get created sequentially and mixed up in the end. The last array element @@ -1769,15 +1728,13 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int acce return (offsetArray); } -static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offsetArray, int pretendRank, +static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t * transferCount, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access){ IOR_offset_t amtXferred = 0; IOR_offset_t transfer; void *buffer = ioBuffers->buffer; - IOR_offset_t offset = offsetArray[pairCnt]; // this looks inappropriate - transfer = test->transferSize; if (access == WRITE) { /* fills each transfer with a unique pattern @@ -1830,41 +1787,48 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, int errors = 0; IOR_offset_t transferCount = 0; uint64_t pairCnt = 0; - IOR_offset_t *offsetArray; int pretendRank; IOR_offset_t dataMoved = 0; /* for data rate calculation */ double startForStonewall; int hitStonewall; + int i, j; IOR_point_t *point = ((access == WRITE) || (access == WRITECHECK)) ? &results->write : &results->read; /* initialize values */ pretendRank = (rank + rankOffset) % test->numTasks; - if (test->randomOffset) { - offsetArray = GetOffsetArrayRandom(test, pretendRank, access); - } else { - offsetArray = GetOffsetArraySequential(test, pretendRank); - } + // offsetArray = GetOffsetArraySequential(test, pretendRank); startForStonewall = GetTimeStamp(); hitStonewall = 0; - /* loop over offsets to access */ - while ((offsetArray[pairCnt] != -1) && !hitStonewall ) { - dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); - pairCnt++; + for (i = 0; i < test->segmentCount && !hitStonewall; i++) { + for (j = 0; j < (test->blockSize / test->transferSize) && !hitStonewall ; j++) { + IOR_offset_t offset; + if (test->randomOffset) { - hitStonewall = ((test->deadlineForStonewalling != 0 - && (GetTimeStamp() - startForStonewall) - > test->deadlineForStonewalling)) || (test->stoneWallingWearOutIterations != 0 && pairCnt == test->stoneWallingWearOutIterations) ; + }else{ + offset = j * test->transferSize; + if (test->filePerProc) { + offset += i * test->blockSize; + } else { + offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); + } + } + dataMoved += WriteOrReadSingle(offset, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); + pairCnt++; - if ( test->collective && test->deadlineForStonewalling ) { - // if collective-mode, you'll get a HANG, if some rank 'accidentally' leave this loop - // it absolutely must be an 'all or none': - MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, MPI_COMM_WORLD), "hitStonewall broadcast failed"); - } + hitStonewall = ((test->deadlineForStonewalling != 0 + && (GetTimeStamp() - startForStonewall) > test->deadlineForStonewalling)) + || (test->stoneWallingWearOutIterations != 0 && pairCnt == test->stoneWallingWearOutIterations) ; + if ( test->collective && test->deadlineForStonewalling ) { + // if collective-mode, you'll get a HANG, if some rank 'accidentally' leave this loop + // it absolutely must be an 'all or none': + MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, MPI_COMM_WORLD), "hitStonewall broadcast failed"); + } + } } if (test->stoneWallingWearOut){ if (verbose >= VERBOSE_1){ @@ -1891,19 +1855,30 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, } if(pairCnt != point->pairs_accessed){ // some work needs still to be done ! - for(; pairCnt < point->pairs_accessed; pairCnt++ ) { - dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); + for ( ; pairCnt < point->pairs_accessed; i++) { + for ( ; j < (test->blockSize / test->transferSize) && pairCnt < point->pairs_accessed ; j++) { + IOR_offset_t offset; + if (test->randomOffset) { + + }else{ + offset = j * test->transferSize; + if (test->filePerProc) { + offset += i * test->blockSize; + } else { + offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); + } + } + dataMoved += WriteOrReadSingle(offset, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); + pairCnt++; + } } } }else{ point->pairs_accessed = pairCnt; } - totalErrorCount += CountErrors(test, access, errors); - free(offsetArray); - if (access == WRITE && test->fsync == TRUE) { backend->fsync(fd, test->backend_options); /*fsync after all accesses */ } From 2d79efc0c5fd0cbbb0f4a5dff62a9b41a214068a Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 5 Nov 2020 19:13:08 +0000 Subject: [PATCH 063/154] Fix wait issue on MacOS --- src/md-workbench.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index ed9ec4e..34dfa01 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -144,7 +144,7 @@ void init_options(){ o.run_info_file = "md-workbench.status"; } -static void wait(double runtime){ +static void mdw_wait(double runtime){ double waittime = runtime * o.relative_waiting_factor; //printf("waittime: %e\n", waittime); if(waittime < 0.01){ @@ -617,7 +617,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_stat, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { - wait(op_time); + mdw_wait(op_time); } if (o.verbosity >= 2){ @@ -651,7 +651,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_read, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { - wait(op_time); + mdw_wait(op_time); } if(o.read_only){ continue; @@ -661,7 +661,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ o.backend->delete(obj_name, o.backend_options); bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_delete, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { - wait(op_time); + mdw_wait(op_time); } if (o.verbosity >= 2){ @@ -689,7 +689,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { - wait(op_time); + mdw_wait(op_time); } if (o.verbosity >= 2){ From 0e9176932836bb11f75fe212ca6c94f7a83d052a Mon Sep 17 00:00:00 2001 From: otatebe <39575743+otatebe@users.noreply.github.com> Date: Fri, 6 Nov 2020 18:15:39 +0900 Subject: [PATCH 064/154] aiori-Gfarm: sequel to #262 - sync interface (#269) * aiori-Gfarm: update to the new aiori interface * aiori-Gfarm: Gfarm_sync --- src/aiori-Gfarm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/aiori-Gfarm.c b/src/aiori-Gfarm.c index fecda08..e94022f 100644 --- a/src/aiori-Gfarm.c +++ b/src/aiori-Gfarm.c @@ -284,6 +284,16 @@ Gfarm_stat(const char *fn, struct stat *buf, aiori_mod_opt_t *param) return (0); } +void +Gfarm_sync(aiori_mod_opt_t *param) +{ + if (hints->dryRun) + return; + + /* no cache in libgfarm */ + return; +} + ior_aiori_t gfarm_aiori = { .name = "Gfarm", .name_legacy = NULL, @@ -304,5 +314,6 @@ ior_aiori_t gfarm_aiori = { .initialize = Gfarm_initialize, .finalize = Gfarm_finalize, .get_options = NULL, + .sync = Gfarm_sync, .enable_mdtest = true, }; From ad985af76363dabbbc66837fd2aaa00182378149 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 6 Nov 2020 09:30:59 +0000 Subject: [PATCH 065/154] Location problem on non Linux systems. --- src/utilities.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/utilities.h b/src/utilities.h index 020f27b..56cc8db 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -30,13 +30,7 @@ extern enum OutputFormat_t outputFormat; /* format of the output */ * Try using the system's PATH_MAX, which is what realpath and such use. */ #define MAX_PATHLEN PATH_MAX - - -#ifdef __linux__ #define ERROR_LOCATION __func__ -#else -#define ERROR_LOCATION __LINE__ -#endif void* safeMalloc(uint64_t size); From a0e5e297d6f34284bcd7c5f366205e6fc9bb6b55 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 6 Nov 2020 09:52:35 +0000 Subject: [PATCH 066/154] Create random for one block (1 segment) only to be repeated. --- src/ior-internal.h | 3 +- src/ior.c | 96 +++++++++++++++++++++++----------------------- src/utilities.c | 21 ---------- src/utilities.h | 1 - 4 files changed, 50 insertions(+), 71 deletions(-) diff --git a/src/ior-internal.h b/src/ior-internal.h index fa7212e..640640d 100644 --- a/src/ior-internal.h +++ b/src/ior-internal.h @@ -25,8 +25,7 @@ void PrintTestEnds(); void PrintTableHeader(); /* End of ior-output */ -IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank); -IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int access); +IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank); struct results { double min; diff --git a/src/ior.c b/src/ior.c index 52630aa..93c461d 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1025,9 +1025,6 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) } init_clock(); - - /* seed random number generator */ - SeedRandGen(mpi_comm_world); } /* @@ -1645,11 +1642,10 @@ static void ValidateTests(IOR_param_t * test) } /** - * Returns a precomputed array of IOR_offset_t for the inner benchmark loop. - * They get created sequentially and mixed up in the end. The last array element - * is set to -1 as end marker. - * It should be noted that as the seeds get synchronised across all processes - * every process computes the same random order if used with filePerProc. + * Returns a precomputed array of IOR_offset_t for the inner benchmark loop terminated by offset -1. + * They get created sequentially and mixed up in the end. + * It should be noted that as the seeds get synchronised across all processes if not FilePerProcess is set + * every process computes the same random order. * For a shared file all transfers get randomly assigned to ranks. The processes * can also have differen't numbers of transfers. This might lead to a bigger * diversion in accesse as it dose with filePerProc. This is expected but @@ -1657,73 +1653,79 @@ static void ValidateTests(IOR_param_t * test) * @param test IOR_param_t for getting transferSize, blocksize and SegmentCount * @param pretendRank int pretended Rank for shifting the offsets correctly * @return IOR_offset_t - * @return */ -IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int access) +IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank) { int seed; - IOR_offset_t i, value, tmp; - IOR_offset_t offsets = 0; + IOR_offset_t i; + IOR_offset_t offsets; IOR_offset_t offsetCnt = 0; - IOR_offset_t fileSize; IOR_offset_t *offsetArray; - /* set up seed, each process can determine which regions to access individually */ - if (test->randomSeed == -1) { - test->randomSeed = seed = rand(); - } else { - seed = test->randomSeed + pretendRank; - } - srand(seed); - - fileSize = test->blockSize * test->segmentCount; - if (test->filePerProc == FALSE) { - fileSize *= test->numTasks; + if (test->filePerProc) { + /* set up seed, each process can determine which regions to access individually */ + if (test->randomSeed == -1) { + seed = time(NULL); + test->randomSeed = seed; + } else { + seed = test->randomSeed + pretendRank; + } + }else{ + /* Shared file requires that the seed is synchronized */ + if (test->randomSeed == -1) { + // all processes need to have the same seed. + if(rank == 0){ + seed = time(NULL); + } + MPI_CHECK(MPI_Bcast(& seed, 1, MPI_INT, 0, test->testComm), "cannot broadcast random seed value"); + test->randomSeed = seed; + }else{ + seed = test->randomSeed; + } } + srandom(seed); /* count needed offsets (pass 1) */ if (test->filePerProc == FALSE) { - for (i = 0; i < fileSize; i += test->transferSize) { - // this counts which process get how many transferes in - // a shared file - if ((rand() % test->numTasks) == pretendRank) { - offsets++; - } + offsets = 0; + for (i = 0; i < test->blockSize; i += test->transferSize) { + // this counts which process get how many transferes in the shared file + if ((rand() % test->numTasks) == pretendRank) { + offsets++; + } } } else { - offsets += fileSize / test->transferSize; + offsets = test->blockSize / test->transferSize; } /* setup empty array */ - offsetArray = - (IOR_offset_t *) malloc((offsets + 1) * sizeof(IOR_offset_t)); - if (offsetArray == NULL) - ERR("malloc() failed"); + offsetArray = (IOR_offset_t *) safeMalloc((offsets + 1) * sizeof(IOR_offset_t)); + offsetArray[offsets] = -1; /* set last offset with -1 */ if (test->filePerProc) { - /* fill array */ - for (i = 0; i < offsets; i++) { - offsetArray[i] = i * test->transferSize; - } + /* fill array */ + for (i = 0; i < offsets; i++) { + offsetArray[i] = i * test->transferSize; + } } else { - /* fill with offsets (pass 2) */ - srand(seed); /* need same seedto get same transfers as counted in the beginning*/ - for (i = 0; i < fileSize; i += test->transferSize) { - if ((rand() % test->numTasks) == pretendRank) { - offsetArray[offsetCnt] = i; - offsetCnt++; - } + /* fill with offsets (pass 2) */ + srandom(seed); /* need same seed to get same transfers as counted in the beginning*/ + for (i = 0; i < test->blockSize; i += test->transferSize) { + if ((rand() % test->numTasks) == pretendRank) { + offsetArray[offsetCnt] = i; + offsetCnt++; } + } } /* reorder array */ for (i = 0; i < offsets; i++) { + IOR_offset_t value, tmp; value = rand() % offsets; tmp = offsetArray[value]; offsetArray[value] = offsetArray[i]; offsetArray[i] = tmp; } - SeedRandGen(test->testComm); /* synchronize seeds across tasks */ return (offsetArray); } diff --git a/src/utilities.c b/src/utilities.c index 36db9c9..cc28c93 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -654,27 +654,6 @@ int Regex(char *string, char *pattern) return (retValue); } -/* - * Seed random generator. - */ -void SeedRandGen(MPI_Comm testComm) -{ - unsigned int randomSeed; - - if (rank == 0) { -#ifdef _WIN32 - rand_s(&randomSeed); -#else - struct timeval randGenTimer; - gettimeofday(&randGenTimer, (struct timezone *)NULL); - randomSeed = randGenTimer.tv_usec; -#endif - } - MPI_CHECK(MPI_Bcast(&randomSeed, 1, MPI_INT, 0, - testComm), "cannot broadcast random seed value"); - srandom(randomSeed); -} - /* * System info for Windows. */ diff --git a/src/utilities.h b/src/utilities.h index 020f27b..5a7d67d 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -46,7 +46,6 @@ char *CurrentTimeString(void); int Regex(char *, char *); void ShowFileSystemSize(IOR_param_t * test); void DumpBuffer(void *, size_t); -void SeedRandGen(MPI_Comm); void SetHints (MPI_Info *, char *); void ShowHints (MPI_Info *); char *HumanReadable(IOR_offset_t value, int base); From af0753d8ad6a1ce15ef9f3db4b17d5d1f69ec0f1 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 6 Nov 2020 10:27:11 +0000 Subject: [PATCH 067/154] Random: last modifications. --- src/ior-internal.h | 2 +- src/ior.c | 44 ++++++++++++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/src/ior-internal.h b/src/ior-internal.h index 640640d..c0af544 100644 --- a/src/ior-internal.h +++ b/src/ior-internal.h @@ -25,7 +25,7 @@ void PrintTestEnds(); void PrintTableHeader(); /* End of ior-output */ -IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank); +IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offset_t * out_count); struct results { double min; diff --git a/src/ior.c b/src/ior.c index 93c461d..af57a84 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1642,7 +1642,7 @@ static void ValidateTests(IOR_param_t * test) } /** - * Returns a precomputed array of IOR_offset_t for the inner benchmark loop terminated by offset -1. + * Returns a precomputed array of IOR_offset_t for the inner benchmark loop. * They get created sequentially and mixed up in the end. * It should be noted that as the seeds get synchronised across all processes if not FilePerProcess is set * every process computes the same random order. @@ -1654,7 +1654,7 @@ static void ValidateTests(IOR_param_t * test) * @param pretendRank int pretended Rank for shifting the offsets correctly * @return IOR_offset_t */ -IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank) +IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offset_t * out_count) { int seed; IOR_offset_t i; @@ -1686,32 +1686,32 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank) srandom(seed); /* count needed offsets (pass 1) */ - if (test->filePerProc == FALSE) { + if (test->filePerProc) { + offsets = test->blockSize / test->transferSize; + }else{ offsets = 0; - for (i = 0; i < test->blockSize; i += test->transferSize) { + for (i = 0; i < test->blockSize * test->numTasks; i += test->transferSize) { // this counts which process get how many transferes in the shared file if ((rand() % test->numTasks) == pretendRank) { offsets++; } } - } else { - offsets = test->blockSize / test->transferSize; } /* setup empty array */ - offsetArray = (IOR_offset_t *) safeMalloc((offsets + 1) * sizeof(IOR_offset_t)); + offsetArray = (IOR_offset_t *) safeMalloc(offsets * sizeof(IOR_offset_t)); - offsetArray[offsets] = -1; /* set last offset with -1 */ + *out_count = offsets; if (test->filePerProc) { /* fill array */ for (i = 0; i < offsets; i++) { - offsetArray[i] = i * test->transferSize; + offsetArray[i] = i * test->transferSize; } } else { /* fill with offsets (pass 2) */ srandom(seed); /* need same seed to get same transfers as counted in the beginning*/ - for (i = 0; i < test->blockSize; i += test->transferSize) { + for (i = 0; i < test->blockSize * test->numTasks; i += test->transferSize) { if ((rand() % test->numTasks) == pretendRank) { offsetArray[offsetCnt] = i; offsetCnt++; @@ -1805,11 +1805,23 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, startForStonewall = GetTimeStamp(); hitStonewall = 0; + IOR_offset_t offsets; + IOR_offset_t * offsets_rnd; + if (test->randomOffset) { + offsets_rnd = GetOffsetArrayRandom(test, pretendRank, & offsets); + }else{ + offsets = (test->blockSize / test->transferSize); + } + for (i = 0; i < test->segmentCount && !hitStonewall; i++) { - for (j = 0; j < (test->blockSize / test->transferSize) && !hitStonewall ; j++) { + for (j = 0; j < offsets && !hitStonewall ; j++) { IOR_offset_t offset; if (test->randomOffset) { - + if(test->filePerProc){ + offset = offsets_rnd[j] + (i * test->blockSize); + }else{ + offset = offsets_rnd[j] + (i * test->numTasks * test->blockSize); + } }else{ offset = j * test->transferSize; if (test->filePerProc) { @@ -1858,10 +1870,14 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, if(pairCnt != point->pairs_accessed){ // some work needs still to be done ! for ( ; pairCnt < point->pairs_accessed; i++) { - for ( ; j < (test->blockSize / test->transferSize) && pairCnt < point->pairs_accessed ; j++) { + for ( ; j < offsets && pairCnt < point->pairs_accessed ; j++) { IOR_offset_t offset; if (test->randomOffset) { - + if(test->filePerProc){ + offset = offsets_rnd[j] + (i * test->blockSize); + }else{ + offset = offsets_rnd[j] + (i * test->numTasks * test->blockSize); + } }else{ offset = j * test->transferSize; if (test->filePerProc) { From a59e98d7a690d3a702124e4ba6dae831bd2d629f Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 6 Nov 2020 10:38:50 +0000 Subject: [PATCH 068/154] Add error to indicate changed behavior to users --- src/ior.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ior.c b/src/ior.c index af57a84..3aaf195 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1586,6 +1586,8 @@ static void ValidateTests(IOR_param_t * test) } if (test->blockSize < test->transferSize) ERR("block size must not be smaller than transfer size"); + if (test->randomOffset && test->blockSize == test->transferSize) + ERR("IOR will randomize access within a block and repeats the same pattern for all segments, therefore choose blocksize > transferSize"); /* specific APIs */ if ((strcasecmp(test->api, "MPIIO") == 0) From 306598db67a6faa76d8195a178b7d57292bc62fa Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 6 Nov 2020 11:40:31 +0000 Subject: [PATCH 069/154] Bugfix memory issue in global options. --- src/aiori.c | 4 +++- src/parse_options.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/aiori.c b/src/aiori.c index 897abb6..6c9a971 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -128,6 +128,8 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) { ior_aiori_t **tmp = available_aiori; char delimiter = ' '; + *APIs = 0; + *APIs_legacy = 0; while (*tmp != NULL) { @@ -136,7 +138,6 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) tmp++; continue; } - if (delimiter == ' ') { APIs += sprintf(APIs, "%s", (*tmp)->name); @@ -148,6 +149,7 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) if ((*tmp)->name_legacy != NULL) APIs_legacy += sprintf(APIs_legacy, "%c%s", delimiter, (*tmp)->name_legacy); + tmp++; } } diff --git a/src/parse_options.c b/src/parse_options.c index 87e3c91..12f8e0c 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -384,7 +384,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ char APIs[1024]; char APIs_legacy[1024]; aiori_supported_apis(APIs, APIs_legacy, IOR); - char apiStr[1024]; + char * apiStr = safeMalloc(1024); sprintf(apiStr, "API for I/O [%s]", APIs); option_help o [] = { From 65666faf8b83b0b4111f5eea693a3ee76f62f85b Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 6 Nov 2020 11:43:52 +0000 Subject: [PATCH 070/154] Updated tests to have transfer != blocksize for new random variant. --- testing/basic-tests.sh | 18 ++++++++++-------- testing/test_comments.ior | 8 ++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/testing/basic-tests.sh b/testing/basic-tests.sh index 1a0841e..cf09082 100755 --- a/testing/basic-tests.sh +++ b/testing/basic-tests.sh @@ -16,15 +16,17 @@ MDTEST 2 -a POSIX -W 2 MDTEST 1 -C -T -r -F -I 1 -z 1 -b 1 -L -u MDTEST 1 -C -T -I 1 -z 1 -b 1 -u -IOR 1 -a POSIX -w -z -F -Y -e -i1 -m -t 100k -b 1000k -IOR 1 -a POSIX -w -z -F -k -e -i2 -m -t 100k -b 100k -IOR 1 -a MMAP -r -z -F -k -e -i1 -m -t 100k -b 100k +IOR 1 -a POSIX -w -z -F -Y -e -i1 -m -t 100k -b 2000k +IOR 1 -a POSIX -w -z -F -k -e -i2 -m -t 100k -b 200k +IOR 1 -a MMAP -r -z -F -k -e -i1 -m -t 100k -b 200k -IOR 2 -a POSIX -w -z -C -F -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -w -z -C -Q 1 -F -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 100k +IOR 2 -a POSIX -w -C -k -e -i1 -m -t 100k -b 200k + +IOR 2 -a POSIX -w -z -C -F -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -w -z -C -Q 1 -F -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 200k IOR 2 -f "$ROOT/test_comments.ior" diff --git a/testing/test_comments.ior b/testing/test_comments.ior index eaf7997..1472e8f 100644 --- a/testing/test_comments.ior +++ b/testing/test_comments.ior @@ -2,16 +2,16 @@ IOR START api=posix writeFile =1 - randomOffset=1 + randomOffset=1 reorderTasks=1 - filePerProc=1 + filePerProc=1 keepFile=1 fsync=1 repetitions=1 multiFile=1 # tab-prefixed comment -transferSize=100k -blockSize=100k +transferSize=10k +blockSize=20k # space-prefixed comment run --dummy.delay-create=1000 From 277f3801392b9f5fddfc19b23d676dc1b3746d26 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 9 Nov 2020 16:23:34 +0000 Subject: [PATCH 071/154] Remove duplicated functionality between mdtest and IOR. Refactored the ShowFileSystemSize function. --- src/ior-output.c | 4 +++- src/mdtest.c | 36 ++---------------------------------- src/utilities.c | 8 +++----- src/utilities.h | 2 +- 4 files changed, 9 insertions(+), 41 deletions(-) diff --git a/src/ior-output.c b/src/ior-output.c index bf5f080..8cfaf12 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -346,7 +346,9 @@ void ShowTestStart(IOR_param_t *test) PrintKeyValInt("TestID", test->id); PrintKeyVal("StartTime", CurrentTimeString()); - ShowFileSystemSize(test); + char filename[MAX_PATHLEN]; + GetTestFileName(filename, test); + ShowFileSystemSize(filename, test->backend, test->backend_options); if (verbose >= VERBOSE_3 || outputFormat == OUTPUT_JSON) { char* data_packets[] = {"g","t","o","i"}; diff --git a/src/mdtest.c b/src/mdtest.c index c713796..6efd852 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1563,39 +1563,6 @@ void show_file_system_size(char *file_system) { return; } -void display_freespace(char *testdirpath) -{ - char dirpath[MAX_PATHLEN] = {0}; - int i; - int directoryFound = 0; - - - VERBOSE(3,5,"Entering display_freespace on %s...", testdirpath ); - - strcpy(dirpath, testdirpath); - - /* get directory for outfile */ - i = strlen(dirpath); - while (i-- > 0) { - if (dirpath[i] == '/') { - dirpath[i] = '\0'; - directoryFound = 1; - break; - } - } - - /* if no directory/, use '.' */ - if (directoryFound == 0) { - strcpy(dirpath, "."); - } - - VERBOSE(3,5,"Before show_file_system_size, dirpath is '%s'", dirpath ); - show_file_system_size(dirpath); - VERBOSE(3,5, "After show_file_system_size, dirpath is '%s'\n", dirpath ); - - return; -} - void create_remove_directory_tree(int create, int currDepth, char* path, int dirNum, rank_progress_t * progress) { @@ -1921,6 +1888,7 @@ void mdtest_init_args(){ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out) { testComm = world_com; out_logfile = world_out; + out_resultfile = world_out; mpi_comm_world = world_com; init_clock(); @@ -2203,7 +2171,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* display disk usage */ VERBOSE(3,-1,"main (before display_freespace): testdirpath is '%s'", testdirpath ); - if (rank == 0) display_freespace(testdirpath); + if (rank == 0) ShowFileSystemSize(testdirpath, backend, backend_options); int tasksBlockMapping = QueryNodeMapping(testComm, true); /* set the shift to mimic IOR and shift by procs per node */ diff --git a/src/utilities.c b/src/utilities.c index 36db9c9..b1b8cda 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -572,16 +572,14 @@ IOR_offset_t StringToBytes(char *size_str) /* * Displays size of file system and percent of data blocks and inodes used. */ -void ShowFileSystemSize(IOR_param_t * test) // this might be converted to an AIORI call +void ShowFileSystemSize(char * filename, const struct ior_aiori * backend, void * backend_options) // this might be converted to an AIORI call { ior_aiori_statfs_t stat; - if(! test->backend->statfs){ + if(! backend->statfs){ WARN("Backend doesn't implement statfs"); return; } - char filename[MAX_PATHLEN]; - GetTestFileName(filename, test); - int ret = test->backend->statfs(filename, & stat, test->backend_options); + int ret = backend->statfs(filename, & stat, backend_options); if( ret != 0 ){ WARN("Backend returned error during statfs"); return; diff --git a/src/utilities.h b/src/utilities.h index 020f27b..94752d5 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -44,7 +44,7 @@ void set_o_direct_flag(int *fd); char *CurrentTimeString(void); int Regex(char *, char *); -void ShowFileSystemSize(IOR_param_t * test); +void ShowFileSystemSize(char * filename, const struct ior_aiori * backend, void * backend_options); void DumpBuffer(void *, size_t); void SeedRandGen(MPI_Comm); void SetHints (MPI_Info *, char *); From b5891141d886f742b5ee0b9caf58ebba3db5a297 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sun, 22 Nov 2020 11:43:42 +0000 Subject: [PATCH 072/154] Move checks before inititalization. Add simple validation for S3. --- src/aiori-S3-libs3.c | 10 ++++++++++ src/md-workbench.c | 7 +++---- src/mdtest.c | 8 ++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index c8e29c2..668f6a6 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -451,6 +451,16 @@ static IOR_offset_t S3_GetFileSize(aiori_mod_opt_t * options, char *testFileName static int S3_check_params(aiori_mod_opt_t * options){ + s3_options_t * o = (s3_options_t*) options; + if(o->access_key == NULL){ + o->access_key = ""; + } + if(o->secret_key == NULL){ + o->secret_key = ""; + } + if(o->host == NULL){ + WARN("The S3 hostname should be specified"); + } return 0; } diff --git a/src/md-workbench.c b/src/md-workbench.c index 34dfa01..869b4fd 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -890,16 +890,15 @@ mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_c exit(1); } - if (o.backend->initialize){ - o.backend->initialize(o.backend_options); - } if(o.backend->xfer_hints){ o.backend->xfer_hints(& o.hints); } if(o.backend->check_params){ o.backend->check_params(o.backend_options); } - + if (o.backend->initialize){ + o.backend->initialize(o.backend_options); + } int current_index = 0; diff --git a/src/mdtest.c b/src/mdtest.c index 6efd852..98c43d0 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1981,16 +1981,16 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * MPI_Comm_rank(testComm, &rank); MPI_Comm_size(testComm, &size); - if (backend->initialize){ - backend->initialize(backend_options); - } if(backend->xfer_hints){ backend->xfer_hints(& hints); } if(backend->check_params){ backend->check_params(backend_options); } - + if (backend->initialize){ + backend->initialize(backend_options); + } + pid = getpid(); uid = getuid(); From 02a47085ec8a4ff79ba257a2dd3ca9d7357e723b Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sun, 22 Nov 2020 12:01:12 +0000 Subject: [PATCH 073/154] Bugfix: suboptimal name mapping was leading to accidential deletes in md-workbench. --- src/aiori-S3-libs3.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index 668f6a6..b5b2f6c 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -86,9 +86,14 @@ static void def_file_name(s3_options_t * o, char * out_name, char const * path){ }else if(c >= 'A' && c <= 'Z'){ *out_name = *path + ('a' - 'A'); out_name++; + }else if(c == '/'){ + *out_name = '_'; + out_name++; } path++; } + *out_name = '-'; + out_name++; *out_name = '\0'; } From 76d5a77ac2bc6fc9477e746a5bd2eaebcd2f6bed Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Wed, 25 Nov 2020 01:21:12 -0700 Subject: [PATCH 074/154] Add missing options to mdtest man page --- doc/mdtest.1 | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doc/mdtest.1 b/doc/mdtest.1 index 81468d9..27d4d7b 100644 --- a/doc/mdtest.1 +++ b/doc/mdtest.1 @@ -78,6 +78,9 @@ Stride # between neighbor tasks for file/dir stat, 0 = local .I "-p" seconds Pre-iteration delay (in seconds). .TP +.I "-P" +Print both the file creation rate and the elapsed time. +.TP .I "-r" Only perform the remove phase of the tests. .TP @@ -121,6 +124,19 @@ Set verbosity value Set the number of Bytes to write to each file after it is created [default: 0]. .TP +.I "-W" seconds +Specify the stonewall time in seconds. When the stonewall timer has elapsed, +the rank with the highest number of creates sets +.I number_of_items +for the other ranks, so that all ranks create the same number of files. +.TP +.I "-x" filename +Filename to use for stonewall synchronization between processes. +.TP +.I "Y" +Call the sync command after each phase, which is included in the +timing. Note that it causes all IO to be flushed from the nodes. +.TP .I "-z" tree_depth The depth of the hierarchical directory tree [default: 0]. .SH EXAMPLES From ad6dfc5e63902ba28ee368f940ea8f518e10e0f5 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 25 Nov 2020 09:50:26 +0000 Subject: [PATCH 075/154] Allow MDTest to print per proc. --- src/mdtest.c | 71 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 98c43d0..5f0bebd 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -138,6 +138,7 @@ static uint64_t num_dirs_in_tree_calc; /* this is a workaround until the overal static int directory_loops; static int print_time; static int print_rate_and_time; +static int print_all_proc; static int random_seed; static int shared_file; static int files_only; @@ -1253,8 +1254,25 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], summary_table[iteration].rate[7]); } +char const * mdtest_test_name(int i){ + switch (i) { + case 0: return "Directory creation :"; + case 1: return "Directory stat :"; + case 2: return NULL; + case 3: return "Directory removal :"; + case 4: return "File creation :"; + case 5: return "File stat :"; + case 6: return "File read :"; + case 7: return "File removal :"; + case 8: return "Tree creation :"; + case 9: return "Tree removal :"; + default: return "ERR INVALID TESTNAME :"; + } + return NULL; +} + void summarize_results(int iterations, int print_time) { - char access[MAX_PATHLEN]; + char const * access; int i, j, k; int start, stop, tableSize = MDTEST_LAST_NUM; double min, max, mean, sd, sum = 0, var = 0, curr = 0; @@ -1277,10 +1295,6 @@ void summarize_results(int iterations, int print_time) { return; } - VERBOSE(0,-1,"\nSUMMARY %s: (of %d iterations)", print_time ? "time": "rate", iterations); - VERBOSE(0,-1," Operation Max Min Mean Std Dev"); - VERBOSE(0,-1," --------- --- --- ---- -------"); - /* if files only access, skip entries 0-3 (the dir tests) */ if (files_only && !dirs_only) { start = 4; @@ -1300,6 +1314,30 @@ void summarize_results(int iterations, int print_time) { start = stop = 0; } + + if(print_all_proc){ + fprintf(out_logfile, "\nPer process result (%s):\n", print_time ? "time" : "rate"); + for (j = 0; j < iterations; j++) { + fprintf(out_logfile, "iteration: %d\n", j); + for (i = start; i < tableSize; i++) { + access = mdtest_test_name(i); + if(access == NULL){ + continue; + } + fprintf(out_logfile, "Test %s", access); + for (k=0; k < size; k++) { + curr = all[(k*tableSize*iterations) + (j*tableSize) + i]; + fprintf(out_logfile, "%c%e", (k==0 ? ' ': ','), curr); + } + fprintf(out_logfile, "\n"); + } + } + } + + VERBOSE(0,-1,"\nSUMMARY %s: (of %d iterations)", print_time ? "time": "rate", iterations); + VERBOSE(0,-1," Operation Max Min Mean Std Dev"); + VERBOSE(0,-1," --------- --- --- ---- -------"); + for (i = start; i < stop; i++) { min = max = all[i]; for (k=0; k < size; k++) { @@ -1324,18 +1362,7 @@ void summarize_results(int iterations, int print_time) { } var = var / (iterations * size); sd = sqrt(var); - switch (i) { - case 0: strcpy(access, "Directory creation :"); break; - case 1: strcpy(access, "Directory stat :"); break; - /* case 2: strcpy(access, "Directory read :"); break; */ - case 2: ; break; /* N/A */ - case 3: strcpy(access, "Directory removal :"); break; - case 4: strcpy(access, "File creation :"); break; - case 5: strcpy(access, "File stat :"); break; - case 6: strcpy(access, "File read :"); break; - case 7: strcpy(access, "File removal :"); break; - default: strcpy(access, "ERR"); break; - } + access = mdtest_test_name(i); if (i != 2) { fprintf(out_logfile, " %s ", access); fprintf(out_logfile, "%14.3f ", max); @@ -1392,11 +1419,7 @@ void summarize_results(int iterations, int print_time) { } var = var / (iterations); sd = sqrt(var); - switch (i) { - case 8: strcpy(access, "Tree creation :"); break; - case 9: strcpy(access, "Tree removal :"); break; - default: strcpy(access, "ERR"); break; - } + access = mdtest_test_name(i); fprintf(out_logfile, " %s ", access); fprintf(out_logfile, "%14.3f ", max); fprintf(out_logfile, "%14.3f ", min); @@ -1872,6 +1895,7 @@ void mdtest_init_args(){ items = 0; num_dirs_in_tree_calc = 0; collective_creates = 0; + print_all_proc = 0; write_bytes = 0; stone_wall_timer_seconds = 0; read_bytes = 0; @@ -1945,6 +1969,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'N', NULL, "stride # between tasks for file/dir operation (local=0; set to 1 to avoid client cache)", OPTION_OPTIONAL_ARGUMENT, 'd', & nstride}, {'p', NULL, "pre-iteration delay (in seconds)", OPTION_OPTIONAL_ARGUMENT, 'd', & pre_delay}, {'P', NULL, "print rate AND time", OPTION_FLAG, 'd', & print_rate_and_time}, + {0, "print-all-procs", "all processes print an excerpt of their results", OPTION_FLAG, 'd', & print_all_proc}, {'R', NULL, "random access to files (only for stat)", OPTION_FLAG, 'd', & randomize}, {0, "random-seed", "random seed for -R", OPTION_OPTIONAL_ARGUMENT, 'd', & random_seed}, {'s', NULL, "stride between the number of tasks for each test", OPTION_OPTIONAL_ARGUMENT, 'd', & stride}, @@ -1990,7 +2015,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * if (backend->initialize){ backend->initialize(backend_options); } - + pid = getpid(); uid = getuid(); From 4377aebcf8c0966e4828021fb155937fd998800b Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 26 Nov 2020 12:48:11 +0000 Subject: [PATCH 076/154] Bugfix MDTest calculation of multiple iterations was incorrect. --- src/mdtest.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 5f0bebd..145fea1 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1291,6 +1291,20 @@ void summarize_results(int iterations, int print_time) { } } + if(print_all_proc && 0){ + // This code prints the result table for debugging + for (i = 0; i < tableSize; i++) { + for (j = 0; j < iterations; j++) { + access = mdtest_test_name(i); + if(access == NULL){ + continue; + } + curr = summary_table[j].rate[i]; + fprintf(out_logfile, "Rank %d Iter %d Test %s Rate: %e\n", rank, j, access, curr); + } + } + } + if (rank != 0) { return; } @@ -1314,7 +1328,6 @@ void summarize_results(int iterations, int print_time) { start = stop = 0; } - if(print_all_proc){ fprintf(out_logfile, "\nPer process result (%s):\n", print_time ? "time" : "rate"); for (j = 0; j < iterations; j++) { @@ -1326,7 +1339,7 @@ void summarize_results(int iterations, int print_time) { } fprintf(out_logfile, "Test %s", access); for (k=0; k < size; k++) { - curr = all[(k*tableSize*iterations) + (j*tableSize) + i]; + curr = all[j*tableSize*size + k * tableSize + i]; fprintf(out_logfile, "%c%e", (k==0 ? ' ': ','), curr); } fprintf(out_logfile, "\n"); @@ -1340,10 +1353,9 @@ void summarize_results(int iterations, int print_time) { for (i = start; i < stop; i++) { min = max = all[i]; - for (k=0; k < size; k++) { - for (j = 0; j < iterations; j++) { - curr = all[(k*tableSize*iterations) - + (j*tableSize) + i]; + for (j = 0; j < iterations; j++) { + for (k=0; k < size; k++) { + curr = all[j*tableSize*size + k*tableSize + i]; if (min > curr) { min = curr; } @@ -1372,7 +1384,6 @@ void summarize_results(int iterations, int print_time) { fflush(out_logfile); } sum = var = 0; - } // TODO generalize once more stonewall timers are supported @@ -1389,7 +1400,7 @@ void summarize_results(int iterations, int print_time) { fprintf(out_logfile, "%14s %14s %14.3f %14s\n", "NA", "NA", print_time ? stonewall_time : stonewall_items / stonewall_time, "NA"); } - /* calculate tree create/remove rates */ + /* calculate tree create/remove rates, applies only to Rank 0 */ for (i = 8; i < tableSize; i++) { min = max = all[i]; for (j = 0; j < iterations; j++) { From 980ab1dc976302c1661d4be564c8fd48a5500161 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 26 Nov 2020 15:56:34 +0000 Subject: [PATCH 077/154] Lustre stripping: Fix default value such that Lustre striping is only set if anything is changed. --- src/aiori-POSIX.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index e8933b7..8beaa09 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -81,6 +81,7 @@ option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_o }else{ memset(o, 0, sizeof(posix_options_t)); o->direct_io = 0; + o->lustre_stripe_count = -1; o->lustre_start_ost = -1; o->beegfs_numTargets = -1; o->beegfs_chunkSize = -1; @@ -392,8 +393,7 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) /* File needs to be opened O_EXCL because we cannot set * Lustre striping information on a pre-existing file.*/ - fd_oflag |= - O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE; + fd_oflag |= O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE; *fd = open64(testFileName, fd_oflag, mode); if (*fd < 0) { fprintf(stdout, "\nUnable to open '%s': %s\n", From 7542e75c825c41cb867c69f836685de3728dfefd Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 27 Nov 2020 15:23:32 +0000 Subject: [PATCH 078/154] MDTest ignore verbose potential format overflows to be able to spot real errors. --- src/mdtest.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mdtest.c b/src/mdtest.c index 5f0bebd..15c26af 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -76,6 +76,8 @@ #include +#pragma GCC diagnostic ignored "-Wformat-overflow" + #ifdef HAVE_LUSTRE_LUSTREAPI #include #endif /* HAVE_LUSTRE_LUSTREAPI */ From ae8a11b42f11015d9e504f4af33a9e31fc01bb61 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 27 Nov 2020 15:35:32 +0000 Subject: [PATCH 079/154] MDTest updated stonewall check. --- src/mdtest.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 15c26af..78952ca 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1015,20 +1015,20 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } /* Returns if the stonewall was hit */ -int updateStoneWallIterations(int iteration, rank_progress_t * progress, double tstart){ +int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, uint64_t * out_max_iter){ int hit = 0; - uint64_t done = progress->items_done; long long unsigned max_iter = 0; - VERBOSE(1,1,"stonewall hit with %lld items", (long long) progress->items_done ); - MPI_Allreduce(& progress->items_done, & max_iter, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm); + VERBOSE(1,1,"stonewall hit with %lld items", (long long) items_done ); + MPI_Allreduce(& items_done, & max_iter, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm); summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = GetTimeStamp() - tstart; + *out_max_iter = max_iter; // continue to the maximum... long long min_accessed = 0; - MPI_Reduce(& progress->items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm); + MPI_Reduce(& items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm); long long sum_accessed = 0; - MPI_Reduce(& progress->items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm); + MPI_Reduce(& items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm); summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size; @@ -1036,8 +1036,6 @@ int updateStoneWallIterations(int iteration, rank_progress_t * progress, double VERBOSE(0,-1, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size); hit = 1; } - progress->items_start = done; - progress->items_per_dir = max_iter; return hit; } @@ -1085,7 +1083,11 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro /* create files */ create_remove_items(0, 0, 1, 0, temp_path, 0, progress); if(stone_wall_timer_seconds){ - int hit = updateStoneWallIterations(iteration, progress, t[0]); + uint64_t max_iter = 0; + uint64_t items_done = progress->items_done + dir_iter * items_per_dir; + int hit = updateStoneWallIterations(iteration, items_done, t[0], & max_iter); + progress->items_start = items_done; + progress->items_per_dir = max_iter; if (hit){ progress->stone_wall_timer_seconds = 0; @@ -1096,7 +1098,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro items = progress->items_done; } if (stoneWallingStatusFile){ - StoreStoneWallingIterations(stoneWallingStatusFile, progress->items_done); + StoreStoneWallingIterations(stoneWallingStatusFile, max_iter); } // reset stone wall timer to allow proper cleanup progress->stone_wall_timer_seconds = 0; @@ -1510,8 +1512,6 @@ void md_validate_tests() { FAIL("only specify the number of items or the number of items per directory"); }else if( items % items_per_dir != 0){ FAIL("items must be a multiple of items per directory"); - }else if( stone_wall_timer_seconds != 0){ - FAIL("items + items_per_dir can only be set without stonewalling"); } } /* check for using mknod */ From fbf976351a3864b90fce504f51913c013d984d52 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 27 Nov 2020 17:49:45 +0000 Subject: [PATCH 080/154] MDTest refactoring: move all global static variables into the global static structure. --- src/mdtest.c | 1419 +++++++++++++++++++++++++------------------------- 1 file changed, 697 insertions(+), 722 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 78952ca..49760f4 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -90,87 +90,92 @@ #define LLU "%lu" -static int size; -static uint64_t *rand_array; -static char testdir[MAX_PATHLEN]; -static char testdirpath[MAX_PATHLEN]; -static char base_tree_name[MAX_PATHLEN]; -static char **filenames; -static char hostname[MAX_PATHLEN]; -static char mk_name[MAX_PATHLEN]; -static char stat_name[MAX_PATHLEN]; -static char read_name[MAX_PATHLEN]; -static char rm_name[MAX_PATHLEN]; -static char unique_mk_dir[MAX_PATHLEN]; -static char unique_chdir_dir[MAX_PATHLEN]; -static char unique_stat_dir[MAX_PATHLEN]; -static char unique_read_dir[MAX_PATHLEN]; -static char unique_rm_dir[MAX_PATHLEN]; -static char unique_rm_uni_dir[MAX_PATHLEN]; -static char *write_buffer; -static char *stoneWallingStatusFile; +typedef struct { + int size; + uint64_t *rand_array; + char testdir[MAX_PATHLEN]; + char testdirpath[MAX_PATHLEN]; + char base_tree_name[MAX_PATHLEN]; + char **filenames; + char hostname[MAX_PATHLEN]; + char mk_name[MAX_PATHLEN]; + char stat_name[MAX_PATHLEN]; + char read_name[MAX_PATHLEN]; + char rm_name[MAX_PATHLEN]; + char unique_mk_dir[MAX_PATHLEN]; + char unique_chdir_dir[MAX_PATHLEN]; + char unique_stat_dir[MAX_PATHLEN]; + char unique_read_dir[MAX_PATHLEN]; + char unique_rm_dir[MAX_PATHLEN]; + char unique_rm_uni_dir[MAX_PATHLEN]; + char *write_buffer; + char *stoneWallingStatusFile; -static int barriers; -static int create_only; -static int stat_only; -static int read_only; -static int verify_read; -static int verify_write; -static int verification_error; -static int remove_only; -static int leaf_only; -static unsigned branch_factor; -static int depth; + int barriers; + int create_only; + int stat_only; + int read_only; + int verify_read; + int verify_write; + int verification_error; + int remove_only; + int leaf_only; + unsigned branch_factor; + int depth; -/* - * This is likely a small value, but it's sometimes computed by - * branch_factor^(depth+1), so we'll make it a larger variable, - * just in case. - */ -static uint64_t num_dirs_in_tree; -/* - * As we start moving towards Exascale, we could have billions - * of files in a directory. Make room for that possibility with - * a larger variable. - */ -static uint64_t items; -static uint64_t items_per_dir; -static uint64_t num_dirs_in_tree_calc; /* this is a workaround until the overal code is refactored */ -static int directory_loops; -static int print_time; -static int print_rate_and_time; -static int print_all_proc; -static int random_seed; -static int shared_file; -static int files_only; -static int dirs_only; -static int pre_delay; -static int unique_dir_per_task; -static int time_unique_dir_overhead; -static int throttle; -static int collective_creates; -static size_t write_bytes; -static int stone_wall_timer_seconds; -static size_t read_bytes; -static int sync_file; -static int call_sync; -static int path_count; -static int nstride; /* neighbor stride */ -static int make_node = 0; -#ifdef HAVE_LUSTRE_LUSTREAPI -static int global_dir_layout; -#endif /* HAVE_LUSTRE_LUSTREAPI */ + /* + * This is likely a small value, but it's sometimes computed by + * branch_factor^(depth+1), so we'll make it a larger variable, + * just in case. + */ + uint64_t num_dirs_in_tree; + /* + * As we start moving towards Exascale, we could have billions + * of files in a directory. Make room for that possibility with + * a larger variable. + */ + uint64_t items; + uint64_t items_per_dir; + uint64_t num_dirs_in_tree_calc; /* this is a workaround until the overal code is refactored */ + int directory_loops; + int print_time; + int print_rate_and_time; + int print_all_proc; + int random_seed; + int shared_file; + int files_only; + int dirs_only; + int pre_delay; + int unique_dir_per_task; + int time_unique_dir_overhead; + int throttle; + int collective_creates; + size_t write_bytes; + int stone_wall_timer_seconds; + size_t read_bytes; + int sync_file; + int call_sync; + int path_count; + int nstride; /* neighbor stride */ + int make_node; + #ifdef HAVE_LUSTRE_LUSTREAPI + int global_dir_layout; + #endif /* HAVE_LUSTRE_LUSTREAPI */ -static mdtest_results_t * summary_table; -static pid_t pid; -static uid_t uid; + mdtest_results_t * summary_table; + pid_t pid; + uid_t uid; + + /* Use the POSIX backend by default */ + const ior_aiori_t *backend; + void * backend_options; + aiori_xfer_hint_t hints; + char * api; +} mdtest_options_t; + +static mdtest_options_t o; -/* Use the POSIX backend by default */ -static const ior_aiori_t *backend; -static void * backend_options; -static aiori_xfer_hint_t hints; -static char * api = NULL; /* This structure describes the processing status for stonewalling */ typedef struct{ @@ -244,46 +249,46 @@ void parse_dirpath(char *dirpath_arg) { tmp = dirpath_arg; - if (* tmp != '\0') path_count++; + if (* tmp != '\0') o.path_count++; while (* tmp != '\0') { if (* tmp == '@') { - path_count++; + o.path_count++; } tmp++; } // prevent changes to the original dirpath_arg dirpath_arg = strdup(dirpath_arg); - filenames = (char **)malloc(path_count * sizeof(char **)); - if (filenames == NULL || dirpath_arg == NULL) { + o.filenames = (char **)malloc(o.path_count * sizeof(char **)); + if (o.filenames == NULL || dirpath_arg == NULL) { FAIL("out of memory"); } token = strtok(dirpath_arg, delimiter_string); while (token != NULL) { - filenames[i] = token; + o.filenames[i] = token; token = strtok(NULL, delimiter_string); i++; } } static void prep_testdir(int j, int dir_iter){ - int pos = sprintf(testdir, "%s", testdirpath); - if ( testdir[strlen( testdir ) - 1] != '/' ) { - pos += sprintf(& testdir[pos], "/"); + int pos = sprintf(o.testdir, "%s", o.testdirpath); + if ( o.testdir[strlen( o.testdir ) - 1] != '/' ) { + pos += sprintf(& o.testdir[pos], "/"); } - pos += sprintf(& testdir[pos], "%s", TEST_DIR); - pos += sprintf(& testdir[pos], ".%d-%d", j, dir_iter); + pos += sprintf(& o.testdir[pos], "%s", TEST_DIR); + pos += sprintf(& o.testdir[pos], ".%d-%d", j, dir_iter); } static void phase_end(){ - if (call_sync){ - if(! backend->sync){ + if (o.call_sync){ + if(! o.backend->sync){ FAIL("Error, backend does not provide the sync method, but you requested to use sync.\n"); } - backend->sync(backend_options); + o.backend->sync(o.backend_options); } - if (barriers) { + if (o.barriers) { MPI_Barrier(testComm); } } @@ -296,15 +301,15 @@ static void phase_end(){ void unique_dir_access(int opt, char *to) { if (opt == MK_UNI_DIR) { MPI_Barrier(testComm); - sprintf( to, "%s/%s", testdir, unique_chdir_dir ); + sprintf( to, "%s/%s", o.testdir, o.unique_chdir_dir ); } else if (opt == STAT_SUB_DIR) { - sprintf( to, "%s/%s", testdir, unique_stat_dir ); + sprintf( to, "%s/%s", o.testdir, o.unique_stat_dir ); } else if (opt == READ_SUB_DIR) { - sprintf( to, "%s/%s", testdir, unique_read_dir ); + sprintf( to, "%s/%s", o.testdir, o.unique_read_dir ); } else if (opt == RM_SUB_DIR) { - sprintf( to, "%s/%s", testdir, unique_rm_dir ); + sprintf( to, "%s/%s", o.testdir, o.unique_rm_dir ); } else if (opt == RM_UNI_DIR) { - sprintf( to, "%s/%s", testdir, unique_rm_uni_dir ); + sprintf( to, "%s/%s", o.testdir, o.unique_rm_uni_dir ); } VERBOSE(1,-1,"Entering unique_dir_access, set it to %s", to ); } @@ -318,15 +323,15 @@ static void create_remove_dirs (const char *path, bool create, uint64_t itemNum) } //create dirs - sprintf(curr_item, "%s/dir.%s%" PRIu64, path, create ? mk_name : rm_name, itemNum); + sprintf(curr_item, "%s/dir.%s%" PRIu64, path, create ? o.mk_name : o.rm_name, itemNum); VERBOSE(3,5,"create_remove_items_helper (dirs %s): curr_item is '%s'", operation, curr_item); if (create) { - if (backend->mkdir(curr_item, DIRMODE, backend_options) == -1) { + if (o.backend->mkdir(curr_item, DIRMODE, o.backend_options) == -1) { FAIL("unable to create directory %s", curr_item); } } else { - if (backend->rmdir(curr_item, backend_options) == -1) { + if (o.backend->rmdir(curr_item, o.backend_options) == -1) { FAIL("unable to remove directory %s", curr_item); } } @@ -340,17 +345,17 @@ static void remove_file (const char *path, uint64_t itemNum) { } //remove files - sprintf(curr_item, "%s/file.%s"LLU"", path, rm_name, itemNum); + sprintf(curr_item, "%s/file.%s"LLU"", path, o.rm_name, itemNum); VERBOSE(3,5,"create_remove_items_helper (non-dirs remove): curr_item is '%s'", curr_item); - if (!(shared_file && rank != 0)) { - backend->delete (curr_item, backend_options); + if (!(o.shared_file && rank != 0)) { + o.backend->delete (curr_item, o.backend_options); } } void mdtest_verify_data(int item, char * buffer, size_t bytes){ if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){ VERBOSE(2, -1, "Error verifying first element for item: %d", item); - verification_error++; + o.verification_error++; } size_t i = bytes < 8 ? 1 : 8; // the first byte @@ -358,7 +363,7 @@ void mdtest_verify_data(int item, char * buffer, size_t bytes){ for( ; i < bytes; i++){ if(buffer[i] != (char) (i + 1)){ VERBOSE(5, -1, "Error verifying byte %zu for item %d", i, item); - verification_error++; + o.verification_error++; break; } } @@ -373,22 +378,22 @@ static void create_file (const char *path, uint64_t itemNum) { } //create files - sprintf(curr_item, "%s/file.%s"LLU"", path, mk_name, itemNum); + sprintf(curr_item, "%s/file.%s"LLU"", path, o.mk_name, itemNum); VERBOSE(3,5,"create_remove_items_helper (non-dirs create): curr_item is '%s'", curr_item); - if (make_node) { + if (o.make_node) { int ret; VERBOSE(3,5,"create_remove_items_helper : mknod..." ); - ret = backend->mknod (curr_item); + ret = o.backend->mknod (curr_item); if (ret != 0) FAIL("unable to mknode file %s", curr_item); return; - } else if (collective_creates) { + } else if (o.collective_creates) { VERBOSE(3,5,"create_remove_items_helper (collective): open..." ); - aiori_fh = backend->open (curr_item, IOR_WRONLY | IOR_CREAT, backend_options); + aiori_fh = o.backend->open (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options); if (NULL == aiori_fh) FAIL("unable to open file %s", curr_item); @@ -396,42 +401,42 @@ static void create_file (const char *path, uint64_t itemNum) { * !collective_creates */ } else { - hints.filePerProc = !shared_file; + o.hints.filePerProc = ! o.shared_file; VERBOSE(3,5,"create_remove_items_helper (non-collective, shared): open..." ); - aiori_fh = backend->create (curr_item, IOR_WRONLY | IOR_CREAT, backend_options); + aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options); if (NULL == aiori_fh) FAIL("unable to create file %s", curr_item); } - if (write_bytes > 0) { + if (o.write_bytes > 0) { VERBOSE(3,5,"create_remove_items_helper: write..." ); /* * According to Bill Loewe, writes are only done one time, so they are always at * offset 0 (zero). */ - hints.fsyncPerWrite = sync_file; - if(write_bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible - ((uint64_t*) write_buffer)[0] = itemNum; + o.hints.fsyncPerWrite = o.sync_file; + if(o.write_bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible + ((uint64_t*) o.write_buffer)[0] = itemNum; }else{ - write_buffer[0] = (char) itemNum; + o.write_buffer[0] = (char) itemNum; } - if ( write_bytes != (size_t) backend->xfer(WRITE, aiori_fh, (IOR_size_t *) write_buffer, write_bytes, 0, backend_options)) { + if ( o.write_bytes != (size_t) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { FAIL("unable to write file %s", curr_item); } - if (verify_write) { - write_buffer[0] = 42; - if (write_bytes != (size_t) backend->xfer(READ, aiori_fh, (IOR_size_t *) write_buffer, write_bytes, 0, backend_options)) { + if (o.verify_write) { + o.write_buffer[0] = 42; + if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { FAIL("unable to verify write (read/back) file %s", curr_item); } - mdtest_verify_data(itemNum, write_buffer, write_bytes); + mdtest_verify_data(itemNum, o.write_buffer, o.write_bytes); } } VERBOSE(3,5,"create_remove_items_helper: close..." ); - backend->close (aiori_fh, backend_options); + o.backend->close (aiori_fh, o.backend_options); } /* helper for creating/removing items */ @@ -471,22 +476,22 @@ void collective_helper(const int dirs, const int create, const char* path, uint6 continue; } - sprintf(curr_item, "%s/file.%s"LLU"", path, create ? mk_name : rm_name, itemNum+i); + sprintf(curr_item, "%s/file.%s"LLU"", path, create ? o.mk_name : o.rm_name, itemNum+i); VERBOSE(3,5,"create file: %s", curr_item); if (create) { aiori_fd_t *aiori_fh; //create files - aiori_fh = backend->create (curr_item, IOR_WRONLY | IOR_CREAT, backend_options); + aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options); if (NULL == aiori_fh) { FAIL("unable to create file %s", curr_item); } - backend->close (aiori_fh, backend_options); - } else if (!(shared_file && rank != 0)) { + o.backend->close (aiori_fh, o.backend_options); + } else if (!(o.shared_file && rank != 0)) { //remove files - backend->delete (curr_item, backend_options); + o.backend->delete (curr_item, o.backend_options); } if(CHECK_STONE_WALL(progress)){ progress->items_done = i + 1; @@ -515,7 +520,7 @@ void create_remove_items(int currDepth, const int dirs, const int create, const if (currDepth == 0) { /* create items at this depth */ - if (!leaf_only || (depth == 0 && leaf_only)) { + if (! o.leaf_only || (o.depth == 0 && o.leaf_only)) { if (collective) { collective_helper(dirs, create, temp_path, 0, progress); } else { @@ -523,28 +528,28 @@ void create_remove_items(int currDepth, const int dirs, const int create, const } } - if (depth > 0) { + if (o.depth > 0) { create_remove_items(++currDepth, dirs, create, collective, temp_path, ++dirNum, progress); } - } else if (currDepth <= depth) { + } else if (currDepth <= o.depth) { /* iterate through the branches */ - for (i=0; i 0) { //item is not in tree's root directory /* prepend parent directory to item's path */ - sprintf(temp, "%s."LLU"/%s", base_tree_name, parent_dir, item); + sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); strcpy(item, temp); //still not at the tree's root dir - while (parent_dir > branch_factor) { - parent_dir = (uint64_t) ((parent_dir-1) / branch_factor); - sprintf(temp, "%s."LLU"/%s", base_tree_name, parent_dir, item); + while (parent_dir > o.branch_factor) { + parent_dir = (uint64_t) ((parent_dir-1) / o.branch_factor); + sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); strcpy(item, temp); } } @@ -643,7 +648,7 @@ void mdtest_stat(const int random, const int dirs, const long dir_iter, const ch /* below temp used to be hiername */ VERBOSE(3,5,"mdtest_stat %4s: %s", (dirs ? "dir" : "file"), item); - if (-1 == backend->stat (item, &buf, backend_options)) { + if (-1 == o.backend->stat (item, &buf, o.backend_options)) { FAIL("unable to stat %s %s", dirs ? "directory" : "file", item); } } @@ -659,17 +664,17 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { char *read_buffer; /* allocate read buffer */ - if (read_bytes > 0) { - int alloc_res = posix_memalign((void**)&read_buffer, sysconf(_SC_PAGESIZE), read_bytes); + if (o.read_bytes > 0) { + int alloc_res = posix_memalign((void**)&read_buffer, sysconf(_SC_PAGESIZE), o.read_bytes); if (alloc_res) { FAIL("out of memory"); } } - uint64_t stop_items = items; + uint64_t stop_items = o.items; - if( directory_loops != 1 ){ - stop_items = items_per_dir; + if( o.directory_loops != 1 ){ + stop_items = o.items_per_dir; } /* iterate over all of the item IDs */ @@ -688,15 +693,15 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { /* determine the item number to read */ if (random) { - item_num = rand_array[i]; + item_num = o.rand_array[i]; } else { item_num = i; } /* make adjustments if in leaf only mode*/ - if (leaf_only) { - item_num += items_per_dir * - (num_dirs_in_tree - (uint64_t) pow (branch_factor, depth)); + if (o.leaf_only) { + item_num += o.items_per_dir * + (o.num_dirs_in_tree - (uint64_t) pow (o.branch_factor, o.depth)); } /* create name of file to read */ @@ -704,22 +709,22 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { if ((i%ITEM_COUNT == 0) && (i != 0)) { VERBOSE(3,5,"read file: "LLU"", i); } - sprintf(item, "file.%s"LLU"", read_name, item_num); + sprintf(item, "file.%s"LLU"", o.read_name, item_num); } /* determine the path to the file/dir to be read'ed */ - parent_dir = item_num / items_per_dir; + parent_dir = item_num / o.items_per_dir; if (parent_dir > 0) { //item is not in tree's root directory /* prepend parent directory to item's path */ - sprintf(temp, "%s."LLU"/%s", base_tree_name, parent_dir, item); + sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); strcpy(item, temp); /* still not at the tree's root dir */ - while (parent_dir > branch_factor) { - parent_dir = (unsigned long long) ((parent_dir-1) / branch_factor); - sprintf(temp, "%s."LLU"/%s", base_tree_name, parent_dir, item); + while (parent_dir > o.branch_factor) { + parent_dir = (unsigned long long) ((parent_dir-1) / o.branch_factor); + sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); strcpy(item, temp); } } @@ -732,29 +737,29 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { VERBOSE(3,5,"mdtest_read file: %s", item); /* open file for reading */ - aiori_fh = backend->open (item, O_RDONLY, backend_options); + aiori_fh = o.backend->open (item, O_RDONLY, o.backend_options); if (NULL == aiori_fh) { FAIL("unable to open file %s", item); } /* read file */ - if (read_bytes > 0) { + if (o.read_bytes > 0) { read_buffer[0] = 42; - if (read_bytes != (size_t) backend->xfer(READ, aiori_fh, (IOR_size_t *) read_buffer, read_bytes, 0, backend_options)) { + if (o.read_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) read_buffer, o.read_bytes, 0, o.backend_options)) { FAIL("unable to read file %s", item); } - if(verify_read){ - mdtest_verify_data(item_num, read_buffer, read_bytes); - }else if((read_bytes >= 8 && ((uint64_t*) read_buffer)[0] != item_num) || (read_bytes < 8 && read_buffer[0] != (char) item_num)){ + if(o.verify_read){ + mdtest_verify_data(item_num, read_buffer, o.read_bytes); + }else if((o.read_bytes >= 8 && ((uint64_t*) read_buffer)[0] != item_num) || (o.read_bytes < 8 && read_buffer[0] != (char) item_num)){ // do a lightweight check, which cost is neglectable - verification_error++; + o.verification_error++; } } /* close file */ - backend->close (aiori_fh, backend_options); + o.backend->close (aiori_fh, o.backend_options); } - if(read_bytes){ + if(o.read_bytes){ free(read_buffer); } } @@ -770,40 +775,40 @@ void collective_create_remove(const int create, const int dirs, const int ntasks for (int i = 0 ; i < ntasks ; ++i) { memset(temp, 0, MAX_PATHLEN); - strcpy(temp, testdir); + strcpy(temp, o.testdir); strcat(temp, "/"); /* set the base tree name appropriately */ - if (unique_dir_per_task) { - sprintf(base_tree_name, "mdtest_tree.%d", i); + if (o.unique_dir_per_task) { + sprintf(o.base_tree_name, "mdtest_tree.%d", i); } else { - sprintf(base_tree_name, "mdtest_tree"); + sprintf(o.base_tree_name, "mdtest_tree"); } /* Setup to do I/O to the appropriate test dir */ - strcat(temp, base_tree_name); + strcat(temp, o.base_tree_name); strcat(temp, ".0"); /* set all item names appropriately */ - if (!shared_file) { - sprintf(mk_name, "mdtest.%d.", (i+(0*nstride))%ntasks); - sprintf(stat_name, "mdtest.%d.", (i+(1*nstride))%ntasks); - sprintf(read_name, "mdtest.%d.", (i+(2*nstride))%ntasks); - sprintf(rm_name, "mdtest.%d.", (i+(3*nstride))%ntasks); + if (! o.shared_file) { + sprintf(o.mk_name, "mdtest.%d.", (i+(0*o.nstride))%ntasks); + sprintf(o.stat_name, "mdtest.%d.", (i+(1*o.nstride))%ntasks); + sprintf(o.read_name, "mdtest.%d.", (i+(2*o.nstride))%ntasks); + sprintf(o.rm_name, "mdtest.%d.", (i+(3*o.nstride))%ntasks); } - if (unique_dir_per_task) { - VERBOSE(3,5,"i %d nstride %d ntasks %d", i, nstride, ntasks); - sprintf(unique_mk_dir, "%s/mdtest_tree.%d.0", testdir, - (i+(0*nstride))%ntasks); - sprintf(unique_chdir_dir, "%s/mdtest_tree.%d.0", testdir, - (i+(1*nstride))%ntasks); - sprintf(unique_stat_dir, "%s/mdtest_tree.%d.0", testdir, - (i+(2*nstride))%ntasks); - sprintf(unique_read_dir, "%s/mdtest_tree.%d.0", testdir, - (i+(3*nstride))%ntasks); - sprintf(unique_rm_dir, "%s/mdtest_tree.%d.0", testdir, - (i+(4*nstride))%ntasks); - sprintf(unique_rm_uni_dir, "%s", testdir); + if (o.unique_dir_per_task) { + VERBOSE(3,5,"i %d nstride %d ntasks %d", i, o.nstride, ntasks); + sprintf(o.unique_mk_dir, "%s/mdtest_tree.%d.0", o.testdir, + (i+(0*o.nstride))%ntasks); + sprintf(o.unique_chdir_dir, "%s/mdtest_tree.%d.0", o.testdir, + (i+(1*o.nstride))%ntasks); + sprintf(o.unique_stat_dir, "%s/mdtest_tree.%d.0", o.testdir, + (i+(2*o.nstride))%ntasks); + sprintf(o.unique_read_dir, "%s/mdtest_tree.%d.0", o.testdir, + (i+(3*o.nstride))%ntasks); + sprintf(o.unique_rm_dir, "%s/mdtest_tree.%d.0", o.testdir, + (i+(4*o.nstride))%ntasks); + sprintf(o.unique_rm_uni_dir, "%s", o.testdir); } /* Now that everything is set up as it should be, do the create or remove */ @@ -813,29 +818,29 @@ void collective_create_remove(const int create, const int dirs, const int ntasks } /* reset all of the item names */ - if (unique_dir_per_task) { - sprintf(base_tree_name, "mdtest_tree.0"); + if (o.unique_dir_per_task) { + sprintf(o.base_tree_name, "mdtest_tree.0"); } else { - sprintf(base_tree_name, "mdtest_tree"); + sprintf(o.base_tree_name, "mdtest_tree"); } - if (!shared_file) { - sprintf(mk_name, "mdtest.%d.", (0+(0*nstride))%ntasks); - sprintf(stat_name, "mdtest.%d.", (0+(1*nstride))%ntasks); - sprintf(read_name, "mdtest.%d.", (0+(2*nstride))%ntasks); - sprintf(rm_name, "mdtest.%d.", (0+(3*nstride))%ntasks); + if (! o.shared_file) { + sprintf(o.mk_name, "mdtest.%d.", (0+(0*o.nstride))%ntasks); + sprintf(o.stat_name, "mdtest.%d.", (0+(1*o.nstride))%ntasks); + sprintf(o.read_name, "mdtest.%d.", (0+(2*o.nstride))%ntasks); + sprintf(o.rm_name, "mdtest.%d.", (0+(3*o.nstride))%ntasks); } - if (unique_dir_per_task) { - sprintf(unique_mk_dir, "%s/mdtest_tree.%d.0", testdir, - (0+(0*nstride))%ntasks); - sprintf(unique_chdir_dir, "%s/mdtest_tree.%d.0", testdir, - (0+(1*nstride))%ntasks); - sprintf(unique_stat_dir, "%s/mdtest_tree.%d.0", testdir, - (0+(2*nstride))%ntasks); - sprintf(unique_read_dir, "%s/mdtest_tree.%d.0", testdir, - (0+(3*nstride))%ntasks); - sprintf(unique_rm_dir, "%s/mdtest_tree.%d.0", testdir, - (0+(4*nstride))%ntasks); - sprintf(unique_rm_uni_dir, "%s", testdir); + if (o.unique_dir_per_task) { + sprintf(o.unique_mk_dir, "%s/mdtest_tree.%d.0", o.testdir, + (0+(0*o.nstride))%ntasks); + sprintf(o.unique_chdir_dir, "%s/mdtest_tree.%d.0", o.testdir, + (0+(1*o.nstride))%ntasks); + sprintf(o.unique_stat_dir, "%s/mdtest_tree.%d.0", o.testdir, + (0+(2*o.nstride))%ntasks); + sprintf(o.unique_read_dir, "%s/mdtest_tree.%d.0", o.testdir, + (0+(3*o.nstride))%ntasks); + sprintf(o.unique_rm_dir, "%s/mdtest_tree.%d.0", o.testdir, + (0+(4*o.nstride))%ntasks); + sprintf(o.unique_rm_uni_dir, "%s", o.testdir); } } @@ -852,25 +857,25 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran t[0] = GetTimeStamp(); /* create phase */ - if(create_only) { - progress->stone_wall_timer_seconds = stone_wall_timer_seconds; + if(o.create_only) { + progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; progress->items_done = 0; progress->start_time = GetTimeStamp(); - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); - if (unique_dir_per_task) { + if (o.unique_dir_per_task) { unique_dir_access(MK_UNI_DIR, temp_path); - if (!time_unique_dir_overhead) { + if (! o.time_unique_dir_overhead) { offset_timers(t, 0); } } else { - sprintf( temp_path, "%s/%s", testdir, path ); + sprintf( temp_path, "%s/%s", o.testdir, path ); } VERBOSE(3,-1,"directory_test: create path is '%s'", temp_path ); /* "touch" the files */ - if (collective_creates) { + if (o.collective_creates) { if (rank == 0) { collective_create_remove(1, 1, ntasks, temp_path, progress); } @@ -886,22 +891,22 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran t[1] = GetTimeStamp(); /* stat phase */ - if (stat_only) { - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + if (o.stat_only) { + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); - if (unique_dir_per_task) { + if (o.unique_dir_per_task) { unique_dir_access(STAT_SUB_DIR, temp_path); - if (!time_unique_dir_overhead) { + if (! o.time_unique_dir_overhead) { offset_timers(t, 1); } } else { - sprintf( temp_path, "%s/%s", testdir, path ); + sprintf( temp_path, "%s/%s", o.testdir, path ); } VERBOSE(3,5,"stat path is '%s'", temp_path ); /* stat directories */ - if (random_seed > 0) { + if (o.random_seed > 0) { mdtest_stat(1, 1, dir_iter, temp_path, progress); } else { mdtest_stat(0, 1, dir_iter, temp_path, progress); @@ -912,22 +917,22 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran t[2] = GetTimeStamp(); /* read phase */ - if (read_only) { - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + if (o.read_only) { + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); - if (unique_dir_per_task) { + if (o.unique_dir_per_task) { unique_dir_access(READ_SUB_DIR, temp_path); - if (!time_unique_dir_overhead) { + if (! o.time_unique_dir_overhead) { offset_timers(t, 2); } } else { - sprintf( temp_path, "%s/%s", testdir, path ); + sprintf( temp_path, "%s/%s", o.testdir, path ); } VERBOSE(3,5,"directory_test: read path is '%s'", temp_path ); /* read directories */ - if (random_seed > 0) { + if (o.random_seed > 0) { ; /* N/A */ } else { ; /* N/A */ @@ -938,22 +943,22 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran phase_end(); t[3] = GetTimeStamp(); - if (remove_only) { - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + if (o.remove_only) { + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); - if (unique_dir_per_task) { + if (o.unique_dir_per_task) { unique_dir_access(RM_SUB_DIR, temp_path); - if (!time_unique_dir_overhead) { + if (!o.time_unique_dir_overhead) { offset_timers(t, 3); } } else { - sprintf( temp_path, "%s/%s", testdir, path ); + sprintf( temp_path, "%s/%s", o.testdir, path ); } VERBOSE(3,5,"directory_test: remove directories path is '%s'", temp_path ); /* remove directories */ - if (collective_creates) { + if (o.collective_creates) { if (rank == 0) { collective_create_remove(0, 1, ntasks, temp_path, progress); } @@ -966,52 +971,52 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran phase_end(); t[4] = GetTimeStamp(); - if (remove_only) { - if (unique_dir_per_task) { + if (o.remove_only) { + if (o.unique_dir_per_task) { unique_dir_access(RM_UNI_DIR, temp_path); } else { - sprintf( temp_path, "%s/%s", testdir, path ); + sprintf( temp_path, "%s/%s", o.testdir, path ); } VERBOSE(3,5,"directory_test: remove unique directories path is '%s'\n", temp_path ); } - if (unique_dir_per_task && !time_unique_dir_overhead) { + if (o.unique_dir_per_task && ! o.time_unique_dir_overhead) { offset_timers(t, 4); } /* calculate times */ - if (create_only) { - summary_table[iteration].rate[0] = items*size/(t[1] - t[0]); - summary_table[iteration].time[0] = t[1] - t[0]; - summary_table[iteration].items[0] = items*size; - summary_table[iteration].stonewall_last_item[0] = items; + if (o.create_only) { + o.summary_table[iteration].rate[0] = o.items*size/(t[1] - t[0]); + o.summary_table[iteration].time[0] = t[1] - t[0]; + o.summary_table[iteration].items[0] = o.items*size; + o.summary_table[iteration].stonewall_last_item[0] = o.items; } - if (stat_only) { - summary_table[iteration].rate[1] = items*size/(t[2] - t[1]); - summary_table[iteration].time[1] = t[2] - t[1]; - summary_table[iteration].items[1] = items*size; - summary_table[iteration].stonewall_last_item[1] = items; + if (o.stat_only) { + o.summary_table[iteration].rate[1] = o.items*size/(t[2] - t[1]); + o.summary_table[iteration].time[1] = t[2] - t[1]; + o.summary_table[iteration].items[1] = o.items*size; + o.summary_table[iteration].stonewall_last_item[1] = o.items; } - if (read_only) { - summary_table[iteration].rate[2] = items*size/(t[3] - t[2]); - summary_table[iteration].time[2] = t[3] - t[2]; - summary_table[iteration].items[2] = items*size; - summary_table[iteration].stonewall_last_item[2] = items; + if (o.read_only) { + o.summary_table[iteration].rate[2] = o.items*size/(t[3] - t[2]); + o.summary_table[iteration].time[2] = t[3] - t[2]; + o.summary_table[iteration].items[2] = o.items*size; + o.summary_table[iteration].stonewall_last_item[2] = o.items; } - if (remove_only) { - summary_table[iteration].rate[3] = items*size/(t[4] - t[3]); - summary_table[iteration].time[3] = t[4] - t[3]; - summary_table[iteration].items[3] = items*size; - summary_table[iteration].stonewall_last_item[3] = items; + if (o.remove_only) { + o.summary_table[iteration].rate[3] = o.items*size/(t[4] - t[3]); + o.summary_table[iteration].time[3] = t[4] - t[3]; + o.summary_table[iteration].items[3] = o.items*size; + o.summary_table[iteration].stonewall_last_item[3] = o.items; } - VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", t[1] - t[0], summary_table[iteration].rate[0]); - VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], summary_table[iteration].rate[1]); + VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[0]); + VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], o.summary_table[iteration].rate[1]); /* N/A - VERBOSE(1,-1," Directory read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], summary_table[iteration].rate[2]); + VERBOSE(1,-1," Directory read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], o.summary_table[iteration].rate[2]); */ - VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], summary_table[iteration].rate[3]); + VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], o.summary_table[iteration].rate[3]); } /* Returns if the stonewall was hit */ @@ -1021,7 +1026,7 @@ int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, VERBOSE(1,1,"stonewall hit with %lld items", (long long) items_done ); MPI_Allreduce(& items_done, & max_iter, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm); - summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = GetTimeStamp() - tstart; + o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = GetTimeStamp() - tstart; *out_max_iter = max_iter; // continue to the maximum... @@ -1029,17 +1034,69 @@ int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, MPI_Reduce(& items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm); long long sum_accessed = 0; MPI_Reduce(& items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm); - summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; - summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size; + o.summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed; + o.summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * o.size; - if(items != (sum_accessed / size)){ - VERBOSE(0,-1, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size); + if(o.items != (sum_accessed / o.size)){ + VERBOSE(0,-1, "Continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / o.size); hit = 1; } return hit; } +void file_test_create(const int iteration, const int ntasks, const char *path, rank_progress_t * progress, double *t){ + char temp_path[MAX_PATHLEN]; + int cur_dir_loops = o.directory_loops; + for (int dir_iter = 0; dir_iter < cur_dir_loops; dir_iter ++){ + prep_testdir(iteration, dir_iter); + + if (o.unique_dir_per_task) { + unique_dir_access(MK_UNI_DIR, temp_path); + VERBOSE(5,5,"operating on %s", temp_path); + if (! o.time_unique_dir_overhead) { + offset_timers(t, 0); + } + } else { + sprintf( temp_path, "%s/%s", o.testdir, path ); + } + + VERBOSE(3,-1,"file_test: create path is '%s'", temp_path ); + /* "touch" the files */ + if (o.collective_creates) { + if (rank == 0) { + collective_create_remove(1, 0, ntasks, temp_path, progress); + } + MPI_Barrier(testComm); + } + + /* create files */ + create_remove_items(0, 0, 1, 0, temp_path, 0, progress); + if(o.stone_wall_timer_seconds){ + // hit the stonewall + uint64_t max_iter = 0; + uint64_t items_done = progress->items_done + dir_iter * o.items_per_dir; + int hit = updateStoneWallIterations(iteration, items_done, t[0], & max_iter); + progress->items_start = items_done; + progress->items_per_dir = max_iter; + if (hit){ + progress->stone_wall_timer_seconds = 0; + VERBOSE(1,1,"stonewall: %lld of %lld", (long long) progress->items_start, (long long) progress->items_per_dir); + create_remove_items(0, 0, 1, 0, temp_path, 0, progress); + // now reset the values + progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; + o.items = progress->items_done; + } + if (o.stoneWallingStatusFile){ + StoreStoneWallingIterations(o.stoneWallingStatusFile, max_iter); + } + // reset stone wall timer to allow proper cleanup + progress->stone_wall_timer_seconds = 0; + break; + } + } +} + void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) { int size; double t[5] = {0}; @@ -1052,72 +1109,25 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro t[0] = GetTimeStamp(); /* create phase */ - if (create_only ) { - progress->stone_wall_timer_seconds = stone_wall_timer_seconds; + if (o.create_only ) { + progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; progress->items_done = 0; progress->start_time = GetTimeStamp(); - - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ - prep_testdir(iteration, dir_iter); - - if (unique_dir_per_task) { - unique_dir_access(MK_UNI_DIR, temp_path); - VERBOSE(5,5,"operating on %s", temp_path); - if (!time_unique_dir_overhead) { - offset_timers(t, 0); - } - } else { - sprintf( temp_path, "%s/%s", testdir, path ); - } - - VERBOSE(3,-1,"file_test: create path is '%s'", temp_path ); - - /* "touch" the files */ - if (collective_creates) { - if (rank == 0) { - collective_create_remove(1, 0, ntasks, temp_path, progress); - } - MPI_Barrier(testComm); - } - - /* create files */ - create_remove_items(0, 0, 1, 0, temp_path, 0, progress); - if(stone_wall_timer_seconds){ - uint64_t max_iter = 0; - uint64_t items_done = progress->items_done + dir_iter * items_per_dir; - int hit = updateStoneWallIterations(iteration, items_done, t[0], & max_iter); - progress->items_start = items_done; - progress->items_per_dir = max_iter; - - if (hit){ - progress->stone_wall_timer_seconds = 0; - VERBOSE(1,1,"stonewall: %lld of %lld", (long long) progress->items_start, (long long) progress->items_per_dir); - create_remove_items(0, 0, 1, 0, temp_path, 0, progress); - // now reset the values - progress->stone_wall_timer_seconds = stone_wall_timer_seconds; - items = progress->items_done; - } - if (stoneWallingStatusFile){ - StoreStoneWallingIterations(stoneWallingStatusFile, max_iter); - } - // reset stone wall timer to allow proper cleanup - progress->stone_wall_timer_seconds = 0; - } - } + file_test_create(iteration, ntasks, path, progress, t); }else{ - if (stoneWallingStatusFile){ + if (o.stoneWallingStatusFile){ int64_t expected_items; /* The number of items depends on the stonewalling file */ - expected_items = ReadStoneWallingIterations(stoneWallingStatusFile); + expected_items = ReadStoneWallingIterations(o.stoneWallingStatusFile); if(expected_items >= 0){ - items = expected_items; - progress->items_per_dir = items; + o.items = expected_items; + progress->items_per_dir = o.items; } if (rank == 0) { if(expected_items == -1){ WARN("Could not read stonewall status file"); }else { - VERBOSE(1,1, "Read stonewall status; items: "LLU"\n", items); + VERBOSE(1,1, "Read stonewall status; items: "LLU"\n", o.items); } } } @@ -1127,22 +1137,22 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro t[1] = GetTimeStamp(); /* stat phase */ - if (stat_only ) { - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + if (o.stat_only ) { + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); - if (unique_dir_per_task) { + if (o.unique_dir_per_task) { unique_dir_access(STAT_SUB_DIR, temp_path); - if (!time_unique_dir_overhead) { + if (!o.time_unique_dir_overhead) { offset_timers(t, 1); } } else { - sprintf( temp_path, "%s/%s", testdir, path ); + sprintf( temp_path, "%s/%s", o.testdir, path ); } VERBOSE(3,5,"file_test: stat path is '%s'", temp_path ); /* stat files */ - mdtest_stat((random_seed > 0 ? 1 : 0), 0, dir_iter, temp_path, progress); + mdtest_stat((o.random_seed > 0 ? 1 : 0), 0, dir_iter, temp_path, progress); } } @@ -1150,22 +1160,22 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro t[2] = GetTimeStamp(); /* read phase */ - if (read_only ) { - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + if (o.read_only ) { + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); - if (unique_dir_per_task) { + if (o.unique_dir_per_task) { unique_dir_access(READ_SUB_DIR, temp_path); - if (!time_unique_dir_overhead) { + if (! o.time_unique_dir_overhead) { offset_timers(t, 2); } } else { - sprintf( temp_path, "%s/%s", testdir, path ); + sprintf( temp_path, "%s/%s", o.testdir, path ); } VERBOSE(3,5,"file_test: read path is '%s'", temp_path ); /* read files */ - if (random_seed > 0) { + if (o.random_seed > 0) { mdtest_read(1,0, dir_iter, temp_path); } else { mdtest_read(0,0, dir_iter, temp_path); @@ -1176,23 +1186,23 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro phase_end(); t[3] = GetTimeStamp(); - if (remove_only) { + if (o.remove_only) { progress->items_start = 0; - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); - if (unique_dir_per_task) { + if (o.unique_dir_per_task) { unique_dir_access(RM_SUB_DIR, temp_path); - if (!time_unique_dir_overhead) { + if (! o.time_unique_dir_overhead) { offset_timers(t, 3); } } else { - sprintf( temp_path, "%s/%s", testdir, path ); + sprintf( temp_path, "%s/%s", o.testdir, path ); } VERBOSE(3,5,"file_test: rm directories path is '%s'", temp_path ); - if (collective_creates) { + if (o.collective_creates) { if (rank == 0) { collective_create_remove(0, 0, ntasks, temp_path, progress); } @@ -1205,8 +1215,8 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro phase_end(); t[4] = GetTimeStamp(); - if (remove_only) { - if (unique_dir_per_task) { + if (o.remove_only) { + if (o.unique_dir_per_task) { unique_dir_access(RM_UNI_DIR, temp_path); } else { strcpy( temp_path, path ); @@ -1215,47 +1225,47 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro VERBOSE(3,5,"file_test: rm unique directories path is '%s'", temp_path ); } - if (unique_dir_per_task && !time_unique_dir_overhead) { + if (o.unique_dir_per_task && ! o.time_unique_dir_overhead) { offset_timers(t, 4); } - if(num_dirs_in_tree_calc){ /* this is temporary fix needed when using -n and -i together */ - items *= num_dirs_in_tree_calc; + if(o.num_dirs_in_tree_calc){ /* this is temporary fix needed when using -n and -i together */ + o.items *= o.num_dirs_in_tree_calc; } /* calculate times */ - if (create_only) { - summary_table[iteration].rate[4] = items*size/(t[1] - t[0]); - summary_table[iteration].time[4] = t[1] - t[0]; - summary_table[iteration].items[4] = items*size; - summary_table[iteration].stonewall_last_item[4] = items; + if (o.create_only) { + o.summary_table[iteration].rate[4] = o.items*size/(t[1] - t[0]); + o.summary_table[iteration].time[4] = t[1] - t[0]; + o.summary_table[iteration].items[4] = o.items*o.size; + o.summary_table[iteration].stonewall_last_item[4] = o.items; } - if (stat_only) { - summary_table[iteration].rate[5] = items*size/(t[2] - t[1]); - summary_table[iteration].time[5] = t[2] - t[1]; - summary_table[iteration].items[5] = items*size; - summary_table[iteration].stonewall_last_item[5] = items; + if (o.stat_only) { + o.summary_table[iteration].rate[5] = o.items*size/(t[2] - t[1]); + o.summary_table[iteration].time[5] = t[2] - t[1]; + o.summary_table[iteration].items[5] = o.items*o.size; + o.summary_table[iteration].stonewall_last_item[5] = o.items; } - if (read_only) { - summary_table[iteration].rate[6] = items*size/(t[3] - t[2]); - summary_table[iteration].time[6] = t[3] - t[2]; - summary_table[iteration].items[6] = items*size; - summary_table[iteration].stonewall_last_item[6] = items; + if (o.read_only) { + o.summary_table[iteration].rate[6] = o.items*o.size/(t[3] - t[2]); + o.summary_table[iteration].time[6] = t[3] - t[2]; + o.summary_table[iteration].items[6] = o.items*o.size; + o.summary_table[iteration].stonewall_last_item[6] = o.items; } - if (remove_only) { - summary_table[iteration].rate[7] = items*size/(t[4] - t[3]); - summary_table[iteration].time[7] = t[4] - t[3]; - summary_table[iteration].items[7] = items*size; - summary_table[iteration].stonewall_last_item[7] = items; + if (o.remove_only) { + o.summary_table[iteration].rate[7] = o.items*o.size/(t[4] - t[3]); + o.summary_table[iteration].time[7] = t[4] - t[3]; + o.summary_table[iteration].items[7] = o.items*o.size; + o.summary_table[iteration].stonewall_last_item[7] = o.items; } - VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], summary_table[iteration].rate[4]); - if(summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM]){ - VERBOSE(1,-1," File creation (stonewall): %14.3f sec, %14.3f ops/sec", summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM], summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]); + VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[4]); + if(o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM]){ + VERBOSE(1,-1," File creation (stonewall): %14.3f sec, %14.3f ops/sec", o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM], o.summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]); } - VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], summary_table[iteration].rate[5]); - VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], summary_table[iteration].rate[6]); - VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], summary_table[iteration].rate[7]); + VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], o.summary_table[iteration].rate[5]); + VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], o.summary_table[iteration].rate[6]); + VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], o.summary_table[iteration].rate[7]); } char const * mdtest_test_name(int i){ @@ -1281,7 +1291,7 @@ void summarize_results(int iterations, int print_time) { int start, stop, tableSize = MDTEST_LAST_NUM; double min, max, mean, sd, sum = 0, var = 0, curr = 0; - double all[iterations * size * tableSize]; + double all[iterations * o.size * tableSize]; VERBOSE(1,-1,"Entering summarize_results..." ); @@ -1289,9 +1299,9 @@ void summarize_results(int iterations, int print_time) { MPI_Barrier(testComm); for(int i=0; i < iterations; i++){ if(print_time){ - MPI_Gather(& summary_table[i].time[0], tableSize, MPI_DOUBLE, & all[i*tableSize*size], tableSize, MPI_DOUBLE, 0, testComm); + MPI_Gather(& o.summary_table[i].time[0], tableSize, MPI_DOUBLE, & all[i*tableSize * o.size], tableSize, MPI_DOUBLE, 0, testComm); }else{ - MPI_Gather(& summary_table[i].rate[0], tableSize, MPI_DOUBLE, & all[i*tableSize*size], tableSize, MPI_DOUBLE, 0, testComm); + MPI_Gather(& o.summary_table[i].rate[0], tableSize, MPI_DOUBLE, & all[i*tableSize * o.size], tableSize, MPI_DOUBLE, 0, testComm); } } @@ -1300,26 +1310,26 @@ void summarize_results(int iterations, int print_time) { } /* if files only access, skip entries 0-3 (the dir tests) */ - if (files_only && !dirs_only) { + if (o.files_only && ! o.dirs_only) { start = 4; } else { start = 0; } /* if directories only access, skip entries 4-7 (the file tests) */ - if (dirs_only && !files_only) { + if (o.dirs_only && !o.files_only) { stop = 4; } else { stop = 8; } /* special case: if no directory or file tests, skip all */ - if (!dirs_only && !files_only) { + if (!o.dirs_only && !o.files_only) { start = stop = 0; } - if(print_all_proc){ + if(o.print_all_proc){ fprintf(out_logfile, "\nPer process result (%s):\n", print_time ? "time" : "rate"); for (j = 0; j < iterations; j++) { fprintf(out_logfile, "iteration: %d\n", j); @@ -1329,7 +1339,7 @@ void summarize_results(int iterations, int print_time) { continue; } fprintf(out_logfile, "Test %s", access); - for (k=0; k < size; k++) { + for (k=0; k < o.size; k++) { curr = all[(k*tableSize*iterations) + (j*tableSize) + i]; fprintf(out_logfile, "%c%e", (k==0 ? ' ': ','), curr); } @@ -1344,7 +1354,7 @@ void summarize_results(int iterations, int print_time) { for (i = start; i < stop; i++) { min = max = all[i]; - for (k=0; k < size; k++) { + for (k=0; k < o.size; k++) { for (j = 0; j < iterations; j++) { curr = all[(k*tableSize*iterations) + (j*tableSize) + i]; @@ -1357,14 +1367,14 @@ void summarize_results(int iterations, int print_time) { sum += curr; } } - mean = sum / (iterations * size); - for (k=0; k curr) { @@ -1414,9 +1424,9 @@ void summarize_results(int iterations, int print_time) { mean = sum / (iterations); for (j = 0; j < iterations; j++) { if(print_time){ - curr = summary_table[j].time[i]; + curr = o.summary_table[j].time[i]; }else{ - curr = summary_table[j].rate[i]; + curr = o.summary_table[j].rate[i]; } var += pow((mean - curr), 2); @@ -1437,25 +1447,25 @@ void summarize_results(int iterations, int print_time) { /* Checks to see if the test setup is valid. If it isn't, fail. */ void md_validate_tests() { - if (((stone_wall_timer_seconds > 0) && (branch_factor > 1)) || ! barriers) { - FAIL( "Error, stone wall timer does only work with a branch factor <= 1 (current is %d) and with barriers\n", branch_factor); + if (((o.stone_wall_timer_seconds > 0) && (o.branch_factor > 1)) || ! o.barriers) { + FAIL( "Error, stone wall timer does only work with a branch factor <= 1 (current is %d) and with barriers\n", o.branch_factor); } - if (!create_only && !stat_only && !read_only && !remove_only) { - create_only = stat_only = read_only = remove_only = 1; + if (!o.create_only && ! o.stat_only && ! o.read_only && !o.remove_only) { + o.create_only = o.stat_only = o.read_only = o.remove_only = 1; VERBOSE(1,-1,"main: Setting create/stat/read/remove_only to True" ); } VERBOSE(1,-1,"Entering md_validate_tests..." ); /* if dirs_only and files_only were both left unset, set both now */ - if (!dirs_only && !files_only) { - dirs_only = files_only = 1; + if (!o.dirs_only && !o.files_only) { + o.dirs_only = o.files_only = 1; } /* if shared file 'S' access, no directory tests */ - if (shared_file) { - dirs_only = 0; + if (o.shared_file) { + o.dirs_only = 0; } /* check for no barriers with shifting processes for different phases. @@ -1463,72 +1473,72 @@ void md_validate_tests() { race conditions that may cause errors stat'ing or deleting after creates. */ - if (( barriers == 0 ) && ( nstride != 0 ) && ( rank == 0 )) { + if (( o.barriers == 0 ) && ( o.nstride != 0 ) && ( rank == 0 )) { FAIL( "Possible race conditions will occur: -B not compatible with -N"); } /* check for collective_creates incompatibilities */ - if (shared_file && collective_creates && rank == 0) { + if (o.shared_file && o.collective_creates && rank == 0) { FAIL("-c not compatible with -S"); } - if (path_count > 1 && collective_creates && rank == 0) { + if (o.path_count > 1 && o.collective_creates && rank == 0) { FAIL("-c not compatible with multiple test directories"); } - if (collective_creates && !barriers) { + if (o.collective_creates && !o.barriers) { FAIL("-c not compatible with -B"); } /* check for shared file incompatibilities */ - if (unique_dir_per_task && shared_file && rank == 0) { + if (o.unique_dir_per_task && o.shared_file && rank == 0) { FAIL("-u not compatible with -S"); } /* check multiple directory paths and strided option */ - if (path_count > 1 && nstride > 0) { + if (o.path_count > 1 && o.nstride > 0) { FAIL("cannot have multiple directory paths with -N strides between neighbor tasks"); } /* check for shared directory and multiple directories incompatibility */ - if (path_count > 1 && unique_dir_per_task != 1) { + if (o.path_count > 1 && o.unique_dir_per_task != 1) { FAIL("shared directory mode is not compatible with multiple directory paths"); } /* check if more directory paths than ranks */ - if (path_count > size) { + if (o.path_count > o.size) { FAIL("cannot have more directory paths than MPI tasks"); } /* check depth */ - if (depth < 0) { + if (o.depth < 0) { FAIL("depth must be greater than or equal to zero"); } /* check branch_factor */ - if (branch_factor < 1 && depth > 0) { + if (o.branch_factor < 1 && o.depth > 0) { FAIL("branch factor must be greater than or equal to zero"); } /* check for valid number of items */ - if ((items > 0) && (items_per_dir > 0)) { - if(unique_dir_per_task){ + if ((o.items > 0) && (o.items_per_dir > 0)) { + if(o.unique_dir_per_task){ FAIL("only specify the number of items or the number of items per directory"); - }else if( items % items_per_dir != 0){ + }else if( o.items % o.items_per_dir != 0){ FAIL("items must be a multiple of items per directory"); } } /* check for using mknod */ - if (write_bytes > 0 && make_node) { + if (o.write_bytes > 0 && o.make_node) { FAIL("-k not compatible with -w"); } - if(verify_read && ! read_only) + if(o.verify_read && ! o.read_only) FAIL("Verify read requires that the read test is used"); - if(verify_read && read_bytes <= 0) + if(o.verify_read && o.read_bytes <= 0) FAIL("Verify read requires that read bytes is > 0"); - if(read_only && read_bytes <= 0) + if(o.read_only && o.read_bytes <= 0) WARN("Read bytes is 0, thus, a read test will actually just open/close"); - if(create_only && read_only && read_bytes > write_bytes) + if(o.create_only && o.read_only && o.read_bytes > o.write_bytes) FAIL("When writing and reading files, read bytes must be smaller than write bytes"); } @@ -1550,7 +1560,7 @@ void show_file_system_size(char *file_system) { VERBOSE(1,-1,"Entering show_file_system_size on %s", file_system ); - ret = backend->statfs (file_system, &stat_buf, backend_options); + ret = o.backend->statfs (file_system, &stat_buf, o.backend_options); if (0 != ret) { FAIL("unable to stat file system %s", file_system); } @@ -1598,16 +1608,16 @@ void create_remove_directory_tree(int create, VERBOSE(1,5,"Entering create_remove_directory_tree on %s, currDepth = %d...", path, currDepth ); if (currDepth == 0) { - sprintf(dir, "%s/%s.%d/", path, base_tree_name, dirNum); + sprintf(dir, "%s/%s.%d/", path, o.base_tree_name, dirNum); if (create) { VERBOSE(2,5,"Making directory '%s'", dir); - if (-1 == backend->mkdir (dir, DIRMODE, backend_options)) { + if (-1 == o.backend->mkdir (dir, DIRMODE, o.backend_options)) { fprintf(out_logfile, "error could not create directory '%s'\n", dir); } #ifdef HAVE_LUSTRE_LUSTREAPI /* internal node for branching, can be non-striped for children */ - if (global_dir_layout && \ + if (o.global_dir_layout && \ llapi_dir_set_default_lmv_stripe(dir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { @@ -1620,34 +1630,34 @@ void create_remove_directory_tree(int create, if (!create) { VERBOSE(2,5,"Remove directory '%s'", dir); - if (-1 == backend->rmdir(dir, backend_options)) { + if (-1 == o.backend->rmdir(dir, o.backend_options)) { FAIL("Unable to remove directory %s", dir); } } - } else if (currDepth <= depth) { + } else if (currDepth <= o.depth) { char temp_path[MAX_PATHLEN]; strcpy(temp_path, path); int currDir = dirNum; - for (i=0; imkdir(temp_path, DIRMODE, backend_options)) { + if (-1 == o.backend->mkdir(temp_path, DIRMODE, o.backend_options)) { FAIL("Unable to create directory %s", temp_path); } } create_remove_directory_tree(create, ++currDepth, - temp_path, (branch_factor*currDir)+1, progress); + temp_path, (o.branch_factor*currDir)+1, progress); currDepth--; if (!create) { VERBOSE(2,5,"Remove directory '%s'", temp_path); - if (-1 == backend->rmdir(temp_path, backend_options)) { + if (-1 == o.backend->rmdir(temp_path, o.backend_options)) { FAIL("Unable to remove directory %s", temp_path); } } @@ -1662,7 +1672,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t rank_progress_t progress_o; memset(& progress_o, 0 , sizeof(progress_o)); progress_o.stone_wall_timer_seconds = 0; - progress_o.items_per_dir = items_per_dir; + progress_o.items_per_dir = o.items_per_dir; rank_progress_t * progress = & progress_o; /* start and end times of directory tree create/remove */ @@ -1671,197 +1681,196 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t VERBOSE(1,-1,"main: * iteration %d *", j+1); - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(j, dir_iter); - VERBOSE(2,5,"main (for j loop): making testdir, '%s'", testdir ); - if ((rank < path_count) && backend->access(testdir, F_OK, backend_options) != 0) { - if (backend->mkdir(testdir, DIRMODE, backend_options) != 0) { - FAIL("Unable to create test directory %s", testdir); + VERBOSE(2,5,"main (for j loop): making o.testdir, '%s'", o.testdir ); + if ((rank < o.path_count) && o.backend->access(o.testdir, F_OK, o.backend_options) != 0) { + if (o.backend->mkdir(o.testdir, DIRMODE, o.backend_options) != 0) { + FAIL("Unable to create test directory %s", o.testdir); } #ifdef HAVE_LUSTRE_LUSTREAPI /* internal node for branching, can be non-striped for children */ - if (global_dir_layout && unique_dir_per_task && llapi_dir_set_default_lmv_stripe(testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { + if (o.global_dir_layout && unique_dir_per_task && llapi_dir_set_default_lmv_stripe(o.testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { FAIL("Unable to reset to global default directory layout"); } #endif /* HAVE_LUSTRE_LUSTREAPI */ } } - if (create_only) { + if (o.create_only) { /* create hierarchical directory structure */ MPI_Barrier(testComm); startCreate = GetTimeStamp(); - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(j, dir_iter); - if (unique_dir_per_task) { - if (collective_creates && (rank == 0)) { + if (o.unique_dir_per_task) { + if (o.collective_creates && (rank == 0)) { /* * This is inside two loops, one of which already uses "i" and the other uses "j". * I don't know how this ever worked. I'm changing this loop to use "k". */ - for (k=0; krate[8] = - num_dirs_in_tree / (endCreate - startCreate); + summary_table->rate[8] = o.num_dirs_in_tree / (endCreate - startCreate); summary_table->time[8] = (endCreate - startCreate); - summary_table->items[8] = num_dirs_in_tree; - summary_table->stonewall_last_item[8] = num_dirs_in_tree; + summary_table->items[8] = o.num_dirs_in_tree; + summary_table->stonewall_last_item[8] = o.num_dirs_in_tree; VERBOSE(1,-1,"V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[8]); } - sprintf(unique_mk_dir, "%s.0", base_tree_name); - sprintf(unique_chdir_dir, "%s.0", base_tree_name); - sprintf(unique_stat_dir, "%s.0", base_tree_name); - sprintf(unique_read_dir, "%s.0", base_tree_name); - sprintf(unique_rm_dir, "%s.0", base_tree_name); - unique_rm_uni_dir[0] = 0; + sprintf(o.unique_mk_dir, "%s.0", o.base_tree_name); + sprintf(o.unique_chdir_dir, "%s.0", o.base_tree_name); + sprintf(o.unique_stat_dir, "%s.0", o.base_tree_name); + sprintf(o.unique_read_dir, "%s.0", o.base_tree_name); + sprintf(o.unique_rm_dir, "%s.0", o.base_tree_name); + o.unique_rm_uni_dir[0] = 0; - if (!unique_dir_per_task) { - VERBOSE(3,-1,"V-3: main: Using unique_mk_dir, '%s'", unique_mk_dir ); + if (! o.unique_dir_per_task) { + VERBOSE(3,-1,"V-3: main: Using unique_mk_dir, '%s'", o.unique_mk_dir ); } if (rank < i) { - if (!shared_file) { - sprintf(mk_name, "mdtest.%d.", (rank+(0*nstride))%i); - sprintf(stat_name, "mdtest.%d.", (rank+(1*nstride))%i); - sprintf(read_name, "mdtest.%d.", (rank+(2*nstride))%i); - sprintf(rm_name, "mdtest.%d.", (rank+(3*nstride))%i); + if (! o.shared_file) { + sprintf(o.mk_name, "mdtest.%d.", (rank+(0*o.nstride))%i); + sprintf(o.stat_name, "mdtest.%d.", (rank+(1*o.nstride))%i); + sprintf(o.read_name, "mdtest.%d.", (rank+(2*o.nstride))%i); + sprintf(o.rm_name, "mdtest.%d.", (rank+(3*o.nstride))%i); } - if (unique_dir_per_task) { - VERBOSE(3,5,"i %d nstride %d", i, nstride); - sprintf(unique_mk_dir, "mdtest_tree.%d.0", (rank+(0*nstride))%i); - sprintf(unique_chdir_dir, "mdtest_tree.%d.0", (rank+(1*nstride))%i); - sprintf(unique_stat_dir, "mdtest_tree.%d.0", (rank+(2*nstride))%i); - sprintf(unique_read_dir, "mdtest_tree.%d.0", (rank+(3*nstride))%i); - sprintf(unique_rm_dir, "mdtest_tree.%d.0", (rank+(4*nstride))%i); - unique_rm_uni_dir[0] = 0; - VERBOSE(5,5,"mk_dir %s chdir %s stat_dir %s read_dir %s rm_dir %s\n", unique_mk_dir,unique_chdir_dir,unique_stat_dir,unique_read_dir,unique_rm_dir); + if (o.unique_dir_per_task) { + VERBOSE(3,5,"i %d nstride %d", i, o.nstride); + sprintf(o.unique_mk_dir, "mdtest_tree.%d.0", (rank+(0*o.nstride))%i); + sprintf(o.unique_chdir_dir, "mdtest_tree.%d.0", (rank+(1*o.nstride))%i); + sprintf(o.unique_stat_dir, "mdtest_tree.%d.0", (rank+(2*o.nstride))%i); + sprintf(o.unique_read_dir, "mdtest_tree.%d.0", (rank+(3*o.nstride))%i); + sprintf(o.unique_rm_dir, "mdtest_tree.%d.0", (rank+(4*o.nstride))%i); + o.unique_rm_uni_dir[0] = 0; + VERBOSE(5,5,"mk_dir %s chdir %s stat_dir %s read_dir %s rm_dir %s\n", o.unique_mk_dir, o.unique_chdir_dir, o.unique_stat_dir, o.unique_read_dir, o.unique_rm_dir); } - VERBOSE(3,-1,"V-3: main: Copied unique_mk_dir, '%s', to topdir", unique_mk_dir ); + VERBOSE(3,-1,"V-3: main: Copied unique_mk_dir, '%s', to topdir", o.unique_mk_dir ); - if (dirs_only && !shared_file) { - if (pre_delay) { - DelaySecs(pre_delay); + if (o.dirs_only && ! o.shared_file) { + if (o.pre_delay) { + DelaySecs(o.pre_delay); } - directory_test(j, i, unique_mk_dir, progress); + directory_test(j, i, o.unique_mk_dir, progress); } - if (files_only) { - if (pre_delay) { - DelaySecs(pre_delay); + if (o.files_only) { + if (o.pre_delay) { + DelaySecs(o.pre_delay); } - VERBOSE(3,5,"will file_test on %s", unique_mk_dir); + VERBOSE(3,5,"will file_test on %s", o.unique_mk_dir); - file_test(j, i, unique_mk_dir, progress); + file_test(j, i, o.unique_mk_dir, progress); } } /* remove directory structure */ - if (!unique_dir_per_task) { - VERBOSE(3,-1,"main: Using testdir, '%s'", testdir ); + if (! o.unique_dir_per_task) { + VERBOSE(3,-1,"main: Using o.testdir, '%s'", o.testdir ); } MPI_Barrier(testComm); - if (remove_only) { + if (o.remove_only) { progress->items_start = 0; startCreate = GetTimeStamp(); - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(j, dir_iter); - if (unique_dir_per_task) { - if (collective_creates && (rank == 0)) { + if (o.unique_dir_per_task) { + if (o.collective_creates && (rank == 0)) { /* * This is inside two loops, one of which already uses "i" and the other uses "j". * I don't know how this ever worked. I'm changing this loop to use "k". */ - for (k=0; krate[9] = num_dirs_in_tree / (endCreate - startCreate); + summary_table->rate[9] = o.num_dirs_in_tree / (endCreate - startCreate); summary_table->time[9] = endCreate - startCreate; - summary_table->items[9] = num_dirs_in_tree; - summary_table->stonewall_last_item[8] = num_dirs_in_tree; + summary_table->items[9] = o.num_dirs_in_tree; + summary_table->stonewall_last_item[8] = o.num_dirs_in_tree; VERBOSE(1,-1,"main Tree removal : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[9]); - VERBOSE(2,-1,"main (at end of for j loop): Removing testdir of '%s'\n", testdir ); + VERBOSE(2,-1,"main (at end of for j loop): Removing o.testdir of '%s'\n", o.testdir ); - for (int dir_iter = 0; dir_iter < directory_loops; dir_iter ++){ + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(j, dir_iter); - if ((rank < path_count) && backend->access(testdir, F_OK, backend_options) == 0) { - //if (( rank == 0 ) && access(testdir, F_OK) == 0) { - if (backend->rmdir(testdir, backend_options) == -1) { - FAIL("unable to remove directory %s", testdir); + if ((rank < o.path_count) && o.backend->access(o.testdir, F_OK, o.backend_options) == 0) { + //if (( rank == 0 ) && access(o.testdir, F_OK) == 0) { + if (o.backend->rmdir(o.testdir, o.backend_options) == -1) { + FAIL("unable to remove directory %s", o.testdir); } } } @@ -1871,44 +1880,10 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } void mdtest_init_args(){ - barriers = 1; - branch_factor = 1; - throttle = 1; - stoneWallingStatusFile = NULL; - create_only = 0; - stat_only = 0; - read_only = 0; - verify_read = 0; - verification_error = 0; - remove_only = 0; - leaf_only = 0; - depth = 0; - num_dirs_in_tree = 0; - items_per_dir = 0; - random_seed = 0; - print_time = 0; - print_rate_and_time = 0; - shared_file = 0; - files_only = 0; - dirs_only = 0; - pre_delay = 0; - unique_dir_per_task = 0; - time_unique_dir_overhead = 0; - items = 0; - num_dirs_in_tree_calc = 0; - collective_creates = 0; - print_all_proc = 0; - write_bytes = 0; - stone_wall_timer_seconds = 0; - read_bytes = 0; - sync_file = 0; - call_sync = 0; - path_count = 0; - nstride = 0; - make_node = 0; -#ifdef HAVE_LUSTRE_LUSTREAPI - global_dir_layout = 0; -#endif /* HAVE_LUSTRE_LUSTREAPI */ + memset(& o, 0, sizeof(o)); + o.barriers = 1; + o.branch_factor = 1; + o.throttle = 1; } mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out) { @@ -1944,82 +1919,82 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * aiori_supported_apis(APIs, APIs_legacy, MDTEST); char apiStr[1024]; sprintf(apiStr, "API for I/O [%s]", APIs); - memset(& hints, 0, sizeof(hints)); + memset(& o.hints, 0, sizeof(o.hints)); option_help options [] = { - {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & api}, - {'b', NULL, "branching factor of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & branch_factor}, + {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & o.api}, + {'b', NULL, "branching factor of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.branch_factor}, {'d', NULL, "the directory in which the tests will run", OPTION_OPTIONAL_ARGUMENT, 's', & path}, {'B', NULL, "no barriers between phases", OPTION_OPTIONAL_ARGUMENT, 'd', & no_barriers}, - {'C', NULL, "only create files/dirs", OPTION_FLAG, 'd', & create_only}, - {'T', NULL, "only stat files/dirs", OPTION_FLAG, 'd', & stat_only}, - {'E', NULL, "only read files/dir", OPTION_FLAG, 'd', & read_only}, - {'r', NULL, "only remove files or directories left behind by previous runs", OPTION_FLAG, 'd', & remove_only}, - {'D', NULL, "perform test on directories only (no files)", OPTION_FLAG, 'd', & dirs_only}, - {'e', NULL, "bytes to read from each file", OPTION_OPTIONAL_ARGUMENT, 'l', & read_bytes}, + {'C', NULL, "only create files/dirs", OPTION_FLAG, 'd', & o.create_only}, + {'T', NULL, "only stat files/dirs", OPTION_FLAG, 'd', & o.stat_only}, + {'E', NULL, "only read files/dir", OPTION_FLAG, 'd', & o.read_only}, + {'r', NULL, "only remove files or directories left behind by previous runs", OPTION_FLAG, 'd', & o.remove_only}, + {'D', NULL, "perform test on directories only (no files)", OPTION_FLAG, 'd', & o.dirs_only}, + {'e', NULL, "bytes to read from each file", OPTION_OPTIONAL_ARGUMENT, 'l', & o.read_bytes}, {'f', NULL, "first number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & first}, - {'F', NULL, "perform test on files only (no directories)", OPTION_FLAG, 'd', & files_only}, + {'F', NULL, "perform test on files only (no directories)", OPTION_FLAG, 'd', & o.files_only}, #ifdef HAVE_LUSTRE_LUSTREAPI - {'g', NULL, "global default directory layout for test subdirectories (deletes inherited striping layout)", OPTION_FLAG, 'd', & global_dir_layout}, + {'g', NULL, "global default directory layout for test subdirectories (deletes inherited striping layout)", OPTION_FLAG, 'd', & o.global_dir_layout}, #endif /* HAVE_LUSTRE_LUSTREAPI */ {'i', NULL, "number of iterations the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & iterations}, - {'I', NULL, "number of items per directory in tree", OPTION_OPTIONAL_ARGUMENT, 'l', & items_per_dir}, - {'k', NULL, "use mknod to create file", OPTION_FLAG, 'd', & make_node}, + {'I', NULL, "number of items per directory in tree", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items_per_dir}, + {'k', NULL, "use mknod to create file", OPTION_FLAG, 'd', & o.make_node}, {'l', NULL, "last number of tasks on which the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & last}, - {'L', NULL, "files only at leaf level of tree", OPTION_FLAG, 'd', & leaf_only}, - {'n', NULL, "every process will creat/stat/read/remove # directories and files", OPTION_OPTIONAL_ARGUMENT, 'l', & items}, - {'N', NULL, "stride # between tasks for file/dir operation (local=0; set to 1 to avoid client cache)", OPTION_OPTIONAL_ARGUMENT, 'd', & nstride}, - {'p', NULL, "pre-iteration delay (in seconds)", OPTION_OPTIONAL_ARGUMENT, 'd', & pre_delay}, - {'P', NULL, "print rate AND time", OPTION_FLAG, 'd', & print_rate_and_time}, - {0, "print-all-procs", "all processes print an excerpt of their results", OPTION_FLAG, 'd', & print_all_proc}, + {'L', NULL, "files only at leaf level of tree", OPTION_FLAG, 'd', & o.leaf_only}, + {'n', NULL, "every process will creat/stat/read/remove # directories and files", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items}, + {'N', NULL, "stride # between tasks for file/dir operation (local=0; set to 1 to avoid client cache)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.nstride}, + {'p', NULL, "pre-iteration delay (in seconds)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.pre_delay}, + {'P', NULL, "print rate AND time", OPTION_FLAG, 'd', & o.print_rate_and_time}, + {0, "print-all-procs", "all processes print an excerpt of their results", OPTION_FLAG, 'd', & o.print_all_proc}, {'R', NULL, "random access to files (only for stat)", OPTION_FLAG, 'd', & randomize}, - {0, "random-seed", "random seed for -R", OPTION_OPTIONAL_ARGUMENT, 'd', & random_seed}, + {0, "random-seed", "random seed for -R", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_seed}, {'s', NULL, "stride between the number of tasks for each test", OPTION_OPTIONAL_ARGUMENT, 'd', & stride}, - {'S', NULL, "shared file access (file only, no directories)", OPTION_FLAG, 'd', & shared_file}, - {'c', NULL, "collective creates: task 0 does all creates", OPTION_FLAG, 'd', & collective_creates}, - {'t', NULL, "time unique working directory overhead", OPTION_FLAG, 'd', & time_unique_dir_overhead}, - {'u', NULL, "unique working directory for each task", OPTION_FLAG, 'd', & unique_dir_per_task}, + {'S', NULL, "shared file access (file only, no directories)", OPTION_FLAG, 'd', & o.shared_file}, + {'c', NULL, "collective creates: task 0 does all creates", OPTION_FLAG, 'd', & o.collective_creates}, + {'t', NULL, "time unique working directory overhead", OPTION_FLAG, 'd', & o.time_unique_dir_overhead}, + {'u', NULL, "unique working directory for each task", OPTION_FLAG, 'd', & o.unique_dir_per_task}, {'v', NULL, "verbosity (each instance of option increments by one)", OPTION_FLAG, 'd', & verbose}, {'V', NULL, "verbosity value", OPTION_OPTIONAL_ARGUMENT, 'd', & verbose}, - {'w', NULL, "bytes to write to each file after it is created", OPTION_OPTIONAL_ARGUMENT, 'l', & write_bytes}, - {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase and files)", OPTION_OPTIONAL_ARGUMENT, 'd', & stone_wall_timer_seconds}, - {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & stoneWallingStatusFile}, - {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & verify_read}, - {0, "verify-write", "Verify the data after a write by reading it back immediately", OPTION_FLAG, 'd', & verify_write}, - {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & sync_file}, - {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & call_sync}, - {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & depth}, - {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & print_time}, + {'w', NULL, "bytes to write to each file after it is created", OPTION_OPTIONAL_ARGUMENT, 'l', & o.write_bytes}, + {'W', NULL, "number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase and files)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stone_wall_timer_seconds}, + {'x', NULL, "StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs", OPTION_OPTIONAL_ARGUMENT, 's', & o.stoneWallingStatusFile}, + {'X', "verify-read", "Verify the data read", OPTION_FLAG, 'd', & o.verify_read}, + {0, "verify-write", "Verify the data after a write by reading it back immediately", OPTION_FLAG, 'd', & o.verify_write}, + {'y', NULL, "sync file after writing", OPTION_FLAG, 'd', & o.sync_file}, + {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync}, + {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth}, + {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, LAST_OPTION }; options_all_t * global_options = airoi_create_all_module_options(options); option_parse(argc, argv, global_options); - backend = aiori_select(api); - if (backend == NULL) + o.backend = aiori_select(o.api); + if (o.backend == NULL) ERR("Unrecognized I/O API"); - if (! backend->enable_mdtest) + if (! o.backend->enable_mdtest) ERR("Backend doesn't support MDTest"); - backend_options = airoi_update_module_options(backend, global_options); + o.backend_options = airoi_update_module_options(o.backend, global_options); free(global_options->modules); free(global_options); MPI_Comm_rank(testComm, &rank); - MPI_Comm_size(testComm, &size); + MPI_Comm_size(testComm, &o.size); - if(backend->xfer_hints){ - backend->xfer_hints(& hints); + if(o.backend->xfer_hints){ + o.backend->xfer_hints(& o.hints); } - if(backend->check_params){ - backend->check_params(backend_options); + if(o.backend->check_params){ + o.backend->check_params(o.backend_options); } - if (backend->initialize){ - backend->initialize(backend_options); + if (o.backend->initialize){ + o.backend->initialize(o.backend_options); } - pid = getpid(); - uid = getuid(); + o.pid = getpid(); + o.uid = getuid(); numNodes = GetNumNodes(testComm); numTasksOnNode0 = GetNumTasksOnNode0(testComm); @@ -2031,118 +2006,118 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } VERBOSE(0,-1,"-- started at %s --\n", PrintTimestamp()); - VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, size, numNodes); + VERBOSE(0,-1,"mdtest-%s was launched with %d total task(s) on %d node(s)", RELEASE_VERS, o.size, numNodes); VERBOSE(0,-1,"Command line used: %s", cmd_buffer); /* adjust special variables */ - barriers = ! no_barriers; + o.barriers = ! no_barriers; if (path != NULL){ parse_dirpath(path); } if( randomize > 0 ){ - if (random_seed == 0) { + if (o.random_seed == 0) { /* Ensure all procs have the same random number */ - random_seed = time(NULL); + o.random_seed = time(NULL); MPI_Barrier(testComm); - MPI_Bcast(&random_seed, 1, MPI_INT, 0, testComm); + MPI_Bcast(& o.random_seed, 1, MPI_INT, 0, testComm); } - random_seed += rank; + o.random_seed += rank; } - if ((items > 0) && (items_per_dir > 0) && (! unique_dir_per_task)) { - directory_loops = items / items_per_dir; + if ((o.items > 0) && (o.items_per_dir > 0) && (! o.unique_dir_per_task)) { + o.directory_loops = o.items / o.items_per_dir; }else{ - directory_loops = 1; + o.directory_loops = 1; } md_validate_tests(); // option_print_current(options); - VERBOSE(1,-1, "api : %s", api); - VERBOSE(1,-1, "barriers : %s", ( barriers ? "True" : "False" )); - VERBOSE(1,-1, "collective_creates : %s", ( collective_creates ? "True" : "False" )); - VERBOSE(1,-1, "create_only : %s", ( create_only ? "True" : "False" )); + VERBOSE(1,-1, "api : %s", o.api); + VERBOSE(1,-1, "barriers : %s", ( o.barriers ? "True" : "False" )); + VERBOSE(1,-1, "collective_creates : %s", ( o.collective_creates ? "True" : "False" )); + VERBOSE(1,-1, "create_only : %s", ( o.create_only ? "True" : "False" )); VERBOSE(1,-1, "dirpath(s):" ); - for ( i = 0; i < path_count; i++ ) { - VERBOSE(1,-1, "\t%s", filenames[i] ); + for ( i = 0; i < o.path_count; i++ ) { + VERBOSE(1,-1, "\t%s", o.filenames[i] ); } - VERBOSE(1,-1, "dirs_only : %s", ( dirs_only ? "True" : "False" )); - VERBOSE(1,-1, "read_bytes : "LLU"", read_bytes ); - VERBOSE(1,-1, "read_only : %s", ( read_only ? "True" : "False" )); + VERBOSE(1,-1, "dirs_only : %s", ( o.dirs_only ? "True" : "False" )); + VERBOSE(1,-1, "read_bytes : "LLU"", o.read_bytes ); + VERBOSE(1,-1, "read_only : %s", ( o.read_only ? "True" : "False" )); VERBOSE(1,-1, "first : %d", first ); - VERBOSE(1,-1, "files_only : %s", ( files_only ? "True" : "False" )); + VERBOSE(1,-1, "files_only : %s", ( o.files_only ? "True" : "False" )); #ifdef HAVE_LUSTRE_LUSTREAPI - VERBOSE(1,-1, "global_dir_layout : %s", ( global_dir_layout ? "True" : "False" )); + VERBOSE(1,-1, "global_dir_layout : %s", ( o.global_dir_layout ? "True" : "False" )); #endif /* HAVE_LUSTRE_LUSTREAPI */ VERBOSE(1,-1, "iterations : %d", iterations ); - VERBOSE(1,-1, "items_per_dir : "LLU"", items_per_dir ); + VERBOSE(1,-1, "items_per_dir : "LLU"", o.items_per_dir ); VERBOSE(1,-1, "last : %d", last ); - VERBOSE(1,-1, "leaf_only : %s", ( leaf_only ? "True" : "False" )); - VERBOSE(1,-1, "items : "LLU"", items ); - VERBOSE(1,-1, "nstride : %d", nstride ); - VERBOSE(1,-1, "pre_delay : %d", pre_delay ); - VERBOSE(1,-1, "remove_only : %s", ( leaf_only ? "True" : "False" )); - VERBOSE(1,-1, "random_seed : %d", random_seed ); + VERBOSE(1,-1, "leaf_only : %s", ( o.leaf_only ? "True" : "False" )); + VERBOSE(1,-1, "items : "LLU"", o.items ); + VERBOSE(1,-1, "nstride : %d", o.nstride ); + VERBOSE(1,-1, "pre_delay : %d", o.pre_delay ); + VERBOSE(1,-1, "remove_only : %s", ( o.leaf_only ? "True" : "False" )); + VERBOSE(1,-1, "random_seed : %d", o.random_seed ); VERBOSE(1,-1, "stride : %d", stride ); - VERBOSE(1,-1, "shared_file : %s", ( shared_file ? "True" : "False" )); - VERBOSE(1,-1, "time_unique_dir_overhead: %s", ( time_unique_dir_overhead ? "True" : "False" )); - VERBOSE(1,-1, "stone_wall_timer_seconds: %d", stone_wall_timer_seconds); - VERBOSE(1,-1, "stat_only : %s", ( stat_only ? "True" : "False" )); - VERBOSE(1,-1, "unique_dir_per_task : %s", ( unique_dir_per_task ? "True" : "False" )); - VERBOSE(1,-1, "write_bytes : "LLU"", write_bytes ); - VERBOSE(1,-1, "sync_file : %s", ( sync_file ? "True" : "False" )); - VERBOSE(1,-1, "call_sync : %s", ( call_sync ? "True" : "False" )); - VERBOSE(1,-1, "depth : %d", depth ); - VERBOSE(1,-1, "make_node : %d", make_node ); + VERBOSE(1,-1, "shared_file : %s", ( o.shared_file ? "True" : "False" )); + VERBOSE(1,-1, "time_unique_dir_overhead: %s", ( o.time_unique_dir_overhead ? "True" : "False" )); + VERBOSE(1,-1, "stone_wall_timer_seconds: %d", o.stone_wall_timer_seconds); + VERBOSE(1,-1, "stat_only : %s", ( o.stat_only ? "True" : "False" )); + VERBOSE(1,-1, "unique_dir_per_task : %s", ( o.unique_dir_per_task ? "True" : "False" )); + VERBOSE(1,-1, "write_bytes : "LLU"", o.write_bytes ); + VERBOSE(1,-1, "sync_file : %s", ( o.sync_file ? "True" : "False" )); + VERBOSE(1,-1, "call_sync : %s", ( o.call_sync ? "True" : "False" )); + VERBOSE(1,-1, "depth : %d", o.depth ); + VERBOSE(1,-1, "make_node : %d", o.make_node ); /* setup total number of items and number of items per dir */ - if (depth <= 0) { - num_dirs_in_tree = 1; + if (o.depth <= 0) { + o.num_dirs_in_tree = 1; } else { - if (branch_factor < 1) { - num_dirs_in_tree = 1; - } else if (branch_factor == 1) { - num_dirs_in_tree = depth + 1; + if (o.branch_factor < 1) { + o.num_dirs_in_tree = 1; + } else if (o.branch_factor == 1) { + o.num_dirs_in_tree = o.depth + 1; } else { - num_dirs_in_tree = (pow(branch_factor, depth+1) - 1) / (branch_factor - 1); + o.num_dirs_in_tree = (pow(o.branch_factor, o.depth+1) - 1) / (o.branch_factor - 1); } } - if (items_per_dir > 0) { - if(items == 0){ - if (leaf_only) { - items = items_per_dir * (uint64_t) pow(branch_factor, depth); + if (o.items_per_dir > 0) { + if(o.items == 0){ + if (o.leaf_only) { + o.items = o.items_per_dir * (uint64_t) pow(o.branch_factor, o.depth); } else { - items = items_per_dir * num_dirs_in_tree; + o.items = o.items_per_dir * o.num_dirs_in_tree; } }else{ - num_dirs_in_tree_calc = num_dirs_in_tree; + o.num_dirs_in_tree_calc = o.num_dirs_in_tree; } } else { - if (leaf_only) { - if (branch_factor <= 1) { - items_per_dir = items; + if (o.leaf_only) { + if (o.branch_factor <= 1) { + o.items_per_dir = o.items; } else { - items_per_dir = (uint64_t) (items / pow(branch_factor, depth)); - items = items_per_dir * (uint64_t) pow(branch_factor, depth); + o.items_per_dir = (uint64_t) (o.items / pow(o.branch_factor, o.depth)); + o.items = o.items_per_dir * (uint64_t) pow(o.branch_factor, o.depth); } } else { - items_per_dir = items / num_dirs_in_tree; - items = items_per_dir * num_dirs_in_tree; + o.items_per_dir = o.items / o.num_dirs_in_tree; + o.items = o.items_per_dir * o.num_dirs_in_tree; } } /* initialize rand_array */ - if (random_seed > 0) { - srand(random_seed); + if (o.random_seed > 0) { + srand(o.random_seed); uint64_t s; - rand_array = (uint64_t *) malloc( items * sizeof(*rand_array)); + o.rand_array = (uint64_t *) malloc( o.items * sizeof(*o.rand_array)); - for (s=0; s < items; s++) { - rand_array[s] = s; + for (s=0; s < o.items; s++) { + o.rand_array[s] = s; } /* shuffle list randomly */ - uint64_t n = items; + uint64_t n = o.items; while (n>1) { n--; @@ -2161,122 +2136,122 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * * element, and the kth element to the nth element. */ - uint64_t tmp = rand_array[k]; - rand_array[k] = rand_array[n]; - rand_array[n] = tmp; + uint64_t tmp = o.rand_array[k]; + o.rand_array[k] = o.rand_array[n]; + o.rand_array[n] = tmp; } } /* allocate and initialize write buffer with # */ - if (write_bytes > 0) { - int alloc_res = posix_memalign((void**)&write_buffer, sysconf(_SC_PAGESIZE), write_bytes); + if (o.write_bytes > 0) { + int alloc_res = posix_memalign((void**)& o.write_buffer, sysconf(_SC_PAGESIZE), o.write_bytes); if (alloc_res) { FAIL("out of memory"); } - generate_memory_pattern(write_buffer, write_bytes); + generate_memory_pattern(o.write_buffer, o.write_bytes); } /* setup directory path to work in */ - if (path_count == 0) { /* special case where no directory path provided with '-d' option */ - char *ret = getcwd(testdirpath, MAX_PATHLEN); + if (o.path_count == 0) { /* special case where no directory path provided with '-d' option */ + char *ret = getcwd(o.testdirpath, MAX_PATHLEN); if (ret == NULL) { - FAIL("Unable to get current working directory on %s", testdirpath); + FAIL("Unable to get current working directory on %s", o.testdirpath); } - path_count = 1; + o.path_count = 1; } else { - strcpy(testdirpath, filenames[rank%path_count]); + strcpy(o.testdirpath, o.filenames[rank % o.path_count]); } /* if directory does not exist, create it */ - if ((rank < path_count) && backend->access(testdirpath, F_OK, backend_options) != 0) { - if (backend->mkdir(testdirpath, DIRMODE, backend_options) != 0) { - FAIL("Unable to create test directory path %s", testdirpath); + if ((rank < o.path_count) && o.backend->access(o.testdirpath, F_OK, o.backend_options) != 0) { + if (o.backend->mkdir(o.testdirpath, DIRMODE, o.backend_options) != 0) { + FAIL("Unable to create test directory path %s", o.testdirpath); } created_root_dir = 1; } /* display disk usage */ - VERBOSE(3,-1,"main (before display_freespace): testdirpath is '%s'", testdirpath ); + VERBOSE(3,-1,"main (before display_freespace): o.testdirpath is '%s'", o.testdirpath ); - if (rank == 0) ShowFileSystemSize(testdirpath, backend, backend_options); + if (rank == 0) ShowFileSystemSize(o.testdirpath, o.backend, o.backend_options); int tasksBlockMapping = QueryNodeMapping(testComm, true); /* set the shift to mimic IOR and shift by procs per node */ - if (nstride > 0) { + if (o.nstride > 0) { if ( numNodes > 1 && tasksBlockMapping ) { /* the user set the stride presumably to get the consumer tasks on a different node than the producer tasks however, if the mpirun scheduler placed the tasks by-slot (in a contiguous block) then we need to adjust the shift by ppn */ - nstride *= numTasksOnNode0; + o.nstride *= numTasksOnNode0; } - VERBOSE(0,5,"Shifting ranks by %d for each phase.", nstride); + VERBOSE(0,5,"Shifting ranks by %d for each phase.", o.nstride); } - VERBOSE(3,-1,"main (after display_freespace): testdirpath is '%s'", testdirpath ); + VERBOSE(3,-1,"main (after display_freespace): o.testdirpath is '%s'", o.testdirpath ); if (rank == 0) { - if (random_seed > 0) { - VERBOSE(0,-1,"random seed: %d", random_seed); + if (o.random_seed > 0) { + VERBOSE(0,-1,"random seed: %d", o.random_seed); } } - if (gethostname(hostname, MAX_PATHLEN) == -1) { + if (gethostname(o.hostname, MAX_PATHLEN) == -1) { perror("gethostname"); MPI_Abort(testComm, 2); } if (last == 0) { - first = size; - last = size; + first = o.size; + last = o.size; } /* setup summary table for recording results */ - summary_table = (mdtest_results_t *) malloc(iterations * sizeof(mdtest_results_t)); - memset(summary_table, 0, iterations * sizeof(mdtest_results_t)); + o.summary_table = (mdtest_results_t *) malloc(iterations * sizeof(mdtest_results_t)); + memset(o.summary_table, 0, iterations * sizeof(mdtest_results_t)); for(int i=0; i < iterations; i++){ for(int j=0; j < MDTEST_LAST_NUM; j++){ - summary_table[i].rate[j] = 0.0; - summary_table[i].time[j] = 0.0; + o.summary_table[i].rate[j] = 0.0; + o.summary_table[i].time[j] = 0.0; } } - if (summary_table == NULL) { + if (o.summary_table == NULL) { FAIL("out of memory"); } - if (unique_dir_per_task) { - sprintf(base_tree_name, "mdtest_tree.%d", rank); + if (o.unique_dir_per_task) { + sprintf(o.base_tree_name, "mdtest_tree.%d", rank); } else { - sprintf(base_tree_name, "mdtest_tree"); + sprintf(o.base_tree_name, "mdtest_tree"); } /* default use shared directory */ - strcpy(mk_name, "mdtest.shared."); - strcpy(stat_name, "mdtest.shared."); - strcpy(read_name, "mdtest.shared."); - strcpy(rm_name, "mdtest.shared."); + strcpy(o.mk_name, "mdtest.shared."); + strcpy(o.stat_name, "mdtest.shared."); + strcpy(o.read_name, "mdtest.shared."); + strcpy(o.rm_name, "mdtest.shared."); MPI_Comm_group(testComm, &worldgroup); /* Run the tests */ - for (i = first; i <= last && i <= size; i += stride) { + for (i = first; i <= last && i <= o.size; i += stride) { range.last = i - 1; MPI_Group_range_incl(worldgroup, 1, (void *)&range, &testgroup); MPI_Comm_create(testComm, testgroup, &testComm); if (rank == 0) { - uint64_t items_all = i * items; - if(num_dirs_in_tree_calc){ - items_all *= num_dirs_in_tree_calc; + uint64_t items_all = i * o.items; + if(o.num_dirs_in_tree_calc){ + items_all *= o.num_dirs_in_tree_calc; } - if (files_only && dirs_only) { + if (o.files_only && o.dirs_only) { VERBOSE(0,-1,"%d tasks, "LLU" files/directories", i, items_all); - } else if (files_only) { - if (!shared_file) { + } else if (o.files_only) { + if (! o.shared_file) { VERBOSE(0,-1,"%d tasks, "LLU" files", i, items_all); } else { VERBOSE(0,-1,"%d tasks, 1 file", i); } - } else if (dirs_only) { + } else if (o.dirs_only) { VERBOSE(0,-1,"%d tasks, "LLU" directories", i, items_all); } } @@ -2286,39 +2261,39 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * for (j = 0; j < iterations; j++) { // keep track of the current status for stonewalling - mdtest_iteration(i, j, testgroup, & summary_table[j]); + mdtest_iteration(i, j, testgroup, & o.summary_table[j]); } - if (print_rate_and_time){ + if (o.print_rate_and_time){ summarize_results(iterations, 0); summarize_results(iterations, 1); }else{ - summarize_results(iterations, print_time); + summarize_results(iterations, o.print_time); } if (i == 1 && stride > 1) { i = 0; } } - if (created_root_dir && remove_only && backend->rmdir(testdirpath, backend_options) != 0) { - FAIL("Unable to remove test directory path %s", testdirpath); + if (created_root_dir && o.remove_only && o.backend->rmdir(o.testdirpath, o.backend_options) != 0) { + FAIL("Unable to remove test directory path %s", o.testdirpath); } - if(verification_error){ + if(o.verification_error){ VERBOSE(0, -1, "\nERROR: verifying the data read! Take the performance values with care!\n"); } VERBOSE(0,-1,"-- finished at %s --\n", PrintTimestamp()); - if (random_seed > 0) { - free(rand_array); + if (o.random_seed > 0) { + free(o.rand_array); } - if (backend->finalize){ - backend->finalize(backend_options); + if (o.backend->finalize){ + o.backend->finalize(o.backend_options); } - if (write_bytes > 0) { - free(write_buffer); + if (o.write_bytes > 0) { + free(o.write_buffer); } - return summary_table; + return o.summary_table; } From fd516543934680f05d1f65a10af0ef29a25f86e7 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 27 Nov 2020 17:51:31 +0000 Subject: [PATCH 081/154] Readd check for mdtest for now. --- src/mdtest.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mdtest.c b/src/mdtest.c index 49760f4..08b5f37 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1522,6 +1522,8 @@ void md_validate_tests() { FAIL("only specify the number of items or the number of items per directory"); }else if( o.items % o.items_per_dir != 0){ FAIL("items must be a multiple of items per directory"); + }else if( o.stone_wall_timer_seconds != 0){ + FAIL("items + items_per_dir can only be set without stonewalling"); } } /* check for using mknod */ From 5799e4ef3a8503d417ea6d9f59041cc8ce5625cc Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 27 Nov 2020 18:02:14 +0000 Subject: [PATCH 082/154] MDTest remove unnede variable. --- src/mdtest.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 08b5f37..7725d1a 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -149,7 +149,6 @@ typedef struct { int pre_delay; int unique_dir_per_task; int time_unique_dir_overhead; - int throttle; int collective_creates; size_t write_bytes; int stone_wall_timer_seconds; @@ -1885,7 +1884,6 @@ void mdtest_init_args(){ memset(& o, 0, sizeof(o)); o.barriers = 1; o.branch_factor = 1; - o.throttle = 1; } mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out) { From ae23523a704162e708e08237eb238e1f362b9487 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sat, 28 Nov 2020 10:34:20 +0000 Subject: [PATCH 083/154] Integrate review feedback --- src/mdtest.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 7725d1a..6415344 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1046,8 +1046,7 @@ int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, void file_test_create(const int iteration, const int ntasks, const char *path, rank_progress_t * progress, double *t){ char temp_path[MAX_PATHLEN]; - int cur_dir_loops = o.directory_loops; - for (int dir_iter = 0; dir_iter < cur_dir_loops; dir_iter ++){ + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); if (o.unique_dir_per_task) { @@ -1091,6 +1090,7 @@ void file_test_create(const int iteration, const int ntasks, const char *path, r } // reset stone wall timer to allow proper cleanup progress->stone_wall_timer_seconds = 0; + // at the moment, stonewall can be done only with one directory_loop, so we can return here safely break; } } @@ -1881,9 +1881,10 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } void mdtest_init_args(){ - memset(& o, 0, sizeof(o)); - o.barriers = 1; - o.branch_factor = 1; + o = (mdtest_options_t) { + .barriers = 1, + .branch_factor = 1 + }; } mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out) { From 11c784c8bd25292001d27ca0858d2086df562419 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Sat, 28 Nov 2020 10:40:41 +0000 Subject: [PATCH 084/154] Integrate review feedback. --- src/mdtest.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 145fea1..ece7ab0 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1271,6 +1271,11 @@ char const * mdtest_test_name(int i){ return NULL; } +int calc_allreduce_index(int iter, int rank, int op){ + int tableSize = MDTEST_LAST_NUM; + return iter*tableSize*size + rank * tableSize + op; +} + void summarize_results(int iterations, int print_time) { char const * access; int i, j, k; @@ -1339,7 +1344,7 @@ void summarize_results(int iterations, int print_time) { } fprintf(out_logfile, "Test %s", access); for (k=0; k < size; k++) { - curr = all[j*tableSize*size + k * tableSize + i]; + curr = all[calc_allreduce_index(j, k, i)]; fprintf(out_logfile, "%c%e", (k==0 ? ' ': ','), curr); } fprintf(out_logfile, "\n"); @@ -1355,7 +1360,7 @@ void summarize_results(int iterations, int print_time) { min = max = all[i]; for (j = 0; j < iterations; j++) { for (k=0; k < size; k++) { - curr = all[j*tableSize*size + k*tableSize + i]; + curr = all[calc_allreduce_index(j, k, i)]; if (min > curr) { min = curr; } From 41184c69e2823a4690b3cea8bcf3e4687575dc05 Mon Sep 17 00:00:00 2001 From: Richard Mohr Date: Tue, 1 Dec 2020 01:22:31 -0500 Subject: [PATCH 085/154] mdtest: fix reference to unique_dir_per_task for builds --with-lustre --- src/mdtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdtest.c b/src/mdtest.c index 433ce7b..1a71174 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1708,7 +1708,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } #ifdef HAVE_LUSTRE_LUSTREAPI /* internal node for branching, can be non-striped for children */ - if (o.global_dir_layout && unique_dir_per_task && llapi_dir_set_default_lmv_stripe(o.testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { + if (o.global_dir_layout && o.unique_dir_per_task && llapi_dir_set_default_lmv_stripe(o.testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { FAIL("Unable to reset to global default directory layout"); } #endif /* HAVE_LUSTRE_LUSTREAPI */ From 5825dbae3598443a1b146647ad3b5dc85ccb29b5 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 1 Dec 2020 13:52:29 +0000 Subject: [PATCH 086/154] Provide new option randomPrefill for random (-z access only) that prefill the file with the specified blocksize, e.g., 2m. See issue #270. This option works so far only without stonewalling! --- src/ior.c | 49 +++++++++++++++++++++++++++++++++++---------- src/ior.h | 3 ++- src/parse_options.c | 1 + 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/src/ior.c b/src/ior.c index 3aaf195..a6dd412 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1588,7 +1588,10 @@ static void ValidateTests(IOR_param_t * test) ERR("block size must not be smaller than transfer size"); if (test->randomOffset && test->blockSize == test->transferSize) ERR("IOR will randomize access within a block and repeats the same pattern for all segments, therefore choose blocksize > transferSize"); - + if (! test->randomOffset && test->randomPrefillBlocksize) + ERR("Setting the randomPrefill option without using random is not useful"); + if (test->randomPrefillBlocksize && (test->blockSize % test->randomPrefillBlocksize != 0)) + ERR("The randomPrefill option must divide the blockSize"); /* specific APIs */ if ((strcasecmp(test->api, "MPIIO") == 0) && (test->blockSize < sizeof(IOR_size_t) @@ -1732,14 +1735,10 @@ IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offs return (offsetArray); } -static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, - IOR_offset_t * transferCount, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access){ +static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t transfer, IOR_offset_t * transferCount, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access){ IOR_offset_t amtXferred = 0; - IOR_offset_t transfer; void *buffer = ioBuffers->buffer; - - transfer = test->transferSize; if (access == WRITE) { /* fills each transfer with a unique pattern * containing the offset into the file */ @@ -1804,9 +1803,6 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, // offsetArray = GetOffsetArraySequential(test, pretendRank); - startForStonewall = GetTimeStamp(); - hitStonewall = 0; - IOR_offset_t offsets; IOR_offset_t * offsets_rnd; if (test->randomOffset) { @@ -1815,7 +1811,38 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, offsets = (test->blockSize / test->transferSize); } + // start timer after random offset was generated + startForStonewall = GetTimeStamp(); + hitStonewall = 0; + + if(test->randomPrefillBlocksize && test->deadlineForStonewalling == 0){ + // prefill the whole file already with an invalid pattern + int offsets = test->blockSize / test->randomPrefillBlocksize; + void * oldBuffer = ioBuffers->buffer; + ioBuffers->buffer = aligned_buffer_alloc(test->randomPrefillBlocksize); + // store invalid data into the buffer + memset(ioBuffers->buffer, -1, test->randomPrefillBlocksize); + for (i = 0; i < test->segmentCount; i++){ + for (j = 0; j < offsets; j++) { + IOR_offset_t offset = j * test->randomPrefillBlocksize; + if (test->filePerProc) { + offset += i * test->blockSize; + } else { + offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); + } + WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & transferCount, & errors, test, fd, ioBuffers, access); + } + } + aligned_buffer_free(ioBuffers->buffer); + ioBuffers->buffer = oldBuffer; + } + for (i = 0; i < test->segmentCount && !hitStonewall; i++) { + if(test->randomPrefillBlocksize && test->deadlineForStonewalling != 0){ + // prefill the whole segment with data + // TODO + ERR("Not supported, yet"); + } for (j = 0; j < offsets && !hitStonewall ; j++) { IOR_offset_t offset; if (test->randomOffset) { @@ -1832,7 +1859,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); } } - dataMoved += WriteOrReadSingle(offset, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); + dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & transferCount, & errors, test, fd, ioBuffers, access); pairCnt++; hitStonewall = ((test->deadlineForStonewalling != 0 @@ -1888,7 +1915,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); } } - dataMoved += WriteOrReadSingle(offset, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); + dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & transferCount, & errors, test, fd, ioBuffers, access); pairCnt++; } } diff --git a/src/ior.h b/src/ior.h index 843884d..33034c9 100755 --- a/src/ior.h +++ b/src/ior.h @@ -127,6 +127,7 @@ typedef struct IOR_offset_t blockSize; /* contiguous bytes to write per task */ IOR_offset_t transferSize; /* size of transfer in bytes */ IOR_offset_t expectedAggFileSize; /* calculated aggregate file size */ + IOR_offset_t randomPrefillBlocksize; /* prefill option for random IO, the amount of data used for prefill */ int summary_every_test; /* flag to print summary every test, not just at end */ int uniqueDir; /* use unique directory for each fpp */ @@ -168,7 +169,7 @@ typedef struct int hdfs_block_size; /* internal blk-size. (0 gets default) */ char* URI; /* "path" to target object */ - + /* RADOS variables */ rados_t rados_cluster; /* RADOS cluster handle */ rados_ioctx_t rados_ioctx; /* I/O context for our pool in the RADOS cluster */ diff --git a/src/parse_options.c b/src/parse_options.c index 12f8e0c..1a2ad7e 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -433,6 +433,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'y', NULL, "dualMount -- use dual mount points for a filesystem", OPTION_FLAG, 'd', & params->dualMount}, {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite}, {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset}, + {0, "randomPrefill", "For random -z access only: Prefill the file with this blocksize, e.g., 2m", OPTION_OPTIONAL_ARGUMENT, 'l', & params->randomPrefillBlocksize}, {0, "random-offset-seed", "The seed for -z", OPTION_OPTIONAL_ARGUMENT, 'd', & params->randomSeed}, {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & params->warningAsErrors}, From 546eb05d15acd1c06a715c97d2521a85a509bc80 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 1 Dec 2020 14:45:07 +0000 Subject: [PATCH 087/154] Rudimentary support for stonewall added, addresses #261. --- src/ior.c | 68 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/src/ior.c b/src/ior.c index a6dd412..862c99d 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1780,6 +1780,27 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_ return amtXferred; } +static void prefillSegment(IOR_param_t *test, void * randomPrefillBuffer, int pretendRank, aiori_fd_t *fd, IOR_io_buffers *ioBuffers, int startSegment, int endSegment){ + // prefill the whole file already with an invalid pattern + int offsets = test->blockSize / test->randomPrefillBlocksize; + void * oldBuffer = ioBuffers->buffer; + IOR_offset_t transferCount; + int errors; + ioBuffers->buffer = randomPrefillBuffer; + for (int i = startSegment; i < endSegment; i++){ + for (int j = 0; j < offsets; j++) { + IOR_offset_t offset = j * test->randomPrefillBlocksize; + if (test->filePerProc) { + offset += i * test->blockSize; + } else { + offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); + } + WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & transferCount, & errors, test, fd, ioBuffers, WRITE); + } + } + ioBuffers->buffer = oldBuffer; +} + /* * Write or Read data to file(s). This loops through the strides, writing * out the data to each block in transfer sizes, until the remainder left is 0. @@ -1811,37 +1832,34 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, offsets = (test->blockSize / test->transferSize); } + void * randomPrefillBuffer = NULL; + if(test->randomPrefillBlocksize && (access == WRITE || access == WRITECHECK)){ + randomPrefillBuffer = aligned_buffer_alloc(test->randomPrefillBlocksize); + // store invalid data into the buffer + memset(randomPrefillBuffer, -1, test->randomPrefillBlocksize); + } + // start timer after random offset was generated startForStonewall = GetTimeStamp(); hitStonewall = 0; - if(test->randomPrefillBlocksize && test->deadlineForStonewalling == 0){ - // prefill the whole file already with an invalid pattern - int offsets = test->blockSize / test->randomPrefillBlocksize; - void * oldBuffer = ioBuffers->buffer; - ioBuffers->buffer = aligned_buffer_alloc(test->randomPrefillBlocksize); - // store invalid data into the buffer - memset(ioBuffers->buffer, -1, test->randomPrefillBlocksize); - for (i = 0; i < test->segmentCount; i++){ - for (j = 0; j < offsets; j++) { - IOR_offset_t offset = j * test->randomPrefillBlocksize; - if (test->filePerProc) { - offset += i * test->blockSize; - } else { - offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); - } - WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & transferCount, & errors, test, fd, ioBuffers, access); - } + if(randomPrefillBuffer && test->deadlineForStonewalling == 0){ + double t_start = GetTimeStamp(); + prefillSegment(test, randomPrefillBuffer, pretendRank, fd, ioBuffers, 0, test->segmentCount); + if(rank == 0 && verbose > VERBOSE_1){ + fprintf(out_logfile, "Random prefill took: %fs\n", GetTimeStamp() - t_start); } - aligned_buffer_free(ioBuffers->buffer); - ioBuffers->buffer = oldBuffer; } for (i = 0; i < test->segmentCount && !hitStonewall; i++) { - if(test->randomPrefillBlocksize && test->deadlineForStonewalling != 0){ - // prefill the whole segment with data - // TODO - ERR("Not supported, yet"); + if(randomPrefillBuffer && test->deadlineForStonewalling != 0){ + // prefill the whole segment with data, this needs to be done collectively + double t_start = GetTimeStamp(); + MPI_Barrier(test->testComm); + prefillSegment(test, randomPrefillBuffer, pretendRank, fd, ioBuffers, i, i+1); + if(rank == 0 && verbose > VERBOSE_1){ + fprintf(out_logfile, "Random: synchronizing segment count with barrier and prefill took: %fs\n", GetTimeStamp() - t_start); + } } for (j = 0; j < offsets && !hitStonewall ; j++) { IOR_offset_t offset; @@ -1929,5 +1947,9 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, if (access == WRITE && test->fsync == TRUE) { backend->fsync(fd, test->backend_options); /*fsync after all accesses */ } + if(randomPrefillBuffer){ + aligned_buffer_free(randomPrefillBuffer); + } + return (dataMoved); } From 9dcf9f79e479be278d7ba2db474dc56a9c474594 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 1 Dec 2020 14:47:57 +0000 Subject: [PATCH 088/154] Moved barriers to right location. --- src/ior.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ior.c b/src/ior.c index 862c99d..e217fda 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1849,14 +1849,16 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, if(rank == 0 && verbose > VERBOSE_1){ fprintf(out_logfile, "Random prefill took: %fs\n", GetTimeStamp() - t_start); } + // must synchronize processes to ensure they are not running ahead + MPI_Barrier(test->testComm); } for (i = 0; i < test->segmentCount && !hitStonewall; i++) { if(randomPrefillBuffer && test->deadlineForStonewalling != 0){ // prefill the whole segment with data, this needs to be done collectively double t_start = GetTimeStamp(); - MPI_Barrier(test->testComm); prefillSegment(test, randomPrefillBuffer, pretendRank, fd, ioBuffers, i, i+1); + MPI_Barrier(test->testComm); if(rank == 0 && verbose > VERBOSE_1){ fprintf(out_logfile, "Random: synchronizing segment count with barrier and prefill took: %fs\n", GetTimeStamp() - t_start); } From da03ae5c9f5dd8df0d43f242e69cd79827d466bf Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 1 Dec 2020 18:24:09 +0000 Subject: [PATCH 089/154] IOR add warning if file exists #273. To stop on warnings, use existing flag --warningAsErrors --- src/ior.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/ior.c b/src/ior.c index 55733d5..674112a 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1293,6 +1293,15 @@ static void TestIoSys(IOR_test_t *test) /* loop over test iterations */ uint64_t params_saved_wearout = params->stoneWallingWearOutIterations; + + /* Check if the file exists and warn users */ + struct stat sb; + GetTestFileName(testFileName, params); + int ret = backend->stat(testFileName, & sb, params->backend_options); + if(ret == 0) { + EWARNF("The file \"%s\" exists already and will be overwritten", testFileName); + } + for (rep = 0; rep < params->repetitions; rep++) { /* Get iteration start time in seconds in task 0 and broadcast to all tasks */ From 938cf2771be8ab99cdc8781ca6e6beebcaa56aa4 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 1 Dec 2020 21:12:52 +0000 Subject: [PATCH 090/154] DFS: make use of collective open to share file handle - remove un-needed libdaos_common lib since DAOS driver is removed - use default container object class when it's not specified Signed-off-by: Mohamad Chaarawi --- configure.ac | 1 - src/aiori-DFS.c | 96 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 67 insertions(+), 30 deletions(-) diff --git a/configure.ac b/configure.ac index 8a859c8..b123064 100755 --- a/configure.ac +++ b/configure.ac @@ -252,7 +252,6 @@ AS_IF([test "x$with_daos" != xno], [ AC_CHECK_HEADERS(daos.h,, [unset DAOS]) AC_CHECK_LIB([gurt], [d_hash_murmur64],, [unset DAOS]) AC_CHECK_LIB([uuid], [uuid_generate],, [unset DAOS]) - AC_CHECK_LIB([daos_common], [daos_sgl_init],, [unset DAOS]) AC_CHECK_LIB([daos], [daos_init],, [unset DAOS]) AC_CHECK_LIB([dfs], [dfs_mkdir],, [unset DAOS]) ]) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 38e99ca..ac6cd1c 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -39,8 +39,8 @@ dfs_t *dfs; static daos_handle_t poh, coh; -static daos_oclass_id_t objectClass = OC_SX; -static daos_oclass_id_t dir_oclass = OC_SX; +static daos_oclass_id_t objectClass; +static daos_oclass_id_t dir_oclass; static struct d_hash_table *dir_hash; static bool dfs_init; @@ -247,8 +247,7 @@ HandleDistribute(enum handleType type) DCHECK(rc, "Failed to get global handle size"); } - MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, - MPI_COMM_WORLD), + MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD), "Failed to bcast global handle buffer size"); global.iov_len = global.iov_buf_len; @@ -266,8 +265,7 @@ HandleDistribute(enum handleType type) DCHECK(rc, "Failed to create global handle"); } - MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, - MPI_COMM_WORLD), + MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, MPI_COMM_WORLD), "Failed to bcast global pool handle"); if (rank != 0) { @@ -374,6 +372,45 @@ out: return rc; } +static void +share_file_handle(dfs_obj_t **file, MPI_Comm comm) +{ + d_iov_t global; + int rc; + + global.iov_buf = NULL; + global.iov_buf_len = 0; + global.iov_len = 0; + + if (rank == 0) { + rc = dfs_obj_local2global(dfs, *file, &global); + DCHECK(rc, "Failed to get global handle size"); + } + + MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, testComm), + "Failed to bcast global handle buffer size"); + + global.iov_len = global.iov_buf_len; + global.iov_buf = malloc(global.iov_buf_len); + if (global.iov_buf == NULL) + ERR("Failed to allocate global handle buffer"); + + if (rank == 0) { + rc = dfs_obj_local2global(dfs, *file, &global); + DCHECK(rc, "Failed to create global handle"); + } + + MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, testComm), + "Failed to bcast global pool handle"); + + if (rank != 0) { + rc = dfs_obj_global2local(dfs, 0, global, file); + DCHECK(rc, "Failed to get local handle"); + } + + free(global.iov_buf); +} + static dfs_obj_t * lookup_insert_dir(const char *name, mode_t *mode) { @@ -555,8 +592,8 @@ DFS_Finalize(aiori_mod_opt_t *options) o->dir_oclass = NULL; o->prefix = NULL; o->destroy = 0; - objectClass = OC_SX; - dir_oclass = OC_SX; + objectClass = 0; + dir_oclass = 0; dfs_init = false; } @@ -578,26 +615,21 @@ DFS_Create(char *testFileName, int flags, aiori_mod_opt_t *param) assert(dir_name); assert(name); - parent = lookup_insert_dir(dir_name, NULL); - if (parent == NULL) - GERR("Failed to lookup parent dir"); - mode = S_IFREG | mode; if (hints->filePerProc || rank == 0) { fd_oflag |= O_CREAT | O_RDWR | O_EXCL; + parent = lookup_insert_dir(dir_name, NULL); + if (parent == NULL) + GERR("Failed to lookup parent dir"); + rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, o->chunk_size, NULL, &obj); DCHECK(rc, "dfs_open() of %s Failed", name); } + if (!hints->filePerProc) { - MPI_Barrier(MPI_COMM_WORLD); - if (rank != 0) { - fd_oflag |= O_RDWR; - rc = dfs_open(dfs, parent, name, mode, fd_oflag, - objectClass, o->chunk_size, NULL, &obj); - DCHECK(rc, "dfs_open() of %s Failed", name); - } + share_file_handle(&obj, testComm); } if (name) @@ -629,13 +661,19 @@ DFS_Open(char *testFileName, int flags, aiori_mod_opt_t *param) assert(dir_name); assert(name); - parent = lookup_insert_dir(dir_name, NULL); - if (parent == NULL) - GERR("Failed to lookup parent dir"); + if (hints->filePerProc || rank == 0) { + parent = lookup_insert_dir(dir_name, NULL); + if (parent == NULL) + GERR("Failed to lookup parent dir"); - rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, - o->chunk_size, NULL, &obj); - DCHECK(rc, "dfs_open() of %s Failed", name); + rc = dfs_open(dfs, parent, name, mode, fd_oflag, objectClass, + o->chunk_size, NULL, &obj); + DCHECK(rc, "dfs_open() of %s Failed", name); + } + + if (!hints->filePerProc) { + share_file_handle(&obj, testComm); + } if (name) free(name); @@ -675,14 +713,14 @@ DFS_Xfer(int access, aiori_fd_t *file, IOR_size_t *buffer, IOR_offset_t length, if (access == WRITE) { rc = dfs_write(dfs, obj, &sgl, off, NULL); if (rc) { - fprintf(stderr, "dfs_write() failed (%d)", rc); + fprintf(stderr, "dfs_write() failed (%d)\n", rc); return -1; } ret = remaining; } else { rc = dfs_read(dfs, obj, &sgl, off, &ret, NULL); if (rc || ret == 0) - fprintf(stderr, "dfs_read() failed(%d)", rc); + fprintf(stderr, "dfs_read() failed(%d)\n", rc); } if (ret < remaining) { @@ -787,7 +825,7 @@ DFS_GetFileSize(aiori_mod_opt_t * test, char *testFileName) comm = testComm; } - if (hints->filePerProc || rank == 0) { + if (hints->filePerProc || rank == 0) { rc = dfs_lookup(dfs, testFileName, O_RDONLY, &obj, NULL, NULL); if (rc) { fprintf(stderr, "dfs_lookup() of %s Failed (%d)", testFileName, rc); @@ -805,7 +843,7 @@ DFS_GetFileSize(aiori_mod_opt_t * test, char *testFileName) if (rc) return rc; } - + return (fsize); } From f71a144b1986ee15b12184276ae974f3fce1aea3 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 2 Dec 2020 10:14:28 +0000 Subject: [PATCH 091/154] IOR raise warning for existing file once if not filePerProc. --- src/ior.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/ior.c b/src/ior.c index 674112a..0daddff 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1295,11 +1295,13 @@ static void TestIoSys(IOR_test_t *test) uint64_t params_saved_wearout = params->stoneWallingWearOutIterations; /* Check if the file exists and warn users */ - struct stat sb; - GetTestFileName(testFileName, params); - int ret = backend->stat(testFileName, & sb, params->backend_options); - if(ret == 0) { - EWARNF("The file \"%s\" exists already and will be overwritten", testFileName); + if( params->hints.filePerProc || rank == 0){ + struct stat sb; + GetTestFileName(testFileName, params); + int ret = backend->stat(testFileName, & sb, params->backend_options); + if(ret == 0) { + EWARNF("The file \"%s\" exists already and will be overwritten", testFileName); + } } for (rep = 0; rep < params->repetitions; rep++) { From 231868505d68df59f57503b9435baf1d314df63f Mon Sep 17 00:00:00 2001 From: Olaf Faaland Date: Thu, 3 Dec 2020 10:39:36 -0800 Subject: [PATCH 092/154] Do not execute functions twice in MPI_CHECKF Assigning MPI_STATUS to a local variable and then referring to the local will ensure that the same value is used in both the conditional expression and the call to MPI_Error_string. Otherwise, when MPI_STATUS is a function call, like MPI_CHECKF(fubar(), "%s", "error in fubar"); fubar() is called twice. If there are underlying intermittent errors, the error code/message for the first call is lost, with confusing output like this: read 2206.18 17.27 145.93 262144 131072 0.272595 291.88 0.290829 292.41 ERROR: cannot access explicit, noncollective, MPI MPI_SUCCESS: no errors, (aiori-MPIIO.c:451) --- src/aiori-debug.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/aiori-debug.h b/src/aiori-debug.h index be289e9..bb5dd71 100644 --- a/src/aiori-debug.h +++ b/src/aiori-debug.h @@ -94,12 +94,13 @@ extern int aiori_warning_as_errors; #define MPI_CHECKF(MPI_STATUS, FORMAT, ...) do { \ char resultString[MPI_MAX_ERROR_STRING]; \ int resultLength; \ + int checkf_mpi_status = MPI_STATUS; \ \ - if (MPI_STATUS != MPI_SUCCESS) { \ - MPI_Error_string(MPI_STATUS, resultString, &resultLength); \ - fprintf(out_logfile, "ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ + if (checkf_mpi_status != MPI_SUCCESS) { \ + MPI_Error_string(checkf_mpi_status, resultString, &resultLength);\ + fprintf(out_logfile, "ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ __VA_ARGS__, resultString, __FILE__, __LINE__); \ - fflush(out_logfile); \ + fflush(out_logfile); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } \ } while(0) From 073da082928f2e06d5723e911a4c8bd444b5e40c Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 3 Dec 2020 20:54:51 +0000 Subject: [PATCH 093/154] IOR: print overwrite check only if a write operation is requested. --- src/ior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ior.c b/src/ior.c index b2ba1a4..0a61814 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1292,7 +1292,7 @@ static void TestIoSys(IOR_test_t *test) uint64_t params_saved_wearout = params->stoneWallingWearOutIterations; /* Check if the file exists and warn users */ - if( params->hints.filePerProc || rank == 0){ + if((params->writeFile || params->checkWrite) && (params->hints.filePerProc || rank == 0)){ struct stat sb; GetTestFileName(testFileName, params); int ret = backend->stat(testFileName, & sb, params->backend_options); From 69e006dd899ba3e2d45bf3c647eff03a672d2367 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 3 Dec 2020 21:07:45 +0000 Subject: [PATCH 094/154] IOR clarify return API. --- src/ior.c | 6 +++--- src/ior.h | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/ior.c b/src/ior.c index 0a61814..aa841de 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1918,14 +1918,14 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved"); MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_min_data_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm), "cannot reduce pairs moved"); - MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_avg_data_accessed, + MPI_CHECK(MPI_Reduce(& data_moved_ll, &point->stonewall_total_data_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm), "cannot reduce pairs moved"); if(rank == 0){ + point->stonewall_avg_data_accessed = point->stonewall_total_data_accessed / test->numTasks; fprintf(out_logfile, "stonewalling pairs accessed min: %lld max: %zu -- min data: %.1f GiB mean data: %.1f GiB time: %.1fs\n", pairs_accessed_min, point->pairs_accessed, - point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 / test->numTasks , point->stonewall_time); - point->stonewall_min_data_accessed *= test->numTasks; + point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 , point->stonewall_time); } if(pairCnt != point->pairs_accessed){ // some work needs still to be done ! diff --git a/src/ior.h b/src/ior.h index 33034c9..3009720 100755 --- a/src/ior.h +++ b/src/ior.h @@ -190,8 +190,9 @@ typedef struct { size_t pairs_accessed; // number of I/Os done, useful for deadlineForStonewalling double stonewall_time; - long long stonewall_min_data_accessed; - long long stonewall_avg_data_accessed; + long long stonewall_min_data_accessed; // of all processes + long long stonewall_avg_data_accessed; // across all processes + long long stonewall_total_data_accessed; // sum accross all processes IOR_offset_t aggFileSizeFromStat; IOR_offset_t aggFileSizeFromXfer; From 0bd52884e8015b19a8cd0331736a9ba809c0e46a Mon Sep 17 00:00:00 2001 From: Frank <34269423+Frankgad@users.noreply.github.com> Date: Fri, 11 Dec 2020 14:05:24 +0100 Subject: [PATCH 095/154] added region and location to libs3 (#295) * added region and location to libs3 --- src/aiori-S3-libs3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index b5b2f6c..489da74 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -56,6 +56,7 @@ static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_m *init_backend_options = (aiori_mod_opt_t*) o; o->bucket_prefix = "ior"; + o->bucket_prefix_cur = "b"; option_help h [] = { {0, "S3-libs3.bucket-per-file", "Use one bucket to map one file/directory, otherwise one bucket is used to store all dirs/files.", OPTION_FLAG, 'd', & o->bucket_per_file}, @@ -66,6 +67,8 @@ static option_help * S3_options(aiori_mod_opt_t ** init_backend_options, aiori_m {0, "S3-libs3.host", "The host optionally followed by:port.", OPTION_OPTIONAL_ARGUMENT, 's', & o->host}, {0, "S3-libs3.secret-key", "The secret key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->secret_key}, {0, "S3-libs3.access-key", "The access key.", OPTION_OPTIONAL_ARGUMENT, 's', & o->access_key}, + {0, "S3-libs3.region", "The region used for the authorization signature.", OPTION_OPTIONAL_ARGUMENT, 's', & o->authRegion}, + {0, "S3-libs3.location", "The bucket geographic location.", OPTION_OPTIONAL_ARGUMENT, 's', & o->locationConstraint}, LAST_OPTION }; option_help * help = malloc(sizeof(h)); @@ -92,7 +95,7 @@ static void def_file_name(s3_options_t * o, char * out_name, char const * path){ } path++; } - *out_name = '-'; + *out_name = 'b'; out_name++; *out_name = '\0'; } From 6675cd50bf68a9ccf3117cf88aea40c04c14a1d8 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Wed, 16 Dec 2020 15:41:58 +0000 Subject: [PATCH 096/154] aiori-DFS: stat should not be fatal ior now expects stat to not be fatal. update the DFS driver to not exit if stat fails since the file can simply not exist. Signed-off-by: Mohamad Chaarawi --- src/aiori-DFS.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index ac6cd1c..32b2960 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -945,7 +945,6 @@ DFS_Stat(const char *path, struct stat *buf, aiori_mod_opt_t * param) GERR("Failed to lookup parent dir"); rc = dfs_stat(dfs, parent, name, buf); - DCHECK(rc, "dfs_stat() of Failed (%d)", rc); if (name) free(name); From 8de13884a768043b9ab0afbce494f693cf89f21b Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Wed, 23 Dec 2020 11:51:31 +0000 Subject: [PATCH 097/154] HDFS module ported to current AIORI API and improved (#302) * HDFS module ported to current AIORI API and extended * Added instructions how to build and run with a HDFS. * Replaced read with pread to support offsets. * Implemented HDFS MDTest API * Improved sync semantics --- src/aiori-HDFS.c | 447 ++++++++++++++++++++++-------------------- src/ior.c | 12 -- src/ior.h | 8 - testing/build-hdfs.sh | 18 ++ 4 files changed, 257 insertions(+), 228 deletions(-) create mode 100755 testing/build-hdfs.sh diff --git a/src/aiori-HDFS.c b/src/aiori-HDFS.c index 118de15..8c528ab 100755 --- a/src/aiori-HDFS.c +++ b/src/aiori-HDFS.c @@ -81,10 +81,9 @@ #include #endif */ - #include "ior.h" #include "aiori.h" -#include "iordef.h" +#include "utilities.h" #ifndef open64 /* necessary for TRU64 -- */ # define open64 open /* unlikely, but may pose */ @@ -101,15 +100,23 @@ #include "hdfs.h" /**************************** P R O T O T Y P E S *****************************/ -static void *HDFS_Create(char *, IOR_param_t *); -static void *HDFS_Open(char *, IOR_param_t *); -static IOR_offset_t HDFS_Xfer(int, void *, IOR_size_t *, - IOR_offset_t, IOR_param_t *); -static void HDFS_Close(void *, IOR_param_t *); -static void HDFS_Delete(char *, IOR_param_t *); -static void HDFS_SetVersion(IOR_param_t *); -static void HDFS_Fsync(void *, IOR_param_t *); -static IOR_offset_t HDFS_GetFileSize(IOR_param_t *, MPI_Comm, char *); +static aiori_fd_t *HDFS_Create(char *testFileName, int flags, aiori_mod_opt_t * param); +static aiori_fd_t *HDFS_Open(char *testFileName, int flags, aiori_mod_opt_t * param); +static IOR_offset_t HDFS_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, + IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param); +static void HDFS_Close(aiori_fd_t *, aiori_mod_opt_t *); +static void HDFS_Delete(char *testFileName, aiori_mod_opt_t * param); +static void HDFS_Fsync(aiori_fd_t *, aiori_mod_opt_t *); +static IOR_offset_t HDFS_GetFileSize(aiori_mod_opt_t *,char *); +static void hdfs_xfer_hints(aiori_xfer_hint_t * params); +static option_help * HDFS_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); +static int HDFS_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options); +static int HDFS_rmdir (const char *path, aiori_mod_opt_t * options); +static int HDFS_access (const char *path, int mode, aiori_mod_opt_t * options); +static int HDFS_stat (const char *path, struct stat *buf, aiori_mod_opt_t * options); +static int HDFS_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_opt_t * options); + +static aiori_xfer_hint_t * hints = NULL; /************************** D E C L A R A T I O N S ***************************/ @@ -121,13 +128,120 @@ ior_aiori_t hdfs_aiori = { .xfer = HDFS_Xfer, .close = HDFS_Close, .delete = HDFS_Delete, - .set_version = HDFS_SetVersion, + .get_options = HDFS_options, + .get_version = aiori_get_version, + .xfer_hints = hdfs_xfer_hints, .fsync = HDFS_Fsync, .get_file_size = HDFS_GetFileSize, + .statfs = HDFS_statfs, + .mkdir = HDFS_mkdir, + .rmdir = HDFS_rmdir, + .access = HDFS_access, + .stat = HDFS_stat, + .enable_mdtest = true }; /***************************** F U N C T I O N S ******************************/ +void hdfs_xfer_hints(aiori_xfer_hint_t * params){ + hints = params; +} + +/************************** O P T I O N S *****************************/ +typedef struct { + char * user; + char * name_node; + int replicas; /* n block replicas. (0 gets default) */ + int direct_io; + IOR_offset_t block_size; /* internal blk-size. (0 gets default) */ + // runtime options + hdfsFS fs; /* file-system handle */ + tPort name_node_port; /* (uint16_t) */ +} hdfs_options_t; + +static void hdfs_connect( hdfs_options_t* o ); + +option_help * HDFS_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ + hdfs_options_t * o = malloc(sizeof(hdfs_options_t)); + + if (init_values != NULL){ + memcpy(o, init_values, sizeof(hdfs_options_t)); + }else{ + memset(o, 0, sizeof(hdfs_options_t)); + char *hdfs_user; + hdfs_user = getenv("USER"); + if (!hdfs_user){ + hdfs_user = ""; + } + o->user = strdup(hdfs_user); + o->name_node = "default"; + } + + *init_backend_options = (aiori_mod_opt_t*) o; + + option_help h [] = { + {0, "hdfs.odirect", "Direct I/O Mode", OPTION_FLAG, 'd', & o->direct_io}, + {0, "hdfs.user", "Username", OPTION_OPTIONAL_ARGUMENT, 's', & o->user}, + {0, "hdfs.name_node", "Namenode", OPTION_OPTIONAL_ARGUMENT, 's', & o->name_node}, + {0, "hdfs.replicas", "Number of replicas", OPTION_OPTIONAL_ARGUMENT, 'd', & o->replicas}, + {0, "hdfs.block_size", "Blocksize", OPTION_OPTIONAL_ARGUMENT, 'l', & o->block_size}, + LAST_OPTION + }; + option_help * help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); + return help; +} + + +int HDFS_mkdir (const char *path, mode_t mode, aiori_mod_opt_t * options){ + hdfs_options_t * o = (hdfs_options_t*) options; + hdfs_connect(o); + return hdfsCreateDirectory(o->fs, path); +} + +int HDFS_rmdir (const char *path, aiori_mod_opt_t * options){ + hdfs_options_t * o = (hdfs_options_t*) options; + hdfs_connect(o); + return hdfsDelete(o->fs, path, 1); +} + +int HDFS_access (const char *path, int mode, aiori_mod_opt_t * options){ + hdfs_options_t * o = (hdfs_options_t*) options; + hdfs_connect(o); + return hdfsExists(o->fs, path); +} + +int HDFS_stat (const char *path, struct stat *buf, aiori_mod_opt_t * options){ + hdfsFileInfo * stat; + hdfs_options_t * o = (hdfs_options_t*) options; + hdfs_connect(o); + stat = hdfsGetPathInfo(o->fs, path); + if(stat == NULL){ + return 1; + } + memset(buf, 0, sizeof(struct stat)); + buf->st_atime = stat->mLastAccess; + buf->st_size = stat->mSize; + buf->st_mtime = stat->mLastMod; + buf->st_mode = stat->mPermissions; + + hdfsFreeFileInfo(stat, 1); + return 0; +} + +int HDFS_statfs (const char * path, ior_aiori_statfs_t * stat, aiori_mod_opt_t * options){ + hdfs_options_t * o = (hdfs_options_t*) options; + hdfs_connect(o); + + stat->f_bsize = hdfsGetDefaultBlockSize(o->fs); + stat->f_blocks = hdfsGetCapacity(o->fs) / hdfsGetDefaultBlockSize(o->fs); + stat->f_bfree = stat->f_blocks - hdfsGetUsed(o->fs) / hdfsGetDefaultBlockSize(o->fs); + stat->f_bavail = 1; + stat->f_files = 1; + stat->f_ffree = 1; + return 0; +} + /* This is identical to the one in aiori-POSIX.c Doesn't seem like * it would be appropriate in utilities.c. */ @@ -159,16 +273,16 @@ void hdfs_set_o_direct_flag(int *fd) * NOTE: It's okay to call this thing whenever you need to be sure the HDFS * filesystem is connected. */ -static void hdfs_connect( IOR_param_t* param ) { - if (param->verbose >= VERBOSE_4) { +void hdfs_connect( hdfs_options_t* o ) { + if (verbose >= VERBOSE_4) { printf("-> hdfs_connect [nn:\"%s\", port:%d, user:%s]\n", - param->hdfs_name_node, - param->hdfs_name_node_port, - param->hdfs_user ); + o->name_node, + o->name_node_port, + o->user ); } - if ( param->hdfs_fs ) { - if (param->verbose >= VERBOSE_4) { + if ( o->fs ) { + if (verbose >= VERBOSE_4) { printf("<- hdfs_connect [nothing to do]\n"); /* DEBUGGING */ } return; @@ -176,34 +290,35 @@ static void hdfs_connect( IOR_param_t* param ) { /* initialize a builder, holding parameters for hdfsBuilderConnect() */ struct hdfsBuilder* builder = hdfsNewBuilder(); - if ( ! builder ) - ERR_SIMPLE("couldn't create an hdfsBuilder"); + if ( ! builder ){ + ERR("couldn't create an hdfsBuilder"); + } hdfsBuilderSetForceNewInstance ( builder ); /* don't use cached instance */ - hdfsBuilderSetNameNode ( builder, param->hdfs_name_node ); - hdfsBuilderSetNameNodePort( builder, param->hdfs_name_node_port ); - hdfsBuilderSetUserName ( builder, param->hdfs_user ); + hdfsBuilderSetNameNode ( builder, o->name_node ); + hdfsBuilderSetNameNodePort( builder, o->name_node_port ); + hdfsBuilderSetUserName ( builder, o->user ); /* NOTE: hdfsBuilderConnect() frees the builder */ - param->hdfs_fs = hdfsBuilderConnect( builder ); - if ( ! param->hdfs_fs ) - ERR_SIMPLE("hdsfsBuilderConnect failed"); + o->fs = hdfsBuilderConnect( builder ); + if ( ! o->fs ) + ERR("hdsfsBuilderConnect failed"); - if (param->verbose >= VERBOSE_4) { + if (verbose >= VERBOSE_4) { printf("<- hdfs_connect [success]\n"); } } -static void hdfs_disconnect( IOR_param_t* param ) { - if (param->verbose >= VERBOSE_4) { +static void hdfs_disconnect( hdfs_options_t* o ) { + if (verbose >= VERBOSE_4) { printf("-> hdfs_disconnect\n"); } - if ( param->hdfs_fs ) { - hdfsDisconnect( param->hdfs_fs ); - param->hdfs_fs = NULL; + if ( o->fs ) { + hdfsDisconnect( o->fs ); + o->fs = NULL; } - if (param->verbose >= VERBOSE_4) { + if (verbose >= VERBOSE_4) { printf("<- hdfs_disconnect\n"); } } @@ -214,16 +329,17 @@ static void hdfs_disconnect( IOR_param_t* param ) { * Return an hdfsFile. */ -static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsigned char createFile ) { - if (param->verbose >= VERBOSE_4) { +static void *HDFS_Create_Or_Open( char *testFileName, int flags, aiori_mod_opt_t *param, unsigned char createFile ) { + if (verbose >= VERBOSE_4) { printf("-> HDFS_Create_Or_Open\n"); } + hdfs_options_t * o = (hdfs_options_t*) param; hdfsFile hdfs_file = NULL; int fd_oflags = 0, hdfs_return; /* initialize file-system handle, if needed */ - hdfs_connect( param ); + hdfs_connect( o ); /* * Check for unsupported flags. @@ -234,15 +350,15 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign * The other two, we just note that they are not supported and don't do them. */ - if ( param->openFlags & IOR_RDWR ) { + if ( flags & IOR_RDWR ) { ERR( "Opening or creating a file in RDWR is not implemented in HDFS" ); } - if ( param->openFlags & IOR_EXCL ) { + if ( flags & IOR_EXCL ) { fprintf( stdout, "Opening or creating a file in Exclusive mode is not implemented in HDFS\n" ); } - if ( param->openFlags & IOR_APPEND ) { + if ( flags & IOR_APPEND ) { fprintf( stdout, "Opening or creating a file for appending is not implemented in HDFS\n" ); } @@ -254,8 +370,8 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign fd_oflags = O_CREAT; } - if ( param->openFlags & IOR_WRONLY ) { - if ( !param->filePerProc ) { + if ( flags & IOR_WRONLY ) { + if ( ! hints->filePerProc ) { // in N-1 mode, only rank 0 truncates the file if ( rank != 0 ) { @@ -279,7 +395,7 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign * Now see if O_DIRECT is needed. */ - if ( param->useO_DIRECT == TRUE ) { + if ( o->direct_io == TRUE ) { hdfs_set_o_direct_flag( &fd_oflags ); } @@ -290,10 +406,7 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign * truncate each other's writes */ - if (( param->openFlags & IOR_WRONLY ) && - ( !param->filePerProc ) && - ( rank != 0 )) { - + if (( flags & IOR_WRONLY ) && ( ! hints->filePerProc ) && ( rank != 0 )) { MPI_CHECK(MPI_Barrier(testComm), "barrier error"); } @@ -301,21 +414,16 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign * Now rank zero can open and truncate, if necessary. */ - if (param->verbose >= VERBOSE_4) { - printf("\thdfsOpenFile(0x%llx, %s, 0%o, %d, %d, %d)\n", - param->hdfs_fs, + if (verbose >= VERBOSE_4) { + printf("\thdfsOpenFile(%p, %s, 0%o, %lld, %d, %lld)\n", + o->fs, testFileName, fd_oflags, /* shown in octal to compare w/ */ - param->transferSize, - param->hdfs_replicas, - param->hdfs_block_size); + hints->transferSize, + o->replicas, + o->block_size); } - hdfs_file = hdfsOpenFile( param->hdfs_fs, - testFileName, - fd_oflags, - param->transferSize, - param->hdfs_replicas, - param->hdfs_block_size); + hdfs_file = hdfsOpenFile( o->fs, testFileName, fd_oflags, hints->transferSize, o->replicas, o->block_size); if ( ! hdfs_file ) { ERR( "Failed to open the file" ); } @@ -324,14 +432,14 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign * For N-1 write, Rank 0 waits for the other ranks to open the file after it has. */ - if (( param->openFlags & IOR_WRONLY ) && - ( !param->filePerProc ) && + if (( flags & IOR_WRONLY ) && + ( !hints->filePerProc ) && ( rank == 0 )) { MPI_CHECK(MPI_Barrier(testComm), "barrier error"); } - if (param->verbose >= VERBOSE_4) { + if (verbose >= VERBOSE_4) { printf("<- HDFS_Create_Or_Open\n"); } return ((void *) hdfs_file ); @@ -341,36 +449,36 @@ static void *HDFS_Create_Or_Open( char *testFileName, IOR_param_t *param, unsign * Create and open a file through the HDFS interface. */ -static void *HDFS_Create( char *testFileName, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_4) { +static aiori_fd_t *HDFS_Create(char *testFileName, int flags, aiori_mod_opt_t * param) { + if (verbose >= VERBOSE_4) { printf("-> HDFS_Create\n"); } - if (param->verbose >= VERBOSE_4) { + if (verbose >= VERBOSE_4) { printf("<- HDFS_Create\n"); } - return HDFS_Create_Or_Open( testFileName, param, TRUE ); + return HDFS_Create_Or_Open( testFileName, flags, param, TRUE ); } /* * Open a file through the HDFS interface. */ -static void *HDFS_Open( char *testFileName, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_4) { +static aiori_fd_t *HDFS_Open(char *testFileName, int flags, aiori_mod_opt_t * param) { + if (verbose >= VERBOSE_4) { printf("-> HDFS_Open\n"); } - if ( param->openFlags & IOR_CREAT ) { - if (param->verbose >= VERBOSE_4) { + if ( flags & IOR_CREAT ) { + if (verbose >= VERBOSE_4) { printf("<- HDFS_Open( ... TRUE)\n"); } - return HDFS_Create_Or_Open( testFileName, param, TRUE ); + return HDFS_Create_Or_Open( testFileName, flags, param, TRUE ); } else { - if (param->verbose >= VERBOSE_4) { + if (verbose >= VERBOSE_4) { printf("<- HDFS_Open( ... FALSE)\n"); } - return HDFS_Create_Or_Open( testFileName, param, FALSE ); + return HDFS_Create_Or_Open( testFileName, flags, param, FALSE ); } } @@ -378,19 +486,18 @@ static void *HDFS_Open( char *testFileName, IOR_param_t * param ) { * Write or read to file using the HDFS interface. */ -static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, - IOR_offset_t length, IOR_param_t * param) { - if (param->verbose >= VERBOSE_4) { - printf("-> HDFS_Xfer(acc:%d, file:0x%llx, buf:0x%llx, len:%llu, 0x%llx)\n", +static IOR_offset_t HDFS_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer, + IOR_offset_t length, IOR_offset_t offset, aiori_mod_opt_t * param) { + if (verbose >= VERBOSE_4) { + printf("-> HDFS_Xfer(acc:%d, file:%p, buf:%p, len:%llu, %p)\n", access, file, buffer, length, param); } - + hdfs_options_t * o = (hdfs_options_t*) param; int xferRetries = 0; long long remaining = (long long)length; char* ptr = (char *)buffer; long long rc; - off_t offset = param->offset; - hdfsFS hdfs_fs = param->hdfs_fs; /* (void*) */ + hdfsFS hdfs_fs = o->fs; /* (void*) */ hdfsFile hdfs_file = (hdfsFile)file; /* (void*) */ @@ -401,37 +508,34 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, if (verbose >= VERBOSE_4) { fprintf( stdout, "task %d writing to offset %lld\n", rank, - param->offset + length - remaining); + offset + length - remaining); } - if (param->verbose >= VERBOSE_4) { - printf("\thdfsWrite( 0x%llx, 0x%llx, 0x%llx, %lld)\n", + if (verbose >= VERBOSE_4) { + printf("\thdfsWrite( %p, %p, %p, %lld)\n", hdfs_fs, hdfs_file, ptr, remaining ); /* DEBUGGING */ } rc = hdfsWrite( hdfs_fs, hdfs_file, ptr, remaining ); if ( rc < 0 ) { ERR( "hdfsWrite() failed" ); } - offset += rc; - if ( param->fsyncPerWrite == TRUE ) { - HDFS_Fsync( hdfs_file, param ); + if ( hints->fsyncPerWrite == TRUE ) { + HDFS_Fsync( file, param ); } } else { /* READ or CHECK */ if (verbose >= VERBOSE_4) { fprintf( stdout, "task %d reading from offset %lld\n", - rank, - param->offset + length - remaining ); + rank, offset + length - remaining ); } - if (param->verbose >= VERBOSE_4) { - printf("\thdfsRead( 0x%llx, 0x%llx, 0x%llx, %lld)\n", + if (verbose >= VERBOSE_4) { + printf("\thdfsRead( %p, %p, %p, %lld)\n", hdfs_fs, hdfs_file, ptr, remaining ); /* DEBUGGING */ } - rc = hdfsRead( hdfs_fs, hdfs_file, ptr, remaining ); - + rc = hdfsPread(hdfs_fs, hdfs_file, offset, ptr, remaining); if ( rc == 0 ) { ERR( "hdfs_read() returned EOF prematurely" ); } @@ -449,9 +553,9 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, rank, access == WRITE ? "hdfsWrite()" : "hdfs_read()", rc, remaining, - param->offset + length - remaining ); + offset + length - remaining ); - if ( param->singleXferAttempt == TRUE ) { + if ( hints->singleXferAttempt == TRUE ) { MPI_CHECK( MPI_Abort( MPI_COMM_WORLD, -1 ), "barrier error" ); } @@ -467,7 +571,16 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, xferRetries++; } - if (param->verbose >= VERBOSE_4) { + if(access == WRITE){ + // flush user buffer, this makes the write visible to readers + // it is the expected semantics of read/writes + rc = hdfsHFlush(hdfs_fs, hdfs_file); + if(rc != 0){ + WARN("Error during flush"); + } + } + + if (verbose >= VERBOSE_4) { printf("<- HDFS_Xfer\n"); } return ( length ); @@ -476,67 +589,38 @@ static IOR_offset_t HDFS_Xfer(int access, void *file, IOR_size_t * buffer, /* * Perform hdfs_sync(). */ - -static void HDFS_Fsync( void *fd, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_4) { - printf("-> HDFS_Fsync\n"); - } - hdfsFS hdfs_fs = param->hdfs_fs; /* (void *) */ +static void HDFS_Fsync(aiori_fd_t * fd, aiori_mod_opt_t * param) { + hdfs_options_t * o = (hdfs_options_t*) param; + hdfsFS hdfs_fs = o->fs; /* (void *) */ hdfsFile hdfs_file = (hdfsFile)fd; /* (void *) */ -#if 0 - if (param->verbose >= VERBOSE_4) { - printf("\thdfsHSync(0x%llx, 0x%llx)\n", hdfs_fs, hdfs_file); + if (verbose >= VERBOSE_4) { + printf("\thdfsFlush(%p, %p)\n", hdfs_fs, hdfs_file); } if ( hdfsHSync( hdfs_fs, hdfs_file ) != 0 ) { - EWARN( "hdfsHSync() failed" ); - } -#elif 0 - if (param->verbose >= VERBOSE_4) { - printf("\thdfsHFlush(0x%llx, 0x%llx)\n", hdfs_fs, hdfs_file); - } - if ( hdfsHFlush( hdfs_fs, hdfs_file ) != 0 ) { - EWARN( "hdfsHFlush() failed" ); - } -#else - if (param->verbose >= VERBOSE_4) { - printf("\thdfsFlush(0x%llx, 0x%llx)\n", hdfs_fs, hdfs_file); - } - if ( hdfsFlush( hdfs_fs, hdfs_file ) != 0 ) { + // Hsync is implemented to flush out data with newer Hadoop versions EWARN( "hdfsFlush() failed" ); } -#endif - - if (param->verbose >= VERBOSE_4) { - printf("<- HDFS_Fsync\n"); - } } /* * Close a file through the HDFS interface. */ -static void HDFS_Close( void *fd, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_4) { +static void HDFS_Close(aiori_fd_t * fd, aiori_mod_opt_t * param) { + if (verbose >= VERBOSE_4) { printf("-> HDFS_Close\n"); } + hdfs_options_t * o = (hdfs_options_t*) param; - hdfsFS hdfs_fs = param->hdfs_fs; /* (void *) */ + hdfsFS hdfs_fs = o->fs; /* (void *) */ hdfsFile hdfs_file = (hdfsFile)fd; /* (void *) */ - int open_flags; - - if ( param->openFlags & IOR_WRONLY ) { - open_flags = O_CREAT | O_WRONLY; - } else { - open_flags = O_RDONLY; - } - if ( hdfsCloseFile( hdfs_fs, hdfs_file ) != 0 ) { ERR( "hdfsCloseFile() failed" ); } - if (param->verbose >= VERBOSE_4) { + if (verbose >= VERBOSE_4) { printf("<- HDFS_Close\n"); } } @@ -547,119 +631,66 @@ static void HDFS_Close( void *fd, IOR_param_t * param ) { * NOTE: The signature for ior_aiori.delete doesn't include a parameter to * select recursive deletes. We'll assume that that is never needed. */ -static void HDFS_Delete( char *testFileName, IOR_param_t * param ) { - if (param->verbose >= VERBOSE_4) { +static void HDFS_Delete( char *testFileName, aiori_mod_opt_t * param ) { + if (verbose >= VERBOSE_4) { printf("-> HDFS_Delete\n"); } + hdfs_options_t * o = (hdfs_options_t*) param; char errmsg[256]; /* initialize file-system handle, if needed */ - hdfs_connect( param ); + hdfs_connect(o); - if ( ! param->hdfs_fs ) - ERR_SIMPLE( "Can't delete a file without an HDFS connection" ); + if ( ! o->fs ) + ERR( "Can't delete a file without an HDFS connection" ); - if ( hdfsDelete( param->hdfs_fs, testFileName, 0 ) != 0 ) { - sprintf(errmsg, - "[RANK %03d]: hdfsDelete() of file \"%s\" failed\n", + if ( hdfsDelete( o->fs, testFileName, 0 ) != 0 ) { + sprintf(errmsg, "[RANK %03d]: hdfsDelete() of file \"%s\" failed\n", rank, testFileName); EWARN( errmsg ); } - if (param->verbose >= VERBOSE_4) { + if (verbose >= VERBOSE_4) { printf("<- HDFS_Delete\n"); } } -/* - * Determine api version. - */ - -static void HDFS_SetVersion( IOR_param_t * param ) { - if (param->verbose >= VERBOSE_4) { - printf("-> HDFS_SetVersion\n"); - } - - strcpy( param->apiVersion, param->api ); - if (param->verbose >= VERBOSE_4) { - printf("<- HDFS_SetVersion\n"); - } -} - /* * Use hdfsGetPathInfo() to get info about file? * Is there an fstat we can use on hdfs? * Should we just use POSIX fstat? */ -static IOR_offset_t -HDFS_GetFileSize(IOR_param_t * param, - MPI_Comm testComm, +static IOR_offset_t HDFS_GetFileSize(aiori_mod_opt_t * param, char * testFileName) { - if (param->verbose >= VERBOSE_4) { + if (verbose >= VERBOSE_4) { printf("-> HDFS_GetFileSize(%s)\n", testFileName); } + hdfs_options_t * o = (hdfs_options_t*) param; IOR_offset_t aggFileSizeFromStat; IOR_offset_t tmpMin, tmpMax, tmpSum; /* make sure file-system is connected */ - hdfs_connect( param ); + hdfs_connect( o ); /* file-info struct includes size in bytes */ - if (param->verbose >= VERBOSE_4) { - printf("\thdfsGetPathInfo(%s) ...", testFileName);fflush(stdout); + if (verbose >= VERBOSE_4) { + printf("\thdfsGetPathInfo(%s) ...", testFileName); + fflush(stdout); } - hdfsFileInfo* info = hdfsGetPathInfo( param->hdfs_fs, testFileName ); + hdfsFileInfo* info = hdfsGetPathInfo( o->fs, testFileName ); if ( ! info ) - ERR_SIMPLE( "hdfsGetPathInfo() failed" ); - if (param->verbose >= VERBOSE_4) { + ERR( "hdfsGetPathInfo() failed" ); + if (verbose >= VERBOSE_4) { printf("done.\n");fflush(stdout); } aggFileSizeFromStat = info->mSize; - if ( param->filePerProc == TRUE ) { - if (param->verbose >= VERBOSE_4) { - printf("\tall-reduce (1)\n"); - } - MPI_CHECK( - MPI_Allreduce( - &aggFileSizeFromStat, &tmpSum, 1, MPI_LONG_LONG_INT, MPI_SUM, testComm ), - "cannot total data moved" ); - - aggFileSizeFromStat = tmpSum; - } - else { - if (param->verbose >= VERBOSE_4) { - printf("\tall-reduce (2a)\n"); - } - MPI_CHECK( - MPI_Allreduce( - &aggFileSizeFromStat, &tmpMin, 1, MPI_LONG_LONG_INT, MPI_MIN, testComm ), - "cannot total data moved" ); - - if (param->verbose >= VERBOSE_4) { - printf("\tall-reduce (2b)\n"); - } - MPI_CHECK( - MPI_Allreduce( - &aggFileSizeFromStat, &tmpMax, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm ), - "cannot total data moved" ); - - if ( tmpMin != tmpMax ) { - if ( rank == 0 ) { - WARN( "inconsistent file size by different tasks" ); - } - - /* incorrect, but now consistent across tasks */ - aggFileSizeFromStat = tmpMin; - } - } - - if (param->verbose >= VERBOSE_4) { + if (verbose >= VERBOSE_4) { printf("<- HDFS_GetFileSize [%llu]\n", aggFileSizeFromStat); } return ( aggFileSizeFromStat ); diff --git a/src/ior.c b/src/ior.c index aa841de..64e3665 100755 --- a/src/ior.c +++ b/src/ior.c @@ -204,8 +204,6 @@ int ior_main(int argc, char **argv) void init_IOR_Param_t(IOR_param_t * p) { const char *default_aiori = aiori_default (); - char *hdfs_user; - assert (NULL != default_aiori); memset(p, 0, sizeof(IOR_param_t)); @@ -235,16 +233,6 @@ void init_IOR_Param_t(IOR_param_t * p) p->incompressibleSeed = 573; p->testComm = mpi_comm_world; - hdfs_user = getenv("USER"); - if (!hdfs_user) - hdfs_user = ""; - p->hdfs_user = strdup(hdfs_user); - p->hdfs_name_node = "default"; - p->hdfs_name_node_port = 0; /* ??? */ - p->hdfs_fs = NULL; - p->hdfs_replicas = 0; /* invokes the default */ - p->hdfs_block_size = 0; - p->URI = NULL; } diff --git a/src/ior.h b/src/ior.h index 3009720..9073d6a 100755 --- a/src/ior.h +++ b/src/ior.h @@ -160,14 +160,6 @@ typedef struct int fsyncPerWrite; /* fsync() after each write */ int fsync; /* fsync() after write */ - /* HDFS variables */ - char * hdfs_user; /* copied from ENV, for now */ - const char* hdfs_name_node; - tPort hdfs_name_node_port; /* (uint16_t) */ - hdfsFS hdfs_fs; /* file-system handle */ - int hdfs_replicas; /* n block replicas. (0 gets default) */ - int hdfs_block_size; /* internal blk-size. (0 gets default) */ - char* URI; /* "path" to target object */ /* RADOS variables */ diff --git a/testing/build-hdfs.sh b/testing/build-hdfs.sh new file mode 100755 index 0000000..0165dfb --- /dev/null +++ b/testing/build-hdfs.sh @@ -0,0 +1,18 @@ +#!/bin/bash +mkdir build-hdfs +cd build-hdfs + +VER=hadoop-3.2.1 +if [[ ! -e $VER.tar.gz ]] ; then + wget https://www.apache.org/dyn/closer.cgi/hadoop/common/$VER/$VER.tar.gz + tar -xf $VER.tar.gz +fi + +../configure --with-hdfs CFLAGS="-I$PWD/$VER/include/ -O0 -g3" LDFLAGS="-L$PWD/$VER/lib/native -Wl,-rpath=$PWD/$VER/lib/native" +make -j + + +echo "To run execute:" +echo export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ +echo export CLASSPATH=$(find $VER/ -name "*.jar" -printf "%p:") +echo ./src/ior -a HDFS From f467903cb9ea08c9c5296977b082cca3e00f83a2 Mon Sep 17 00:00:00 2001 From: "Glenn K. Lockwood" Date: Wed, 23 Dec 2020 03:56:49 -0800 Subject: [PATCH 098/154] draft of a new simplified release process (#297) --- doc/sphinx/devDoc/release.rst | 131 +++++++++++++++++++++++++++------- 1 file changed, 107 insertions(+), 24 deletions(-) diff --git a/doc/sphinx/devDoc/release.rst b/doc/sphinx/devDoc/release.rst index 1e39a35..6238e6f 100644 --- a/doc/sphinx/devDoc/release.rst +++ b/doc/sphinx/devDoc/release.rst @@ -1,6 +1,22 @@ Release Process =============== +General release process +----------------------- + +The versioning for IOR is encoded in the ``META`` file in the root of the +repository. The nomenclature is + +* 3.2.0 designates a proper release +* 3.2.0rc1 designates the first release candidate in preparation for the 3.2.0 + release +* 3.2.0+dev indicates development towards 3.2.0 prior to a feature freeze +* 3.2.0rc1+dev indicates development towards 3.2.0's first release candidate + after a feature freeze + +Building a release of IOR +------------------------- + To build a new version of IOR:: $ docker run -it ubuntu bash @@ -10,29 +26,96 @@ To build a new version of IOR:: $ cd ior $ ./travis-build.sh -To create a new release candidate from RC, +Feature freezing for a new release +---------------------------------- -1. Disable the ``check-news`` option in ``AM_INIT_AUTOMAKE`` inside configure.ac -2. Append "rcX" to the ``Version:`` field in META where X is the release - candidate number -3. Build a release package as described above - -To create a new minor release of IOR, - -1. Build the rc branch as described above -2. Create a release on GitHub which creates the appropriate tag -3. Upload the source distributions generated by travis-build.sh - -To create a micro branch of IOR (e.g., if a release needs a hotfix), - -1. Check out the relevant release tagged in the rc branch (e.g., ``3.2.0``) -2. Create a branch with the major.minor name (e.g., ``3.2``) from that tag -3. Update the ``Version:`` in META -4. Apply hotfix(es) to that major.minor branch -5. Create the major.minor.micro release on GitHub - -To initiate a feature freeze, - -1. Merge the master branch into the rc branch +1. Branch `major.minor` from the commit at which the feature freeze should take + effect. +2. Append the "rc1+dev" designator to the Version field in the META file +3. Commit and push this new branch 2. Update the ``Version:`` field in META `of the master branch` to be the `next` - release version, not the one whose features have just been frozen + release version, not the one whose features have just been frozen. + +For example, to feature-freeze for version 3.2:: + + $ git checkout 11469ac + $ git checkout -B 3.2 + $ # update the ``Version:`` field in ``META`` to 3.2.0rc1+dev + $ git add META + $ git commit -m "Update version for feature freeze" + $ git push upstream 3.2 + $ git checkout master + $ # update the ``Version:`` field in ``META`` to 3.3.0+dev + $ git add META + $ git commit -m "Update version number" + $ git push upstream master + +Creating a new release candidate +-------------------------------- + +1. Check out the appropriate commit from the `major.minor` branch +2. Disable the ``check-news`` option in ``AM_INIT_AUTOMAKE`` inside configure.ac +3. Remove the "+dev" designator from the Version field in META +4. Build a release package as described above +5. Revert the change from #2 (it was just required to build a non-release tarball) +5. Tag and commit the updated META so one can easily recompile this rc from git +6. Update the "rcX" number and add "+dev" back to the ``Version:`` field in + META. This will allow anyone playing with the tip of this branch to see that + this the state is in preparation of the next rc, but is unreleased because of + +dev. +7. Commit + +For example to release 3.2.0rc1:: + + $ git checkout 3.2 + $ # edit configure.ac and remove the check-news option + $ # remove +dev from the Version field in META (Version: 3.2.0rc1) + $ # build + $ git checkout configure.ac + $ git add META + $ git commit -m "Release candidate for 3.2.0rc1" + $ git tag 3.2.0rc1 + $ # uptick rc number and re-add +dev to META (Version: 3.2.0rc2+dev) + $ git add META # should contain Version: 3.2.0rc2+dev + $ git commit -m "Uptick version after release" + $ git push --tags + +Applying patches to a new microrelease +-------------------------------------- + +If a released version 3.2.0 has bugs, cherry-pick the fixes from master into the +3.2 branch:: + + $ git checkout 3.2 + $ git cherry-pick cb40c99 + $ git cherry-pick aafdf89 + $ git push upstream 3.2 + +Once you've accumulated enough bugs, move on to issuing a new release below. + +Creating a new release +---------------------- + +1. Check out the relevant `major.minor` branch +2. Remove any "rcX" and "+dev" from the Version field in META +3. Update NEWS with the release notes +4. Build a release package as described above +5. Tag and commit the updated NEWS and META so one can easily recompile this + release from git +6. Update the Version field to the next rc version and re-add "+dev" +7. Commit +8. Create the major.minor.micro release on GitHub from the associated tag + +For example to release 3.2.0:: + + $ git checkout 3.2 + $ vim META # 3.2.0rc2+dev -> 3.2.0 + $ vim NEWS # add release notes from ``git log --oneline 3.2.0rc1..`` + $ # build + $ git add NEWS META + $ git commit -m "Release v3.2.0" + $ git tag 3.2.0 + $ vim META # 3.2.0 -> 3.2.1rc1+dev + $ git add META + $ git commit -m "Uptick version after release" + $ git push --tags From e579531970551fe6358598b4d5d52db4653d8b9e Mon Sep 17 00:00:00 2001 From: "Glenn K. Lockwood" Date: Wed, 23 Dec 2020 17:06:56 -0800 Subject: [PATCH 099/154] update NEWS for 3.3 release --- NEWS | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 195e645..00b98ff 100644 --- a/NEWS +++ b/NEWS @@ -8,16 +8,19 @@ New minor features: Bugfixes: -Version 3.3.0+dev +Version 3.3.0 -------------------------------------------------------------------------------- New major features: + - Add CephFS AIORI (Mark Nelson) - Add Gfarm AIORI (Osamu Tatebe) - Add DAOS AIORI (Mohamad Chaarawi) - Add DAOS DFS AIORI (Mohamad Chaarawi) +- -B option has been replaced with --posix.odirect New minor features: + - Display outlier host names (Jean-Yves Vet) - Enable global default dir layout for subdirs in Lustre (Petros Koutoupis) - Removed pound signs (#) from mdtest output file names (Julian Kunkel) @@ -30,6 +33,7 @@ New minor features: - Add support for sync to AIORIs (Julian Kunkel) General user improvements and bug fixes: + - Allocate aligned buffers to support DirectIO for BeeGFS (Sven Breuner) - Added IOPS and latency results to json output (Robert LeBlanc) - Fixed case where numTasks is not evenly divisible by tasksPerNode (J. Schwartz) @@ -40,14 +44,16 @@ General user improvements and bug fixes: - Make write verification work without read test (Jean-Yves Vet) - Documentation updates (Vaclav Hapla, Glenn Lockwood) - Add more debugging support (J. Schwartz) -- + General developer improvements: + - Fix type casting errors (Vaclav Hapla) - Add basic test infrastructure (Julian Kunkel, Glenn Lockwood) - Conform to strict C99 (Glenn Lockwood) Known issues: -- S3 AIORI may not compile with new versions of aws4c + +- S3 and HDFS backends may not compile with new versions of respective libraries Version 3.2.1 -------------------------------------------------------------------------------- From eb883f533a432881f1ae86c03510b9da1d512e2e Mon Sep 17 00:00:00 2001 From: "Glenn K. Lockwood" Date: Wed, 23 Dec 2020 18:17:47 -0800 Subject: [PATCH 100/154] update release docs based on 3.3.0 release experience --- doc/sphinx/devDoc/release.rst | 61 +++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 13 deletions(-) diff --git a/doc/sphinx/devDoc/release.rst b/doc/sphinx/devDoc/release.rst index 6238e6f..6fe6718 100644 --- a/doc/sphinx/devDoc/release.rst +++ b/doc/sphinx/devDoc/release.rst @@ -17,36 +17,48 @@ repository. The nomenclature is Building a release of IOR ------------------------- -To build a new version of IOR:: +To build a new version of IOR, e.g., from the 3.2 release branch:: $ docker run -it ubuntu bash $ apt-get update $ apt-get install -y git automake autoconf make gcc mpich - $ git clone -b rc https://github.com/hpc/ior + $ git clone -b 3.2 https://github.com/hpc/ior $ cd ior $ ./travis-build.sh +Alternatively you can build an an arbitrary branch in Docker using a bind mount. +This will be wrapped into a build-release Dockerfile in the future:: + + $ docker run -it --mount type=bind,source=$PWD,target=/ior ubuntu + $ apt-get update + $ apt-get install -y git automake autoconf make gcc mpich + $ ./travis-build.sh + Feature freezing for a new release ---------------------------------- 1. Branch `major.minor` from the commit at which the feature freeze should take effect. -2. Append the "rc1+dev" designator to the Version field in the META file +2. Append the "rc1+dev" designator to the Version field in the META file, and + update the NEWS file to have this new version as the topmost heading 3. Commit and push this new branch 2. Update the ``Version:`` field in META `of the master branch` to be the `next` - release version, not the one whose features have just been frozen. + release version, not the one whose features have just been frozen, and update + the NEWS file as you did in step 2. For example, to feature-freeze for version 3.2:: $ git checkout 11469ac $ git checkout -B 3.2 - $ # update the ``Version:`` field in ``META`` to 3.2.0rc1+dev - $ git add META + $ vim META # update the ``Version:`` field to 3.2.0rc1+dev + $ vim NEWS # update the topmost version number to 3.2.0rc1+dev + $ git add NEWS META $ git commit -m "Update version for feature freeze" $ git push upstream 3.2 $ git checkout master - $ # update the ``Version:`` field in ``META`` to 3.3.0+dev - $ git add META + $ vim META # update the ``Version:`` field to 3.3.0+dev + $ vim NEWS # update the topmost version number to 3.3.0+dev + $ git add NEWS META $ git commit -m "Update version number" $ git push upstream master @@ -78,7 +90,7 @@ For example to release 3.2.0rc1:: $ # uptick rc number and re-add +dev to META (Version: 3.2.0rc2+dev) $ git add META # should contain Version: 3.2.0rc2+dev $ git commit -m "Uptick version after release" - $ git push --tags + $ git push && git push --tags Applying patches to a new microrelease -------------------------------------- @@ -96,9 +108,20 @@ Once you've accumulated enough bugs, move on to issuing a new release below. Creating a new release ---------------------- +This is a two-phase process because we need to ensure that NEWS in master +contains a full history of releases, and we achieve this by always merging +changes from master into a release branch. + +1. Check out master +2. Ensure that the latest release notes for this release are reflected in NEWS +3. Commit that to master + +Then work on the release branch: + 1. Check out the relevant `major.minor` branch 2. Remove any "rcX" and "+dev" from the Version field in META -3. Update NEWS with the release notes +3. Cherry-pick your NEWS update commit from master into this release branch. + Resolve conflicts and get rid of news that reflect future releases. 4. Build a release package as described above 5. Tag and commit the updated NEWS and META so one can easily recompile this release from git @@ -108,14 +131,26 @@ Creating a new release For example to release 3.2.0:: + $ git checkout master + $ vim NEWS # add release notes from ``git log --oneline 3.2.0rc1..`` + $ git commit + +Let's say the above generated commit abc345e on master. Then:: + $ git checkout 3.2 $ vim META # 3.2.0rc2+dev -> 3.2.0 - $ vim NEWS # add release notes from ``git log --oneline 3.2.0rc1..`` + $ git cherry-pick abc345e + $ vim NEWS # resolve conflicts, delete stuff for e.g., 3.4 $ # build $ git add NEWS META $ git commit -m "Release v3.2.0" $ git tag 3.2.0 $ vim META # 3.2.0 -> 3.2.1rc1+dev - $ git add META + # vim NEWS # add a placeholder for 3.2.1rc2+dev so automake is happy + $ git add NEWS META $ git commit -m "Uptick version after release" - $ git push --tags + +Then push your master and your release branch and also push tags:: + + $ git checkout master && git push && git push --tags + $ git checkout 3.2 && git push && git push --tags From 21bf5a5a12f52fe5a4304536f8eeaada728d549f Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 25 Dec 2020 16:48:34 +0000 Subject: [PATCH 101/154] NCMPI ported to current ADIO. Fixed autoconf #303. --- configure.ac | 6 +- src/aiori-NCMPI.c | 198 ++++++++++++++++++++++++++-------------------- src/ior.h | 3 - 3 files changed, 117 insertions(+), 90 deletions(-) diff --git a/configure.ac b/configure.ac index b123064..a9d106a 100755 --- a/configure.ac +++ b/configure.ac @@ -160,8 +160,10 @@ AC_ARG_WITH([ncmpi], [], [with_ncmpi=no]) AM_CONDITIONAL([USE_NCMPI_AIORI], [test x$with_ncmpi = xyes]) -AM_COND_IF([USE_NCMPI_AIORI],[ - AC_DEFINE([USE_NCMPI_AIORI], [], [Build NCMPI backend AIORI]) +AS_IF([test "x$with_ncmpi" = xyes ], [ + AC_CHECK_HEADERS([pnetcdf.h], [AC_DEFINE([USE_NCMPI_AIORI], [], [PNetCDF available])], [ + AC_MSG_FAILURE([--with-ncmpi was given but pnetcdf.h not found]) + ]) ]) # MMAP IO support diff --git a/src/aiori-NCMPI.c b/src/aiori-NCMPI.c index 5fc1375..b6ab84b 100755 --- a/src/aiori-NCMPI.c +++ b/src/aiori-NCMPI.c @@ -45,20 +45,57 @@ /**************************** P R O T O T Y P E S *****************************/ -static int GetFileMode(IOR_param_t *); +static int GetFileMode(int flags); -static void *NCMPI_Create(char *, IOR_param_t *); -static void *NCMPI_Open(char *, IOR_param_t *); -static IOR_offset_t NCMPI_Xfer(int, void *, IOR_size_t *, - IOR_offset_t, IOR_param_t *); -static void NCMPI_Close(void *, IOR_param_t *); -static void NCMPI_Delete(char *, IOR_param_t *); +static aiori_fd_t *NCMPI_Create(char *, int iorflags, aiori_mod_opt_t *); +static aiori_fd_t *NCMPI_Open(char *, int iorflags, aiori_mod_opt_t *); +static IOR_offset_t NCMPI_Xfer(int, aiori_fd_t *, IOR_size_t *, + IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); +static void NCMPI_Close(aiori_fd_t *, aiori_mod_opt_t *); +static void NCMPI_Delete(char *, aiori_mod_opt_t *); static char *NCMPI_GetVersion(); -static void NCMPI_Fsync(void *, IOR_param_t *); -static IOR_offset_t NCMPI_GetFileSize(IOR_param_t *, MPI_Comm, char *); -static int NCMPI_Access(const char *, int, IOR_param_t *); +static void NCMPI_Fsync(aiori_fd_t *, aiori_mod_opt_t *); +static IOR_offset_t NCMPI_GetFileSize(aiori_mod_opt_t *, char *); +static int NCMPI_Access(const char *, int, aiori_mod_opt_t *); /************************** D E C L A R A T I O N S ***************************/ +static aiori_xfer_hint_t * hints = NULL; + +static void NCMPI_xfer_hints(aiori_xfer_hint_t * params){ + hints = params; + + MPIIO_xfer_hints(params); +} + +typedef struct { + int showHints; /* show hints */ + char * hintsFileName; /* full name for hints file */ + + /* runtime variables */ + int var_id; /* variable id handle for data set */ + int firstReadCheck; + int startDataSet; +} ncmpi_options_t; + + +static option_help * NCMPI_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values){ + ncmpi_options_t * o = malloc(sizeof(ncmpi_options_t)); + if (init_values != NULL){ + memcpy(o, init_values, sizeof(ncmpi_options_t)); + }else{ + memset(o, 0, sizeof(ncmpi_options_t)); + } + *init_backend_options = (aiori_mod_opt_t*) o; + + option_help h [] = { + {0, "mpiio.hintsFileName","Full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & o->hintsFileName}, + {0, "mpiio.showHints", "Show MPI hints", OPTION_FLAG, 'd', & o->showHints}, + LAST_OPTION + }; + option_help * help = malloc(sizeof(h)); + memcpy(help, h, sizeof(h)); + return help; +} ior_aiori_t ncmpi_aiori = { .name = "NCMPI", @@ -76,6 +113,8 @@ ior_aiori_t ncmpi_aiori = { .rmdir = aiori_posix_rmdir, .access = NCMPI_Access, .stat = aiori_posix_stat, + .get_options = NCMPI_options, + .xfer_hints = NCMPI_xfer_hints, }; /***************************** F U N C T I O N S ******************************/ @@ -83,15 +122,16 @@ ior_aiori_t ncmpi_aiori = { /* * Create and open a file through the NCMPI interface. */ -static void *NCMPI_Create(char *testFileName, IOR_param_t * param) +static aiori_fd_t *NCMPI_Create(char *testFileName, int iorflags, aiori_mod_opt_t * param) { int *fd; int fd_mode; MPI_Info mpiHints = MPI_INFO_NULL; + ncmpi_options_t * o = (ncmpi_options_t*) param; /* read and set MPI file hints from hintsFile */ - SetHints(&mpiHints, param->hintsFileName); - if (rank == 0 && param->showHints) { + SetHints(&mpiHints, o->hintsFileName); + if (rank == 0 && o->showHints) { fprintf(stdout, "\nhints passed to MPI_File_open() {\n"); ShowHints(&mpiHints); fprintf(stdout, "}\n"); @@ -101,7 +141,7 @@ static void *NCMPI_Create(char *testFileName, IOR_param_t * param) if (fd == NULL) ERR("malloc() failed"); - fd_mode = GetFileMode(param); + fd_mode = GetFileMode(iorflags); NCMPI_CHECK(ncmpi_create(testComm, testFileName, fd_mode, mpiHints, fd), "cannot create file"); @@ -111,7 +151,7 @@ static void *NCMPI_Create(char *testFileName, IOR_param_t * param) #if defined(PNETCDF_VERSION_MAJOR) && (PNETCDF_VERSION_MAJOR > 1 || PNETCDF_VERSION_MINOR >= 2) /* ncmpi_get_file_info is first available in 1.2.0 */ - if (rank == 0 && param->showHints) { + if (rank == 0 && o->showHints) { MPI_Info info_used; MPI_CHECK(ncmpi_get_file_info(*fd, &info_used), "cannot inquire file info"); @@ -123,21 +163,22 @@ static void *NCMPI_Create(char *testFileName, IOR_param_t * param) } #endif - return (fd); + return (aiori_fd_t*)(fd); } /* * Open a file through the NCMPI interface. */ -static void *NCMPI_Open(char *testFileName, IOR_param_t * param) +static aiori_fd_t *NCMPI_Open(char *testFileName, int iorflags, aiori_mod_opt_t * param) { int *fd; int fd_mode; MPI_Info mpiHints = MPI_INFO_NULL; + ncmpi_options_t * o = (ncmpi_options_t*) param; /* read and set MPI file hints from hintsFile */ - SetHints(&mpiHints, param->hintsFileName); - if (rank == 0 && param->showHints) { + SetHints(&mpiHints, o->hintsFileName); + if (rank == 0 && o->showHints) { fprintf(stdout, "\nhints passed to MPI_File_open() {\n"); ShowHints(&mpiHints); fprintf(stdout, "}\n"); @@ -147,7 +188,7 @@ static void *NCMPI_Open(char *testFileName, IOR_param_t * param) if (fd == NULL) ERR("malloc() failed"); - fd_mode = GetFileMode(param); + fd_mode = GetFileMode(iorflags); NCMPI_CHECK(ncmpi_open(testComm, testFileName, fd_mode, mpiHints, fd), "cannot open file"); @@ -157,7 +198,7 @@ static void *NCMPI_Open(char *testFileName, IOR_param_t * param) #if defined(PNETCDF_VERSION_MAJOR) && (PNETCDF_VERSION_MAJOR > 1 || PNETCDF_VERSION_MINOR >= 2) /* ncmpi_get_file_info is first available in 1.2.0 */ - if (rank == 0 && param->showHints) { + if (rank == 0 && o->showHints) { MPI_Info info_used; MPI_CHECK(ncmpi_get_file_info(*fd, &info_used), "cannot inquire file info"); @@ -169,51 +210,43 @@ static void *NCMPI_Open(char *testFileName, IOR_param_t * param) } #endif - return (fd); + return (aiori_fd_t*)(fd); } /* * Write or read access to file using the NCMPI interface. */ -static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, - IOR_offset_t length, IOR_param_t * param) +static IOR_offset_t NCMPI_Xfer(int access, aiori_fd_t *fd, IOR_size_t * buffer, IOR_offset_t transferSize, IOR_offset_t offset, aiori_mod_opt_t * param) { signed char *bufferPtr = (signed char *)buffer; - static int firstReadCheck = FALSE, startDataSet; + ncmpi_options_t * o = (ncmpi_options_t*) param; int var_id, dim_id[NUM_DIMS]; - MPI_Offset bufSize[NUM_DIMS], offset[NUM_DIMS]; + MPI_Offset bufSize[NUM_DIMS], offsets[NUM_DIMS]; IOR_offset_t segmentPosition; int segmentNum, transferNum; /* determine by offset if need to start data set */ - if (param->filePerProc == TRUE) { + if (hints->filePerProc == TRUE) { segmentPosition = (IOR_offset_t) 0; } else { - segmentPosition = - (IOR_offset_t) ((rank + rankOffset) % param->numTasks) - * param->blockSize; + segmentPosition = (IOR_offset_t) ((rank + rankOffset) % hints->numTasks) * hints->blockSize; } - if ((int)(param->offset - segmentPosition) == 0) { - startDataSet = TRUE; + if ((int)(offset - segmentPosition) == 0) { + o->startDataSet = TRUE; /* * this toggle is for the read check operation, which passes through * this function twice; note that this function will open a data set * only on the first read check and close only on the second */ if (access == READCHECK) { - if (firstReadCheck == TRUE) { - firstReadCheck = FALSE; - } else { - firstReadCheck = TRUE; - } + o->firstReadCheck = ! o->firstReadCheck; } } - if (startDataSet == TRUE && - (access != READCHECK || firstReadCheck == TRUE)) { + if (o->startDataSet == TRUE && + (access != READCHECK || o->firstReadCheck == TRUE)) { if (access == WRITE) { - int numTransfers = - param->blockSize / param->transferSize; + int numTransfers = hints->blockSize / hints->transferSize; /* reshape 1D array to 3D array: [segmentCount*numTasks][numTransfers][transferSize] @@ -229,7 +262,7 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, "cannot define data set dimensions"); NCMPI_CHECK(ncmpi_def_dim (*(int *)fd, "transfer_size", - param->transferSize, &dim_id[2]), + hints->transferSize, &dim_id[2]), "cannot define data set dimensions"); NCMPI_CHECK(ncmpi_def_var (*(int *)fd, "data_var", NC_BYTE, NUM_DIMS, @@ -244,77 +277,72 @@ static IOR_offset_t NCMPI_Xfer(int access, void *fd, IOR_size_t * buffer, "cannot retrieve data set variable"); } - if (param->collective == FALSE) { + if (hints->collective == FALSE) { NCMPI_CHECK(ncmpi_begin_indep_data(*(int *)fd), "cannot enable independent data mode"); } - param->var_id = var_id; - startDataSet = FALSE; + o->var_id = var_id; + o->startDataSet = FALSE; } - var_id = param->var_id; + var_id = o->var_id; /* calculate the segment number */ - segmentNum = param->offset / (param->numTasks * param->blockSize); + segmentNum = offset / (hints->numTasks * hints->blockSize); /* calculate the transfer number in each block */ - transferNum = param->offset % param->blockSize / param->transferSize; + transferNum = offset % hints->blockSize / hints->transferSize; /* read/write the 3rd dim of the dataset, each is of amount param->transferSize */ bufSize[0] = 1; bufSize[1] = 1; - bufSize[2] = param->transferSize; + bufSize[2] = transferSize; - offset[0] = segmentNum * param->numTasks + rank; - offset[1] = transferNum; - offset[2] = 0; + offsets[0] = segmentNum * hints->numTasks + rank; + offsets[1] = transferNum; + offsets[2] = 0; /* access the file */ if (access == WRITE) { /* WRITE */ - if (param->collective) { + if (hints->collective) { NCMPI_CHECK(ncmpi_put_vara_schar_all - (*(int *)fd, var_id, offset, bufSize, - bufferPtr), + (*(int *)fd, var_id, offsets, bufSize, bufferPtr), "cannot write to data set"); } else { NCMPI_CHECK(ncmpi_put_vara_schar - (*(int *)fd, var_id, offset, bufSize, - bufferPtr), + (*(int *)fd, var_id, offsets, bufSize, bufferPtr), "cannot write to data set"); } } else { /* READ or CHECK */ - if (param->collective == TRUE) { + if (hints->collective == TRUE) { NCMPI_CHECK(ncmpi_get_vara_schar_all - (*(int *)fd, var_id, offset, bufSize, - bufferPtr), + (*(int *)fd, var_id, offsets, bufSize, bufferPtr), "cannot read from data set"); } else { NCMPI_CHECK(ncmpi_get_vara_schar - (*(int *)fd, var_id, offset, bufSize, - bufferPtr), + (*(int *)fd, var_id, offsets, bufSize, bufferPtr), "cannot read from data set"); } } - return (length); + return (transferSize); } /* * Perform fsync(). */ -static void NCMPI_Fsync(void *fd, IOR_param_t * param) +static void NCMPI_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) { - ; } /* * Close a file through the NCMPI interface. */ -static void NCMPI_Close(void *fd, IOR_param_t * param) +static void NCMPI_Close(aiori_fd_t *fd, aiori_mod_opt_t * param) { - if (param->collective == FALSE) { + if (hints->collective == FALSE) { NCMPI_CHECK(ncmpi_end_indep_data(*(int *)fd), "cannot disable independent data mode"); } @@ -325,7 +353,7 @@ static void NCMPI_Close(void *fd, IOR_param_t * param) /* * Delete a file through the NCMPI interface. */ -static void NCMPI_Delete(char *testFileName, IOR_param_t * param) +static void NCMPI_Delete(char *testFileName, aiori_mod_opt_t * param) { return(MPIIO_Delete(testFileName, param)); } @@ -341,35 +369,35 @@ static char* NCMPI_GetVersion() /* * Return the correct file mode for NCMPI. */ -static int GetFileMode(IOR_param_t * param) +static int GetFileMode(int flags) { int fd_mode = 0; /* set IOR file flags to NCMPI flags */ /* -- file open flags -- */ - if (param->openFlags & IOR_RDONLY) { + if (flags & IOR_RDONLY) { fd_mode |= NC_NOWRITE; } - if (param->openFlags & IOR_WRONLY) { - fprintf(stdout, "File write only not implemented in NCMPI\n"); + if (flags & IOR_WRONLY) { + WARN("File write only not implemented in NCMPI"); } - if (param->openFlags & IOR_RDWR) { + if (flags & IOR_RDWR) { fd_mode |= NC_WRITE; } - if (param->openFlags & IOR_APPEND) { - fprintf(stdout, "File append not implemented in NCMPI\n"); + if (flags & IOR_APPEND) { + WARN("File append not implemented in NCMPI"); } - if (param->openFlags & IOR_CREAT) { + if (flags & IOR_CREAT) { fd_mode |= NC_CLOBBER; } - if (param->openFlags & IOR_EXCL) { - fprintf(stdout, "Exclusive access not implemented in NCMPI\n"); + if (flags & IOR_EXCL) { + WARN("Exclusive access not implemented in NCMPI"); } - if (param->openFlags & IOR_TRUNC) { - fprintf(stdout, "File truncation not implemented in NCMPI\n"); + if (flags & IOR_TRUNC) { + WARN("File truncation not implemented in NCMPI"); } - if (param->openFlags & IOR_DIRECT) { - fprintf(stdout, "O_DIRECT not implemented in NCMPI\n"); + if (flags & IOR_DIRECT) { + WARN("O_DIRECT not implemented in NCMPI"); } /* to enable > 4GB file size */ @@ -381,16 +409,16 @@ static int GetFileMode(IOR_param_t * param) /* * Use MPIIO call to get file size. */ -static IOR_offset_t NCMPI_GetFileSize(IOR_param_t * test, MPI_Comm testComm, +static IOR_offset_t NCMPI_GetFileSize(aiori_mod_opt_t * opt, char *testFileName) { - return(MPIIO_GetFileSize(test, testComm, testFileName)); + return(MPIIO_GetFileSize(opt, testFileName)); } /* * Use MPIIO call to check for access. */ -static int NCMPI_Access(const char *path, int mode, IOR_param_t *param) +static int NCMPI_Access(const char *path, int mode, aiori_mod_opt_t *param) { return(MPIIO_Access(path, mode, param)); } diff --git a/src/ior.h b/src/ior.h index 9073d6a..87940a0 100755 --- a/src/ior.h +++ b/src/ior.h @@ -166,9 +166,6 @@ typedef struct rados_t rados_cluster; /* RADOS cluster handle */ rados_ioctx_t rados_ioctx; /* I/O context for our pool in the RADOS cluster */ - /* NCMPI variables */ - int var_id; /* variable id handle for data set */ - int id; /* test's unique ID */ int intraTestBarriers; /* barriers between open/op and op/close */ int warningAsErrors; /* treat any warning as an error */ From d339caa501a146449a45ab876079dc37f513fc43 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Mon, 4 Jan 2021 18:50:38 +0000 Subject: [PATCH 102/154] Updated test (illustration of an empty test) and distclean for make distcheck #304 (#305) Resolves #304 --- Makefile.am | 3 ++- src/test/example.c | 16 ++++------------ 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/Makefile.am b/Makefile.am index d874a90..d6465a8 100755 --- a/Makefile.am +++ b/Makefile.am @@ -10,4 +10,5 @@ ACLOCAL_AMFLAGS = -I config # `make dist` and `make test` for simple test binaries that do not require any # special environment. #TESTS = testing/basic-tests.sh -#DISTCLEANFILES = -r test test_out + +DISTCLEANFILES = ./src/build.conf diff --git a/src/test/example.c b/src/test/example.c index 5bb4b2b..3b31066 100644 --- a/src/test/example.c +++ b/src/test/example.c @@ -1,8 +1,10 @@ #include -#include -#include +#include "../ior.h" +#include "../ior-internal.h" +// Run all tests via: +// make distcheck // build a single test via, e.g., mpicc example.c -I ../src/ ../src/libaiori.a -lm int main(){ @@ -16,16 +18,6 @@ int main(){ // having an individual file test.filePerProc = 1; - IOR_offset_t * offsets; - offsets = GetOffsetArraySequential(& test, 0); - assert(offsets[0] == 0); - assert(offsets[1] == 10); - assert(offsets[2] == 20); - assert(offsets[3] == 30); - assert(offsets[4] == 40); - // for(int i = 0; i < test.segmentCount; i++){ - // printf("%lld\n", (long long int) offsets[i]); - // } printf("OK\n"); return 0; } From 4a964369284ff842146339c657362c58bef17e33 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Thu, 14 Jan 2021 16:41:56 +0000 Subject: [PATCH 103/154] MDTest convert FATAL to WARN (#307) --- src/mdtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mdtest.c b/src/mdtest.c index 1a71174..0887da4 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1648,7 +1648,7 @@ void create_remove_directory_tree(int create, if (!create) { VERBOSE(2,5,"Remove directory '%s'", dir); if (-1 == o.backend->rmdir(dir, o.backend_options)) { - FAIL("Unable to remove directory %s", dir); + EWARNF("Unable to remove directory %s", dir); } } } else if (currDepth <= o.depth) { From 33b70ecbc8b061fc2370192f6340434efa5a4c80 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Thu, 14 Jan 2021 17:38:11 +0000 Subject: [PATCH 104/154] Fix MDTest multidir option to work with stonewall file. (#308) --- src/mdtest.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 0887da4..4a3b31f 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1119,8 +1119,13 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro /* The number of items depends on the stonewalling file */ expected_items = ReadStoneWallingIterations(o.stoneWallingStatusFile); if(expected_items >= 0){ - o.items = expected_items; - progress->items_per_dir = o.items; + if(o.directory_loops > 1){ + o.directory_loops = expected_items / o.items_per_dir; + o.items = o.items_per_dir; + }else{ + o.items = expected_items; + progress->items_per_dir = o.items; + } } if (rank == 0) { if(expected_items == -1){ @@ -1537,8 +1542,6 @@ void md_validate_tests() { FAIL("only specify the number of items or the number of items per directory"); }else if( o.items % o.items_per_dir != 0){ FAIL("items must be a multiple of items per directory"); - }else if( o.stone_wall_timer_seconds != 0){ - FAIL("items + items_per_dir can only be set without stonewalling"); } } /* check for using mknod */ From b2089514e3bf3663b003f8e89bbcfb065acd3bc6 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Mon, 18 Jan 2021 21:30:16 +0000 Subject: [PATCH 105/154] MDTest calculate min/max/mean across iterations #300. (#312) The calculation per iteration first computes the value of the slowest process, i.e., highest time or lowest rate. This is then the value for the iteration. Secondly, calculate the min/max/mean across iterations. For tree operations, the value is identical to previous as only Rank 0 is involved. --- src/mdtest.c | 59 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 4a3b31f..5fe4263 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1298,7 +1298,8 @@ void summarize_results(int iterations, int print_time) { char const * access; int i, j, k; int start, stop, tableSize = MDTEST_LAST_NUM; - double min, max, mean, sd, sum = 0, var = 0, curr = 0; + double min, max, mean, sd, sum, var, curr = 0; + double imin, imax, isum, icur; // calculation per iteration double all[iterations * o.size * tableSize]; @@ -1371,22 +1372,42 @@ void summarize_results(int iterations, int print_time) { } VERBOSE(0,-1,"\nSUMMARY %s: (of %d iterations)", print_time ? "time": "rate", iterations); - VERBOSE(0,-1," Operation Max Min Mean Std Dev"); - VERBOSE(0,-1," --------- --- --- ---- -------"); + VERBOSE(0,-1," Operation per Rank: Max Min Mean Std Dev per Iteration: Max Min Mean"); + VERBOSE(0,-1," --------- --- --- ---- ------- --- --- ----"); for (i = start; i < stop; i++) { min = max = all[i]; - for (k=0; k < o.size; k++) { - for (j = 0; j < iterations; j++) { + sum = var = 0; + imin = 1e308; + isum = imax = 0; + for (j = 0; j < iterations; j++) { + icur = print_time ? 0 : 1e308; + for (k=0; k < o.size; k++) { curr = all[calc_allreduce_index(j, k, i)]; if (min > curr) { min = curr; } if (max < curr) { - max = curr; + max = curr; + } + if(print_time){ + if(icur < curr){ + icur = curr; + } + }else{ + if(icur > curr){ + icur = curr; + } } sum += curr; } + if(icur > imax){ + imax = icur; + } + if(icur < imin){ + imin = icur; + } + isum += icur; } mean = sum / (iterations * o.size); for (k=0; k < o.size; k++) { @@ -1403,10 +1424,12 @@ void summarize_results(int iterations, int print_time) { fprintf(out_logfile, "%14.3f ", max); fprintf(out_logfile, "%14.3f ", min); fprintf(out_logfile, "%14.3f ", mean); - fprintf(out_logfile, "%14.3f\n", sd); + fprintf(out_logfile, "%14.3f ", sd); + fprintf(out_logfile, "%18.3f ", imax); + fprintf(out_logfile, "%14.3f ", imin); + fprintf(out_logfile, "%14.3f\n", isum / iterations); fflush(out_logfile); } - sum = var = 0; } // TODO generalize once more stonewall timers are supported @@ -1426,20 +1449,28 @@ void summarize_results(int iterations, int print_time) { /* calculate tree create/remove rates, applies only to Rank 0 */ for (i = 8; i < tableSize; i++) { min = max = all[i]; + sum = var = 0; + imin = imax = all[i]; + isum = 0; for (j = 0; j < iterations; j++) { if(print_time){ curr = o.summary_table[j].time[i]; }else{ curr = o.summary_table[j].rate[i]; } - if (min > curr) { - min = curr; + min = curr; } if (max < curr) { - max = curr; + max = curr; } sum += curr; + if(curr > imax){ + imax = curr; + } + if(curr < imin){ + imin = curr; + } } mean = sum / (iterations); for (j = 0; j < iterations; j++) { @@ -1458,9 +1489,11 @@ void summarize_results(int iterations, int print_time) { fprintf(out_logfile, "%14.3f ", max); fprintf(out_logfile, "%14.3f ", min); fprintf(out_logfile, "%14.3f ", mean); - fprintf(out_logfile, "%14.3f\n", sd); + fprintf(out_logfile, "%14.3f ", sd); + fprintf(out_logfile, "%18.3f ", imax); + fprintf(out_logfile, "%14.3f ", imin); + fprintf(out_logfile, "%14.3f\n", sum / iterations); fflush(out_logfile); - sum = var = 0; } } From fa316d5d24ada43e84e3f50f440a9301873c0bb5 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Tue, 19 Jan 2021 07:43:01 -0600 Subject: [PATCH 106/154] dfs: conditionally compile out svcl setting (#313) The SVCL argument is being removed from the DAOS API, so conditionally compile out while maintaining backwards compatibility to versions where it's still required. --- src/aiori-DFS.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 32b2960..686075f 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -59,7 +59,9 @@ enum handleType { /************************** O P T I O N S *****************************/ typedef struct { char *pool; +#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1 char *svcl; +#endif char *group; char *cont; int chunk_size; @@ -85,7 +87,9 @@ static option_help * DFS_options(aiori_mod_opt_t ** init_backend_options, option_help h [] = { {0, "dfs.pool", "pool uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o->pool}, +#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1 {0, "dfs.svcl", "pool SVCL", OPTION_OPTIONAL_ARGUMENT, 's', &o->svcl}, +#endif {0, "dfs.group", "server group", OPTION_OPTIONAL_ARGUMENT, 's', &o->group}, {0, "dfs.cont", "DFS container uuid", OPTION_OPTIONAL_ARGUMENT, 's', &o->cont}, {0, "dfs.chunk_size", "chunk size", OPTION_OPTIONAL_ARGUMENT, 'd', &o->chunk_size}, @@ -188,9 +192,13 @@ void DFS_init_xfer_options(aiori_xfer_hint_t * params) static int DFS_check_params(aiori_mod_opt_t * options){ DFS_options_t *o = (DFS_options_t *) options; - if (o->pool == NULL || o->svcl == NULL || o->cont == NULL) + if (o->pool == NULL || o->cont == NULL) ERR("Invalid pool or container options\n"); +#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1 + if (o->svcl == NULL) + ERR("Invalid SVCL\n"); +#endif return 0; } @@ -455,9 +463,14 @@ DFS_Init(aiori_mod_opt_t * options) return; /** shouldn't be fatal since it can be called with POSIX backend selection */ - if (o->pool == NULL || o->svcl == NULL || o->cont == NULL) + if (o->pool == NULL || o->cont == NULL) return; +#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1 + if (o->svcl == NULL) + return; +#endif + rc = daos_init(); DCHECK(rc, "Failed to initialize daos"); @@ -478,7 +491,6 @@ DFS_Init(aiori_mod_opt_t * options) if (rank == 0) { uuid_t pool_uuid, co_uuid; - d_rank_list_t *svcl = NULL; daos_pool_info_t pool_info; daos_cont_info_t co_info; @@ -488,17 +500,25 @@ DFS_Init(aiori_mod_opt_t * options) rc = uuid_parse(o->cont, co_uuid); DCHECK(rc, "Failed to parse 'Cont uuid': %s", o->cont); + INFO(VERBOSE_1, "Pool uuid = %s", o->pool); + INFO(VERBOSE_1, "DFS Container namespace uuid = %s", o->cont); + +#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1 + d_rank_list_t *svcl = NULL; + svcl = daos_rank_list_parse(o->svcl, ":"); if (svcl == NULL) ERR("Failed to allocate svcl"); - - INFO(VERBOSE_1, "Pool uuid = %s, SVCL = %s\n", o->pool, o->svcl); - INFO(VERBOSE_1, "DFS Container namespace uuid = %s\n", o->cont); + INFO(VERBOSE_1, "Pool svcl = %s", o->svcl); /** Connect to DAOS pool */ rc = daos_pool_connect(pool_uuid, o->group, svcl, DAOS_PC_RW, &poh, &pool_info, NULL); d_rank_list_free(svcl); +#else + rc = daos_pool_connect(pool_uuid, o->group, DAOS_PC_RW, + &poh, &pool_info, NULL); +#endif DCHECK(rc, "Failed to connect to pool"); rc = daos_cont_open(poh, co_uuid, DAOS_COO_RW, &coh, &co_info, @@ -584,8 +604,10 @@ DFS_Finalize(aiori_mod_opt_t *options) /** reset tunables */ o->pool = NULL; +#if !defined(DAOS_API_VERSION_MAJOR) || DAOS_API_VERSION_MAJOR < 1 o->svcl = NULL; - o->group = NULL; +#endif + o->group = NULL; o->cont = NULL; o->chunk_size = 1048576; o->oclass = NULL; From 3daf7a2d0f48c7fb1a098cfc47f7739f43b0509d Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Tue, 19 Jan 2021 21:23:30 +0000 Subject: [PATCH 107/154] Bugfix: valgrind memory issues (#314) --- src/md-workbench.c | 4 +++- src/mdtest.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index 869b4fd..7c130b1 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "md-workbench.h" #include "config.h" @@ -343,7 +344,8 @@ static int compare_floats(time_result_t * x, time_result_t * y){ } static double runtime_quantile(int repeats, time_result_t * times, float quantile){ - int pos = round(quantile * repeats + 0.49); + int pos = round(quantile * (repeats - 1) + 0.49); + assert(pos < repeats); return times[pos].runtime; } diff --git a/src/mdtest.c b/src/mdtest.c index 5fe4263..6d610df 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -668,6 +668,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { if (alloc_res) { FAIL("out of memory"); } + memset(read_buffer, -1, o.read_bytes); } uint64_t stop_items = o.items; From 19ad73568a5cc1fc5ca782584aba55813e751def Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Tue, 19 Jan 2021 22:28:34 +0000 Subject: [PATCH 108/154] Bugfix ior stonewalling case. (#316) * Bugfix ior stonewalling case. --- src/ior.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/ior.c b/src/ior.c index 64e3665..8175ce2 100755 --- a/src/ior.c +++ b/src/ior.c @@ -363,9 +363,11 @@ static void CheckFileSize(IOR_test_t *test, char * testFilename, IOR_offset_t da static size_t CompareData(void *expectedBuffer, size_t size, IOR_offset_t transferCount, IOR_param_t *test, IOR_offset_t offset, int fillrank, int access) { + assert(access == WRITECHECK || access == READCHECK); + char testFileName[MAX_PATHLEN]; - char bufferLabel1[MAX_STR]; - char bufferLabel2[MAX_STR]; + char * bufferLabel1 = "Expected: "; + char * bufferLabel2 = "Actual: "; size_t i, j, length; size_t errorCount = 0; @@ -379,13 +381,6 @@ CompareData(void *expectedBuffer, size_t size, IOR_offset_t transferCount, IOR_p unsigned long long *testbuf = (unsigned long long *)expectedBuffer; - if (access == WRITECHECK || access == READCHECK) { - strcpy(bufferLabel1, "Expected: "); - strcpy(bufferLabel2, "Actual: "); - } else { - ERR("incorrect argument for CompareData()"); - } - length = size / sizeof(IOR_size_t); if (verbose >= VERBOSE_3) { fprintf(out_logfile, @@ -442,6 +437,7 @@ CompareData(void *expectedBuffer, size_t size, IOR_offset_t transferCount, IOR_p }else if(verbose >= VERBOSE_2){ fprintf(out_logfile, "[%d] comparison successful during transfer %lld offset %lld\n", rank, transferCount, offset); } + return (errorCount); } @@ -1916,9 +1912,13 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, point->stonewall_min_data_accessed /1024.0 / 1024 / 1024, point->stonewall_avg_data_accessed / 1024.0 / 1024 / 1024 , point->stonewall_time); } if(pairCnt != point->pairs_accessed){ - // some work needs still to be done ! + // some work needs still to be done, complete the current block ! + i--; + if(j == offsets){ + j = 0; // current block is completed + } for ( ; pairCnt < point->pairs_accessed; i++) { - for ( ; j < offsets && pairCnt < point->pairs_accessed ; j++) { + for ( ; j < offsets && pairCnt < point->pairs_accessed ; j++) { IOR_offset_t offset; if (test->randomOffset) { if(test->filePerProc){ @@ -1937,6 +1937,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & transferCount, & errors, test, fd, ioBuffers, access); pairCnt++; } + j = 0; } } }else{ From 58fbefbd337622d2a5d2e38c11c3d9330e51eab2 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Tue, 19 Jan 2021 23:19:01 +0000 Subject: [PATCH 109/154] Bugfix IOR offsetting (fix before was incomplete). (#317) --- src/ior.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ior.c b/src/ior.c index 8175ce2..8a349a0 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1916,6 +1916,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, i--; if(j == offsets){ j = 0; // current block is completed + i++; } for ( ; pairCnt < point->pairs_accessed; i++) { for ( ; j < offsets && pairCnt < point->pairs_accessed ; j++) { From e4120d600d63ba94cd2af575e54f8afaa34ff6ba Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 20 Jan 2021 14:06:05 +0000 Subject: [PATCH 110/154] Reduce the dependency to global MPI Communicator --- src/ior.c | 4 ++-- src/md-workbench.c | 8 ++++---- src/mdtest.c | 4 ++-- src/utilities.c | 22 +++++++++++----------- src/utilities.h | 4 ++-- 5 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/ior.c b/src/ior.c index 8a349a0..f30594a 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1008,7 +1008,7 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) tests = tests->next; } - init_clock(); + init_clock(com); } /* @@ -1415,7 +1415,7 @@ static void TestIoSys(IOR_test_t *test) if ((params->readFile || params->checkRead ) && !test_time_elapsed(params, startTime)) { /* check for stonewall */ if(params->stoneWallingStatusFile){ - params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile); + params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile, params->testComm); if(params->stoneWallingWearOutIterations == -1 && rank == 0){ WARN("Could not read back the stonewalling status from the file!"); params->stoneWallingWearOutIterations = 0; diff --git a/src/md-workbench.c b/src/md-workbench.c index 7c130b1..b9b1b23 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -853,8 +853,8 @@ mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_c int ret; int printhelp = 0; char * limit_memory_P = NULL; - init_options(); + init_clock(world_com); o.com = world_com; o.logfile = out_logfile; @@ -935,8 +935,8 @@ mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_c // MPI_Abort(o.com, 1); //} - double bench_start; - bench_start = GetTimeStamp(); + double t_bench_start; + t_bench_start = GetTimeStamp(); phase_stat_t phase_stats; size_t result_count = (2 + o.iterations) * (o.adaptive_waiting_mode ? 7 : 1); o.results = malloc(sizeof(mdworkbench_results_t) + sizeof(mdworkbench_result_t) * result_count); @@ -1006,7 +1006,7 @@ mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_c store_position(current_index); } - double t_all = GetTimeStamp(); + double t_all = GetTimeStamp() - t_bench_start; if(o.backend->finalize){ o.backend->finalize(o.backend_options); } diff --git a/src/mdtest.c b/src/mdtest.c index 6d610df..5386739 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1118,7 +1118,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro if (o.stoneWallingStatusFile){ int64_t expected_items; /* The number of items depends on the stonewalling file */ - expected_items = ReadStoneWallingIterations(o.stoneWallingStatusFile); + expected_items = ReadStoneWallingIterations(o.stoneWallingStatusFile, testComm); if(expected_items >= 0){ if(o.directory_loops > 1){ o.directory_loops = expected_items / o.items_per_dir; @@ -1946,7 +1946,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * out_resultfile = world_out; mpi_comm_world = world_com; - init_clock(); + init_clock(world_com); mdtest_init_args(); int i, j; diff --git a/src/utilities.c b/src/utilities.c index 0ec2390..cb2deae 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -706,34 +706,34 @@ double GetTimeStamp(void) /* * Determine any spread (range) between node times. */ -static double TimeDeviation(void) +static double TimeDeviation(MPI_Comm com) { double timestamp; double min = 0; double max = 0; double roottimestamp; - MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error"); + MPI_CHECK(MPI_Barrier(com), "barrier error"); timestamp = GetTimeStamp(); MPI_CHECK(MPI_Reduce(×tamp, &min, 1, MPI_DOUBLE, - MPI_MIN, 0, mpi_comm_world), + MPI_MIN, 0, com), "cannot reduce tasks' times"); MPI_CHECK(MPI_Reduce(×tamp, &max, 1, MPI_DOUBLE, - MPI_MAX, 0, mpi_comm_world), + MPI_MAX, 0, com), "cannot reduce tasks' times"); /* delta between individual nodes' time and root node's time */ roottimestamp = timestamp; - MPI_CHECK(MPI_Bcast(&roottimestamp, 1, MPI_DOUBLE, 0, mpi_comm_world), + MPI_CHECK(MPI_Bcast(&roottimestamp, 1, MPI_DOUBLE, 0, com), "cannot broadcast root's time"); wall_clock_delta = timestamp - roottimestamp; return max - min; } -void init_clock(){ +void init_clock(MPI_Comm com){ /* check for skew between tasks' start times */ - wall_clock_deviation = TimeDeviation(); + wall_clock_deviation = TimeDeviation(com); } char * PrintTimestamp() { @@ -751,16 +751,16 @@ char * PrintTimestamp() { return datestring; } -int64_t ReadStoneWallingIterations(char * const filename){ +int64_t ReadStoneWallingIterations(char * const filename, MPI_Comm com){ long long data; if(rank != 0){ - MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world); + MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com); return data; }else{ FILE * out = fopen(filename, "r"); if (out == NULL){ data = -1; - MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world); + MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com); return data; } int ret = fscanf(out, "%lld", & data); @@ -768,7 +768,7 @@ int64_t ReadStoneWallingIterations(char * const filename){ return -1; } fclose(out); - MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world); + MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, com); return data; } } diff --git a/src/utilities.h b/src/utilities.h index 83563c5..dd6d16f 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -52,10 +52,10 @@ void updateParsedOptions(IOR_param_t * options, options_all_t * global_options); size_t NodeMemoryStringToBytes(char *size_str); /* Returns -1, if cannot be read */ -int64_t ReadStoneWallingIterations(char * const filename); +int64_t ReadStoneWallingIterations(char * const filename, MPI_Comm com); void StoreStoneWallingIterations(char * const filename, int64_t count); -void init_clock(void); +void init_clock(MPI_Comm com); double GetTimeStamp(void); char * PrintTimestamp(); // TODO remove this function unsigned long GetProcessorAndCore(int *chip, int *core); From 40c6d97e72afb1f1ca062ac37fa1b2a8915deb3e Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 20 Jan 2021 14:35:06 +0000 Subject: [PATCH 111/154] Replaced MPI_COMM_WORLD where needed with testComm. --- src/aiori-DFS.c | 14 +++++++------- src/aiori-S3-4c.c | 4 ++-- src/ior.c | 8 +++----- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/aiori-DFS.c b/src/aiori-DFS.c index 686075f..8e6b2a7 100755 --- a/src/aiori-DFS.c +++ b/src/aiori-DFS.c @@ -255,7 +255,7 @@ HandleDistribute(enum handleType type) DCHECK(rc, "Failed to get global handle size"); } - MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD), + MPI_CHECK(MPI_Bcast(&global.iov_buf_len, 1, MPI_UINT64_T, 0, testComm), "Failed to bcast global handle buffer size"); global.iov_len = global.iov_buf_len; @@ -273,7 +273,7 @@ HandleDistribute(enum handleType type) DCHECK(rc, "Failed to create global handle"); } - MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, MPI_COMM_WORLD), + MPI_CHECK(MPI_Bcast(global.iov_buf, global.iov_buf_len, MPI_BYTE, 0, testComm), "Failed to bcast global pool handle"); if (rank != 0) { @@ -555,16 +555,16 @@ DFS_Finalize(aiori_mod_opt_t *options) DFS_options_t *o = (DFS_options_t *)options; int rc; - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(testComm); d_hash_table_destroy(dir_hash, true /* force */); rc = dfs_umount(dfs); DCHECK(rc, "Failed to umount DFS namespace"); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(testComm); rc = daos_cont_close(coh, NULL); DCHECK(rc, "Failed to close container %s (%d)", o->cont, rc); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(testComm); if (o->destroy) { if (rank == 0) { @@ -580,7 +580,7 @@ DFS_Finalize(aiori_mod_opt_t *options) INFO(VERBOSE_1, "Container Destroy time = %f secs", t2-t1); } - MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&rc, 1, MPI_INT, 0, testComm); if (rc) { if (rank == 0) DCHECK(rc, "Failed to destroy container %s (%d)", o->cont, rc); @@ -594,7 +594,7 @@ DFS_Finalize(aiori_mod_opt_t *options) rc = daos_pool_disconnect(poh, NULL); DCHECK(rc, "Failed to disconnect from pool"); - MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD), "barrier error"); + MPI_CHECK(MPI_Barrier(testComm), "barrier error"); if (rank == 0) INFO(VERBOSE_1, "Finalizing DAOS..\n"); diff --git a/src/aiori-S3-4c.c b/src/aiori-S3-4c.c index f34fadb..6155ceb 100755 --- a/src/aiori-S3-4c.c +++ b/src/aiori-S3-4c.c @@ -1076,7 +1076,7 @@ static void S3_Close_internal(aiori_fd_t* fd, s3_options_t* param, int multi_pa MPI_Abort(testComm, 1); } MPI_Gather(etag_data, etag_data_size, MPI_BYTE, - etag_vec, etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD); + etag_vec, etag_data_size, MPI_BYTE, 0, testComm); // --- debugging: show the gathered etag data // (This shows the raw concatenated etag-data from each node.) @@ -1196,7 +1196,7 @@ static void S3_Close_internal(aiori_fd_t* fd, s3_options_t* param, int multi_pa aws_iobuf_append_str(xml, "\n"); } else { MPI_Gather(etag_data, etag_data_size, MPI_BYTE, - NULL, etag_data_size, MPI_BYTE, 0, MPI_COMM_WORLD); + NULL, etag_data_size, MPI_BYTE, 0, testComm); } } else { /* N:N */ diff --git a/src/ior.c b/src/ior.c index f30594a..3509cc5 100755 --- a/src/ior.c +++ b/src/ior.c @@ -589,11 +589,9 @@ void DistributeHints(void) } } - MPI_CHECK(MPI_Bcast(&hintCount, sizeof(hintCount), MPI_BYTE, - 0, MPI_COMM_WORLD), "cannot broadcast hints"); + MPI_CHECK(MPI_Bcast(&hintCount, sizeof(hintCount), MPI_BYTE, 0, testComm), "cannot broadcast hints"); for (i = 0; i < hintCount; i++) { - MPI_CHECK(MPI_Bcast(&hint[i], MAX_STR, MPI_BYTE, - 0, MPI_COMM_WORLD), + MPI_CHECK(MPI_Bcast(&hint[i], MAX_STR, MPI_BYTE, 0, testComm), "cannot broadcast hints"); strcpy(fullHint, hint[i]); strcpy(hintVariable, strtok(fullHint, "=")); @@ -1884,7 +1882,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, if ( test->collective && test->deadlineForStonewalling ) { // if collective-mode, you'll get a HANG, if some rank 'accidentally' leave this loop // it absolutely must be an 'all or none': - MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, MPI_COMM_WORLD), "hitStonewall broadcast failed"); + MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, testComm), "hitStonewall broadcast failed"); } } } From 970c5ef13910941e592e04d5929c51024f2176d5 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 20 Jan 2021 14:57:21 +0000 Subject: [PATCH 112/154] Adjust complex tests for changed -z behavior. --- src/ior.c | 44 +++++++++++++++++++--------------------- src/ior.h | 5 +++-- src/mdtest.c | 1 - src/parse_options.c | 6 +++--- src/parse_options.h | 4 +--- src/utilities.c | 1 - src/utilities.h | 1 - testing/complex-tests.sh | 10 ++++----- 8 files changed, 33 insertions(+), 39 deletions(-) diff --git a/src/ior.c b/src/ior.c index 3509cc5..3cf435b 100755 --- a/src/ior.c +++ b/src/ior.c @@ -53,7 +53,7 @@ static char *PrependDir(IOR_param_t *, char *); static char **ParseFileName(char *, int *); static void InitTests(IOR_test_t * , MPI_Comm); static void TestIoSys(IOR_test_t *); -static void ValidateTests(IOR_param_t *); +static void ValidateTests(IOR_param_t * params, MPI_Comm com); static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, aiori_fd_t *fd, const int access, IOR_io_buffers *ioBuffers); @@ -107,12 +107,11 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out IOR_test_t *tptr; out_logfile = world_out; out_resultfile = world_out; - mpi_comm_world = world_com; - MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank"); + MPI_CHECK(MPI_Comm_rank(world_com, &rank), "cannot get rank"); /* setup tests, and validate parameters */ - tests_head = ParseCommandLine(argc, argv); + tests_head = ParseCommandLine(argc, argv, world_com); InitTests(tests_head, world_com); PrintHeader(argc, argv); @@ -147,20 +146,19 @@ int ior_main(int argc, char **argv) /* * check -h option from commandline without starting MPI; */ - tests_head = ParseCommandLine(argc, argv); + tests_head = ParseCommandLine(argc, argv, MPI_COMM_WORLD); /* start the MPI code */ MPI_CHECK(MPI_Init(&argc, &argv), "cannot initialize MPI"); - mpi_comm_world = MPI_COMM_WORLD; - MPI_CHECK(MPI_Comm_rank(mpi_comm_world, &rank), "cannot get rank"); + MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &rank), "cannot get rank"); /* set error-handling */ /*MPI_CHECK(MPI_Errhandler_set(mpi_comm_world, MPI_ERRORS_RETURN), "cannot set errhandler"); */ /* setup tests, and validate parameters */ - InitTests(tests_head, mpi_comm_world); + InitTests(tests_head, MPI_COMM_WORLD); PrintHeader(argc, argv); @@ -201,7 +199,7 @@ int ior_main(int argc, char **argv) /* * Initialize an IOR_param_t structure to the defaults */ -void init_IOR_Param_t(IOR_param_t * p) +void init_IOR_Param_t(IOR_param_t * p, MPI_Comm com) { const char *default_aiori = aiori_default (); assert (NULL != default_aiori); @@ -231,7 +229,8 @@ void init_IOR_Param_t(IOR_param_t * p) p->transferSize = 262144; p->randomSeed = -1; p->incompressibleSeed = 573; - p->testComm = mpi_comm_world; + p->testComm = com; // this com might change for smaller tests + p->mpi_comm_world = com; p->URI = NULL; } @@ -567,7 +566,7 @@ static void DestroyTests(IOR_test_t *tests_head) /* * Distribute IOR_HINTs to all tasks' environments. */ -void DistributeHints(void) +static void DistributeHints(MPI_Comm com) { char hint[MAX_HINTS][MAX_STR], fullHint[MAX_STR], hintVariable[MAX_STR]; int hintCount = 0, i; @@ -589,9 +588,9 @@ void DistributeHints(void) } } - MPI_CHECK(MPI_Bcast(&hintCount, sizeof(hintCount), MPI_BYTE, 0, testComm), "cannot broadcast hints"); + MPI_CHECK(MPI_Bcast(&hintCount, sizeof(hintCount), MPI_BYTE, 0, com), "cannot broadcast hints"); for (i = 0; i < hintCount; i++) { - MPI_CHECK(MPI_Bcast(&hint[i], MAX_STR, MPI_BYTE, 0, testComm), + MPI_CHECK(MPI_Bcast(&hint[i], MAX_STR, MPI_BYTE, 0, com), "cannot broadcast hints"); strcpy(fullHint, hint[i]); strcpy(hintVariable, strtok(fullHint, "=")); @@ -973,7 +972,7 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) * task 0 has the environment settings for the hints, pass * the hint=value pair to everyone else in mpi_comm_world */ - DistributeHints(); + DistributeHints(com); /* check validity of tests and create test queue */ while (tests != NULL) { @@ -1002,7 +1001,7 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) params->expectedAggFileSize = params->blockSize * params->segmentCount * params->numTasks; - ValidateTests(&tests->params); + ValidateTests(&tests->params, com); tests = tests->next; } @@ -1069,7 +1068,7 @@ static void file_hits_histogram(IOR_param_t *params) } MPI_CHECK(MPI_Gather(&rankOffset, 1, MPI_INT, rankoffs, - 1, MPI_INT, 0, mpi_comm_world), + 1, MPI_INT, 0, params->testComm), "MPI_Gather error"); if (rank != 0) @@ -1225,21 +1224,21 @@ static void TestIoSys(IOR_test_t *test) IOR_io_buffers ioBuffers; /* set up communicator for test */ - MPI_CHECK(MPI_Comm_group(mpi_comm_world, &orig_group), + MPI_CHECK(MPI_Comm_group(params->mpi_comm_world, &orig_group), "MPI_Comm_group() error"); range[0] = 0; /* first rank */ range[1] = params->numTasks - 1; /* last rank */ range[2] = 1; /* stride */ MPI_CHECK(MPI_Group_range_incl(orig_group, 1, &range, &new_group), "MPI_Group_range_incl() error"); - MPI_CHECK(MPI_Comm_create(mpi_comm_world, new_group, &testComm), + MPI_CHECK(MPI_Comm_create(params->mpi_comm_world, new_group, &testComm), "MPI_Comm_create() error"); MPI_CHECK(MPI_Group_free(&orig_group), "MPI_Group_Free() error"); MPI_CHECK(MPI_Group_free(&new_group), "MPI_Group_Free() error"); params->testComm = testComm; if (testComm == MPI_COMM_NULL) { /* tasks not in the group do not participate in this test */ - MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error"); + MPI_CHECK(MPI_Barrier(params->mpi_comm_world), "barrier error"); return; } if (rank == 0 && verbose >= VERBOSE_1) { @@ -1536,17 +1535,16 @@ static void TestIoSys(IOR_test_t *test) free(hog_buf); /* Sync with the tasks that did not participate in this test */ - MPI_CHECK(MPI_Barrier(mpi_comm_world), "barrier error"); - + MPI_CHECK(MPI_Barrier(params->mpi_comm_world), "barrier error"); } /* * Determine if valid tests from parameters. */ -static void ValidateTests(IOR_param_t * test) +static void ValidateTests(IOR_param_t * test, MPI_Comm com) { IOR_param_t defaults; - init_IOR_Param_t(&defaults); + init_IOR_Param_t(&defaults, com); if (test->repetitions <= 0) WARN_RESET("too few test repetitions", diff --git a/src/ior.h b/src/ior.h index 87940a0..6252f78 100755 --- a/src/ior.h +++ b/src/ior.h @@ -98,7 +98,8 @@ typedef struct char * options; /* options string */ // intermediate options int collective; /* collective I/O */ - MPI_Comm testComm; /* MPI communicator */ + MPI_Comm testComm; /* Current MPI communicator */ + MPI_Comm mpi_comm_world; /* The global MPI communicator */ int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */ int dualMount; /* dual mount points */ int numTasks; /* number of tasks for test */ @@ -205,7 +206,7 @@ IOR_test_t *CreateTest(IOR_param_t *init_params, int test_num); void AllocResults(IOR_test_t *test); char * GetPlatformName(void); -void init_IOR_Param_t(IOR_param_t *p); +void init_IOR_Param_t(IOR_param_t *p, MPI_Comm global_com); /* * This function runs IOR given by command line, useful for testing diff --git a/src/mdtest.c b/src/mdtest.c index 5386739..74cb297 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1944,7 +1944,6 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * testComm = world_com; out_logfile = world_out; out_resultfile = world_out; - mpi_comm_world = world_com; init_clock(world_com); diff --git a/src/parse_options.c b/src/parse_options.c index 1a2ad7e..05fa78f 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -32,7 +32,7 @@ #include "option.h" #include "aiori.h" -IOR_param_t initialTestParams; +static IOR_param_t initialTestParams; option_help * createGlobalOptions(IOR_param_t * params); @@ -451,9 +451,9 @@ option_help * createGlobalOptions(IOR_param_t * params){ /* * Parse Commandline. */ -IOR_test_t *ParseCommandLine(int argc, char **argv) +IOR_test_t *ParseCommandLine(int argc, char **argv, MPI_Comm com) { - init_IOR_Param_t(& initialTestParams); + init_IOR_Param_t(& initialTestParams, com); IOR_test_t *tests = NULL; diff --git a/src/parse_options.h b/src/parse_options.h index 45b93ca..b12dd78 100755 --- a/src/parse_options.h +++ b/src/parse_options.h @@ -13,8 +13,6 @@ #include "ior.h" -extern IOR_param_t initialTestParams; - -IOR_test_t *ParseCommandLine(int argc, char **argv); +IOR_test_t *ParseCommandLine(int argc, char **argv, MPI_Comm com); #endif /* !_PARSE_OPTIONS_H */ diff --git a/src/utilities.c b/src/utilities.c index cb2deae..cf79e00 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -65,7 +65,6 @@ int rank = 0; int rankOffset = 0; int verbose = VERBOSE_0; /* verbose output */ MPI_Comm testComm; -MPI_Comm mpi_comm_world; FILE * out_logfile = NULL; FILE * out_resultfile = NULL; enum OutputFormat_t outputFormat; diff --git a/src/utilities.h b/src/utilities.h index dd6d16f..1eff2d2 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -22,7 +22,6 @@ extern int rank; extern int rankOffset; extern int verbose; extern MPI_Comm testComm; -extern MPI_Comm mpi_comm_world; extern FILE * out_resultfile; extern enum OutputFormat_t outputFormat; /* format of the output */ diff --git a/testing/complex-tests.sh b/testing/complex-tests.sh index c314cf9..a04c14d 100755 --- a/testing/complex-tests.sh +++ b/testing/complex-tests.sh @@ -19,13 +19,13 @@ MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -T -v MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -D -v #shared tests -IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -r -z-k -e -i1 -m -t 100k -b 100k +IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -r -z-k -e -i1 -m -t 100k -b 200k #test mutually exclusive options -IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 100k +IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -w -z -k -e -i1 -m -t 100k -b 200k IOR 2 -a POSIX -w -Z -i1 -m -t 100k -b 100k -d 0.1 # Now set the num tasks per node to 1: From f345a7804686fa08f1961b6ce54c322fe09eccd2 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 20 Jan 2021 15:00:33 +0000 Subject: [PATCH 113/154] Removed dependency. --- src/ior.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ior.c b/src/ior.c index 3cf435b..b8f1129 100755 --- a/src/ior.c +++ b/src/ior.c @@ -51,7 +51,7 @@ static const ior_aiori_t *backend; static void DestroyTests(IOR_test_t *tests_head); static char *PrependDir(IOR_param_t *, char *); static char **ParseFileName(char *, int *); -static void InitTests(IOR_test_t * , MPI_Comm); +static void InitTests(IOR_test_t *); static void TestIoSys(IOR_test_t *); static void ValidateTests(IOR_param_t * params, MPI_Comm com); static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, @@ -112,7 +112,7 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out /* setup tests, and validate parameters */ tests_head = ParseCommandLine(argc, argv, world_com); - InitTests(tests_head, world_com); + InitTests(tests_head); PrintHeader(argc, argv); @@ -158,7 +158,7 @@ int ior_main(int argc, char **argv) "cannot set errhandler"); */ /* setup tests, and validate parameters */ - InitTests(tests_head, MPI_COMM_WORLD); + InitTests(tests_head); PrintHeader(argc, argv); @@ -950,8 +950,12 @@ static void RemoveFile(char *testFileName, int filePerProc, IOR_param_t * test) * Setup tests by parsing commandline and creating test script. * Perform a sanity-check on the configured parameters. */ -static void InitTests(IOR_test_t *tests, MPI_Comm com) +static void InitTests(IOR_test_t *tests) { + if(tests == NULL){ + return; + } + MPI_Comm com = tests->params.mpi_comm_world; int mpiNumNodes = 0; int mpiNumTasks = 0; int mpiNumTasksOnNode0 = 0; From 9e0a8c44d56eb31a25b5b4d74b56c4ef14450aae Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Wed, 20 Jan 2021 15:06:45 +0000 Subject: [PATCH 114/154] IOR: move verbose output of detailed errors to verbosity level 1. (#315) IOR by default outputs the numbers of errors. Reason: The amount of error messages can be overwhelming, particularly in a parallel program (Gigabytes...). One -v increases the verbosity level to provide the extra details without adding too many other messages. --- src/ior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ior.c b/src/ior.c index 8a349a0..f814ea2 100755 --- a/src/ior.c +++ b/src/ior.c @@ -430,7 +430,7 @@ CompareData(void *expectedBuffer, size_t size, IOR_offset_t transferCount, IOR_p fprintf(out_logfile, "\n"); } } - if (errorCount > 0) { + if (errorCount > 0 && verbose >= VERBOSE_1) { GetTestFileName(testFileName, test); EWARNF("[%d] FAILED comparison of buffer in file %s during transfer %lld offset %lld containing %d-byte ints (%zd errors)", rank, testFileName, transferCount, offset, (int)sizeof(unsigned long long int),errorCount); From eb111f427976503ee183df128a3428529364b4ac Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 20 Jan 2021 15:19:13 +0000 Subject: [PATCH 115/154] MDTest replace fatals with warnings making the API more robust while it resolves #309. Example execution (can be done multiple times without cleanup): $ mpiexec -np 2 ./src/mdtest -d test@test -u -n 100 -C --- src/mdtest.c | 63 ++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 6d610df..829ca36 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -257,10 +257,7 @@ void parse_dirpath(char *dirpath_arg) { } // prevent changes to the original dirpath_arg dirpath_arg = strdup(dirpath_arg); - o.filenames = (char **)malloc(o.path_count * sizeof(char **)); - if (o.filenames == NULL || dirpath_arg == NULL) { - FAIL("out of memory"); - } + o.filenames = (char **) safeMalloc(o.path_count * sizeof(char **)); token = strtok(dirpath_arg, delimiter_string); while (token != NULL) { @@ -327,11 +324,11 @@ static void create_remove_dirs (const char *path, bool create, uint64_t itemNum) if (create) { if (o.backend->mkdir(curr_item, DIRMODE, o.backend_options) == -1) { - FAIL("unable to create directory %s", curr_item); + EWARNF("unable to create directory %s", curr_item); } } else { if (o.backend->rmdir(curr_item, o.backend_options) == -1) { - FAIL("unable to remove directory %s", curr_item); + EWARNF("unable to remove directory %s", curr_item); } } } @@ -386,15 +383,17 @@ static void create_file (const char *path, uint64_t itemNum) { ret = o.backend->mknod (curr_item); if (ret != 0) - FAIL("unable to mknode file %s", curr_item); + EWARNF("unable to mknode file %s", curr_item); return; } else if (o.collective_creates) { VERBOSE(3,5,"create_remove_items_helper (collective): open..." ); aiori_fh = o.backend->open (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options); - if (NULL == aiori_fh) - FAIL("unable to open file %s", curr_item); + if (NULL == aiori_fh){ + EWARNF("unable to open file %s", curr_item); + return; + } /* * !collective_creates @@ -404,8 +403,10 @@ static void create_file (const char *path, uint64_t itemNum) { VERBOSE(3,5,"create_remove_items_helper (non-collective, shared): open..." ); aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options); - if (NULL == aiori_fh) - FAIL("unable to create file %s", curr_item); + if (NULL == aiori_fh){ + EWARNF("unable to create file %s", curr_item); + return; + } } if (o.write_bytes > 0) { @@ -422,13 +423,13 @@ static void create_file (const char *path, uint64_t itemNum) { o.write_buffer[0] = (char) itemNum; } if ( o.write_bytes != (size_t) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { - FAIL("unable to write file %s", curr_item); + EWARNF("unable to write file %s", curr_item); } if (o.verify_write) { o.write_buffer[0] = 42; if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { - FAIL("unable to verify write (read/back) file %s", curr_item); + EWARNF("unable to verify write (read/back) file %s", curr_item); } mdtest_verify_data(itemNum, o.write_buffer, o.write_bytes); } @@ -484,10 +485,10 @@ void collective_helper(const int dirs, const int create, const char* path, uint6 //create files aiori_fh = o.backend->create (curr_item, IOR_WRONLY | IOR_CREAT, o.backend_options); if (NULL == aiori_fh) { - FAIL("unable to create file %s", curr_item); + EWARNF("unable to create file %s", curr_item); + }else{ + o.backend->close (aiori_fh, o.backend_options); } - - o.backend->close (aiori_fh, o.backend_options); } else if (!(o.shared_file && rank != 0)) { //remove files o.backend->delete (curr_item, o.backend_options); @@ -648,7 +649,7 @@ void mdtest_stat(const int random, const int dirs, const long dir_iter, const ch /* below temp used to be hiername */ VERBOSE(3,5,"mdtest_stat %4s: %s", (dirs ? "dir" : "file"), item); if (-1 == o.backend->stat (item, &buf, o.backend_options)) { - FAIL("unable to stat %s %s", dirs ? "directory" : "file", item); + EWARNF("unable to stat %s %s", dirs ? "directory" : "file", item); } } } @@ -739,14 +740,16 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { /* open file for reading */ aiori_fh = o.backend->open (item, O_RDONLY, o.backend_options); if (NULL == aiori_fh) { - FAIL("unable to open file %s", item); + EWARNF("unable to open file %s", item); + continue; } /* read file */ if (o.read_bytes > 0) { read_buffer[0] = 42; if (o.read_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) read_buffer, o.read_bytes, 0, o.backend_options)) { - FAIL("unable to read file %s", item); + EWARNF("unable to read file %s", item); + continue; } if(o.verify_read){ mdtest_verify_data(item_num, read_buffer, o.read_bytes); @@ -1667,7 +1670,7 @@ void create_remove_directory_tree(int create, if (create) { VERBOSE(2,5,"Making directory '%s'", dir); if (-1 == o.backend->mkdir (dir, DIRMODE, o.backend_options)) { - fprintf(out_logfile, "error could not create directory '%s'\n", dir); + EWARNF("unable to create tree directory '%s'\n", dir); } #ifdef HAVE_LUSTRE_LUSTREAPI /* internal node for branching, can be non-striped for children */ @@ -1701,7 +1704,7 @@ void create_remove_directory_tree(int create, if (create) { VERBOSE(2,5,"Making directory '%s'", temp_path); if (-1 == o.backend->mkdir(temp_path, DIRMODE, o.backend_options)) { - FAIL("Unable to create directory %s", temp_path); + EWARNF("Unable to create directory %s", temp_path); } } @@ -1712,7 +1715,7 @@ void create_remove_directory_tree(int create, if (!create) { VERBOSE(2,5,"Remove directory '%s'", temp_path); if (-1 == o.backend->rmdir(temp_path, o.backend_options)) { - FAIL("Unable to remove directory %s", temp_path); + EWARNF("Unable to remove directory %s", temp_path); } } @@ -1741,12 +1744,12 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t VERBOSE(2,5,"main (for j loop): making o.testdir, '%s'", o.testdir ); if ((rank < o.path_count) && o.backend->access(o.testdir, F_OK, o.backend_options) != 0) { if (o.backend->mkdir(o.testdir, DIRMODE, o.backend_options) != 0) { - FAIL("Unable to create test directory %s", o.testdir); + EWARNF("Unable to create test directory %s", o.testdir); } #ifdef HAVE_LUSTRE_LUSTREAPI /* internal node for branching, can be non-striped for children */ if (o.global_dir_layout && o.unique_dir_per_task && llapi_dir_set_default_lmv_stripe(o.testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { - FAIL("Unable to reset to global default directory layout"); + EWARNF("Unable to reset to global default directory layout"); } #endif /* HAVE_LUSTRE_LUSTREAPI */ } @@ -1924,7 +1927,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t if ((rank < o.path_count) && o.backend->access(o.testdir, F_OK, o.backend_options) == 0) { //if (( rank == 0 ) && access(o.testdir, F_OK) == 0) { if (o.backend->rmdir(o.testdir, o.backend_options) == -1) { - FAIL("unable to remove directory %s", o.testdir); + EWARNF("unable to remove directory %s", o.testdir); } } } @@ -2164,7 +2167,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * uint64_t s; - o.rand_array = (uint64_t *) malloc( o.items * sizeof(*o.rand_array)); + o.rand_array = (uint64_t *) safeMalloc( o.items * sizeof(*o.rand_array)); for (s=0; s < o.items; s++) { o.rand_array[s] = s; @@ -2219,7 +2222,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* if directory does not exist, create it */ if ((rank < o.path_count) && o.backend->access(o.testdirpath, F_OK, o.backend_options) != 0) { if (o.backend->mkdir(o.testdirpath, DIRMODE, o.backend_options) != 0) { - FAIL("Unable to create test directory path %s", o.testdirpath); + EWARNF("Unable to create test directory path %s", o.testdirpath); } created_root_dir = 1; } @@ -2259,7 +2262,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } /* setup summary table for recording results */ - o.summary_table = (mdtest_results_t *) malloc(iterations * sizeof(mdtest_results_t)); + o.summary_table = (mdtest_results_t *) safeMalloc(iterations * sizeof(mdtest_results_t)); memset(o.summary_table, 0, iterations * sizeof(mdtest_results_t)); for(int i=0; i < iterations; i++){ for(int j=0; j < MDTEST_LAST_NUM; j++){ @@ -2268,10 +2271,6 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } } - if (o.summary_table == NULL) { - FAIL("out of memory"); - } - if (o.unique_dir_per_task) { sprintf(o.base_tree_name, "mdtest_tree.%d", rank); } else { From 4edb27b41a9f85f23b64b61537cb50fb1ad14f4e Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 20 Jan 2021 19:38:54 +0000 Subject: [PATCH 116/154] Remove MPI timer in favor of gettimeofday() to prevent MPI issues. Remove time adjustment as measurements are relative anyway. --- src/ior-output.c | 17 ----------------- src/ior.c | 2 +- src/utilities.c | 19 +++---------------- src/utilities.h | 2 -- 4 files changed, 4 insertions(+), 36 deletions(-) diff --git a/src/ior-output.c b/src/ior-output.c index 8cfaf12..d60cbdb 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -304,23 +304,6 @@ void PrintHeader(int argc, char **argv) } PrintKeyValEnd(); } - -#ifdef _NO_MPI_TIMER - if (verbose >= VERBOSE_2) - fprintf(out_logfile, "Using unsynchronized POSIX timer\n"); -#else /* not _NO_MPI_TIMER */ - if (MPI_WTIME_IS_GLOBAL) { - if (verbose >= VERBOSE_2) - fprintf(out_logfile, "Using synchronized MPI timer\n"); - } else { - if (verbose >= VERBOSE_2) - fprintf(out_logfile, "Using unsynchronized MPI timer\n"); - } -#endif /* _NO_MPI_TIMER */ - if (verbose >= VERBOSE_1) { - fprintf(out_logfile, "Start time skew across all tasks: %.02f sec\n", - wall_clock_deviation); - } if (verbose >= VERBOSE_3) { /* show env */ fprintf(out_logfile, "STARTING ENVIRON LOOP\n"); for (i = 0; environ[i] != NULL; i++) { diff --git a/src/ior.c b/src/ior.c index f36532f..4f28c19 100755 --- a/src/ior.c +++ b/src/ior.c @@ -244,7 +244,7 @@ DisplayOutliers(int numTasks, double sum, mean, sqrDiff, var, sd; /* for local timerVal, don't compensate for wall clock delta */ - timerVal += wall_clock_delta; + //timerVal += wall_clock_delta; MPI_CHECK(MPI_Allreduce (&timerVal, &sum, 1, MPI_DOUBLE, MPI_SUM, testComm), diff --git a/src/utilities.c b/src/utilities.c index cf79e00..6b0871f 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -673,10 +673,6 @@ int uname(struct utsname *name) } #endif /* _WIN32 */ - -double wall_clock_deviation; -double wall_clock_delta = 0; - /* * Get time stamp. Use MPI_Timer() unless _NO_MPI_TIMER is defined, * in which case use gettimeofday(). @@ -684,26 +680,18 @@ double wall_clock_delta = 0; double GetTimeStamp(void) { double timeVal; -#ifdef _NO_MPI_TIMER struct timeval timer; if (gettimeofday(&timer, (struct timezone *)NULL) != 0) ERR("cannot use gettimeofday()"); timeVal = (double)timer.tv_sec + ((double)timer.tv_usec / 1000000); -#else /* not _NO_MPI_TIMER */ - timeVal = MPI_Wtime(); /* no MPI_CHECK(), just check return value */ - if (timeVal < 0) - ERR("cannot use MPI_Wtime()"); -#endif /* _NO_MPI_TIMER */ - - /* wall_clock_delta is difference from root node's time */ - timeVal -= wall_clock_delta; return (timeVal); } /* * Determine any spread (range) between node times. + * Obsolete */ static double TimeDeviation(MPI_Comm com) { @@ -725,14 +713,13 @@ static double TimeDeviation(MPI_Comm com) roottimestamp = timestamp; MPI_CHECK(MPI_Bcast(&roottimestamp, 1, MPI_DOUBLE, 0, com), "cannot broadcast root's time"); - wall_clock_delta = timestamp - roottimestamp; + // wall_clock_delta = timestamp - roottimestamp; return max - min; } void init_clock(MPI_Comm com){ - /* check for skew between tasks' start times */ - wall_clock_deviation = TimeDeviation(com); + } char * PrintTimestamp() { diff --git a/src/utilities.h b/src/utilities.h index 1eff2d2..b0be545 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -59,6 +59,4 @@ double GetTimeStamp(void); char * PrintTimestamp(); // TODO remove this function unsigned long GetProcessorAndCore(int *chip, int *core); -extern double wall_clock_deviation; -extern double wall_clock_delta; #endif /* !_UTILITIES_H */ From 852ee3e40e8e32abf354cc68738e5e3f9e43ae23 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 21 Jan 2021 11:06:13 +0000 Subject: [PATCH 117/154] AIORI POSIX use internal debug macros, include errno in msg. --- src/aiori-POSIX.c | 40 +++++++++++++++------------------------- src/aiori-debug.h | 12 ++++++++++++ 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 8beaa09..fc99c61 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -378,9 +378,10 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) MPI_CHECK(MPI_Barrier(testComm), "barrier error"); fd_oflag |= O_RDWR; *fd = open64(testFileName, fd_oflag, mode); - if (*fd < 0) - ERRF("open64(\"%s\", %d, %#o) failed", - testFileName, fd_oflag, mode); + if (*fd < 0){ + ERRF("open64(\"%s\", %d, %#o) failed. Error: %s", + testFileName, fd_oflag, mode, strerror(errno)); + } } else { struct lov_user_md opts = { 0 }; @@ -396,19 +397,14 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) fd_oflag |= O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE; *fd = open64(testFileName, fd_oflag, mode); if (*fd < 0) { - fprintf(stdout, "\nUnable to open '%s': %s\n", + ERRF("Unable to open '%s': %s\n", testFileName, strerror(errno)); - MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), - "MPI_Abort() error"); } else if (ioctl(*fd, LL_IOC_LOV_SETSTRIPE, &opts)) { char *errmsg = "stripe already set"; if (errno != EEXIST && errno != EALREADY) errmsg = strerror(errno); - fprintf(stdout, - "\nError on ioctl for '%s' (%d): %s\n", + ERRF("Error on ioctl for '%s' (%d): %s\n", testFileName, *fd, errmsg); - MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), - "MPI_Abort() error"); } if (!hints->filePerProc) MPI_CHECK(MPI_Barrier(testComm), @@ -435,9 +431,10 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) #endif /* HAVE_BEEGFS_BEEGFS_H */ *fd = open64(testFileName, fd_oflag, mode); - if (*fd < 0) - ERRF("open64(\"%s\", %d, %#o) failed", - testFileName, fd_oflag, mode); + if (*fd < 0){ + ERRF("open64(\"%s\", %d, %#o) failed. Error: %s", + testFileName, fd_oflag, mode, strerror(errno)); + } #ifdef HAVE_LUSTRE_USER } @@ -503,8 +500,7 @@ aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param) if (o->lustre_ignore_locks) { int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK; if (verbose >= VERBOSE_1) { - fprintf(stdout, - "** Disabling lustre range locking **\n"); + EINFO("** Disabling lustre range locking **\n"); } if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); @@ -552,8 +548,7 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer /* write/read file */ if (access == WRITE) { /* WRITE */ if (verbose >= VERBOSE_4) { - fprintf(stdout, - "task %d writing to offset %lld\n", + EINFO("task %d writing to offset %lld\n", rank, offset + length - remaining); } @@ -566,8 +561,7 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer } } else { /* READ or CHECK */ if (verbose >= VERBOSE_4) { - fprintf(stdout, - "task %d reading from offset %lld\n", + EINFO("task %d reading from offset %lld\n", rank, offset + length - remaining); } @@ -580,16 +574,12 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer fd, (void*)ptr, remaining); } if (rc < remaining) { - fprintf(stdout, - "WARNING: Task %d, partial %s, %lld of %lld bytes at offset %lld\n", + EWARNF("task %d, partial %s, %lld of %lld bytes at offset %lld\n", rank, access == WRITE ? "write()" : "read()", rc, remaining, offset + length - remaining); - if (hints->singleXferAttempt == TRUE) - MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), - "barrier error"); - if (xferRetries > MAX_RETRY) + if (xferRetries > MAX_RETRY || hints->singleXferAttempt) ERR("too many retries -- aborting"); } assert(rc >= 0); diff --git a/src/aiori-debug.h b/src/aiori-debug.h index bb5dd71..32db28f 100644 --- a/src/aiori-debug.h +++ b/src/aiori-debug.h @@ -61,6 +61,18 @@ extern int aiori_warning_as_errors; } while (0) +/* warning with format string and errno printed */ +#define EINFO(FORMAT, ...) do { \ + if (verbose > VERBOSE_2) { \ + fprintf(out_logfile, "INFO: " FORMAT ", (%s:%d).\n", \ + __VA_ARGS__, __FILE__, __LINE__); \ + } else { \ + fprintf(out_logfile, "INFO: " FORMAT "\n", \ + __VA_ARGS__); \ + } \ + fflush(out_logfile); \ +} while (0) + /* display error message with format string and terminate execution */ #define ERRF(FORMAT, ...) do { \ fprintf(out_logfile, "ERROR: " FORMAT ", (%s:%d)\n", \ From 310fd374273db158063fa5881c4e2d6f6a56a41b Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Thu, 21 Jan 2021 14:10:23 +0000 Subject: [PATCH 118/154] Feature mdtest dir rename #306 (#311) * MDTest: Support for directory renaming. * Refactored some MDTest variables from hardcoded numbers to ENUM symbols. --- src/aiori-DUMMY.c | 6 + src/aiori-POSIX.c | 12 ++ src/aiori-POSIX.h | 1 + src/aiori.h | 1 + src/mdtest.c | 351 +++++++++++++++++++++++++++++----------------- src/mdtest.h | 17 +-- 6 files changed, 255 insertions(+), 133 deletions(-) diff --git a/src/aiori-DUMMY.c b/src/aiori-DUMMY.c index 17656bb..4769de0 100755 --- a/src/aiori-DUMMY.c +++ b/src/aiori-DUMMY.c @@ -156,6 +156,11 @@ static int DUMMY_stat (const char *path, struct stat *buf, aiori_mod_opt_t * opt return 0; } +static int DUMMY_rename (const char *path, const char *path2, aiori_mod_opt_t * options){ + return 0; +} + + static int DUMMY_check_params(aiori_mod_opt_t * options){ return 0; } @@ -188,6 +193,7 @@ ior_aiori_t dummy_aiori = { .statfs = DUMMY_statfs, .mkdir = DUMMY_mkdir, .rmdir = DUMMY_rmdir, + .rename = DUMMY_rename, .access = DUMMY_access, .stat = DUMMY_stat, .initialize = DUMMY_init, diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index fc99c61..5f6261a 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -133,6 +133,7 @@ ior_aiori_t posix_aiori = { .statfs = aiori_posix_statfs, .mkdir = aiori_posix_mkdir, .rmdir = aiori_posix_rmdir, + .rename = POSIX_Rename, .access = aiori_posix_access, .stat = aiori_posix_stat, .get_options = POSIX_options, @@ -636,6 +637,17 @@ void POSIX_Delete(char *testFileName, aiori_mod_opt_t * param) } } +int POSIX_Rename(const char * oldfile, const char * newfile, aiori_mod_opt_t * module_options){ + if(hints->dryRun) + return 0; + + if(rename(oldfile, newfile) != 0){ + EWARNF("[RANK %03d]: rename() of file \"%s\" to \"%s\" failed", rank, oldfile, newfile); + return -1; + } + return 0; +} + /* * Use POSIX stat() to return aggregate file size. */ diff --git a/src/aiori-POSIX.h b/src/aiori-POSIX.h index 1780cf7..8884a30 100644 --- a/src/aiori-POSIX.h +++ b/src/aiori-POSIX.h @@ -34,6 +34,7 @@ int POSIX_Mknod(char *testFileName); aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * module_options); IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, char *testFileName); void POSIX_Delete(char *testFileName, aiori_mod_opt_t * module_options); +int POSIX_Rename(const char *oldfile, const char *newfile, aiori_mod_opt_t * module_options); void POSIX_Close(aiori_fd_t *fd, aiori_mod_opt_t * module_options); option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t * init_values); void POSIX_xfer_hints(aiori_xfer_hint_t * params); diff --git a/src/aiori.h b/src/aiori.h index ba84b60..6f78e5f 100755 --- a/src/aiori.h +++ b/src/aiori.h @@ -108,6 +108,7 @@ typedef struct ior_aiori { int (*stat) (const char *path, struct stat *buf, aiori_mod_opt_t * module_options); void (*initialize)(aiori_mod_opt_t * options); /* called once per program before MPI is started */ void (*finalize)(aiori_mod_opt_t * options); /* called once per program after MPI is shutdown */ + int (*rename) (const char *oldpath, const char *newpath, aiori_mod_opt_t * module_options); option_help * (*get_options)(aiori_mod_opt_t ** init_backend_options, aiori_mod_opt_t* init_values); /* initializes the backend options as well and returns the pointer to the option help structure */ int (*check_params)(aiori_mod_opt_t *); /* check if the provided module_optionseters for the given test and the module options are correct, if they aren't print a message and exit(1) or return 1*/ void (*sync)(aiori_mod_opt_t * ); /* synchronize every pending operation for this storage */ diff --git a/src/mdtest.c b/src/mdtest.c index b35d55b..48ca7b8 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -120,6 +120,7 @@ typedef struct { int verify_write; int verification_error; int remove_only; + int rename_dirs; int leaf_only; unsigned branch_factor; int depth; @@ -847,10 +848,82 @@ void collective_create_remove(const int create, const int dirs, const int ntasks } } +void rename_dir_test(const int dirs, const long dir_iter, const char *path, rank_progress_t * progress) { + uint64_t parent_dir, item_num = 0; + char item[MAX_PATHLEN], temp[MAX_PATHLEN]; + char item_last[MAX_PATHLEN]; + + if(o.backend->rename == NULL){ + WARN("Backend doesn't support rename\n"); + return; + } + + VERBOSE(1,-1,"Entering mdtest_rename on %s", path ); + + uint64_t stop_items = o.items; + + if( o.directory_loops != 1 ){ + stop_items = o.items_per_dir; + } + + if(stop_items == 1) return; + + /* iterate over all of the item IDs */ + char first_item_name[MAX_PATHLEN]; + for (uint64_t i = 0 ; i < stop_items; ++i) { + item_num = i; + /* make adjustments if in leaf only mode*/ + if (o.leaf_only) { + item_num += o.items_per_dir * (o.num_dirs_in_tree - (uint64_t) pow( o.branch_factor, o.depth )); + } + + /* create name of file/dir to stat */ + if (dirs) { + sprintf(item, "dir.%s"LLU"", o.stat_name, item_num); + } else { + sprintf(item, "file.%s"LLU"", o.stat_name, item_num); + } + + /* determine the path to the file/dir to be stat'ed */ + parent_dir = item_num / o.items_per_dir; + + if (parent_dir > 0) { //item is not in tree's root directory + /* prepend parent directory to item's path */ + sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); + strcpy(item, temp); + + //still not at the tree's root dir + while (parent_dir > o.branch_factor) { + parent_dir = (uint64_t) ((parent_dir-1) / o.branch_factor); + sprintf(temp, "%s."LLU"/%s", o.base_tree_name, parent_dir, item); + strcpy(item, temp); + } + } + + /* Now get item to have the full path */ + sprintf( temp, "%s/%s", path, item ); + strcpy( item, temp ); + + VERBOSE(3,5,"mdtest_rename %4s: %s", (dirs ? "dir" : "file"), item); + if(i == 0){ + sprintf(first_item_name, "%s-XX", item); + strcpy(item_last, first_item_name); + }else if(i == stop_items - 1){ + strcpy(item, first_item_name); + } + if (-1 == o.backend->rename(item, item_last, o.backend_options)) { + EWARNF("unable to rename %s %s", dirs ? "directory" : "file", item); + } + + strcpy(item_last, item); + } +} + void directory_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) { int size; - double t[5] = {0}; + double t[6] = {0}; char temp_path[MAX_PATHLEN]; + mdtest_results_t * res = & o.summary_table[iteration]; MPI_Comm_size(testComm, &size); @@ -942,9 +1015,35 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } } - phase_end(); + t[3] = GetTimeStamp(); + if(o.rename_dirs){ + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ + prep_testdir(iteration, dir_iter); + if (o.unique_dir_per_task) { + unique_dir_access(STAT_SUB_DIR, temp_path); + if (! o.time_unique_dir_overhead) { + offset_timers(t, 1); + } + } else { + sprintf( temp_path, "%s/%s", o.testdir, path ); + } + + VERBOSE(3,5,"rename path is '%s'", temp_path ); + + rename_dir_test(1, dir_iter, temp_path, progress); + } + } + phase_end(); + + t[4] = GetTimeStamp(); + if (o.rename_dirs && o.items > 1) { // moved close to execution + res->rate[MDTEST_DIR_RENAME_NUM] = o.items*size/(t[4] - t[3]); + res->time[MDTEST_DIR_RENAME_NUM] = t[4] - t[3]; + res->items[MDTEST_DIR_RENAME_NUM] = o.items*size; + res->stonewall_last_item[MDTEST_DIR_RENAME_NUM] = o.items*size; + } if (o.remove_only) { for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ @@ -972,7 +1071,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } phase_end(); - t[4] = GetTimeStamp(); + t[5] = GetTimeStamp(); if (o.remove_only) { if (o.unique_dir_per_task) { @@ -985,41 +1084,38 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } if (o.unique_dir_per_task && ! o.time_unique_dir_overhead) { - offset_timers(t, 4); + offset_timers(t, 5); } /* calculate times */ if (o.create_only) { - o.summary_table[iteration].rate[0] = o.items*size/(t[1] - t[0]); - o.summary_table[iteration].time[0] = t[1] - t[0]; - o.summary_table[iteration].items[0] = o.items*size; - o.summary_table[iteration].stonewall_last_item[0] = o.items; + res->rate[MDTEST_DIR_CREATE_NUM] = o.items*size/(t[1] - t[0]); + res->time[MDTEST_DIR_CREATE_NUM] = t[1] - t[0]; + res->items[MDTEST_DIR_CREATE_NUM] = o.items*size; + res->stonewall_last_item[MDTEST_DIR_CREATE_NUM] = o.items; } if (o.stat_only) { - o.summary_table[iteration].rate[1] = o.items*size/(t[2] - t[1]); - o.summary_table[iteration].time[1] = t[2] - t[1]; - o.summary_table[iteration].items[1] = o.items*size; - o.summary_table[iteration].stonewall_last_item[1] = o.items; + res->rate[MDTEST_DIR_STAT_NUM] = o.items*size/(t[2] - t[1]); + res->time[MDTEST_DIR_STAT_NUM] = t[2] - t[1]; + res->items[MDTEST_DIR_STAT_NUM] = o.items*size; + res->stonewall_last_item[MDTEST_DIR_STAT_NUM] = o.items; } if (o.read_only) { - o.summary_table[iteration].rate[2] = o.items*size/(t[3] - t[2]); - o.summary_table[iteration].time[2] = t[3] - t[2]; - o.summary_table[iteration].items[2] = o.items*size; - o.summary_table[iteration].stonewall_last_item[2] = o.items; + res->rate[MDTEST_DIR_READ_NUM] = o.items*size/(t[3] - t[2]); + res->time[MDTEST_DIR_READ_NUM] = t[3] - t[2]; + res->items[MDTEST_DIR_READ_NUM] = o.items*size; + res->stonewall_last_item[MDTEST_DIR_READ_NUM] = o.items; } if (o.remove_only) { - o.summary_table[iteration].rate[3] = o.items*size/(t[4] - t[3]); - o.summary_table[iteration].time[3] = t[4] - t[3]; - o.summary_table[iteration].items[3] = o.items*size; - o.summary_table[iteration].stonewall_last_item[3] = o.items; + res->rate[MDTEST_DIR_REMOVE_NUM] = o.items*size/(t[5] - t[4]); + res->time[MDTEST_DIR_REMOVE_NUM] = t[5] - t[4]; + res->items[MDTEST_DIR_REMOVE_NUM] = o.items*size; + res->stonewall_last_item[MDTEST_DIR_REMOVE_NUM] = o.items; } - VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[0]); VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], o.summary_table[iteration].rate[1]); - /* N/A - VERBOSE(1,-1," Directory read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], o.summary_table[iteration].rate[2]); - */ - VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], o.summary_table[iteration].rate[3]); + VERBOSE(1,-1," Directory rename : %14.3f sec, %14.3f ops/sec", t[4] - t[3], o.summary_table[iteration].rate[MDTEST_DIR_RENAME_NUM]); + VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", t[5] - t[4], o.summary_table[iteration].rate[4]); } /* Returns if the stonewall was hit */ @@ -1241,30 +1337,31 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro o.items *= o.num_dirs_in_tree_calc; } + mdtest_results_t * res = & o.summary_table[iteration]; /* calculate times */ if (o.create_only) { - o.summary_table[iteration].rate[4] = o.items*size/(t[1] - t[0]); - o.summary_table[iteration].time[4] = t[1] - t[0]; - o.summary_table[iteration].items[4] = o.items*o.size; - o.summary_table[iteration].stonewall_last_item[4] = o.items; + res->rate[MDTEST_FILE_CREATE_NUM] = o.items*size/(t[1] - t[0]); + res->time[MDTEST_FILE_CREATE_NUM] = t[1] - t[0]; + res->items[MDTEST_FILE_CREATE_NUM] = o.items*o.size; + res->stonewall_last_item[MDTEST_FILE_CREATE_NUM] = o.items; } if (o.stat_only) { - o.summary_table[iteration].rate[5] = o.items*size/(t[2] - t[1]); - o.summary_table[iteration].time[5] = t[2] - t[1]; - o.summary_table[iteration].items[5] = o.items*o.size; - o.summary_table[iteration].stonewall_last_item[5] = o.items; + res->rate[MDTEST_FILE_STAT_NUM] = o.items*size/(t[2] - t[1]); + res->time[MDTEST_FILE_STAT_NUM] = t[2] - t[1]; + res->items[MDTEST_FILE_STAT_NUM] = o.items*o.size; + res->stonewall_last_item[MDTEST_FILE_STAT_NUM] = o.items; } if (o.read_only) { - o.summary_table[iteration].rate[6] = o.items*o.size/(t[3] - t[2]); - o.summary_table[iteration].time[6] = t[3] - t[2]; - o.summary_table[iteration].items[6] = o.items*o.size; - o.summary_table[iteration].stonewall_last_item[6] = o.items; + res->rate[MDTEST_FILE_READ_NUM] = o.items*o.size/(t[3] - t[2]); + res->time[MDTEST_FILE_READ_NUM] = t[3] - t[2]; + res->items[MDTEST_FILE_READ_NUM] = o.items*o.size; + res->stonewall_last_item[MDTEST_FILE_READ_NUM] = o.items; } if (o.remove_only) { - o.summary_table[iteration].rate[7] = o.items*o.size/(t[4] - t[3]); - o.summary_table[iteration].time[7] = t[4] - t[3]; - o.summary_table[iteration].items[7] = o.items*o.size; - o.summary_table[iteration].stonewall_last_item[7] = o.items; + res->rate[MDTEST_FILE_REMOVE_NUM] = o.items*o.size/(t[4] - t[3]); + res->time[MDTEST_FILE_REMOVE_NUM] = t[4] - t[3]; + res->items[MDTEST_FILE_REMOVE_NUM] = o.items*o.size; + res->stonewall_last_item[MDTEST_FILE_REMOVE_NUM] = o.items; } VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[4]); @@ -1278,17 +1375,18 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro char const * mdtest_test_name(int i){ switch (i) { - case 0: return "Directory creation :"; - case 1: return "Directory stat :"; - case 2: return NULL; - case 3: return "Directory removal :"; - case 4: return "File creation :"; - case 5: return "File stat :"; - case 6: return "File read :"; - case 7: return "File removal :"; - case 8: return "Tree creation :"; - case 9: return "Tree removal :"; - default: return "ERR INVALID TESTNAME :"; + case MDTEST_DIR_CREATE_NUM: return "Directory creation :"; + case MDTEST_DIR_STAT_NUM: return "Directory stat :"; + case MDTEST_DIR_READ_NUM: return NULL; + case MDTEST_DIR_REMOVE_NUM: return "Directory removal :"; + case MDTEST_DIR_RENAME_NUM: return "Directory rename :"; + case MDTEST_FILE_CREATE_NUM: return "File creation :"; + case MDTEST_FILE_STAT_NUM: return "File stat :"; + case MDTEST_FILE_READ_NUM: return "File read :"; + case MDTEST_FILE_REMOVE_NUM: return "File removal :"; + case MDTEST_TREE_CREATE_NUM: return "Tree creation :"; + case MDTEST_TREE_REMOVE_NUM: return "Tree removal :"; + default: return "ERR INVALID TESTNAME :"; } return NULL; } @@ -1339,16 +1437,16 @@ void summarize_results(int iterations, int print_time) { /* if files only access, skip entries 0-3 (the dir tests) */ if (o.files_only && ! o.dirs_only) { - start = 4; + start = MDTEST_FILE_CREATE_NUM; } else { start = 0; } /* if directories only access, skip entries 4-7 (the file tests) */ if (o.dirs_only && !o.files_only) { - stop = 4; + stop = MDTEST_FILE_CREATE_NUM; } else { - stop = 8; + stop = MDTEST_TREE_CREATE_NUM; } /* special case: if no directory or file tests, skip all */ @@ -1451,7 +1549,7 @@ void summarize_results(int iterations, int print_time) { } /* calculate tree create/remove rates, applies only to Rank 0 */ - for (i = 8; i < tableSize; i++) { + for (i = MDTEST_TREE_CREATE_NUM; i < tableSize; i++) { min = max = all[i]; sum = var = 0; imin = imax = all[i]; @@ -1508,8 +1606,8 @@ void md_validate_tests() { FAIL( "Error, stone wall timer does only work with a branch factor <= 1 (current is %d) and with barriers\n", o.branch_factor); } - if (!o.create_only && ! o.stat_only && ! o.read_only && !o.remove_only) { - o.create_only = o.stat_only = o.read_only = o.remove_only = 1; + if (!o.create_only && ! o.stat_only && ! o.read_only && !o.remove_only && !o.rename_dirs) { + o.create_only = o.stat_only = o.read_only = o.remove_only = o.rename_dirs = 1; VERBOSE(1,-1,"main: Setting create/stat/read/remove_only to True" ); } @@ -1670,7 +1768,7 @@ void create_remove_directory_tree(int create, if (create) { VERBOSE(2,5,"Making directory '%s'", dir); if (-1 == o.backend->mkdir (dir, DIRMODE, o.backend_options)) { - EWARNF("unable to create tree directory '%s'\n", dir); + EWARNF("unable to create tree directory '%s'", dir); } #ifdef HAVE_LUSTRE_LUSTREAPI /* internal node for branching, can be non-striped for children */ @@ -1738,78 +1836,81 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t VERBOSE(1,-1,"main: * iteration %d *", j+1); - for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(j, dir_iter); + if(o.create_only){ + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ + if (rank >= o.path_count) { + continue; + } + prep_testdir(j, dir_iter); - VERBOSE(2,5,"main (for j loop): making o.testdir, '%s'", o.testdir ); - if ((rank < o.path_count) && o.backend->access(o.testdir, F_OK, o.backend_options) != 0) { - if (o.backend->mkdir(o.testdir, DIRMODE, o.backend_options) != 0) { - EWARNF("Unable to create test directory %s", o.testdir); - } + VERBOSE(2,5,"main (for j loop): making o.testdir, '%s'", o.testdir ); + if (o.backend->access(o.testdir, F_OK, o.backend_options) != 0) { + if (o.backend->mkdir(o.testdir, DIRMODE, o.backend_options) != 0) { + EWARNF("Unable to create test directory %s", o.testdir); + } #ifdef HAVE_LUSTRE_LUSTREAPI - /* internal node for branching, can be non-striped for children */ - if (o.global_dir_layout && o.unique_dir_per_task && llapi_dir_set_default_lmv_stripe(o.testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { - EWARNF("Unable to reset to global default directory layout"); - } + /* internal node for branching, can be non-striped for children */ + if (o.global_dir_layout && o.unique_dir_per_task && llapi_dir_set_default_lmv_stripe(o.testdir, -1, 0, LMV_HASH_TYPE_FNV_1A_64, NULL) == -1) { + EWARNF("Unable to reset to global default directory layout"); + } #endif /* HAVE_LUSTRE_LUSTREAPI */ + } } - } - if (o.create_only) { - /* create hierarchical directory structure */ - MPI_Barrier(testComm); + /* create hierarchical directory structure */ + MPI_Barrier(testComm); - startCreate = GetTimeStamp(); - for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ - prep_testdir(j, dir_iter); + startCreate = GetTimeStamp(); + for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ + prep_testdir(j, dir_iter); - if (o.unique_dir_per_task) { - if (o.collective_creates && (rank == 0)) { - /* - * This is inside two loops, one of which already uses "i" and the other uses "j". - * I don't know how this ever worked. I'm changing this loop to use "k". - */ - for (k=0; k < o.size; k++) { - sprintf(o.base_tree_name, "mdtest_tree.%d", k); + if (o.unique_dir_per_task) { + if (o.collective_creates && (rank == 0)) { + /* + * This is inside two loops, one of which already uses "i" and the other uses "j". + * I don't know how this ever worked. I'm changing this loop to use "k". + */ + for (k=0; k < o.size; k++) { + sprintf(o.base_tree_name, "mdtest_tree.%d", k); - VERBOSE(3,5,"main (create hierarchical directory loop-collective): Calling create_remove_directory_tree with '%s'", o.testdir ); - /* - * Let's pass in the path to the directory we most recently made so that we can use - * full paths in the other calls. - */ - create_remove_directory_tree(1, 0, o.testdir, 0, progress); - if(CHECK_STONE_WALL(progress)){ - o.size = k; - break; - } - } - } else if (! o.collective_creates) { - VERBOSE(3,5,"main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '%s'", o.testdir ); - /* - * Let's pass in the path to the directory we most recently made so that we can use - * full paths in the other calls. - */ - create_remove_directory_tree(1, 0, o.testdir, 0, progress); + VERBOSE(3,5,"main (create hierarchical directory loop-collective): Calling create_remove_directory_tree with '%s'", o.testdir ); + /* + * Let's pass in the path to the directory we most recently made so that we can use + * full paths in the other calls. + */ + create_remove_directory_tree(1, 0, o.testdir, 0, progress); + if(CHECK_STONE_WALL(progress)){ + o.size = k; + break; } - } else { - if (rank == 0) { - VERBOSE(3,5,"main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '%s'", o.testdir ); + } + } else if (! o.collective_creates) { + VERBOSE(3,5,"main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '%s'", o.testdir ); + /* + * Let's pass in the path to the directory we most recently made so that we can use + * full paths in the other calls. + */ + create_remove_directory_tree(1, 0, o.testdir, 0, progress); + } + } else { + if (rank == 0) { + VERBOSE(3,5,"main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '%s'", o.testdir ); - /* - * Let's pass in the path to the directory we most recently made so that we can use - * full paths in the other calls. - */ - create_remove_directory_tree(1, 0 , o.testdir, 0, progress); - } + /* + * Let's pass in the path to the directory we most recently made so that we can use + * full paths in the other calls. + */ + create_remove_directory_tree(1, 0 , o.testdir, 0, progress); } } - MPI_Barrier(testComm); - endCreate = GetTimeStamp(); - summary_table->rate[8] = o.num_dirs_in_tree / (endCreate - startCreate); - summary_table->time[8] = (endCreate - startCreate); - summary_table->items[8] = o.num_dirs_in_tree; - summary_table->stonewall_last_item[8] = o.num_dirs_in_tree; - VERBOSE(1,-1,"V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[8]); + } + MPI_Barrier(testComm); + endCreate = GetTimeStamp(); + summary_table->rate[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree / (endCreate - startCreate); + summary_table->time[MDTEST_TREE_CREATE_NUM] = (endCreate - startCreate); + summary_table->items[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree; + summary_table->stonewall_last_item[MDTEST_TREE_CREATE_NUM] = o.num_dirs_in_tree; + VERBOSE(1,-1,"V-1: main: Tree creation : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[MDTEST_TREE_CREATE_NUM]); } sprintf(o.unique_mk_dir, "%s.0", o.base_tree_name); @@ -1915,11 +2016,11 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t MPI_Barrier(testComm); endCreate = GetTimeStamp(); - summary_table->rate[9] = o.num_dirs_in_tree / (endCreate - startCreate); - summary_table->time[9] = endCreate - startCreate; - summary_table->items[9] = o.num_dirs_in_tree; - summary_table->stonewall_last_item[8] = o.num_dirs_in_tree; - VERBOSE(1,-1,"main Tree removal : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[9]); + summary_table->rate[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree / (endCreate - startCreate); + summary_table->time[MDTEST_TREE_REMOVE_NUM] = endCreate - startCreate; + summary_table->items[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree; + summary_table->stonewall_last_item[MDTEST_TREE_REMOVE_NUM] = o.num_dirs_in_tree; + VERBOSE(1,-1,"main Tree removal : %14.3f sec, %14.3f ops/sec", (endCreate - startCreate), summary_table->rate[MDTEST_TREE_REMOVE_NUM]); VERBOSE(2,-1,"main (at end of for j loop): Removing o.testdir of '%s'\n", o.testdir ); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ @@ -1932,7 +2033,7 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t } } } else { - summary_table->rate[9] = 0; + summary_table->rate[MDTEST_TREE_REMOVE_NUM] = 0; } } diff --git a/src/mdtest.h b/src/mdtest.h index 6267282..32d37fe 100644 --- a/src/mdtest.h +++ b/src/mdtest.h @@ -8,14 +8,15 @@ typedef enum { MDTEST_DIR_CREATE_NUM = 0, MDTEST_DIR_STAT_NUM = 1, - MDTEST_DIR_READ_NUM = 1, - MDTEST_DIR_REMOVE_NUM = 3, - MDTEST_FILE_CREATE_NUM = 4, - MDTEST_FILE_STAT_NUM = 5, - MDTEST_FILE_READ_NUM = 6, - MDTEST_FILE_REMOVE_NUM = 7, - MDTEST_TREE_CREATE_NUM = 8, - MDTEST_TREE_REMOVE_NUM = 9, + MDTEST_DIR_READ_NUM = 2, + MDTEST_DIR_RENAME_NUM = 3, + MDTEST_DIR_REMOVE_NUM = 4, + MDTEST_FILE_CREATE_NUM = 5, + MDTEST_FILE_STAT_NUM = 6, + MDTEST_FILE_READ_NUM = 7, + MDTEST_FILE_REMOVE_NUM = 8, + MDTEST_TREE_CREATE_NUM = 9, + MDTEST_TREE_REMOVE_NUM = 10, MDTEST_LAST_NUM } mdtest_test_num_t; From 3af915aae1c9ce52b71640512c83393774aeb641 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 22 Jan 2021 12:10:09 +0000 Subject: [PATCH 119/154] MDTest data verification. Fixed bug: added missing reduce. Add rank into I/O buffer to make individual files unique. --- src/mdtest.c | 52 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 48ca7b8..58d644d 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -219,10 +219,16 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ... } } -void generate_memory_pattern(char * buffer, size_t bytes){ - // the first byte is set to the item number - for(int i=1; i < bytes; i++){ - buffer[i] = i + 1; +void generate_memory_pattern(char * buf, size_t bytes){ + uint64_t * buffi = (uint64_t*) buf; + // first half of 64 bits use the rank + uint64_t ranki = (uint64_t)(rank + 1) << 32; + // the first 8 bytes are set to item number + for(size_t i=1; i < bytes/8; i++){ + buffi[i] = (i + 1) + ranki; + } + for(size_t i=(bytes/8)*8; i < bytes; i++){ + buf[i] = (char) i; } } @@ -349,19 +355,31 @@ static void remove_file (const char *path, uint64_t itemNum) { } } -void mdtest_verify_data(int item, char * buffer, size_t bytes){ +void mdtest_verify_data(int item, char * buffer, size_t bytes, int pretendRank){ if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){ VERBOSE(2, -1, "Error verifying first element for item: %d", item); o.verification_error++; + return; } - size_t i = bytes < 8 ? 1 : 8; // the first byte - - for( ; i < bytes; i++){ - if(buffer[i] != (char) (i + 1)){ + uint64_t * buffi = (uint64_t*) buffer; + // first half of 64 bits use the rank, here need to apply rank shifting + uint64_t rank_mod = (uint64_t)(pretendRank + 1) << 32; + // the first 8 bytes are set to item number + for(size_t i=1; i < bytes/8; i++){ + uint64_t exp = (i + 1) + rank_mod; + if(buffi[i] != exp){ + VERBOSE(5, -1, "Error verifying offset %zu for item %d", i*8, item); + printf("%lld != %lld\n", exp, buffi[i]); + o.verification_error++; + return; + } + } + for(size_t i=(bytes/8)*8; i < bytes; i++){ + if(buffer[i] != (char) i){ VERBOSE(5, -1, "Error verifying byte %zu for item %d", i, item); o.verification_error++; - break; + return; } } } @@ -432,7 +450,7 @@ static void create_file (const char *path, uint64_t itemNum) { if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { EWARNF("unable to verify write (read/back) file %s", curr_item); } - mdtest_verify_data(itemNum, o.write_buffer, o.write_bytes); + mdtest_verify_data(itemNum, o.write_buffer, o.write_bytes, rank); } } @@ -753,7 +771,11 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { continue; } if(o.verify_read){ - mdtest_verify_data(item_num, read_buffer, o.read_bytes); + int pretend_rank = (2 * o.nstride + rank) % o.size; + if (o.shared_file) { + pretend_rank = rank; + } + mdtest_verify_data(item_num, read_buffer, o.read_bytes, pretend_rank); }else if((o.read_bytes >= 8 && ((uint64_t*) read_buffer)[0] != item_num) || (o.read_bytes < 8 && read_buffer[0] != (char) item_num)){ // do a lightweight check, which cost is neglectable o.verification_error++; @@ -2431,8 +2453,10 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * FAIL("Unable to remove test directory path %s", o.testdirpath); } - if(o.verification_error){ - VERBOSE(0, -1, "\nERROR: verifying the data read! Take the performance values with care!\n"); + int total_errors; + MPI_Reduce(& o.verification_error, & total_errors, 1, MPI_INT, MPI_SUM, 0, testComm); + if(total_errors){ + VERBOSE(0, -1, "\nERROR: verifying the data on read (%lld errors)! Take the performance values with care!\n", total_errors); } VERBOSE(0,-1,"-- finished at %s --\n", PrintTimestamp()); From effcb4131c5d15a35b1f5d1a601c3f5df6c17ea3 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 22 Jan 2021 12:24:33 +0000 Subject: [PATCH 120/154] MDTest: add randomness to buffers to defend dedup efforts. Allow to set random offset externally. --- src/mdtest.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 58d644d..27e4d7d 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -124,6 +124,7 @@ typedef struct { int leaf_only; unsigned branch_factor; int depth; + int random_buffer_offset; /* user settable value, otherwise random */ /* * This is likely a small value, but it's sometimes computed by @@ -222,7 +223,7 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ... void generate_memory_pattern(char * buf, size_t bytes){ uint64_t * buffi = (uint64_t*) buf; // first half of 64 bits use the rank - uint64_t ranki = (uint64_t)(rank + 1) << 32; + uint64_t ranki = (uint64_t)(rank + 1) << 32 + o.random_buffer_offset; // the first 8 bytes are set to item number for(size_t i=1; i < bytes/8; i++){ buffi[i] = (i + 1) + ranki; @@ -364,13 +365,12 @@ void mdtest_verify_data(int item, char * buffer, size_t bytes, int pretendRank){ uint64_t * buffi = (uint64_t*) buffer; // first half of 64 bits use the rank, here need to apply rank shifting - uint64_t rank_mod = (uint64_t)(pretendRank + 1) << 32; + uint64_t rank_mod = (uint64_t)(pretendRank + 1) << 32 + o.random_buffer_offset; // the first 8 bytes are set to item number for(size_t i=1; i < bytes/8; i++){ uint64_t exp = (i + 1) + rank_mod; if(buffi[i] != exp){ VERBOSE(5, -1, "Error verifying offset %zu for item %d", i*8, item); - printf("%lld != %lld\n", exp, buffi[i]); o.verification_error++; return; } @@ -2062,7 +2062,8 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t void mdtest_init_args(){ o = (mdtest_options_t) { .barriers = 1, - .branch_factor = 1 + .branch_factor = 1, + .random_buffer_offset = -1 }; } @@ -2116,6 +2117,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * #ifdef HAVE_LUSTRE_LUSTREAPI {'g', NULL, "global default directory layout for test subdirectories (deletes inherited striping layout)", OPTION_FLAG, 'd', & o.global_dir_layout}, #endif /* HAVE_LUSTRE_LUSTREAPI */ + {'G', NULL, "Offset for the data in the read/write buffer, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_buffer_offset}, {'i', NULL, "number of iterations the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & iterations}, {'I', NULL, "number of items per directory in tree", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items_per_dir}, {'k', NULL, "use mknod to create file", OPTION_FLAG, 'd', & o.make_node}, @@ -2202,6 +2204,10 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } o.random_seed += rank; } + if( o.random_buffer_offset == -1 ){ + o.random_buffer_offset = time(NULL); + MPI_Bcast(& o.random_buffer_offset, 1, MPI_INT, 0, testComm); + } if ((o.items > 0) && (o.items_per_dir > 0) && (! o.unique_dir_per_task)) { o.directory_loops = o.items / o.items_per_dir; }else{ From 7061b60ed8d4853411a8a9b1d8ceff1c8035e8d0 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 22 Jan 2021 14:05:58 +0000 Subject: [PATCH 121/154] Extracted memory pattern comparison, added mem check option to md-workbench. --- src/md-workbench.c | 45 +++++++++++++++++++++++++------------- src/mdtest.c | 54 +++++----------------------------------------- src/utilities.c | 47 ++++++++++++++++++++++++++++++++++++++++ src/utilities.h | 5 +++++ 4 files changed, 87 insertions(+), 64 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index b9b1b23..e7213b9 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -115,6 +115,7 @@ struct benchmark_options{ int ignore_precreate_errors; int rank; int size; + int verify_read; float relative_waiting_factor; int adaptive_waiting_mode; @@ -549,7 +550,7 @@ void run_precreate(phase_stat_t * s, int current_index){ } char * buf = malloc(o.file_size); - memset(buf, o.rank % 256, o.file_size); + generate_memory_pattern(buf, o.file_size, 0, o.rank); double op_timer; // timer for individual operations size_t pos = -1; // position inside the individual measurement array double op_time; @@ -565,6 +566,7 @@ void run_precreate(phase_stat_t * s, int current_index){ if (NULL == aiori_fh){ FAIL("Unable to open file %s", obj_name); } + update_write_memory_pattern(f * o.dset_count + d, buf, o.file_size, 0, o.rank); if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { s->obj_create.suc++; }else{ @@ -643,11 +645,19 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ if (NULL == aiori_fh){ FAIL("Unable to open file %s", obj_name); } - if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { - s->obj_read.suc++; + if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options) ) { + if(o.verify_read){ + if(verify_memory_pattern(f * o.dset_count + d, buf, o.file_size, 0, readRank) == 0){ + s->obj_read.suc++; + }else{ + s->obj_read.err++; + } + }else{ + s->obj_read.suc++; + } }else{ s->obj_read.err++; - ERRF("%d: Error while reading the obj: %s\n", o.rank, obj_name); + EWARNF("%d: Error while reading the obj: %s", o.rank, obj_name); } o.backend->close(aiori_fh, o.backend_options); @@ -676,19 +686,23 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ op_timer = GetTimeStamp(); aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); - if (NULL == aiori_fh){ - FAIL("Unable to open file %s", obj_name); - } - if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { - s->obj_create.suc++; - }else{ - s->obj_create.err++; - if (! o.ignore_precreate_errors){ - ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name); + if (NULL != aiori_fh){ + if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { + s->obj_create.suc++; + }else{ + s->obj_create.err++; + if (! o.ignore_precreate_errors){ + ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name); + } } + o.backend->close(aiori_fh, o.backend_options); + }else{ + if (! o.ignore_precreate_errors){ + ERRF("Unable to open file %s", obj_name); + } + EWARNF("Unable to open file %s", obj_name); + s->obj_create.err++; } - o.backend->close(aiori_fh, o.backend_options); - bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); if(o.relative_waiting_factor > 1e-9) { mdw_wait(op_time); @@ -800,6 +814,7 @@ static option_help options [] = { {'3', "run-cleanup", "Run cleanup phase (only run explicit phases)", OPTION_FLAG, 'd', & o.phase_cleanup}, {'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer}, {'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out}, + {'X', "verify-read", "Verify the data on read", OPTION_FLAG, 'd', & o.verify_read}, {0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number}, {0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats}, {0, "read-only", "Run read-only during benchmarking phase (no deletes/writes), probably use with -2", OPTION_FLAG, 'd', & o.read_only}, diff --git a/src/mdtest.c b/src/mdtest.c index 27e4d7d..ac9bdc4 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -220,24 +220,10 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ... } } -void generate_memory_pattern(char * buf, size_t bytes){ - uint64_t * buffi = (uint64_t*) buf; - // first half of 64 bits use the rank - uint64_t ranki = (uint64_t)(rank + 1) << 32 + o.random_buffer_offset; - // the first 8 bytes are set to item number - for(size_t i=1; i < bytes/8; i++){ - buffi[i] = (i + 1) + ranki; - } - for(size_t i=(bytes/8)*8; i < bytes; i++){ - buf[i] = (char) i; - } -} - void offset_timers(double * t, int tcount) { double toffset; int i; - VERBOSE(1,-1,"V-1: Entering offset_timers..." ); toffset = GetTimeStamp() - t[tcount]; @@ -356,33 +342,6 @@ static void remove_file (const char *path, uint64_t itemNum) { } } -void mdtest_verify_data(int item, char * buffer, size_t bytes, int pretendRank){ - if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){ - VERBOSE(2, -1, "Error verifying first element for item: %d", item); - o.verification_error++; - return; - } - - uint64_t * buffi = (uint64_t*) buffer; - // first half of 64 bits use the rank, here need to apply rank shifting - uint64_t rank_mod = (uint64_t)(pretendRank + 1) << 32 + o.random_buffer_offset; - // the first 8 bytes are set to item number - for(size_t i=1; i < bytes/8; i++){ - uint64_t exp = (i + 1) + rank_mod; - if(buffi[i] != exp){ - VERBOSE(5, -1, "Error verifying offset %zu for item %d", i*8, item); - o.verification_error++; - return; - } - } - for(size_t i=(bytes/8)*8; i < bytes; i++){ - if(buffer[i] != (char) i){ - VERBOSE(5, -1, "Error verifying byte %zu for item %d", i, item); - o.verification_error++; - return; - } - } -} static void create_file (const char *path, uint64_t itemNum) { char curr_item[MAX_PATHLEN]; @@ -436,11 +395,8 @@ static void create_file (const char *path, uint64_t itemNum) { * offset 0 (zero). */ o.hints.fsyncPerWrite = o.sync_file; - if(o.write_bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible - ((uint64_t*) o.write_buffer)[0] = itemNum; - }else{ - o.write_buffer[0] = (char) itemNum; - } + update_write_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank); + if ( o.write_bytes != (size_t) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { EWARNF("unable to write file %s", curr_item); } @@ -450,7 +406,7 @@ static void create_file (const char *path, uint64_t itemNum) { if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { EWARNF("unable to verify write (read/back) file %s", curr_item); } - mdtest_verify_data(itemNum, o.write_buffer, o.write_bytes, rank); + o.verification_error += verify_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank); } } @@ -775,7 +731,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { if (o.shared_file) { pretend_rank = rank; } - mdtest_verify_data(item_num, read_buffer, o.read_bytes, pretend_rank); + o.verification_error += verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank); }else if((o.read_bytes >= 8 && ((uint64_t*) read_buffer)[0] != item_num) || (o.read_bytes < 8 && read_buffer[0] != (char) item_num)){ // do a lightweight check, which cost is neglectable o.verification_error++; @@ -2333,7 +2289,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * if (alloc_res) { FAIL("out of memory"); } - generate_memory_pattern(o.write_buffer, o.write_bytes); + generate_memory_pattern(o.write_buffer, o.write_bytes, o.random_buffer_offset, rank); } /* setup directory path to work in */ diff --git a/src/utilities.c b/src/utilities.c index 6b0871f..16a31b0 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -71,6 +71,53 @@ enum OutputFormat_t outputFormat; /***************************** F U N C T I O N S ******************************/ +void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int buff_offset, int rank){ + if(bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible + ((uint64_t*) buf)[0] = item; + }else{ + buf[0] = (char) item; + } +} + +void generate_memory_pattern(char * buf, size_t bytes, int buff_offset, int rank){ + uint64_t * buffi = (uint64_t*) buf; + // first half of 64 bits use the rank + const uint64_t ranki = (uint64_t)(rank + 1) << 32 + buff_offset; + const size_t size = bytes / 8; + // the first 8 bytes are set to item number + for(size_t i=1; i < size; i++){ + buffi[i] = (i + 1) + ranki; + } + for(size_t i=(bytes/8)*8; i < bytes; i++){ + buf[i] = (char) i; + } +} + +int verify_memory_pattern(int item, char * buffer, size_t bytes, int buff_offset, int pretendRank){ + int error = 0; + // always read all data to ensure that performance numbers stay the same + if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){ + error = 1; + } + + uint64_t * buffi = (uint64_t*) buffer; + // first half of 64 bits use the rank, here need to apply rank shifting + uint64_t rank_mod = (uint64_t)(pretendRank + 1) << 32 + buff_offset; + // the first 8 bytes are set to item number + for(size_t i=1; i < bytes/8; i++){ + uint64_t exp = (i + 1) + rank_mod; + if(buffi[i] != exp){ + error = 1; + } + } + for(size_t i=(bytes/8)*8; i < bytes; i++){ + if(buffer[i] != (char) i){ + error = 1; + } + } + return error; +} + void* safeMalloc(uint64_t size){ void * d = malloc(size); if (d == NULL){ diff --git a/src/utilities.h b/src/utilities.h index b0be545..202bcad 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -35,6 +35,11 @@ extern enum OutputFormat_t outputFormat; /* format of the output */ void* safeMalloc(uint64_t size); void set_o_direct_flag(int *fd); +void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int buff_offset, int rank); +void generate_memory_pattern(char * buf, size_t bytes, int buff_offset, int rank); +/* check a data buffer, @return 0 if all is correct, otherwise 1 */ +int verify_memory_pattern(int item, char * buffer, size_t bytes, int buff_offset, int pretendRank); + char *CurrentTimeString(void); int Regex(char *, char *); void ShowFileSystemSize(char * filename, const struct ior_aiori * backend, void * backend_options); From 0a066d8285fbbe8f0843a1b98745c58c280d5465 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 22 Jan 2021 14:38:36 +0000 Subject: [PATCH 122/154] MD-Worbench: add -G option to set parameter. --- src/md-workbench.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index e7213b9..672a73a 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -116,6 +116,7 @@ struct benchmark_options{ int rank; int size; int verify_read; + int random_buffer_offset; float relative_waiting_factor; int adaptive_waiting_mode; @@ -134,16 +135,17 @@ static void def_obj_name(char * out_name, int n, int d, int i){ } void init_options(){ - memset(& o, 0, sizeof(o)); - o.interface = "POSIX"; - o.prefix = "./out"; - o.num = 1000; - o.precreate = 3000; - o.dset_count = 10; - o.offset = 1; - o.iterations = 3; - o.file_size = 3901; - o.run_info_file = "md-workbench.status"; + o = (struct benchmark_options){ + .interface = "POSIX", + .prefix = "./out", + .num = 1000, + .random_buffer_offset = -1, + .precreate = 3000, + .dset_count = 10, + .offset = 1, + .iterations = 3, + .file_size = 3901, + .run_info_file = "md-workbench.status"}; } static void mdw_wait(double runtime){ @@ -550,7 +552,7 @@ void run_precreate(phase_stat_t * s, int current_index){ } char * buf = malloc(o.file_size); - generate_memory_pattern(buf, o.file_size, 0, o.rank); + generate_memory_pattern(buf, o.file_size, o.random_buffer_offset, o.rank); double op_timer; // timer for individual operations size_t pos = -1; // position inside the individual measurement array double op_time; @@ -566,7 +568,7 @@ void run_precreate(phase_stat_t * s, int current_index){ if (NULL == aiori_fh){ FAIL("Unable to open file %s", obj_name); } - update_write_memory_pattern(f * o.dset_count + d, buf, o.file_size, 0, o.rank); + update_write_memory_pattern(f * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, o.rank); if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { s->obj_create.suc++; }else{ @@ -647,7 +649,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options) ) { if(o.verify_read){ - if(verify_memory_pattern(f * o.dset_count + d, buf, o.file_size, 0, readRank) == 0){ + if(verify_memory_pattern(f * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, readRank) == 0){ s->obj_read.suc++; }else{ s->obj_read.err++; @@ -801,6 +803,7 @@ static option_help options [] = { {0, "latency-all", "Keep the latency files from all ranks.", OPTION_FLAG, 'd', & o.latency_keep_all}, {'P', "precreate-per-set", "Number of object to precreate per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.precreate}, {'D', "data-sets", "Number of data sets covered per process and iteration.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.dset_count}, + {'G', NULL, "Offset for the data in the read/write buffer, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_buffer_offset}, {'o', NULL, "Output directory", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix}, {'q', "quiet", "Avoid irrelevant printing.", OPTION_FLAG, 'd', & o.quiet_output}, //{'m', "lim-free-mem", "Allocate memory until this limit (in MiB) is reached.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory}, @@ -906,6 +909,10 @@ mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_c ERR("Invalid options, if running only the benchmark phase using -2 with stonewall option then use stonewall wear-out"); exit(1); } + if( o.random_buffer_offset == -1 ){ + o.random_buffer_offset = time(NULL); + MPI_Bcast(& o.random_buffer_offset, 1, MPI_INT, 0, o.com); + } if(o.backend->xfer_hints){ o.backend->xfer_hints(& o.hints); From 351fd4dfb93c8533ba0ce0b05bffcc54b1cf2167 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 29 Jan 2021 09:08:54 +0000 Subject: [PATCH 123/154] Move initialization of TestComm before aiori->initialize() --- src/ior.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/ior.c b/src/ior.c index 4f28c19..79f48d6 100755 --- a/src/ior.c +++ b/src/ior.c @@ -81,8 +81,28 @@ static void ior_set_xfer_hints(IOR_param_t * p){ int aiori_warning_as_errors = 0; static void test_initialize(IOR_test_t * test){ + int range[3]; + IOR_param_t *params = &test->params; + MPI_Group orig_group, new_group; + + /* set up communicator for test */ + MPI_CHECK(MPI_Comm_group(params->mpi_comm_world, &orig_group), + "MPI_Comm_group() error"); + range[0] = 0; /* first rank */ + range[1] = params->numTasks - 1; /* last rank */ + range[2] = 1; /* stride */ + MPI_CHECK(MPI_Group_range_incl(orig_group, 1, &range, &new_group), + "MPI_Group_range_incl() error"); + MPI_CHECK(MPI_Comm_create(params->mpi_comm_world, new_group, & params->testComm), + "MPI_Comm_create() error"); + MPI_CHECK(MPI_Group_free(&orig_group), "MPI_Group_Free() error"); + MPI_CHECK(MPI_Group_free(&new_group), "MPI_Group_Free() error"); + + /* Setup global variables */ + testComm = params->testComm; verbose = test->params.verbose; backend = test->params.backend; + if(backend->initialize){ backend->initialize(test->params.backend_options); } @@ -1221,26 +1241,11 @@ static void TestIoSys(IOR_test_t *test) int pretendRank; int rep; aiori_fd_t *fd; - MPI_Group orig_group, new_group; - int range[3]; IOR_offset_t dataMoved; /* for data rate calculation */ void *hog_buf; IOR_io_buffers ioBuffers; - /* set up communicator for test */ - MPI_CHECK(MPI_Comm_group(params->mpi_comm_world, &orig_group), - "MPI_Comm_group() error"); - range[0] = 0; /* first rank */ - range[1] = params->numTasks - 1; /* last rank */ - range[2] = 1; /* stride */ - MPI_CHECK(MPI_Group_range_incl(orig_group, 1, &range, &new_group), - "MPI_Group_range_incl() error"); - MPI_CHECK(MPI_Comm_create(params->mpi_comm_world, new_group, &testComm), - "MPI_Comm_create() error"); - MPI_CHECK(MPI_Group_free(&orig_group), "MPI_Group_Free() error"); - MPI_CHECK(MPI_Group_free(&new_group), "MPI_Group_Free() error"); - params->testComm = testComm; - if (testComm == MPI_COMM_NULL) { + if (params->testComm == MPI_COMM_NULL) { /* tasks not in the group do not participate in this test */ MPI_CHECK(MPI_Barrier(params->mpi_comm_world), "barrier error"); return; From 7b69fc1500ff45a6b80212358238b50d628a2366 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 29 Jan 2021 14:00:17 +0000 Subject: [PATCH 124/154] Free testcom after finalize. --- src/ior.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/ior.c b/src/ior.c index 79f48d6..7b46298 100755 --- a/src/ior.c +++ b/src/ior.c @@ -80,7 +80,10 @@ static void ior_set_xfer_hints(IOR_param_t * p){ int aiori_warning_as_errors = 0; -static void test_initialize(IOR_test_t * test){ +/* + Returns 1 if the process participates in the test + */ +static int test_initialize(IOR_test_t * test){ int range[3]; IOR_param_t *params = &test->params; MPI_Group orig_group, new_group; @@ -98,6 +101,13 @@ static void test_initialize(IOR_test_t * test){ MPI_CHECK(MPI_Group_free(&orig_group), "MPI_Group_Free() error"); MPI_CHECK(MPI_Group_free(&new_group), "MPI_Group_Free() error"); + + if (params->testComm == MPI_COMM_NULL) { + /* tasks not in the group do not participate in this test */ + MPI_CHECK(MPI_Barrier(params->mpi_comm_world), "barrier error"); + return 0; + } + /* Setup global variables */ testComm = params->testComm; verbose = test->params.verbose; @@ -112,6 +122,7 @@ static void test_initialize(IOR_test_t * test){ if (rank == 0 && verbose >= VERBOSE_0) { ShowTestStart(& test->params); } + return 1; } static void test_finalize(IOR_test_t * test){ @@ -119,6 +130,7 @@ static void test_finalize(IOR_test_t * test){ if(backend->finalize){ backend->finalize(test->params.backend_options); } + MPI_CHECK(MPI_Comm_free(& testComm), "MPI_Comm_free() error"); } @@ -138,7 +150,8 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out /* perform each test */ for (tptr = tests_head; tptr != NULL; tptr = tptr->next) { - test_initialize(tptr); + int participate = test_initialize(tptr); + if( ! participate ) continue; totalErrorCount = 0; TestIoSys(tptr); tptr->results->errors = totalErrorCount; @@ -184,7 +197,8 @@ int ior_main(int argc, char **argv) /* perform each test */ for (tptr = tests_head; tptr != NULL; tptr = tptr->next) { - test_initialize(tptr); + int participate = test_initialize(tptr); + if( ! participate ) continue; // This is useful for trapping a running MPI process. While // this is sleeping, run the script 'testing/hdfs/gdb.attach' @@ -1245,11 +1259,6 @@ static void TestIoSys(IOR_test_t *test) void *hog_buf; IOR_io_buffers ioBuffers; - if (params->testComm == MPI_COMM_NULL) { - /* tasks not in the group do not participate in this test */ - MPI_CHECK(MPI_Barrier(params->mpi_comm_world), "barrier error"); - return; - } if (rank == 0 && verbose >= VERBOSE_1) { fprintf(out_logfile, "Participating tasks : %d\n", params->numTasks); fflush(out_logfile); @@ -1529,8 +1538,6 @@ static void TestIoSys(IOR_test_t *test) } PrintRepeatEnd(); - MPI_CHECK(MPI_Comm_free(&testComm), "MPI_Comm_free() error"); - if (params->summary_every_test) { PrintLongSummaryHeader(); PrintLongSummaryOneTest(test); @@ -1542,9 +1549,6 @@ static void TestIoSys(IOR_test_t *test) if (hog_buf != NULL) free(hog_buf); - - /* Sync with the tasks that did not participate in this test */ - MPI_CHECK(MPI_Barrier(params->mpi_comm_world), "barrier error"); } /* From 265cdb2a42b9bbd71ed926b9dbdaa1bb78bc8b24 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 5 Feb 2021 10:49:35 +0000 Subject: [PATCH 125/154] MDWorkbench: Fix new verification option --- src/md-workbench.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/md-workbench.c b/src/md-workbench.c index 672a73a..7f08611 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -582,7 +582,7 @@ void run_precreate(phase_stat_t * s, int current_index){ add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time); if (o.verbosity >= 2){ - oprintf("%d: write %s:%s (%d)\n", o.rank, dset, obj_name, ret); + oprintf("%d: write %s:%s (%d) pretend: %d\n", o.rank, dset, obj_name, ret, o.rank); } } } @@ -639,7 +639,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ s->obj_stat.suc++; if (o.verbosity >= 2){ - oprintf("%d: read %s \n", o.rank, obj_name); + oprintf("%d: read %s pretend: %d\n", o.rank, obj_name, readRank); } op_timer = GetTimeStamp(); @@ -649,7 +649,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options) ) { if(o.verify_read){ - if(verify_memory_pattern(f * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, readRank) == 0){ + if(verify_memory_pattern(prevFile * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, readRank) == 0){ s->obj_read.suc++; }else{ s->obj_read.err++; @@ -684,11 +684,15 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ s->obj_delete.suc++; int writeRank = (o.rank + o.offset * (d+1)) % o.size; - def_obj_name(obj_name, writeRank, d, o.precreate + prevFile); + const int newFileIndex = o.precreate + prevFile; + def_obj_name(obj_name, writeRank, d, newFileIndex); op_timer = GetTimeStamp(); aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); if (NULL != aiori_fh){ + generate_memory_pattern(buf, o.file_size, o.random_buffer_offset, writeRank); + update_write_memory_pattern(newFileIndex * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, writeRank); + if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { s->obj_create.suc++; }else{ @@ -711,7 +715,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if (o.verbosity >= 2){ - oprintf("%d: write %s (%d)\n", o.rank, obj_name, ret); + oprintf("%d: write %s (%d) pretend: %d\n", o.rank, obj_name, ret, writeRank); } } // end loop From b4db470459bca48b5608c772bb4f81c09a5ff8d2 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 5 Feb 2021 11:15:19 +0000 Subject: [PATCH 126/154] Testing improvements. Uploaded new mdtest patterns. Make them invariant to line nrs. Added md-workbench tests. Added --oversubscribe option if MPI understands it (needed for some systems) --- testing/basic-tests.sh | 12 +- testing/mdtest-patterns/advanced/3.txt | 187 ++++++++++++------------- testing/mdtest-patterns/advanced/4.txt | 101 +++++++------ testing/mdtest-patterns/advanced/5.txt | 172 +++++++++++++---------- testing/mdtest-patterns/basic/0.txt | 52 ++++--- testing/mdtest-patterns/basic/1.txt | 52 ++++--- testing/mdtest-patterns/basic/2.txt | 55 ++++---- testing/mdtest-patterns/basic/3.txt | 65 ++++----- testing/test-lib.sh | 25 +++- 9 files changed, 377 insertions(+), 344 deletions(-) diff --git a/testing/basic-tests.sh b/testing/basic-tests.sh index cf09082..4377511 100755 --- a/testing/basic-tests.sh +++ b/testing/basic-tests.sh @@ -26,7 +26,7 @@ IOR 2 -a POSIX -w -z -C -F -k -e -i1 -m -t 100k -b 200k IOR 2 -a POSIX -w -z -C -Q 1 -F -k -e -i1 -m -t 100k -b 200k IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 200k IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 200k -IOR 2 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 200k +IOR 3 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 200k IOR 2 -f "$ROOT/test_comments.ior" @@ -34,4 +34,14 @@ IOR 2 -f "$ROOT/test_comments.ior" IOR 2 -a DUMMY -e -F -t 1m -b 1m -A 328883 -O summaryFormat=JSON -O summaryFile=OUT.json python -mjson.tool OUT.json >/dev/null && echo "JSON OK" +# MDWB +MDWB 3 -a POSIX -O=1 -D=1 -G=10 -P=1 -I=1 -R=2 -X +MDWB 3 -a POSIX -O=1 -D=4 -G=10 -P=4 -I=1 -R=2 -X -t=0.001 -L=latency.txt +MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -R=2 -X -W -w 1 +MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -1 -W -w 1 --run-info-file=mdw.tst --print-detailed-stats +MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -2 -W -w 1 --run-info-file=mdw.tst --print-detailed-stats +MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -2 -W -w 1 --read-only --run-info-file=mdw.tst --print-detailed-stats +MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -2 -W -w 1 --read-only --run-info-file=mdw.tst --print-detailed-stats +MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -3 -W -w 1 --run-info-file=mdw.tst --print-detailed-stats + END diff --git a/testing/mdtest-patterns/advanced/3.txt b/testing/mdtest-patterns/advanced/3.txt index 4c45941..cff653e 100644 --- a/testing/mdtest-patterns/advanced/3.txt +++ b/testing/mdtest-patterns/advanced/3.txt @@ -1,95 +1,92 @@ -V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... -V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' -V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 -V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 -V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' +V-3: Rank 0 will file_test on mdtest_tree.0 +V-3: Rank 0 Entering file_test on mdtest_tree.0 +V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/advanced/4.txt b/testing/mdtest-patterns/advanced/4.txt index 5d3b7da..62548ae 100644 --- a/testing/mdtest-patterns/advanced/4.txt +++ b/testing/mdtest-patterns/advanced/4.txt @@ -1,52 +1,49 @@ -V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... -V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 -V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 -V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 -V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19 -V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 +V-3: Rank 0 will file_test on mdtest_tree.0 +V-3: Rank 0 Entering file_test on mdtest_tree.0 +V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.0 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.1 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.2 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.3 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.4 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.5 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.6 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.7 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.8 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.9 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.10 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.11 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.12 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.13 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.14 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.15 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.16 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.17 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.18 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/file.mdtest.0.19 +V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/advanced/5.txt b/testing/mdtest-patterns/advanced/5.txt index e87ae0a..7192c35 100644 --- a/testing/mdtest-patterns/advanced/5.txt +++ b/testing/mdtest-patterns/advanced/5.txt @@ -1,77 +1,95 @@ -V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... -V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' -V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 -V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' -V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' -V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' +V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 +V-3: Rank 0 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 rename path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18 +V-3: Rank 0 mdtest_rename dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19 +V-3: Rank 0 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.0' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.1' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.2' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.3' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.4' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.5' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.6' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.7' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.8' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.9' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.10' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.11' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.12' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.13' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.14' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.15' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.16' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.17' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.18' +V-3: Rank 0 create_remove_items_helper (dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0/dir.mdtest.0.19' +V-3: Rank 0 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/0.txt b/testing/mdtest-patterns/basic/0.txt index ebe0f14..4c816c5 100644 --- a/testing/mdtest-patterns/basic/0.txt +++ b/testing/mdtest-patterns/basic/0.txt @@ -1,27 +1,25 @@ -V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... -V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 -V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 -V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1104 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1158 file_test: rm unique directories path is 'mdtest_tree.0' -V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' -V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 rename path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 will file_test on mdtest_tree.0 +V-3: Rank 0 Entering file_test on mdtest_tree.0 +V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 file_test: rm unique directories path is 'mdtest_tree.0' +V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/1.txt b/testing/mdtest-patterns/basic/1.txt index ebe0f14..4c816c5 100644 --- a/testing/mdtest-patterns/basic/1.txt +++ b/testing/mdtest-patterns/basic/1.txt @@ -1,27 +1,25 @@ -V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... -V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1656 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -V-3: Rank 0 Line 1683 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' -V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir -V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 862 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 890 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 915 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0 -V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0 -V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1104 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' -V-3: Rank 0 Line 1158 file_test: rm unique directories path is 'mdtest_tree.0' -V-3: Rank 0 Line 1723 main: Using testdir, '/dev/shm/mdest/test-dir.0-0' -V-3: Rank 0 Line 1764 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (create hierarchical directory loop-!unque_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 V-3: main: Using unique_mk_dir, 'mdtest_tree.0' +V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0', to topdir +V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 directory_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 rename path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 directory_test: remove directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 directory_test: remove unique directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 will file_test on mdtest_tree.0 +V-3: Rank 0 Entering file_test on mdtest_tree.0 +V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 file_test: read path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0 +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0' +V-3: Rank 0 file_test: rm unique directories path is 'mdtest_tree.0' +V-3: Rank 0 main: Using o.testdir, '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 V-3: main (remove hierarchical directory loop-!unique_dir_per_task): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/2.txt b/testing/mdtest-patterns/basic/2.txt index 77f5c78..099b265 100644 --- a/testing/mdtest-patterns/basic/2.txt +++ b/testing/mdtest-patterns/basic/2.txt @@ -1,29 +1,26 @@ -V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... -V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1647 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -V-3: Rank 0 Line 1694 i 1 nstride 0 -V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir -V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0.0 -V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0.0 -V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 -V-3: Rank 0 Line 1134 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 1141 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0 -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -V-3: Rank 0 Line 310 create_remove_items_helper (non-dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -V-3: Rank 0 Line 1158 file_test: rm unique directories path is '/dev/shm/mdest/test-dir.0-0/' -V-3: Rank 0 Line 1754 main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 i 1 nstride 0 +V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir +V-3: Rank 0 will file_test on mdtest_tree.0.0 +V-3: Rank 0 Entering file_test on mdtest_tree.0.0 +V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 +V-3: Rank 0 file_test: rm directories path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 gonna create /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0 +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 create_remove_items_helper (non-dirs remove): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 file_test: rm unique directories path is '/dev/shm/mdest/test-dir.0-0/' +V-3: Rank 0 main (remove hierarchical directory loop-!collective): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' diff --git a/testing/mdtest-patterns/basic/3.txt b/testing/mdtest-patterns/basic/3.txt index eafadc1..cf925f8 100644 --- a/testing/mdtest-patterns/basic/3.txt +++ b/testing/mdtest-patterns/basic/3.txt @@ -1,34 +1,31 @@ -V-3: Rank 0 Line 2082 main (before display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1506 Entering display_freespace on /dev/shm/mdest... -V-3: Rank 0 Line 1525 Before show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 1527 After show_file_system_size, dirpath is '/dev/shm' -V-3: Rank 0 Line 2097 main (after display_freespace): testdirpath is '/dev/shm/mdest' -V-3: Rank 0 Line 1647 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' -V-3: Rank 0 Line 1694 i 1 nstride 0 -V-3: Rank 0 Line 1704 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir -V-3: Rank 0 Line 801 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -V-3: Rank 0 Line 288 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//dir.mdtest.0.1' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -V-3: Rank 0 Line 833 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0 -V-3: Rank 0 Line 588 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/dir.mdtest.0.1 -V-3: Rank 0 Line 1716 will file_test on mdtest_tree.0.0 -V-3: Rank 0 Line 990 Entering file_test on mdtest_tree.0.0 -V-3: Rank 0 Line 1012 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 483 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -V-3: Rank 0 Line 326 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' -V-3: Rank 0 Line 348 create_remove_items_helper (non-collective, shared): open... -V-3: Rank 0 Line 373 create_remove_items_helper: close... -V-3: Rank 0 Line 457 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' -V-3: Rank 0 Line 1079 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0 -V-3: Rank 0 Line 588 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 +V-3: Rank 0 main (before display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (after display_freespace): o.testdirpath is '/dev/shm/mdest' +V-3: Rank 0 main (create hierarchical directory loop-!collective_creates): Calling create_remove_directory_tree with '/dev/shm/mdest/test-dir.0-0' +V-3: Rank 0 i 1 nstride 0 +V-3: Rank 0 V-3: main: Copied unique_mk_dir, 'mdtest_tree.0.0', to topdir +V-3: Rank 0 directory_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 create_remove_items_helper (dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//dir.mdtest.0.1' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/dir.mdtest.0.0 +V-3: Rank 0 mdtest_stat dir: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/dir.mdtest.0.1 +V-3: Rank 0 will file_test on mdtest_tree.0.0 +V-3: Rank 0 Entering file_test on mdtest_tree.0.0 +V-3: Rank 0 file_test: create path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 create_remove_items (for loop): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 create_remove_items_helper (non-dirs create): curr_item is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1//file.mdtest.0.1' +V-3: Rank 0 create_remove_items_helper (non-collective, shared): open... +V-3: Rank 0 create_remove_items_helper: close... +V-3: Rank 0 create_remove_items (start): temp_path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/' +V-3: Rank 0 file_test: stat path is '/dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0' +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/file.mdtest.0.0 +V-3: Rank 0 mdtest_stat file: /dev/shm/mdest/test-dir.0-0/mdtest_tree.0.0/mdtest_tree.0.1/file.mdtest.0.1 diff --git a/testing/test-lib.sh b/testing/test-lib.sh index e35b245..88db1e2 100644 --- a/testing/test-lib.sh +++ b/testing/test-lib.sh @@ -7,12 +7,17 @@ # Example: export IOR_EXTRA="-v -v -v" IOR_MPIRUN=${IOR_MPIRUN:-mpiexec -np} +if ${IOR_MPIRUN} 1 --oversubscribe true ; then + IOR_MPIRUN="mpiexec --oversubscribe -np" +fi IOR_BIN_DIR=${IOR_BIN_DIR:-./src} IOR_OUT=${IOR_OUT:-./test_logs} IOR_TMP=${IOR_TMP:-/dev/shm} IOR_EXTRA=${IOR_EXTRA:-} # Add global options like verbosity MDTEST_EXTRA=${MDTEST_EXTRA:-} MDTEST_TEST_PATTERNS=${MDTEST_TEST_PATTERNS:-../testing/mdtest-patterns/$TYPE} +MDWB_EXTRA=${MDWB_EXTRA:-} + ################################################################################ mkdir -p ${IOR_OUT} @@ -63,8 +68,8 @@ function MDTEST(){ ERRORS=$(($ERRORS + 1)) else # compare basic pattern + grep "V-3" "${IOR_OUT}/test_out.$I" | sed "s/Line *[0-9]*//" > "${IOR_OUT}/tmp" if [[ -r ${MDTEST_TEST_PATTERNS}/$I.txt ]] ; then - grep "V-3" "${IOR_OUT}/test_out.$I" > "${IOR_OUT}/tmp" cmp -s "${IOR_OUT}/tmp" ${MDTEST_TEST_PATTERNS}/$I.txt if [[ $? != 0 ]]; then mv "${IOR_OUT}/tmp" ${IOR_OUT}/tmp.$I @@ -74,7 +79,7 @@ function MDTEST(){ if [[ ! -e ${MDTEST_TEST_PATTERNS} ]] ; then mkdir -p ${MDTEST_TEST_PATTERNS} fi - grep "V-3" "${IOR_OUT}/test_out.$I" > ${MDTEST_TEST_PATTERNS}/$I.txt + mv "${IOR_OUT}/tmp" ${MDTEST_TEST_PATTERNS}/$I.txt fi echo -n "OK " fi @@ -82,6 +87,22 @@ function MDTEST(){ I=$((${I}+1)) } +function MDWB(){ + RANKS=$1 + shift + WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/md-workbench ${@} -o ${IOR_TMP}/md-workbench ${MDWB_EXTRA}" + LOG="${IOR_OUT}/test_out.$I" + $WHAT 1>"$LOG" 2>&1 + if [[ $? != 0 ]] || grep '!!!' "$LOG" ; then + echo -n "ERR" + ERRORS=$(($ERRORS + 1)) + else + echo -n "OK " + fi + echo " $WHAT" + I=$((${I}+1)) +} + function END(){ if [[ ${ERRORS} == 0 ]] ; then echo "PASSED" From bdde24bd9142fb51b48d1a337c8255efe95b4614 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 5 Feb 2021 11:24:53 +0000 Subject: [PATCH 127/154] Save testlogs into directory per type. --- testing/test-lib.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test-lib.sh b/testing/test-lib.sh index 88db1e2..b331eda 100644 --- a/testing/test-lib.sh +++ b/testing/test-lib.sh @@ -11,7 +11,7 @@ if ${IOR_MPIRUN} 1 --oversubscribe true ; then IOR_MPIRUN="mpiexec --oversubscribe -np" fi IOR_BIN_DIR=${IOR_BIN_DIR:-./src} -IOR_OUT=${IOR_OUT:-./test_logs} +IOR_OUT=${IOR_OUT:-./test_logs/$TYPE} IOR_TMP=${IOR_TMP:-/dev/shm} IOR_EXTRA=${IOR_EXTRA:-} # Add global options like verbosity MDTEST_EXTRA=${MDTEST_EXTRA:-} From fa5b24f2aa6c5be717161de077c249bca9b3c753 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 8 Feb 2021 13:28:13 +0000 Subject: [PATCH 128/154] Bugfix user docu for ranks. Bugfix missmatching barriers. --- doc/USER_GUIDE | 2 +- src/ior.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/USER_GUIDE b/doc/USER_GUIDE index 2962753..c68aeca 100755 --- a/doc/USER_GUIDE +++ b/doc/USER_GUIDE @@ -164,7 +164,7 @@ GENERAL: * numTasks - number of tasks that should participate in the test [0] - NOTE: 0 denotes all tasks + NOTE: -1 denotes all tasks * interTestDelay - this is the time in seconds to delay before beginning a write or read in a series of tests [0] diff --git a/src/ior.c b/src/ior.c index 7b46298..986f3de 100755 --- a/src/ior.c +++ b/src/ior.c @@ -103,7 +103,7 @@ static int test_initialize(IOR_test_t * test){ if (params->testComm == MPI_COMM_NULL) { - /* tasks not in the group do not participate in this test */ + /* tasks not in the group do not participate in this test, this matches the proceses in test_finalize() that participate */ MPI_CHECK(MPI_Barrier(params->mpi_comm_world), "barrier error"); return 0; } @@ -130,6 +130,7 @@ static void test_finalize(IOR_test_t * test){ if(backend->finalize){ backend->finalize(test->params.backend_options); } + MPI_CHECK(MPI_Barrier(test->params.mpi_comm_world), "barrier error"); MPI_CHECK(MPI_Comm_free(& testComm), "MPI_Comm_free() error"); } From b5963380ae9ee4939c2c0e632371b64f0b796461 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Tue, 9 Feb 2021 17:54:14 +0000 Subject: [PATCH 129/154] Feature: IOR rank details in CSV file (#334) * IOR: Store individual rank results into a CSV file #333 Example usage: ior -O saveRankPerformanceDetailsCSV=test.csv --- src/ior.c | 63 ++++++++++++++++++++++++++++++++++++--------- src/ior.h | 1 + src/parse_options.c | 16 ++++++++++++ 3 files changed, 68 insertions(+), 12 deletions(-) diff --git a/src/ior.c b/src/ior.c index 986f3de..dd0f048 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1243,6 +1243,55 @@ WriteTimes(IOR_param_t *test, const double *timer, const int iteration, timerName); } } + +static void StoreRankInformation(IOR_test_t *test, double *timer, const int rep, const int access){ + IOR_param_t *params = &test->params; + double totalTime = timer[5] - timer[0]; + double accessTime = timer[3] - timer[2]; + double times[] = {totalTime, accessTime}; + + if(rank == 0){ + FILE* fd = fopen(params->saveRankDetailsCSV, "a"); + if (fd == NULL){ + FAIL("Cannot open saveRankPerformanceDetailsCSV file for writes!"); + } + int size; + MPI_Comm_size(params->testComm, & size); + double *all_times = malloc(2* size * sizeof(double)); + MPI_Gather(times, 2, MPI_DOUBLE, all_times, 2, MPI_DOUBLE, 0, params->testComm); + IOR_point_t *point = (access == WRITE) ? &test->results[rep].write : &test->results[rep].read; + double file_size = ((double) point->aggFileSizeForBW) / size; + + for(int i=0; i < size; i++){ + char buff[1024]; + sprintf(buff, "%s,%d,%.10e,%.10e,%.10e,%.10e\n", access==WRITE ? "write" : "read", i, all_times[i*2], all_times[i*2+1], file_size/all_times[i*2], file_size/all_times[i*2+1] ); + int ret = fwrite(buff, strlen(buff), 1, fd); + if(ret != 1){ + WARN("Couln't append to saveRankPerformanceDetailsCSV file\n"); + break; + } + } + fclose(fd); + }else{ + MPI_Gather(& times, 2, MPI_DOUBLE, NULL, 2, MPI_DOUBLE, 0, testComm); + } +} + +static void ProcessIterResults(IOR_test_t *test, double *timer, const int rep, const int access){ + IOR_param_t *params = &test->params; + + if (verbose >= VERBOSE_3) + WriteTimes(params, timer, rep, access); + ReduceIterResults(test, timer, rep, access); + if (params->outlierThreshold) { + CheckForOutliers(params, timer, access); + } + + if(params->saveRankDetailsCSV){ + StoreRankInformation(test, timer, rep, access); + } +} + /* * Using the test parameters, run iteration(s) of single test. */ @@ -1383,12 +1432,7 @@ static void TestIoSys(IOR_test_t *test) use actual amount of byte moved */ CheckFileSize(test, testFileName, dataMoved, rep, WRITE); - if (verbose >= VERBOSE_3) - WriteTimes(params, timer, rep, WRITE); - ReduceIterResults(test, timer, rep, WRITE); - if (params->outlierThreshold) { - CheckForOutliers(params, timer, WRITE); - } + ProcessIterResults(test, timer, rep, WRITE); /* check if in this round we run write with stonewalling */ if(params->deadlineForStonewalling > 0){ @@ -1513,12 +1557,7 @@ static void TestIoSys(IOR_test_t *test) use actual amount of byte moved */ CheckFileSize(test, testFileName, dataMoved, rep, READ); - if (verbose >= VERBOSE_3) - WriteTimes(params, timer, rep, READ); - ReduceIterResults(test, timer, rep, READ); - if (params->outlierThreshold) { - CheckForOutliers(params, timer, READ); - } + ProcessIterResults(test, timer, rep, READ); } if (!params->keepFile diff --git a/src/ior.h b/src/ior.h index 6252f78..e4663db 100755 --- a/src/ior.h +++ b/src/ior.h @@ -130,6 +130,7 @@ typedef struct IOR_offset_t expectedAggFileSize; /* calculated aggregate file size */ IOR_offset_t randomPrefillBlocksize; /* prefill option for random IO, the amount of data used for prefill */ + char * saveRankDetailsCSV; /* save the details about the performance to a file */ int summary_every_test; /* flag to print summary every test, not just at end */ int uniqueDir; /* use unique directory for each fpp */ int useExistingTestFile; /* do not delete test file before access */ diff --git a/src/parse_options.c b/src/parse_options.c index 05fa78f..82fab98 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -103,6 +103,21 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt } printf("Writing output to %s\n", value); } + } else if (strcasecmp(option, "saveRankPerformanceDetailsCSV") == 0){ + if (rank == 0){ + // check that the file is writeable, truncate it and add header + FILE* fd = fopen(value, "w"); + if (fd == NULL){ + FAIL("Cannot open saveRankPerformanceDetailsCSV file for write!"); + } + char buff[] = "access,rank,runtime-with-openclose,runtime,throughput-withopenclose,throughput\n"; + int ret = fwrite(buff, strlen(buff), 1, fd); + if(ret != 1){ + FAIL("Cannot write header to saveRankPerformanceDetailsCSV file"); + } + fclose(fd); + } + params->saveRankDetailsCSV = strdup(value); } else if (strcasecmp(option, "summaryFormat") == 0) { if(strcasecmp(value, "default") == 0){ outputFormat = OUTPUT_DEFAULT; @@ -439,6 +454,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & params->warningAsErrors}, {.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputting the summary", .arg = OPTION_OPTIONAL_ARGUMENT}, + {.help=" -O saveRankPerformanceDetailsCSV= -- store the performance of each rank into the named CSV file.", .arg = OPTION_OPTIONAL_ARGUMENT}, {0, "dryRun", "do not perform any I/Os just run evtl. inputs print dummy output", OPTION_FLAG, 'd', & params->dryRun}, LAST_OPTION, }; From 106eebecb3ad637ae4e6981bb70fb799a90a7ad6 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Thu, 11 Feb 2021 09:57:22 +0000 Subject: [PATCH 130/154] MDTest allow storing information per rank for later analysis (#335) * MDTest allow storing information per rank for later analysis when using the --saveRankPerformanceDetails= option * MDTest: refactored calculation of results, added time_before_executing a barrier. --- src/mdtest.c | 168 ++++++++++++++++++++++++++++++++++++--------------- src/mdtest.h | 1 + 2 files changed, 119 insertions(+), 50 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index ac9bdc4..cfd221a 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -163,6 +163,7 @@ typedef struct { #ifdef HAVE_LUSTRE_LUSTREAPI int global_dir_layout; #endif /* HAVE_LUSTRE_LUSTREAPI */ + char * saveRankDetailsCSV; /* save the details about the performance to a file */ mdtest_results_t * summary_table; pid_t pid; @@ -897,9 +898,20 @@ void rename_dir_test(const int dirs, const long dir_iter, const char *path, rank } } +static void updateResult(mdtest_results_t * res, mdtest_test_num_t test, uint64_t item_count, int t, double * times, double * tBefore){ + res->rate[test] = item_count/(times[t] - times[t-1]); + res->time[test] = times[t] - times[t-1]; + if(tBefore){ + res->time_before_barrier[test] = tBefore[t] - times[t-1]; + } + res->items[test] = item_count; + res->stonewall_last_item[test] = o.items; +} + void directory_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) { int size; double t[6] = {0}; + double tBefore[6] = {0}; char temp_path[MAX_PATHLEN]; mdtest_results_t * res = & o.summary_table[iteration]; @@ -907,6 +919,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran VERBOSE(1,-1,"Entering directory_test on %s", path ); + tBefore[0] = GetTimeStamp(); MPI_Barrier(testComm); t[0] = GetTimeStamp(); @@ -941,6 +954,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran progress->stone_wall_timer_seconds = 0; } + tBefore[1] = GetTimeStamp(); phase_end(); t[1] = GetTimeStamp(); @@ -967,6 +981,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } } + tBefore[2] = GetTimeStamp(); phase_end(); t[2] = GetTimeStamp(); @@ -993,6 +1008,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } } + tBefore[3] = GetTimeStamp(); phase_end(); t[3] = GetTimeStamp(); @@ -1013,6 +1029,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran rename_dir_test(1, dir_iter, temp_path, progress); } } + tBefore[4] = GetTimeStamp(); phase_end(); t[4] = GetTimeStamp(); @@ -1048,6 +1065,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } + tBefore[5] = GetTimeStamp(); phase_end(); t[5] = GetTimeStamp(); @@ -1067,28 +1085,16 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran /* calculate times */ if (o.create_only) { - res->rate[MDTEST_DIR_CREATE_NUM] = o.items*size/(t[1] - t[0]); - res->time[MDTEST_DIR_CREATE_NUM] = t[1] - t[0]; - res->items[MDTEST_DIR_CREATE_NUM] = o.items*size; - res->stonewall_last_item[MDTEST_DIR_CREATE_NUM] = o.items; + updateResult(res, MDTEST_DIR_CREATE_NUM, o.items*size, 1, t, tBefore); } if (o.stat_only) { - res->rate[MDTEST_DIR_STAT_NUM] = o.items*size/(t[2] - t[1]); - res->time[MDTEST_DIR_STAT_NUM] = t[2] - t[1]; - res->items[MDTEST_DIR_STAT_NUM] = o.items*size; - res->stonewall_last_item[MDTEST_DIR_STAT_NUM] = o.items; + updateResult(res, MDTEST_DIR_STAT_NUM, o.items*size, 2, t, tBefore); } if (o.read_only) { - res->rate[MDTEST_DIR_READ_NUM] = o.items*size/(t[3] - t[2]); - res->time[MDTEST_DIR_READ_NUM] = t[3] - t[2]; - res->items[MDTEST_DIR_READ_NUM] = o.items*size; - res->stonewall_last_item[MDTEST_DIR_READ_NUM] = o.items; + updateResult(res, MDTEST_DIR_READ_NUM, o.items*size, 3, t, tBefore); } if (o.remove_only) { - res->rate[MDTEST_DIR_REMOVE_NUM] = o.items*size/(t[5] - t[4]); - res->time[MDTEST_DIR_REMOVE_NUM] = t[5] - t[4]; - res->items[MDTEST_DIR_REMOVE_NUM] = o.items*size; - res->stonewall_last_item[MDTEST_DIR_REMOVE_NUM] = o.items; + updateResult(res, MDTEST_DIR_REMOVE_NUM, o.items*size, 5, t, tBefore); } VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[0]); VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], o.summary_table[iteration].rate[1]); @@ -1177,11 +1183,13 @@ void file_test_create(const int iteration, const int ntasks, const char *path, r void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) { int size; double t[5] = {0}; + double tBefore[5] = {0}; char temp_path[MAX_PATHLEN]; MPI_Comm_size(testComm, &size); VERBOSE(3,5,"Entering file_test on %s", path); + tBefore[0] = GetTimeStamp(); MPI_Barrier(testComm); t[0] = GetTimeStamp(); @@ -1215,6 +1223,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } + tBefore[1] = GetTimeStamp(); phase_end(); t[1] = GetTimeStamp(); @@ -1238,6 +1247,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } + tBefore[2] = GetTimeStamp(); phase_end(); t[2] = GetTimeStamp(); @@ -1265,6 +1275,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } + tBefore[3] = GetTimeStamp(); phase_end(); t[3] = GetTimeStamp(); @@ -1295,6 +1306,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } + tBefore[4] = GetTimeStamp(); phase_end(); t[4] = GetTimeStamp(); if (o.remove_only) { @@ -1318,28 +1330,16 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro mdtest_results_t * res = & o.summary_table[iteration]; /* calculate times */ if (o.create_only) { - res->rate[MDTEST_FILE_CREATE_NUM] = o.items*size/(t[1] - t[0]); - res->time[MDTEST_FILE_CREATE_NUM] = t[1] - t[0]; - res->items[MDTEST_FILE_CREATE_NUM] = o.items*o.size; - res->stonewall_last_item[MDTEST_FILE_CREATE_NUM] = o.items; + updateResult(res, MDTEST_FILE_CREATE_NUM, o.items*size, 1, t, tBefore); } if (o.stat_only) { - res->rate[MDTEST_FILE_STAT_NUM] = o.items*size/(t[2] - t[1]); - res->time[MDTEST_FILE_STAT_NUM] = t[2] - t[1]; - res->items[MDTEST_FILE_STAT_NUM] = o.items*o.size; - res->stonewall_last_item[MDTEST_FILE_STAT_NUM] = o.items; + updateResult(res, MDTEST_FILE_STAT_NUM, o.items*size, 2, t, tBefore); } if (o.read_only) { - res->rate[MDTEST_FILE_READ_NUM] = o.items*o.size/(t[3] - t[2]); - res->time[MDTEST_FILE_READ_NUM] = t[3] - t[2]; - res->items[MDTEST_FILE_READ_NUM] = o.items*o.size; - res->stonewall_last_item[MDTEST_FILE_READ_NUM] = o.items; + updateResult(res, MDTEST_FILE_READ_NUM, o.items*size, 3, t, tBefore); } if (o.remove_only) { - res->rate[MDTEST_FILE_REMOVE_NUM] = o.items*o.size/(t[4] - t[3]); - res->time[MDTEST_FILE_REMOVE_NUM] = t[4] - t[3]; - res->items[MDTEST_FILE_REMOVE_NUM] = o.items*o.size; - res->stonewall_last_item[MDTEST_FILE_REMOVE_NUM] = o.items; + updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items*size, 4, t, tBefore); } VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[4]); @@ -1353,17 +1353,17 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro char const * mdtest_test_name(int i){ switch (i) { - case MDTEST_DIR_CREATE_NUM: return "Directory creation :"; - case MDTEST_DIR_STAT_NUM: return "Directory stat :"; - case MDTEST_DIR_READ_NUM: return NULL; - case MDTEST_DIR_REMOVE_NUM: return "Directory removal :"; - case MDTEST_DIR_RENAME_NUM: return "Directory rename :"; - case MDTEST_FILE_CREATE_NUM: return "File creation :"; - case MDTEST_FILE_STAT_NUM: return "File stat :"; - case MDTEST_FILE_READ_NUM: return "File read :"; - case MDTEST_FILE_REMOVE_NUM: return "File removal :"; - case MDTEST_TREE_CREATE_NUM: return "Tree creation :"; - case MDTEST_TREE_REMOVE_NUM: return "Tree removal :"; + case MDTEST_DIR_CREATE_NUM: return "Directory creation"; + case MDTEST_DIR_STAT_NUM: return "Directory stat"; + case MDTEST_DIR_READ_NUM: return "Directory read"; + case MDTEST_DIR_REMOVE_NUM: return "Directory removal"; + case MDTEST_DIR_RENAME_NUM: return "Directory rename"; + case MDTEST_FILE_CREATE_NUM: return "File creation"; + case MDTEST_FILE_STAT_NUM: return "File stat"; + case MDTEST_FILE_READ_NUM: return "File read"; + case MDTEST_FILE_REMOVE_NUM: return "File removal"; + case MDTEST_TREE_CREATE_NUM: return "Tree creation"; + case MDTEST_TREE_REMOVE_NUM: return "Tree removal"; default: return "ERR INVALID TESTNAME :"; } return NULL; @@ -1374,6 +1374,48 @@ int calc_allreduce_index(int iter, int rank, int op){ return iter * tableSize * o.size + rank * tableSize + op; } +/* + * Store the results of each process in a file + */ +static void StoreRankInformation(int iterations){ + const size_t size = sizeof(mdtest_results_t) * iterations; + if(rank == 0){ + FILE* fd = fopen(o.saveRankDetailsCSV, "a"); + if (fd == NULL){ + FAIL("Cannot open saveRankPerformanceDetails file for writes!"); + } + + mdtest_results_t * results = malloc(size * o.size); + MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, results, size / sizeof(double), MPI_DOUBLE, 0, testComm); + + for(int iter = 0; iter < iterations; iter++){ + for(int i=0; i < o.size; i++){ + mdtest_results_t * cur = & results[i * iterations + iter]; + char buff[4096]; + char * cpos = buff; + cpos += sprintf(cpos, "%d,%llu", i, (long long unsigned) o.items); + for(int e = 0; (e < MDTEST_TREE_CREATE_NUM) || (i == 0 && e < MDTEST_LAST_NUM); e++){ + if(cur->items[e] == 0){ + cpos += sprintf(cpos, ",,"); + }else{ + cpos += sprintf(cpos, ",%.10e,%.10e", cur->items[e] / cur->time_before_barrier[e], cur->time_before_barrier[e]); + } + } + cpos += sprintf(cpos, "\n"); + int ret = fwrite(buff, cpos - buff, 1, fd); + if(ret != 1){ + WARN("Couln't append to saveRankPerformanceDetailsCSV file\n"); + break; + } + } + } + fclose(fd); + }else{ + /* this is a hack for now assuming all datatypes in the structure are double */ + MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, NULL, size / sizeof(double), MPI_DOUBLE, 0, testComm); + } +} + void summarize_results(int iterations, int print_time) { char const * access; int i, j, k; @@ -1404,7 +1446,7 @@ void summarize_results(int iterations, int print_time) { continue; } curr = o.summary_table[j].rate[i]; - fprintf(out_logfile, "Rank %d Iter %d Test %s Rate: %e\n", rank, j, access, curr); + fprintf(out_logfile, "Rank %d Iter %d Test %-20s Rate: %e\n", rank, j, access, curr); } } } @@ -1452,8 +1494,8 @@ void summarize_results(int iterations, int print_time) { } VERBOSE(0,-1,"\nSUMMARY %s: (of %d iterations)", print_time ? "time": "rate", iterations); - VERBOSE(0,-1," Operation per Rank: Max Min Mean Std Dev per Iteration: Max Min Mean"); - VERBOSE(0,-1," --------- --- --- ---- ------- --- --- ----"); + VERBOSE(0,-1," Operation per Rank: Max Min Mean Std Dev per Iteration: Max Min Mean"); + VERBOSE(0,-1," --------- --- --- ---- ------- --- --- ----"); for (i = start; i < stop; i++) { min = max = all[i]; @@ -1500,7 +1542,7 @@ void summarize_results(int iterations, int print_time) { sd = sqrt(var); access = mdtest_test_name(i); if (i != 2) { - fprintf(out_logfile, " %s ", access); + fprintf(out_logfile, " %-22s ", access); fprintf(out_logfile, "%14.3f ", max); fprintf(out_logfile, "%14.3f ", min); fprintf(out_logfile, "%14.3f ", mean); @@ -1522,8 +1564,8 @@ void summarize_results(int iterations, int print_time) { } } if(stonewall_items != 0){ - fprintf(out_logfile, " File create (stonewall) : "); - fprintf(out_logfile, "%14s %14s %14.3f %14s\n", "NA", "NA", print_time ? stonewall_time : stonewall_items / stonewall_time, "NA"); + fprintf(out_logfile, " File create (stonewall) "); + fprintf(out_logfile, "%13s %14s %14.3f %14s\n", "NA", "NA", print_time ? stonewall_time : stonewall_items / stonewall_time, "NA"); } /* calculate tree create/remove rates, applies only to Rank 0 */ @@ -1565,7 +1607,7 @@ void summarize_results(int iterations, int print_time) { var = var / (iterations); sd = sqrt(var); access = mdtest_test_name(i); - fprintf(out_logfile, " %s ", access); + fprintf(out_logfile, " %-22s ", access); fprintf(out_logfile, "%14.3f ", max); fprintf(out_logfile, "%14.3f ", min); fprintf(out_logfile, "%14.3f ", mean); @@ -1673,6 +1715,28 @@ void md_validate_tests() { if(o.create_only && o.read_only && o.read_bytes > o.write_bytes) FAIL("When writing and reading files, read bytes must be smaller than write bytes"); + + if (rank == 0 && o.saveRankDetailsCSV){ + // check that the file is writeable, truncate it and add header + FILE* fd = fopen(o.saveRankDetailsCSV, "w"); + if (fd == NULL){ + FAIL("Cannot open saveRankPerformanceDetails file for write!"); + } + char * head = "rank,items"; + int ret = fwrite(head, strlen(head), 1, fd); + for(int e = 0; e < MDTEST_LAST_NUM; e++){ + char buf[1024]; + const char * str = mdtest_test_name(e); + + sprintf(buf, ",rate-%s,time-%s", str, str); + ret = fwrite(buf, strlen(buf), 1, fd); + if(ret != 1){ + FAIL("Cannot write header to saveRankPerformanceDetails file"); + } + } + fwrite("\n", 1, 1, fd); + fclose(fd); + } } void show_file_system_size(char *file_system) { @@ -2103,6 +2167,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth}, {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, + {0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV}, LAST_OPTION }; options_all_t * global_options = airoi_create_all_module_options(options); @@ -2406,6 +2471,9 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * }else{ summarize_results(iterations, o.print_time); } + if(o.saveRankDetailsCSV){ + StoreRankInformation(iterations); + } if (i == 1 && stride > 1) { i = 0; } diff --git a/src/mdtest.h b/src/mdtest.h index 32d37fe..dbafccb 100644 --- a/src/mdtest.h +++ b/src/mdtest.h @@ -24,6 +24,7 @@ typedef struct { double rate[MDTEST_LAST_NUM]; /* Calculated throughput */ double time[MDTEST_LAST_NUM]; /* Time */ + double time_before_barrier[MDTEST_TREE_CREATE_NUM]; /* individual time before executing the barrier */ uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done */ /* Statistics when hitting the stonewall */ From a198b0404cd1b88eebc1820fd72f5e383f32956a Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 11 Feb 2021 15:50:48 +0000 Subject: [PATCH 131/154] Bugfix MDTest: only rank 0 shall print errors. --- src/mdtest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index cfd221a..d780fba 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -2483,9 +2483,9 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * FAIL("Unable to remove test directory path %s", o.testdirpath); } - int total_errors; + int total_errors = 0; MPI_Reduce(& o.verification_error, & total_errors, 1, MPI_INT, MPI_SUM, 0, testComm); - if(total_errors){ + if(rank == 0 && total_errors){ VERBOSE(0, -1, "\nERROR: verifying the data on read (%lld errors)! Take the performance values with care!\n", total_errors); } VERBOSE(0,-1,"-- finished at %s --\n", PrintTimestamp()); From 4038ebdb21f498133f3bf53bc599bc89cb73e622 Mon Sep 17 00:00:00 2001 From: "Glenn K. Lockwood" Date: Wed, 17 Feb 2021 12:36:20 -0800 Subject: [PATCH 132/154] fix residual references to old main branch name --- README.md | 2 +- doc/sphinx/devDoc/release.rst | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 8f1c0c8..081752b 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# HPC IO Benchmark Repository [![Build Status](https://travis-ci.org/hpc/ior.svg?branch=master)](https://travis-ci.org/hpc/ior) +# HPC IO Benchmark Repository [![Build Status](https://travis-ci.org/hpc/ior.svg?branch=main)](https://travis-ci.org/hpc/ior) This repository contains the IOR and mdtest parallel I/O benchmarks. The [official IOR/mdtest documentation][] can be found in the `docs/` subdirectory diff --git a/doc/sphinx/devDoc/release.rst b/doc/sphinx/devDoc/release.rst index 6fe6718..7a49687 100644 --- a/doc/sphinx/devDoc/release.rst +++ b/doc/sphinx/devDoc/release.rst @@ -42,7 +42,7 @@ Feature freezing for a new release 2. Append the "rc1+dev" designator to the Version field in the META file, and update the NEWS file to have this new version as the topmost heading 3. Commit and push this new branch -2. Update the ``Version:`` field in META `of the master branch` to be the `next` +2. Update the ``Version:`` field in META `of the main branch` to be the `next` release version, not the one whose features have just been frozen, and update the NEWS file as you did in step 2. @@ -55,12 +55,12 @@ For example, to feature-freeze for version 3.2:: $ git add NEWS META $ git commit -m "Update version for feature freeze" $ git push upstream 3.2 - $ git checkout master + $ git checkout main $ vim META # update the ``Version:`` field to 3.3.0+dev $ vim NEWS # update the topmost version number to 3.3.0+dev $ git add NEWS META $ git commit -m "Update version number" - $ git push upstream master + $ git push upstream main Creating a new release candidate -------------------------------- @@ -95,7 +95,7 @@ For example to release 3.2.0rc1:: Applying patches to a new microrelease -------------------------------------- -If a released version 3.2.0 has bugs, cherry-pick the fixes from master into the +If a released version 3.2.0 has bugs, cherry-pick the fixes from main into the 3.2 branch:: $ git checkout 3.2 @@ -108,19 +108,19 @@ Once you've accumulated enough bugs, move on to issuing a new release below. Creating a new release ---------------------- -This is a two-phase process because we need to ensure that NEWS in master +This is a two-phase process because we need to ensure that NEWS in main contains a full history of releases, and we achieve this by always merging -changes from master into a release branch. +changes from main into a release branch. -1. Check out master +1. Check out main 2. Ensure that the latest release notes for this release are reflected in NEWS -3. Commit that to master +3. Commit that to main Then work on the release branch: 1. Check out the relevant `major.minor` branch 2. Remove any "rcX" and "+dev" from the Version field in META -3. Cherry-pick your NEWS update commit from master into this release branch. +3. Cherry-pick your NEWS update commit from main into this release branch. Resolve conflicts and get rid of news that reflect future releases. 4. Build a release package as described above 5. Tag and commit the updated NEWS and META so one can easily recompile this @@ -131,11 +131,11 @@ Then work on the release branch: For example to release 3.2.0:: - $ git checkout master + $ git checkout main $ vim NEWS # add release notes from ``git log --oneline 3.2.0rc1..`` $ git commit -Let's say the above generated commit abc345e on master. Then:: +Let's say the above generated commit abc345e on main. Then:: $ git checkout 3.2 $ vim META # 3.2.0rc2+dev -> 3.2.0 @@ -150,7 +150,7 @@ Let's say the above generated commit abc345e on master. Then:: $ git add NEWS META $ git commit -m "Uptick version after release" -Then push your master and your release branch and also push tags:: +Then push your main and your release branch and also push tags:: - $ git checkout master && git push && git push --tags + $ git checkout main && git push && git push --tags $ git checkout 3.2 && git push && git push --tags From e78613d62d2e6ac6c1cf8ee3722cd2fd79ad6a14 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Thu, 18 Feb 2021 09:53:07 +0000 Subject: [PATCH 133/154] Improve and fix the reporting in MDTest. (#336) * Improve and fix the reporting in MDTest. Reporting per rank now outputs the performance of the individual rank (before the barrier), the iteration throughput includes the barrier time (if not -B=0 is set). Before, it was the time after the barrier. * Clarify the computation of the results. * MDTest improve CSV output to include aggregated result. --- src/mdtest.c | 532 ++++++++++++++++++++++++++++----------------------- src/mdtest.h | 13 +- 2 files changed, 304 insertions(+), 241 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index d780fba..3394675 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -899,11 +899,14 @@ void rename_dir_test(const int dirs, const long dir_iter, const char *path, rank } static void updateResult(mdtest_results_t * res, mdtest_test_num_t test, uint64_t item_count, int t, double * times, double * tBefore){ - res->rate[test] = item_count/(times[t] - times[t-1]); res->time[test] = times[t] - times[t-1]; if(tBefore){ res->time_before_barrier[test] = tBefore[t] - times[t-1]; + }else{ + res->time_before_barrier[test] = res->time[test]; } + res->rate[test] = item_count/res->time[test]; + res->rate_before_barrier[test] = item_count/res->time_before_barrier[test]; res->items[test] = item_count; res->stonewall_last_item[test] = o.items; } @@ -984,7 +987,9 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran tBefore[2] = GetTimeStamp(); phase_end(); t[2] = GetTimeStamp(); - + if (o.rename_dirs && o.items > 1) { // moved close to execution + updateResult(res, MDTEST_DIR_RENAME_NUM, o.items, 4, t, tBefore); + } /* read phase */ if (o.read_only) { for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ @@ -1034,10 +1039,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran t[4] = GetTimeStamp(); if (o.rename_dirs && o.items > 1) { // moved close to execution - res->rate[MDTEST_DIR_RENAME_NUM] = o.items*size/(t[4] - t[3]); - res->time[MDTEST_DIR_RENAME_NUM] = t[4] - t[3]; - res->items[MDTEST_DIR_RENAME_NUM] = o.items*size; - res->stonewall_last_item[MDTEST_DIR_RENAME_NUM] = o.items*size; + updateResult(res, MDTEST_DIR_RENAME_NUM, o.items, 4, t, tBefore); } if (o.remove_only) { @@ -1085,16 +1087,16 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran /* calculate times */ if (o.create_only) { - updateResult(res, MDTEST_DIR_CREATE_NUM, o.items*size, 1, t, tBefore); + updateResult(res, MDTEST_DIR_CREATE_NUM, o.items, 1, t, tBefore); } if (o.stat_only) { - updateResult(res, MDTEST_DIR_STAT_NUM, o.items*size, 2, t, tBefore); + updateResult(res, MDTEST_DIR_STAT_NUM, o.items, 2, t, tBefore); } if (o.read_only) { - updateResult(res, MDTEST_DIR_READ_NUM, o.items*size, 3, t, tBefore); + updateResult(res, MDTEST_DIR_READ_NUM, o.items, 3, t, tBefore); } if (o.remove_only) { - updateResult(res, MDTEST_DIR_REMOVE_NUM, o.items*size, 5, t, tBefore); + updateResult(res, MDTEST_DIR_REMOVE_NUM, o.items, 5, t, tBefore); } VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[0]); VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], o.summary_table[iteration].rate[1]); @@ -1110,6 +1112,7 @@ int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, VERBOSE(1,1,"stonewall hit with %lld items", (long long) items_done ); MPI_Allreduce(& items_done, & max_iter, 1, MPI_LONG_LONG_INT, MPI_MAX, testComm); o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = GetTimeStamp() - tstart; + o.summary_table[iteration].stonewall_last_item[MDTEST_FILE_CREATE_NUM] = items_done; *out_max_iter = max_iter; // continue to the maximum... @@ -1330,16 +1333,16 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro mdtest_results_t * res = & o.summary_table[iteration]; /* calculate times */ if (o.create_only) { - updateResult(res, MDTEST_FILE_CREATE_NUM, o.items*size, 1, t, tBefore); + updateResult(res, MDTEST_FILE_CREATE_NUM, o.items, 1, t, tBefore); } if (o.stat_only) { - updateResult(res, MDTEST_FILE_STAT_NUM, o.items*size, 2, t, tBefore); + updateResult(res, MDTEST_FILE_STAT_NUM, o.items, 2, t, tBefore); } if (o.read_only) { - updateResult(res, MDTEST_FILE_READ_NUM, o.items*size, 3, t, tBefore); + updateResult(res, MDTEST_FILE_READ_NUM, o.items, 3, t, tBefore); } if (o.remove_only) { - updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items*size, 4, t, tBefore); + updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items, 4, t, tBefore); } VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[4]); @@ -1369,15 +1372,10 @@ char const * mdtest_test_name(int i){ return NULL; } -int calc_allreduce_index(int iter, int rank, int op){ - int tableSize = MDTEST_LAST_NUM; - return iter * tableSize * o.size + rank * tableSize + op; -} - /* * Store the results of each process in a file */ -static void StoreRankInformation(int iterations){ +static void StoreRankInformation(int iterations, mdtest_results_t * agg){ const size_t size = sizeof(mdtest_results_t) * iterations; if(rank == 0){ FILE* fd = fopen(o.saveRankDetailsCSV, "a"); @@ -1385,16 +1383,28 @@ static void StoreRankInformation(int iterations){ FAIL("Cannot open saveRankPerformanceDetails file for writes!"); } - mdtest_results_t * results = malloc(size * o.size); + mdtest_results_t * results = safeMalloc(size * o.size); MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, results, size / sizeof(double), MPI_DOUBLE, 0, testComm); + char buff[4096]; + char * cpos = buff; + cpos += sprintf(cpos, "all,%llu", (long long unsigned) o.items); + for(int e = 0; e < MDTEST_LAST_NUM; e++){ + if(agg->items[e] == 0){ + cpos += sprintf(cpos, ",,"); + }else{ + cpos += sprintf(cpos, ",%.10e,%.10e", agg->items[e] / agg->time[e], agg->time[e]); + } + } + cpos += sprintf(cpos, "\n"); + int ret = fwrite(buff, cpos - buff, 1, fd); + for(int iter = 0; iter < iterations; iter++){ for(int i=0; i < o.size; i++){ mdtest_results_t * cur = & results[i * iterations + iter]; - char buff[4096]; - char * cpos = buff; - cpos += sprintf(cpos, "%d,%llu", i, (long long unsigned) o.items); - for(int e = 0; (e < MDTEST_TREE_CREATE_NUM) || (i == 0 && e < MDTEST_LAST_NUM); e++){ + cpos = buff; + cpos += sprintf(cpos, "%d,", i); + for(int e = 0; e < MDTEST_TREE_CREATE_NUM; e++){ if(cur->items[e] == 0){ cpos += sprintf(cpos, ",,"); }else{ @@ -1402,7 +1412,7 @@ static void StoreRankInformation(int iterations){ } } cpos += sprintf(cpos, "\n"); - int ret = fwrite(buff, cpos - buff, 1, fd); + ret = fwrite(buff, cpos - buff, 1, fd); if(ret != 1){ WARN("Couln't append to saveRankPerformanceDetailsCSV file\n"); break; @@ -1410,213 +1420,273 @@ static void StoreRankInformation(int iterations){ } } fclose(fd); + free(results); }else{ /* this is a hack for now assuming all datatypes in the structure are double */ MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, NULL, size / sizeof(double), MPI_DOUBLE, 0, testComm); } } -void summarize_results(int iterations, int print_time) { - char const * access; - int i, j, k; - int start, stop, tableSize = MDTEST_LAST_NUM; - double min, max, mean, sd, sum, var, curr = 0; - double imin, imax, isum, icur; // calculation per iteration +static mdtest_results_t* get_result_index(mdtest_results_t* all_results, int proc, int iter, int interation_count){ + return & all_results[proc * interation_count + iter]; +} - double all[iterations * o.size * tableSize]; +static void summarize_results_rank0(int iterations, mdtest_results_t * all_results, int print_time) { + int start, stop; + double min, max, mean, sd, sum, var, curr = 0; + double imin, imax, imean, isum, icur; // calculation per iteration + char const * access; + /* if files only access, skip entries 0-3 (the dir tests) */ + if (o.files_only && ! o.dirs_only) { + start = MDTEST_FILE_CREATE_NUM; + } else { + start = 0; + } + /* if directories only access, skip entries 4-7 (the file tests) */ + if (o.dirs_only && !o.files_only) { + stop = MDTEST_FILE_CREATE_NUM; + } else { + stop = MDTEST_TREE_CREATE_NUM; + } - VERBOSE(1,-1,"Entering summarize_results..." ); + /* special case: if no directory or file tests, skip all */ + if (!o.dirs_only && !o.files_only) { + start = stop = 0; + } - MPI_Barrier(testComm); - for(int i=0; i < iterations; i++){ - if(print_time){ - MPI_Gather(& o.summary_table[i].time[0], tableSize, MPI_DOUBLE, & all[i*tableSize * o.size], tableSize, MPI_DOUBLE, 0, testComm); - }else{ - MPI_Gather(& o.summary_table[i].rate[0], tableSize, MPI_DOUBLE, & all[i*tableSize * o.size], tableSize, MPI_DOUBLE, 0, testComm); - } - } - - if(o.print_all_proc && 0){ - // This code prints the result table for debugging - for (i = 0; i < tableSize; i++) { - for (j = 0; j < iterations; j++) { - access = mdtest_test_name(i); - if(access == NULL){ - continue; - } - curr = o.summary_table[j].rate[i]; - fprintf(out_logfile, "Rank %d Iter %d Test %-20s Rate: %e\n", rank, j, access, curr); - } - } - } - - if (rank != 0) { - return; - } - - /* if files only access, skip entries 0-3 (the dir tests) */ - if (o.files_only && ! o.dirs_only) { - start = MDTEST_FILE_CREATE_NUM; - } else { - start = 0; - } - - /* if directories only access, skip entries 4-7 (the file tests) */ - if (o.dirs_only && !o.files_only) { - stop = MDTEST_FILE_CREATE_NUM; - } else { - stop = MDTEST_TREE_CREATE_NUM; - } - - /* special case: if no directory or file tests, skip all */ - if (!o.dirs_only && !o.files_only) { - start = stop = 0; - } - - if(o.print_all_proc){ - fprintf(out_logfile, "\nPer process result (%s):\n", print_time ? "time" : "rate"); - for (j = 0; j < iterations; j++) { - fprintf(out_logfile, "iteration: %d\n", j); - for (i = start; i < tableSize; i++) { - access = mdtest_test_name(i); - if(access == NULL){ - continue; - } - fprintf(out_logfile, "Test %s", access); - for (k=0; k < o.size; k++) { - curr = all[calc_allreduce_index(j, k, i)]; - fprintf(out_logfile, "%c%e", (k==0 ? ' ': ','), curr); - } - fprintf(out_logfile, "\n"); - } - } - } - - VERBOSE(0,-1,"\nSUMMARY %s: (of %d iterations)", print_time ? "time": "rate", iterations); - VERBOSE(0,-1," Operation per Rank: Max Min Mean Std Dev per Iteration: Max Min Mean"); - VERBOSE(0,-1," --------- --- --- ---- ------- --- --- ----"); - - for (i = start; i < stop; i++) { - min = max = all[i]; - sum = var = 0; - imin = 1e308; - isum = imax = 0; - for (j = 0; j < iterations; j++) { - icur = print_time ? 0 : 1e308; - for (k=0; k < o.size; k++) { - curr = all[calc_allreduce_index(j, k, i)]; - if (min > curr) { - min = curr; - } - if (max < curr) { - max = curr; - } - if(print_time){ - if(icur < curr){ - icur = curr; - } - }else{ - if(icur > curr){ - icur = curr; - } - } - sum += curr; - } - if(icur > imax){ - imax = icur; - } - if(icur < imin){ - imin = icur; - } - isum += icur; - } - mean = sum / (iterations * o.size); - for (k=0; k < o.size; k++) { - for (j = 0; j < iterations; j++) { - var += pow((mean - all[(k*tableSize*iterations) - + (j*tableSize) + i]), 2); - } - } - var = var / (iterations * o.size); - sd = sqrt(var); - access = mdtest_test_name(i); - if (i != 2) { - fprintf(out_logfile, " %-22s ", access); - fprintf(out_logfile, "%14.3f ", max); - fprintf(out_logfile, "%14.3f ", min); - fprintf(out_logfile, "%14.3f ", mean); - fprintf(out_logfile, "%14.3f ", sd); - fprintf(out_logfile, "%18.3f ", imax); - fprintf(out_logfile, "%14.3f ", imin); - fprintf(out_logfile, "%14.3f\n", isum / iterations); - fflush(out_logfile); - } - } - - // TODO generalize once more stonewall timers are supported - double stonewall_time = 0; - uint64_t stonewall_items = 0; - for(int i=0; i < iterations; i++){ - if(o.summary_table[i].stonewall_time[MDTEST_FILE_CREATE_NUM]){ - stonewall_time += o.summary_table[i].stonewall_time[MDTEST_FILE_CREATE_NUM]; - stonewall_items += o.summary_table[i].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]; - } - } - if(stonewall_items != 0){ - fprintf(out_logfile, " File create (stonewall) "); - fprintf(out_logfile, "%13s %14s %14.3f %14s\n", "NA", "NA", print_time ? stonewall_time : stonewall_items / stonewall_time, "NA"); - } - - /* calculate tree create/remove rates, applies only to Rank 0 */ - for (i = MDTEST_TREE_CREATE_NUM; i < tableSize; i++) { - min = max = all[i]; - sum = var = 0; - imin = imax = all[i]; - isum = 0; - for (j = 0; j < iterations; j++) { - if(print_time){ - curr = o.summary_table[j].time[i]; - }else{ - curr = o.summary_table[j].rate[i]; - } - if (min > curr) { - min = curr; - } - if (max < curr) { - max = curr; - } - sum += curr; - if(curr > imax){ - imax = curr; - } - if(curr < imin){ - imin = curr; - } - } - mean = sum / (iterations); - for (j = 0; j < iterations; j++) { - if(print_time){ - curr = o.summary_table[j].time[i]; - }else{ - curr = o.summary_table[j].rate[i]; - } - - var += pow((mean - curr), 2); - } - var = var / (iterations); - sd = sqrt(var); + if(o.print_all_proc){ + fprintf(out_logfile, "\nPer process result (%s):\n", print_time ? "time" : "rate"); + for (int j = 0; j < iterations; j++) { + fprintf(out_logfile, "iteration: %d\n", j); + for (int i = start; i < MDTEST_LAST_NUM; i++) { access = mdtest_test_name(i); - fprintf(out_logfile, " %-22s ", access); - fprintf(out_logfile, "%14.3f ", max); - fprintf(out_logfile, "%14.3f ", min); - fprintf(out_logfile, "%14.3f ", mean); - fprintf(out_logfile, "%14.3f ", sd); - fprintf(out_logfile, "%18.3f ", imax); - fprintf(out_logfile, "%14.3f ", imin); - fprintf(out_logfile, "%14.3f\n", sum / iterations); - fflush(out_logfile); + if(access == NULL){ + continue; + } + fprintf(out_logfile, "Test %s", access); + for (int k=0; k < o.size; k++) { + mdtest_results_t * cur = get_result_index(all_results, k, j, iterations); + if(print_time){ + curr = cur->time_before_barrier[i]; + }else{ + curr = cur->rate_before_barrier[i]; + } + fprintf(out_logfile, "%c%e", (k==0 ? ' ': ','), curr); + } + fprintf(out_logfile, "\n"); + } } + } + + VERBOSE(0, -1, "\nSUMMARY %s: (of %d iterations)", print_time ? "time" : "rate", iterations); + VERBOSE(0, -1, + " Operation per Rank: Max Min Mean " + " per Iteration: Max Min Mean Std Dev"); + VERBOSE(0, -1, + " --------- --- --- ---- " + " --- --- ---- -------"); + for (int i = start; i < stop; i++) { + min = 1e308; + max = 0; + sum = var = 0; + imin = 1e308; + isum = imax = 0; + double iter_result[iterations]; + for (int j = 0; j < iterations; j++) { + icur = print_time ? 0 : 1e308; + for (int k = 0; k < o.size; k++) { + mdtest_results_t * cur = get_result_index(all_results, k, j, iterations); + if(print_time){ + curr = cur->time_before_barrier[i]; + }else{ + curr = cur->rate_before_barrier[i]; + } + if (min > curr) { + min = curr; + } + if (max < curr) { + max = curr; + } + sum += curr; + + if (print_time) { + curr = cur->time[i]; + if (icur < curr) { + icur = curr; + } + } else { + curr = cur->rate[i]; + if (icur > curr) { + icur = curr; + } + } + } + + if (icur > imax) { + imax = icur; + } + if (icur < imin) { + imin = icur; + } + isum += icur; + if(print_time){ + iter_result[j] = icur; + }else{ + iter_result[j] = icur * o.size; + } + } + mean = sum / iterations / o.size; + imean = isum / iterations; + if(! print_time){ + imax *= o.size; + imin *= o.size; + isum *= o.size; + imean *= o.size; + } + for (int j = 0; j < iterations; j++) { + var += (imean - iter_result[j]) * (imean - iter_result[j]); + } + var = var / (iterations - 1); + sd = sqrt(var); + access = mdtest_test_name(i); + if (i != 2) { + fprintf(out_logfile, " %-22s ", access); + fprintf(out_logfile, "%14.3f ", max); + fprintf(out_logfile, "%14.3f ", min); + fprintf(out_logfile, "%14.3f ", mean); + fprintf(out_logfile, "%18.3f ", imax); + fprintf(out_logfile, "%14.3f ", imin); + fprintf(out_logfile, "%14.3f ", imean); + fprintf(out_logfile, "%14.3f\n", iterations == 1 ? 0 : sd); + fflush(out_logfile); + } + } + + /* calculate tree create/remove rates, applies only to Rank 0 */ + for (int i = MDTEST_TREE_CREATE_NUM; i < MDTEST_LAST_NUM; i++) { + min = imin = 1e308; + max = imax = 0; + sum = var = 0; + for (int j = 0; j < iterations; j++) { + if(print_time){ + curr = o.summary_table[j].time[i]; + }else{ + curr = o.summary_table[j].rate[i]; + } + if (min > curr) { + min = curr; + } + if (max < curr) { + max = curr; + } + sum += curr; + if(curr > imax){ + imax = curr; + } + if(curr < imin){ + imin = curr; + } + } + + mean = sum / (iterations); + + for (int j = 0; j < iterations; j++) { + if(print_time){ + curr = o.summary_table[j].time[i]; + }else{ + curr = o.summary_table[j].rate[i]; + } + var += (mean - curr)*(mean - curr); + } + var = var / (iterations - 1); + sd = sqrt(var); + access = mdtest_test_name(i); + fprintf(out_logfile, " %-22s ", access); + fprintf(out_logfile, "%14.3f ", max); + fprintf(out_logfile, "%14.3f ", min); + fprintf(out_logfile, "%14.3f ", mean); + fprintf(out_logfile, "%18.3f ", imax); + fprintf(out_logfile, "%14.3f ", imin); + fprintf(out_logfile, "%14.3f ", sum / iterations); + fprintf(out_logfile, "%14.3f\n", iterations == 1 ? 0 : sd); + fflush(out_logfile); + } +} + +/* + Output the results and summarize them into rank 0's o.summary_table + */ +void summarize_results(int iterations, mdtest_results_t * results) { + const size_t size = sizeof(mdtest_results_t) * iterations; + mdtest_results_t * all_results = NULL; + if(rank == 0){ + all_results = safeMalloc(size * o.size); + memset(all_results, 0, size * o.size); + MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, all_results, size / sizeof(double), MPI_DOUBLE, 0, testComm); + // calculate the aggregated values for all processes + for(int j=0; j < iterations; j++){ + for(int i=0; i < MDTEST_LAST_NUM; i++){ + //double sum_rate = 0; + double max_time = 0; + double max_stonewall_time = 0; + uint64_t sum_items = 0; + + // reduce over the processes + for(int p=0; p < o.size; p++){ + mdtest_results_t * cur = get_result_index(all_results, p, j, iterations); + //sum_rate += all_results[p + j*p]->rate[i]; + double t = cur->time[i]; + max_time = max_time < t ? t : max_time; + + sum_items += cur->items[i]; + + t = cur->stonewall_time[i]; + max_stonewall_time = max_stonewall_time < t ? t : max_stonewall_time; + } + + results[j].items[i] = sum_items; + results[j].time[i] = max_time; + results[j].stonewall_time[i] = max_stonewall_time; + if(sum_items == 0){ + results[j].rate[i] = 0.0; + }else{ + results[j].rate[i] = sum_items / max_time; + } + + /* These results have already been reduced to Rank 0 */ + results[j].stonewall_item_sum[i] = o.summary_table[j].stonewall_item_sum[i]; + results[j].stonewall_item_min[i] = o.summary_table[j].stonewall_item_min[i]; + results[j].stonewall_time[i] = o.summary_table[j].stonewall_time[i]; + } + } + }else{ + MPI_Gather(o.summary_table, size / sizeof(double), MPI_DOUBLE, NULL, size / sizeof(double), MPI_DOUBLE, 0, testComm); + } + + /* share global results across processes as these are returned by the API */ + MPI_Bcast(results, size / sizeof(double), MPI_DOUBLE, 0, testComm); + + /* update relevant result values with local values as these are returned by the API */ + for(int j=0; j < iterations; j++){ + for(int i=0; i < MDTEST_LAST_NUM; i++){ + results[j].time_before_barrier[i] = o.summary_table[j].time_before_barrier[i]; + results[j].stonewall_last_item[i] = o.summary_table[j].stonewall_last_item[i]; + } + } + + if(rank != 0){ + return; + } + + if (o.print_rate_and_time){ + summarize_results_rank0(iterations, all_results, 0); + summarize_results_rank0(iterations, all_results, 1); + }else{ + summarize_results_rank0(iterations, all_results, o.print_time); + } + + free(all_results); } /* Checks to see if the test setup is valid. If it isn't, fail. */ @@ -2413,12 +2483,6 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* setup summary table for recording results */ o.summary_table = (mdtest_results_t *) safeMalloc(iterations * sizeof(mdtest_results_t)); memset(o.summary_table, 0, iterations * sizeof(mdtest_results_t)); - for(int i=0; i < iterations; i++){ - for(int j=0; j < MDTEST_LAST_NUM; j++){ - o.summary_table[i].rate[j] = 0.0; - o.summary_table[i].time[j] = 0.0; - } - } if (o.unique_dir_per_task) { sprintf(o.base_tree_name, "mdtest_tree.%d", rank); @@ -2426,6 +2490,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * sprintf(o.base_tree_name, "mdtest_tree"); } + mdtest_results_t * aggregated_results = safeMalloc(iterations * sizeof(mdtest_results_t)); + /* default use shared directory */ strcpy(o.mk_name, "mdtest.shared."); strcpy(o.stat_name, "mdtest.shared."); @@ -2465,14 +2531,9 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * // keep track of the current status for stonewalling mdtest_iteration(i, j, testgroup, & o.summary_table[j]); } - if (o.print_rate_and_time){ - summarize_results(iterations, 0); - summarize_results(iterations, 1); - }else{ - summarize_results(iterations, o.print_time); - } + summarize_results(iterations, aggregated_results); if(o.saveRankDetailsCSV){ - StoreRankInformation(iterations); + StoreRankInformation(iterations, aggregated_results); } if (i == 1 && stride > 1) { i = 0; @@ -2501,6 +2562,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * if (o.write_bytes > 0) { free(o.write_buffer); } + free(o.summary_table); - return o.summary_table; + return aggregated_results; } diff --git a/src/mdtest.h b/src/mdtest.h index dbafccb..09f14be 100644 --- a/src/mdtest.h +++ b/src/mdtest.h @@ -22,16 +22,17 @@ typedef enum { typedef struct { - double rate[MDTEST_LAST_NUM]; /* Calculated throughput */ + double rate[MDTEST_LAST_NUM]; /* Calculated throughput after the barrier */ + double rate_before_barrier[MDTEST_LAST_NUM]; /* Calculated throughput before the barrier */ double time[MDTEST_LAST_NUM]; /* Time */ double time_before_barrier[MDTEST_TREE_CREATE_NUM]; /* individual time before executing the barrier */ - uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done */ + uint64_t items[MDTEST_LAST_NUM]; /* Number of operations done in this process*/ /* Statistics when hitting the stonewall */ - double stonewall_time[MDTEST_LAST_NUM]; /* runtime until completion / hit of the stonewall */ - uint64_t stonewall_last_item[MDTEST_LAST_NUM]; /* Max number of items a process has accessed */ - uint64_t stonewall_item_min[MDTEST_LAST_NUM]; /* Min number of items a process has accessed */ - uint64_t stonewall_item_sum[MDTEST_LAST_NUM]; /* Total number of items accessed until stonewall */ + double stonewall_time[MDTEST_LAST_NUM]; /* Max runtime of any process until completion / hit of the stonewall */ + uint64_t stonewall_last_item[MDTEST_LAST_NUM]; /* The number of items a process has accessed */ + uint64_t stonewall_item_min[MDTEST_LAST_NUM]; /* Min number of items any process has accessed */ + uint64_t stonewall_item_sum[MDTEST_LAST_NUM]; /* Total number of items accessed by all processes until stonewall */ } mdtest_results_t; mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * out_logfile); From 74df77430fbaaaadcd2ac0adb1f997fe9af84a9d Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Thu, 18 Feb 2021 10:40:42 +0000 Subject: [PATCH 134/154] Feature ior gpu #284 (#323) * Basic support for memory allocation on GPU using CUDA unified memory. Partially addressing #284. IOR support completed. * Support for GPU alloc in MDTest and MD-Workbench * Option: support repeated parsing of same option (allows option sharing across modules). * Checks for gpuDirect * Integrate gpuDirect options and basic hooks, more testing to be done. * POSIX: basic gpuDirect implementation working with fake-gpudirect library. * CUDA allow setting of DeviceID for IOR (not yet MDTest). * CUDA/GPUDirect Support --with-X= * Bugfix in option parser for flags that are part of an argument for an option, e.g., -O=1, if 1 is a flag it is wrongly assumed to be a flag. --- configure.ac | 46 ++++++++++++ src/Makefile.am | 4 + src/aiori-POSIX.c | 179 +++++++++++++++++++++++++++++++++----------- src/aiori-POSIX.h | 2 +- src/ior.c | 57 ++++---------- src/ior.h | 10 ++- src/md-workbench.c | 26 ++++--- src/mdtest.c | 16 ++-- src/option.c | 12 ++- src/parse_options.c | 25 ++++++- src/utilities.c | 77 ++++++++++++++++++- src/utilities.h | 3 +- 12 files changed, 338 insertions(+), 119 deletions(-) diff --git a/configure.ac b/configure.ac index a9d106a..b758f5b 100755 --- a/configure.ac +++ b/configure.ac @@ -73,6 +73,52 @@ AS_IF([test "$ac_cv_header_gpfs_h" = "yes" -o "$ac_cv_header_gpfs_fcntl_h" = "ye ]) ]) +# Check for CUDA +AC_ARG_WITH([cuda], + [AS_HELP_STRING([--with-cuda], + [support configurable CUDA @<:@default=check@:>@])], + [], [with_cuda=check]) + +AS_IF([test "x$with_cuda" != xno], [ + LDFLAGS="$LDFLAGS -L$with_cuda/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_cuda/lib64" + CPPFLAGS="$CPPFLAGS -I$with_cuda/include" + + AC_CHECK_HEADERS([cuda_runtime.h], [AC_DEFINE([HAVE_CUDA], [], [CUDA GPU API found])], [ + if test "x$with_cuda" != xcheck; then + AC_MSG_FAILURE([--with-cuda was given, not found]) + fi + ]) +AS_IF([test "$ac_cv_header_cuda_runtime_h" = "yes"], [ + AC_SEARCH_LIBS([cudaMalloc], [cudart cudart_static], [], + [AC_MSG_ERROR([Library containing cudaMalloc symbol not found])]) + ]) +]) +AM_CONDITIONAL([USE_CUDA], [test x$with_cuda = xyes]) + +# Check for GPUDirect +AC_ARG_WITH([gpuDirect], + [AS_HELP_STRING([--with-gpuDirect], + [support configurable GPUDirect @<:@default=check@:>@])], + [], [with_gpuDirect=check]) + +AS_IF([test "x$with_gpuDirect" != xno], [ + LDFLAGS="$LDFLAGS -L$with_gpuDirect/lib64 -Wl,--enable-new-dtags -Wl,-rpath=$with_gpuDirect/lib64" + CPPFLAGS="$CPPFLAGS -I$with_gpuDirect/include" + + AC_CHECK_HEADERS([cufile.h], [AC_DEFINE([HAVE_GPU_DIRECT], [], [GPUDirect API found])], [ + if test "x$with_gpuDirect" != xcheck; then + AC_MSG_FAILURE([--with-gpuDirect was given, not found]) + fi + ]) +AS_IF([test "$ac_cv_header_cufile_h" = "yes"], [ + AC_SEARCH_LIBS([cuFileDriverOpen], [cufile], [], + [AC_MSG_ERROR([Library containing cuFileDriverOpen symbol not found])]) + ]) +]) +AM_CONDITIONAL([HAVE_GPU_DIRECT], [test x$with_gpuDirect = xyes]) + + + # Check for system capabilities AC_SYS_LARGEFILE diff --git a/src/Makefile.am b/src/Makefile.am index 52461a1..fdf746f 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -41,6 +41,10 @@ extraLDFLAGS += -L/opt/hadoop-2.2.0/lib/native extraLDADD += -lhdfs endif +if USE_CUDA +extraLDADD += -lcudart +endif + if USE_HDF5_AIORI extraSOURCES += aiori-HDF5.c extraLDADD += -lhdf5 -lz diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 5f6261a..5040a53 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -57,6 +57,20 @@ #include "aiori-POSIX.h" +#ifdef HAVE_GPU_DIRECT +typedef long long loff_t; +#include +#include +#endif + +typedef struct { + int fd; +#ifdef HAVE_GPU_DIRECT + CUfileHandle_t cf_handle; +#endif +} posix_fd; + + #ifndef open64 /* necessary for TRU64 -- */ # define open64 open /* unlikely, but may pose */ #endif /* not open64 */ /* conflicting prototypes */ @@ -69,7 +83,30 @@ # define O_BINARY 0 #endif +#ifdef HAVE_GPU_DIRECT +static const char* cuFileGetErrorString(CUfileError_t status){ + if(IS_CUDA_ERR(status)){ + return cudaGetErrorString(status.err); + } + return strerror(status.err); +} + +static void init_cufile(posix_fd * pfd){ + CUfileDescr_t cf_descr = (CUfileDescr_t){ + .handle.fd = pfd->fd, + .type = CU_FILE_HANDLE_TYPE_OPAQUE_FD + }; + CUfileError_t status = cuFileHandleRegister(& pfd->cf_handle, & cf_descr); + if(status.err != CU_FILE_SUCCESS){ + EWARNF("Could not register handle %s", cuFileGetErrorString(status)); + } +} +#endif + /**************************** P R O T O T Y P E S *****************************/ +static void POSIX_Initialize(aiori_mod_opt_t * options); +static void POSIX_Finalize(aiori_mod_opt_t * options); + static IOR_offset_t POSIX_Xfer(int, aiori_fd_t *, IOR_size_t *, IOR_offset_t, IOR_offset_t, aiori_mod_opt_t *); @@ -105,6 +142,9 @@ option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_o {0, "posix.lustre.stripesize", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_stripe_size}, {0, "posix.lustre.startost", "", OPTION_OPTIONAL_ARGUMENT, 'd', & o->lustre_start_ost}, {0, "posix.lustre.ignorelocks", "", OPTION_FLAG, 'd', & o->lustre_ignore_locks}, +#endif +#ifdef HAVE_GPU_DIRECT + {0, "gpuDirect", "allocate I/O buffers on the GPU", OPTION_FLAG, 'd', & o->gpuDirect}, #endif LAST_OPTION }; @@ -120,6 +160,8 @@ option_help * POSIX_options(aiori_mod_opt_t ** init_backend_options, aiori_mod_o ior_aiori_t posix_aiori = { .name = "POSIX", .name_legacy = NULL, + .initialize = POSIX_Initialize, + .finalize = POSIX_Finalize, .create = POSIX_Create, .mknod = POSIX_Mknod, .open = POSIX_Open, @@ -156,6 +198,14 @@ int POSIX_check_params(aiori_mod_opt_t * param){ ERR("beegfsChunkSize must be a power of two and >64k"); if(o->lustre_stripe_count != -1 || o->lustre_stripe_size != 0) o->lustre_set_striping = 1; + if(o->gpuDirect && ! o->direct_io){ + ERR("GPUDirect required direct I/O to be used!"); + } +#ifndef HAVE_GPU_DIRECT + if(o->gpuDirect){ + ERR("GPUDirect support is not compiled"); + } +#endif return 0; } @@ -352,14 +402,10 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) { int fd_oflag = O_BINARY; int mode = 0664; - int *fd; - - fd = (int *)malloc(sizeof(int)); - if (fd == NULL) - ERR("Unable to malloc file descriptor"); + posix_fd * pfd = safeMalloc(sizeof(posix_fd)); posix_options_t * o = (posix_options_t*) param; if (o->direct_io == TRUE){ - set_o_direct_flag(&fd_oflag); + set_o_direct_flag(& fd_oflag); } if(hints->dryRun) @@ -378,8 +424,8 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) if (!hints->filePerProc && rank != 0) { MPI_CHECK(MPI_Barrier(testComm), "barrier error"); fd_oflag |= O_RDWR; - *fd = open64(testFileName, fd_oflag, mode); - if (*fd < 0){ + pfd->fd = open64(testFileName, fd_oflag, mode); + if (pfd->fd < 0){ ERRF("open64(\"%s\", %d, %#o) failed. Error: %s", testFileName, fd_oflag, mode, strerror(errno)); } @@ -396,16 +442,16 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) * Lustre striping information on a pre-existing file.*/ fd_oflag |= O_CREAT | O_EXCL | O_RDWR | O_LOV_DELAY_CREATE; - *fd = open64(testFileName, fd_oflag, mode); - if (*fd < 0) { + pfd->fd = open64(testFileName, fd_oflag, mode); + if (pfd->fd < 0) { ERRF("Unable to open '%s': %s\n", testFileName, strerror(errno)); - } else if (ioctl(*fd, LL_IOC_LOV_SETSTRIPE, &opts)) { + } else if (ioctl(pfd->fd, LL_IOC_LOV_SETSTRIPE, &opts)) { char *errmsg = "stripe already set"; if (errno != EEXIST && errno != EALREADY) errmsg = strerror(errno); ERRF("Error on ioctl for '%s' (%d): %s\n", - testFileName, *fd, errmsg); + testFileName, pfd->fd, errmsg); } if (!hints->filePerProc) MPI_CHECK(MPI_Barrier(testComm), @@ -431,8 +477,8 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) } #endif /* HAVE_BEEGFS_BEEGFS_H */ - *fd = open64(testFileName, fd_oflag, mode); - if (*fd < 0){ + pfd->fd = open64(testFileName, fd_oflag, mode); + if (pfd->fd < 0){ ERRF("open64(\"%s\", %d, %#o) failed. Error: %s", testFileName, fd_oflag, mode, strerror(errno)); } @@ -442,8 +488,8 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) if (o->lustre_ignore_locks) { int lustre_ioctl_flags = LL_FILE_IGNORE_LOCK; - if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) - ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); + if (ioctl(pfd->fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) + ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", pfd->fd); } #endif /* HAVE_LUSTRE_USER */ @@ -452,10 +498,15 @@ aiori_fd_t *POSIX_Create(char *testFileName, int flags, aiori_mod_opt_t * param) * the intent that we can avoid some byte range lock revocation: * everyone will be writing/reading from individual regions */ if (o->gpfs_release_token ) { - gpfs_free_all_locks(*fd); + gpfs_free_all_locks(pfd->fd); } #endif - return (aiori_fd_t*) fd; +#ifdef HAVE_GPU_DIRECT + if(o->gpuDirect){ + init_cufile(pfd); + } +#endif + return (aiori_fd_t*) pfd; } /* @@ -477,24 +528,18 @@ int POSIX_Mknod(char *testFileName) */ aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param) { - int fd_oflag = O_BINARY; - int *fd; - - fd = (int *)malloc(sizeof(int)); - if (fd == NULL) - ERR("Unable to malloc file descriptor"); - + int fd_oflag = O_BINARY | O_RDWR; + posix_fd * pfd = safeMalloc(sizeof(posix_fd)); posix_options_t * o = (posix_options_t*) param; - if (o->direct_io == TRUE) + if (o->direct_io == TRUE){ set_o_direct_flag(&fd_oflag); - - fd_oflag |= O_RDWR; + } if(hints->dryRun) return (aiori_fd_t*) 0; - *fd = open64(testFileName, fd_oflag); - if (*fd < 0) + pfd->fd = open64(testFileName, fd_oflag); + if (pfd->fd < 0) ERRF("open64(\"%s\", %d) failed: %s", testFileName, fd_oflag, strerror(errno)); #ifdef HAVE_LUSTRE_USER @@ -503,17 +548,22 @@ aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param) if (verbose >= VERBOSE_1) { EINFO("** Disabling lustre range locking **\n"); } - if (ioctl(*fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) - ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", *fd); + if (ioctl(pfd->fd, LL_IOC_SETFLAGS, &lustre_ioctl_flags) == -1) + ERRF("ioctl(%d, LL_IOC_SETFLAGS, ...) failed", pfd->fd); } #endif /* HAVE_LUSTRE_USER */ #ifdef HAVE_GPFS_FCNTL_H if(o->gpfs_release_token) { - gpfs_free_all_locks(*fd); + gpfs_free_all_locks(pfd->fd); } #endif - return (aiori_fd_t*) fd; +#ifdef HAVE_GPU_DIRECT + if(o->gpuDirect){ + init_cufile(pfd); + } +#endif + return (aiori_fd_t*) pfd; } /* @@ -532,7 +582,8 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer if(hints->dryRun) return length; - fd = *(int *)file; + posix_fd * pfd = (posix_fd *) file; + fd = pfd->fd; #ifdef HAVE_GPFS_FCNTL_H if (o->gpfs_hint_access) { @@ -544,7 +595,7 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer /* seek to offset */ if (lseek64(fd, offset, SEEK_SET) == -1) ERRF("lseek64(%d, %lld, SEEK_SET) failed", fd, offset); - + off_t mem_offset = 0; while (remaining > 0) { /* write/read file */ if (access == WRITE) { /* WRITE */ @@ -553,7 +604,15 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer rank, offset + length - remaining); } - rc = write(fd, ptr, remaining); +#ifdef HAVE_GPU_DIRECT + if(o->gpuDirect){ + rc = cuFileWrite(pfd->cf_handle, ptr, remaining, offset + mem_offset, mem_offset); + }else{ +#endif + rc = write(fd, ptr, remaining); +#ifdef HAVE_GPU_DIRECT + } +#endif if (rc == -1) ERRF("write(%d, %p, %lld) failed", fd, (void*)ptr, remaining); @@ -566,7 +625,15 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer rank, offset + length - remaining); } - rc = read(fd, ptr, remaining); +#ifdef HAVE_GPU_DIRECT + if(o->gpuDirect){ + rc = cuFileRead(pfd->cf_handle, ptr, remaining, offset + mem_offset, mem_offset); + }else{ +#endif + rc = read(fd, ptr, remaining); +#ifdef HAVE_GPU_DIRECT + } +#endif if (rc == 0) ERRF("read(%d, %p, %lld) returned EOF prematurely", fd, (void*)ptr, remaining); @@ -587,6 +654,7 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer assert(rc <= remaining); remaining -= rc; ptr += rc; + mem_offset += rc; xferRetries++; } #ifdef HAVE_GPFS_FCNTL_H @@ -597,10 +665,11 @@ static IOR_offset_t POSIX_Xfer(int access, aiori_fd_t *file, IOR_size_t * buffer return (length); } -void POSIX_Fsync(aiori_fd_t *fd, aiori_mod_opt_t * param) +void POSIX_Fsync(aiori_fd_t *afd, aiori_mod_opt_t * param) { - if (fsync(*(int *)fd) != 0) - EWARNF("fsync(%d) failed", *(int *)fd); + int fd = ((posix_fd*) afd)->fd; + if (fsync(fd) != 0) + EWARNF("fsync(%d) failed", fd); } @@ -616,13 +685,21 @@ void POSIX_Sync(aiori_mod_opt_t * param) /* * Close a file through the POSIX interface. */ -void POSIX_Close(aiori_fd_t *fd, aiori_mod_opt_t * param) +void POSIX_Close(aiori_fd_t *afd, aiori_mod_opt_t * param) { if(hints->dryRun) return; - if (close(*(int *)fd) != 0) - ERRF("close(%d) failed", *(int *)fd); - free(fd); + posix_options_t * o = (posix_options_t*) param; + int fd = ((posix_fd*) afd)->fd; +#ifdef HAVE_GPU_DIRECT + if(o->gpuDirect){ + cuFileHandleDeregister(((posix_fd*) afd)->cf_handle); + } +#endif + if (close(fd) != 0){ + ERRF("close(%d) failed", fd); + } + free(afd); } /* @@ -665,3 +742,15 @@ IOR_offset_t POSIX_GetFileSize(aiori_mod_opt_t * test, char *testFileName) return (aggFileSizeFromStat); } + +void POSIX_Initialize(aiori_mod_opt_t * options){ +#ifdef HAVE_GPU_DIRECT + CUfileError_t err = cuFileDriverOpen(); +#endif +} + +void POSIX_Finalize(aiori_mod_opt_t * options){ +#ifdef HAVE_GPU_DIRECT + CUfileError_t err = cuFileDriverClose(); +#endif +} diff --git a/src/aiori-POSIX.h b/src/aiori-POSIX.h index 8884a30..b2f556a 100644 --- a/src/aiori-POSIX.h +++ b/src/aiori-POSIX.h @@ -22,7 +22,7 @@ typedef struct{ /* beegfs variables */ int beegfs_numTargets; /* number storage targets to use */ int beegfs_chunkSize; /* srtipe pattern for new files */ - + int gpuDirect; } posix_options_t; void POSIX_Sync(aiori_mod_opt_t * param); diff --git a/src/ior.c b/src/ior.c index dd0f048..a591b18 100755 --- a/src/ior.c +++ b/src/ior.c @@ -33,6 +33,10 @@ # include /* uname() */ #endif +#ifdef HAVE_CUDA +#include +#endif + #include #include "ior.h" @@ -113,6 +117,13 @@ static int test_initialize(IOR_test_t * test){ verbose = test->params.verbose; backend = test->params.backend; +#ifdef HAVE_CUDA + cudaError_t cret = cudaSetDevice(test->params.gpuID); + if(cret != cudaSuccess){ + EWARNF("cudaSetDevice(%d) error: %s", test->params.gpuID, cudaGetErrorString(cret)); + } +#endif + if(backend->initialize){ backend->initialize(test->params.backend_options); } @@ -507,44 +518,6 @@ static int CountErrors(IOR_param_t * test, int access, int errors) return (allErrors); } -/* - * Allocate a page-aligned (required by O_DIRECT) buffer. - */ -static void *aligned_buffer_alloc(size_t size) -{ - size_t pageMask; - char *buf, *tmp; - char *aligned; - -#ifdef HAVE_SYSCONF - long pageSize = sysconf(_SC_PAGESIZE); -#else - size_t pageSize = getpagesize(); -#endif - - pageMask = pageSize - 1; - buf = malloc(size + pageSize + sizeof(void *)); - if (buf == NULL) - ERR("out of memory"); - /* find the alinged buffer */ - tmp = buf + sizeof(char *); - aligned = tmp + pageSize - ((size_t) tmp & pageMask); - /* write a pointer to the original malloc()ed buffer into the bytes - preceding "aligned", so that the aligned buffer can later be free()ed */ - tmp = aligned - sizeof(void *); - *(void **)tmp = buf; - - return (void *)aligned; -} - -/* - * Free a buffer allocated by aligned_buffer_alloc(). - */ -static void aligned_buffer_free(void *buf) -{ - free(*(void **)((char *)buf - sizeof(char *))); -} - void AllocResults(IOR_test_t *test) { int reps; @@ -1053,7 +1026,7 @@ static void InitTests(IOR_test_t *tests) static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test, int pretendRank) { - ioBuffers->buffer = aligned_buffer_alloc(test->transferSize); + ioBuffers->buffer = aligned_buffer_alloc(test->transferSize, test->gpuMemoryFlags); } /* @@ -1062,7 +1035,7 @@ static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test, static void XferBuffersFree(IOR_io_buffers* ioBuffers, IOR_param_t* test) { - aligned_buffer_free(ioBuffers->buffer); + aligned_buffer_free(ioBuffers->buffer, test->gpuMemoryFlags); } @@ -1878,7 +1851,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, void * randomPrefillBuffer = NULL; if(test->randomPrefillBlocksize && (access == WRITE || access == WRITECHECK)){ - randomPrefillBuffer = aligned_buffer_alloc(test->randomPrefillBlocksize); + randomPrefillBuffer = aligned_buffer_alloc(test->randomPrefillBlocksize, test->gpuMemoryFlags); // store invalid data into the buffer memset(randomPrefillBuffer, -1, test->randomPrefillBlocksize); } @@ -2000,7 +1973,7 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, backend->fsync(fd, test->backend_options); /*fsync after all accesses */ } if(randomPrefillBuffer){ - aligned_buffer_free(randomPrefillBuffer); + aligned_buffer_free(randomPrefillBuffer, test->gpuMemoryFlags); } return (dataMoved); diff --git a/src/ior.h b/src/ior.h index e4663db..c58b198 100755 --- a/src/ior.h +++ b/src/ior.h @@ -58,6 +58,11 @@ enum PACKET_TYPE }; +typedef enum{ + IOR_MEMORY_TYPE_CPU = 0, + IOR_MEMORY_TYPE_GPU_MANAGED = 1, + IOR_MEMORY_TYPE_GPU_DEVICE_ONLY = 2, +} ior_memory_flags; /***************** IOR_BUFFERS *************************************************/ @@ -101,7 +106,10 @@ typedef struct MPI_Comm testComm; /* Current MPI communicator */ MPI_Comm mpi_comm_world; /* The global MPI communicator */ int dryRun; /* do not perform any I/Os just run evtl. inputs print dummy output */ - int dualMount; /* dual mount points */ + int dualMount; /* dual mount points */ + ior_memory_flags gpuMemoryFlags; /* use the GPU to store the data */ + int gpuDirect; /* use gpuDirect, this influences gpuMemoryFlags as well */ + int gpuID; /* the GPU to use for gpuDirect or memory options */ int numTasks; /* number of tasks for test */ int numNodes; /* number of nodes for test */ int numTasksOnNode0; /* number of tasks on node 0 (usually all the same, but don't have to be, use with caution) */ diff --git a/src/md-workbench.c b/src/md-workbench.c index 7f08611..fc51800 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -92,6 +92,7 @@ struct benchmark_options{ int read_only; int stonewall_timer; int stonewall_timer_wear_out; + int gpu_memory_flags; /* use the GPU to store the data */ char * latency_file_prefix; int latency_keep_all; @@ -381,7 +382,7 @@ static void compute_histogram(const char * name, time_result_t * times, time_sta sprintf(file, "%s-%.2f-%d-%s.csv", o.latency_file_prefix, o.relative_waiting_factor, o.global_iteration, name); FILE * f = fopen(file, "w+"); if(f == NULL){ - ERRF("%d: Error writing to latency file: %s\n", o.rank, file); + ERRF("%d: Error writing to latency file: %s", o.rank, file); return; } fprintf(f, "time,runtime\n"); @@ -546,12 +547,12 @@ void run_precreate(phase_stat_t * s, int current_index){ }else{ s->dset_create.err++; if (! o.ignore_precreate_errors){ - ERRF("%d: Error while creating the dset: %s\n", o.rank, dset); + ERRF("%d: Error while creating the dset: %s", o.rank, dset); } } } - char * buf = malloc(o.file_size); + char * buf = aligned_buffer_alloc(o.file_size, o.gpu_memory_flags); generate_memory_pattern(buf, o.file_size, o.random_buffer_offset, o.rank); double op_timer; // timer for individual operations size_t pos = -1; // position inside the individual measurement array @@ -574,7 +575,7 @@ void run_precreate(phase_stat_t * s, int current_index){ }else{ s->obj_create.err++; if (! o.ignore_precreate_errors){ - ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name); + ERRF("%d: Error while creating the obj: %s", o.rank, obj_name); } } o.backend->close(aiori_fh, o.backend_options); @@ -586,14 +587,14 @@ void run_precreate(phase_stat_t * s, int current_index){ } } } - free(buf); + aligned_buffer_free(buf, o.gpu_memory_flags); } /* FIFO: create a new file, write to it. Then read from the first created file, delete it... */ void run_benchmark(phase_stat_t * s, int * current_index_p){ char obj_name[MAX_PATHLEN]; int ret; - char * buf = malloc(o.file_size); + char * buf = aligned_buffer_alloc(o.file_size, o.gpu_memory_flags); memset(buf, o.rank % 256, o.file_size); double op_timer; // timer for individual operations size_t pos = -1; // position inside the individual measurement array @@ -632,7 +633,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ if(ret != 0){ if (o.verbosity) - ERRF("%d: Error while stating the obj: %s\n", o.rank, obj_name); + ERRF("%d: Error while stating the obj: %s", o.rank, obj_name); s->obj_stat.err++; continue; } @@ -704,7 +705,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ o.backend->close(aiori_fh, o.backend_options); }else{ if (! o.ignore_precreate_errors){ - ERRF("Unable to open file %s", obj_name); + ERRF("%d: Error while creating the obj: %s", o.rank, obj_name); } EWARNF("Unable to open file %s", obj_name); s->obj_create.err++; @@ -761,7 +762,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ *current_index_p += f; } s->repeats = pos + 1; - free(buf); + aligned_buffer_free(buf, o.gpu_memory_flags); } void run_cleanup(phase_stat_t * s, int start_index){ @@ -822,6 +823,7 @@ static option_help options [] = { {'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer}, {'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out}, {'X', "verify-read", "Verify the data on read", OPTION_FLAG, 'd', & o.verify_read}, + {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags}, {0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number}, {0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats}, {0, "read-only", "Run read-only during benchmarking phase (no deletes/writes), probably use with -2", OPTION_FLAG, 'd', & o.read_only}, @@ -844,12 +846,12 @@ static int return_position(){ if( o.rank == 0){ FILE * f = fopen(o.run_info_file, "r"); if(! f){ - ERRF("[ERROR] Could not open %s for restart\n", o.run_info_file); + ERRF("[ERROR] Could not open %s for restart", o.run_info_file); exit(1); } ret = fscanf(f, "pos: %d", & position); if (ret != 1){ - ERRF("Could not read from %s for restart\n", o.run_info_file); + ERRF("Could not read from %s for restart", o.run_info_file); exit(1); } fclose(f); @@ -864,7 +866,7 @@ static void store_position(int position){ } FILE * f = fopen(o.run_info_file, "w"); if(! f){ - ERRF("[ERROR] Could not open %s for saving data\n", o.run_info_file); + ERRF("[ERROR] Could not open %s for saving data", o.run_info_file); exit(1); } fprintf(f, "pos: %d\n", position); diff --git a/src/mdtest.c b/src/mdtest.c index 3394675..558bbf4 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -110,6 +110,7 @@ typedef struct { char unique_rm_uni_dir[MAX_PATHLEN]; char *write_buffer; char *stoneWallingStatusFile; + int gpu_memory_flags; int barriers; @@ -641,10 +642,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { /* allocate read buffer */ if (o.read_bytes > 0) { - int alloc_res = posix_memalign((void**)&read_buffer, sysconf(_SC_PAGESIZE), o.read_bytes); - if (alloc_res) { - FAIL("out of memory"); - } + read_buffer = aligned_buffer_alloc(o.read_bytes, o.gpu_memory_flags); memset(read_buffer, -1, o.read_bytes); } @@ -743,7 +741,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { o.backend->close (aiori_fh, o.backend_options); } if(o.read_bytes){ - free(read_buffer); + aligned_buffer_free(read_buffer, o.gpu_memory_flags); } } @@ -2236,6 +2234,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth}, {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time}, + {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, {0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV}, LAST_OPTION @@ -2420,10 +2419,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* allocate and initialize write buffer with # */ if (o.write_bytes > 0) { - int alloc_res = posix_memalign((void**)& o.write_buffer, sysconf(_SC_PAGESIZE), o.write_bytes); - if (alloc_res) { - FAIL("out of memory"); - } + o.write_buffer = aligned_buffer_alloc(o.write_bytes, o.gpu_memory_flags); generate_memory_pattern(o.write_buffer, o.write_bytes, o.random_buffer_offset, rank); } @@ -2560,7 +2556,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } if (o.write_bytes > 0) { - free(o.write_buffer); + aligned_buffer_free(o.write_buffer, o.gpu_memory_flags); } free(o.summary_table); diff --git a/src/option.c b/src/option.c index c44dc9b..7be3df6 100644 --- a/src/option.c +++ b/src/option.c @@ -264,11 +264,13 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi return; } txt++; - + int parsed = 0; + + // printf("Parsing: %s : %s\n", txt, arg); // support groups of multiple flags like -vvv or -vq for(int flag_index = 0; flag_index < strlen(txt); ++flag_index){ // don't loop looking for multiple flags if we already processed a long option - if(txt[0] == '-' && flag_index > 0) + if(txt[flag_index] == '=' || (txt[0] == '-' && flag_index > 0)) break; for(int m = 0; m < opt_all->module_count; m++ ){ @@ -281,6 +283,7 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi continue; } if ( (o->shortVar == txt[flag_index]) || (strlen(txt) > 2 && txt[0] == '-' && o->longVar != NULL && strcmp(txt + 1, o->longVar) == 0)){ + // printf("Found %s %c=%c? %d %d\n", o->help, o->shortVar, txt[flag_index], (o->shortVar == txt[flag_index]), (strlen(txt) > 2 && txt[0] == '-' && o->longVar != NULL && strcmp(txt + 1, o->longVar) == 0)); // now process the option. switch(o->arg){ case (OPTION_FLAG):{ @@ -370,12 +373,13 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi (*requiredArgsSeen)++; } - return; + parsed = 1; } } } } - + if(parsed) return; + if(strcmp(txt, "h") == 0 || strcmp(txt, "-help") == 0){ *print_help = 1; }else{ diff --git a/src/parse_options.c b/src/parse_options.c index 82fab98..605de91 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -62,7 +62,17 @@ static void CheckRunSettings(IOR_test_t *tests) } if(params->dualMount && !params->filePerProc) { - MPI_CHECK(MPI_Abort(MPI_COMM_WORLD, -1), "Dual Mount can only be used with File Per Process"); + ERR("Dual Mount can only be used with File Per Process"); + } + + if(params->gpuDirect){ + if(params->gpuMemoryFlags == IOR_MEMORY_TYPE_GPU_MANAGED){ + ERR("GPUDirect cannot be used with managed memory"); + } + params->gpuMemoryFlags = IOR_MEMORY_TYPE_GPU_DEVICE_ONLY; + if(params->checkRead || params->checkWrite){ + ERR("GPUDirect data cannot yet be checked"); + } } } } @@ -138,6 +148,12 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt params->testFileName = strdup(value); } else if (strcasecmp(option, "dualmount") == 0){ params->dualMount = atoi(value); + } else if (strcasecmp(option, "allocateBufferOnGPU") == 0) { + params->gpuMemoryFlags = atoi(value); + } else if (strcasecmp(option, "GPUid") == 0) { + params->gpuID = atoi(value); + } else if (strcasecmp(option, "GPUDirect") == 0) { + params->gpuDirect = atoi(value); } else if (strcasecmp(option, "deadlineforstonewalling") == 0) { params->deadlineForStonewalling = atoi(value); } else if (strcasecmp(option, "stoneWallingWearOut") == 0) { @@ -413,6 +429,13 @@ option_help * createGlobalOptions(IOR_param_t * params){ {.help=" -O stoneWallingWearOut=1 -- once the stonewalling timeout is over, all process finish to access the amount of data", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read", .arg = OPTION_OPTIONAL_ARGUMENT}, +#ifdef HAVE_CUDA + {.help=" -O allocateBufferOnGPU=X -- allocate I/O buffers on the GPU: X=1 uses managed memory, X=2 device memory.", .arg = OPTION_OPTIONAL_ARGUMENT}, + {.help=" -O GPUid=X -- select the GPU to use.", .arg = OPTION_OPTIONAL_ARGUMENT}, +#ifdef HAVE_GPU_DIRECT + {0, "gpuDirect", "allocate I/O buffers on the GPU and use gpuDirect to store data; this option is incompatible with any option requiring CPU access to data.", OPTION_FLAG, 'd', & params->gpuDirect}, +#endif +#endif {'e', NULL, "fsync -- perform a fsync() operation at the end of each read/write phase", OPTION_FLAG, 'd', & params->fsync}, {'E', NULL, "useExistingTestFile -- do not remove test file before write access", OPTION_FLAG, 'd', & params->useExistingTestFile}, {'f', NULL, "scriptFile -- test script name", OPTION_OPTIONAL_ARGUMENT, 's', & params->testscripts}, diff --git a/src/utilities.c b/src/utilities.c index 16a31b0..9ab5432 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -37,6 +37,10 @@ #include #include +#ifdef HAVE_CUDA +#include +#endif + #ifndef _WIN32 # include # ifdef __sun /* SunOS does not support statfs(), instead uses statvfs() */ @@ -210,7 +214,7 @@ void updateParsedOptions(IOR_param_t * options, options_all_t * global_options){ /* Used in aiori-POSIX.c and aiori-PLFS.c */ -void set_o_direct_flag(int *fd) +void set_o_direct_flag(int *flag) { /* note that TRU64 needs O_DIRECTIO, SunOS uses directio(), and everyone else needs O_DIRECT */ @@ -223,7 +227,7 @@ void set_o_direct_flag(int *fd) # endif /* not O_DIRECTIO */ #endif /* not O_DIRECT */ - *fd |= O_DIRECT; + *flag |= O_DIRECT; } @@ -911,3 +915,72 @@ unsigned long GetProcessorAndCore(int *chip, int *core){ return 1; } #endif + + + +/* + * Allocate a page-aligned (required by O_DIRECT) buffer. + */ +void *aligned_buffer_alloc(size_t size, ior_memory_flags type) +{ + size_t pageMask; + char *buf, *tmp; + char *aligned; + + if(type == IOR_MEMORY_TYPE_GPU_MANAGED){ +#ifdef HAVE_CUDA + // use unified memory here to allow drop-in-replacement + if (cudaMallocManaged((void**) & buf, size, cudaMemAttachGlobal) != cudaSuccess){ + ERR("Cannot allocate buffer on GPU"); + } + return buf; +#else + ERR("No CUDA supported, cannot allocate on the GPU"); +#endif + }else if(type == IOR_MEMORY_TYPE_GPU_DEVICE_ONLY){ +#ifdef HAVE_GPU_DIRECT + if (cudaMalloc((void**) & buf, size) != cudaSuccess){ + ERR("Cannot allocate buffer on GPU"); + } + return buf; +#else + ERR("No GPUDirect supported, cannot allocate on the GPU"); +#endif + } + +#ifdef HAVE_SYSCONF + long pageSize = sysconf(_SC_PAGESIZE); +#else + size_t pageSize = getpagesize(); +#endif + + pageMask = pageSize - 1; + buf = safeMalloc(size + pageSize + sizeof(void *)); + /* find the alinged buffer */ + tmp = buf + sizeof(char *); + aligned = tmp + pageSize - ((size_t) tmp & pageMask); + /* write a pointer to the original malloc()ed buffer into the bytes + preceding "aligned", so that the aligned buffer can later be free()ed */ + tmp = aligned - sizeof(void *); + *(void **)tmp = buf; + + return (void *)aligned; +} + +/* + * Free a buffer allocated by aligned_buffer_alloc(). + */ +void aligned_buffer_free(void *buf, ior_memory_flags gpu) +{ + if(gpu){ +#ifdef HAVE_CUDA + if (cudaFree(buf) != cudaSuccess){ + WARN("Cannot free buffer on GPU"); + } + return; +#else + ERR("No CUDA supported, cannot free on the GPU"); +#endif + } + free(*(void **)((char *)buf - sizeof(char *))); +} diff --git a/src/utilities.h b/src/utilities.h index 202bcad..97dc2c0 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -63,5 +63,6 @@ void init_clock(MPI_Comm com); double GetTimeStamp(void); char * PrintTimestamp(); // TODO remove this function unsigned long GetProcessorAndCore(int *chip, int *core); - +void *aligned_buffer_alloc(size_t size, ior_memory_flags type); +void aligned_buffer_free(void *buf, ior_memory_flags type); #endif /* !_UTILITIES_H */ From 0829ac5a1818f1d5eb380aada7d8d36efad6a377 Mon Sep 17 00:00:00 2001 From: Jean-Yves VET Date: Fri, 19 Feb 2021 13:30:21 +0100 Subject: [PATCH 135/154] Fix detection of option with empty value (#338) Context: Options with values are not properly parsed. If the value is not set, a bug makes appear that 0 was provided. This patch fixes options parsing (issue #337). An error is reported if 'backend.option=' argument is provided. --- src/option.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/option.c b/src/option.c index 7be3df6..80463d4 100644 --- a/src/option.c +++ b/src/option.c @@ -253,8 +253,10 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi int i = 0; if(arg != NULL){ arg[0] = 0; - arg++; replaced_equal = 1; + + // Check empty value + arg = (arg[1] == 0) ? NULL : arg + 1; } *flag_parsed_next = 0; @@ -299,7 +301,7 @@ static void option_parse_token(char ** argv, int * flag_parsed_next, int * requi case (OPTION_OPTIONAL_ARGUMENT): case (OPTION_REQUIRED_ARGUMENT):{ // check if next is an argument - if(arg == NULL){ + if(arg == NULL && replaced_equal != 1){ if(o->shortVar == txt[0] && txt[1] != 0){ arg = & txt[1]; }else{ From ad3bf97304ff1d623c83661521ef80e19e0d9c23 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Sun, 28 Feb 2021 11:45:32 +0000 Subject: [PATCH 136/154] Fixed some issues in build system detected using Clang. (#339) --- configure.ac | 5 +++-- src/Makefile.am | 8 ++++++-- src/utilities.c | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index b758f5b..7f64855 100755 --- a/configure.ac +++ b/configure.ac @@ -93,7 +93,8 @@ AS_IF([test "$ac_cv_header_cuda_runtime_h" = "yes"], [ [AC_MSG_ERROR([Library containing cudaMalloc symbol not found])]) ]) ]) -AM_CONDITIONAL([USE_CUDA], [test x$with_cuda = xyes]) +AM_CONDITIONAL([HAVE_CUDA], [test x$with_cuda = xyes]) +AM_COND_IF([HAVE_CUDA],[AC_DEFINE([HAVE_CUDA], [], [CUDA GPU API found])]) # Check for GPUDirect AC_ARG_WITH([gpuDirect], @@ -116,7 +117,7 @@ AS_IF([test "$ac_cv_header_cufile_h" = "yes"], [ ]) ]) AM_CONDITIONAL([HAVE_GPU_DIRECT], [test x$with_gpuDirect = xyes]) - +AM_COND_IF([HAVE_GPU_DIRECT],[AC_DEFINE([HAVE_GPU_DIRECT], [], [GPUDirect API found])]) # Check for system capabilities diff --git a/src/Makefile.am b/src/Makefile.am index fdf746f..037433c 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -41,10 +41,14 @@ extraLDFLAGS += -L/opt/hadoop-2.2.0/lib/native extraLDADD += -lhdfs endif -if USE_CUDA +if HAVE_CUDA extraLDADD += -lcudart endif +if HAVE_GPU_DIRECT +extraLDADD += -lcufile +endif + if USE_HDF5_AIORI extraSOURCES += aiori-HDF5.c extraLDADD += -lhdf5 -lz @@ -164,5 +168,5 @@ libaiori_a_CPPFLAGS = $(extraCPPFLAGS) .PHONY: build.conf all-local: build.conf build.conf: - @echo LDFLAGS=$(LDFLAGS) $(extraLDFLAGS) $(extraLDADD) > build.conf + @echo LDFLAGS=$(LDFLAGS) $(extraLDFLAGS) $(extraLDADD) $(LIBS) > build.conf @echo CFLAGS=$(CFLAGS) $(extraCPPFLAGS) >> build.conf diff --git a/src/utilities.c b/src/utilities.c index 9ab5432..5972b27 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -86,7 +86,7 @@ void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int bu void generate_memory_pattern(char * buf, size_t bytes, int buff_offset, int rank){ uint64_t * buffi = (uint64_t*) buf; // first half of 64 bits use the rank - const uint64_t ranki = (uint64_t)(rank + 1) << 32 + buff_offset; + const uint64_t ranki = ((uint64_t)(rank + 1) << 32) + buff_offset; const size_t size = bytes / 8; // the first 8 bytes are set to item number for(size_t i=1; i < size; i++){ @@ -106,7 +106,7 @@ int verify_memory_pattern(int item, char * buffer, size_t bytes, int buff_offset uint64_t * buffi = (uint64_t*) buffer; // first half of 64 bits use the rank, here need to apply rank shifting - uint64_t rank_mod = (uint64_t)(pretendRank + 1) << 32 + buff_offset; + uint64_t rank_mod = ((uint64_t)(pretendRank + 1) << 32) + buff_offset; // the first 8 bytes are set to item number for(size_t i=1; i < bytes/8; i++){ uint64_t exp = (i + 1) + rank_mod; From 4bad282932deef9fb242450c39d57d3fd9fed259 Mon Sep 17 00:00:00 2001 From: Rob Latham Date: Sat, 6 Mar 2021 03:53:39 -0600 Subject: [PATCH 137/154] Make noncontiguous I/O work again (#341) --- src/aiori-MPIIO.c | 11 +++++++---- src/ior.c | 4 +++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index 2ed0c6d..a457ee0 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -72,7 +72,7 @@ static option_help * MPIIO_options(aiori_mod_opt_t ** init_backend_options, aior {0, "mpiio.hintsFileName","Full name for hints file", OPTION_OPTIONAL_ARGUMENT, 's', & o->hintsFileName}, {0, "mpiio.showHints", "Show MPI hints", OPTION_FLAG, 'd', & o->showHints}, {0, "mpiio.preallocate", "Preallocate file size", OPTION_FLAG, 'd', & o->preallocate}, - {0, "mpiio.useStridedDatatype", "put strided access into datatype [not working]", OPTION_FLAG, 'd', & o->useStridedDatatype}, + {0, "mpiio.useStridedDatatype", "put strided access into datatype", OPTION_FLAG, 'd', & o->useStridedDatatype}, //{'P', NULL, "useSharedFilePointer -- use shared file pointer [not working]", OPTION_FLAG, 'd', & params->useSharedFilePointer}, {0, "mpiio.useFileView", "Use MPI_File_set_view", OPTION_FLAG, 'd', & o->useFileView}, LAST_OPTION @@ -120,8 +120,6 @@ static int MPIIO_check_params(aiori_mod_opt_t * module_options){ ERR("segment size must be < 2GiB"); if (param->useSharedFilePointer) ERR("shared file pointer not implemented"); - if (param->useStridedDatatype) - ERR("strided datatype not implemented"); if (param->useStridedDatatype && (hints->blockSize < sizeof(IOR_size_t) || hints->transferSize < sizeof(IOR_size_t))) @@ -414,7 +412,12 @@ static IOR_offset_t MPIIO_Xfer(int access, aiori_fd_t * fdp, IOR_size_t * buffer mfd->transferType, &status), "cannot access noncollective"); } - length *= hints->transferSize; /* for return value in bytes */ + /* MPI-IO driver does "nontcontiguous" by transfering + * 'segment' regions of 'transfersize' bytes, but + * our caller WriteOrReadSingle does not know how to + * deal with us reporting that we wrote N times more + * data than requested. */ + length = hints->transferSize; } } else { /* diff --git a/src/ior.c b/src/ior.c index a591b18..38ae88f 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1026,7 +1026,9 @@ static void InitTests(IOR_test_t *tests) static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test, int pretendRank) { - ioBuffers->buffer = aligned_buffer_alloc(test->transferSize, test->gpuMemoryFlags); + /* MPI-IO driver when doing noncontiguous I/O will construct an access + * pattern that describes the entire strided access in a single go */ + ioBuffers->buffer = aligned_buffer_alloc(test->transferSize*test->segmentCount, test->gpuMemoryFlags); } /* From f239b74d8357e57bcfccc97a260f2a9c8e608780 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Mon, 8 Mar 2021 12:46:58 -0600 Subject: [PATCH 138/154] free MPI comm and group for each iteration they are created in. (#342) Signed-off-by: Mohamad Chaarawi --- src/mdtest.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 558bbf4..cae0ea9 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -2534,17 +2534,23 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * if (i == 1 && stride > 1) { i = 0; } + + int total_errors = 0; + MPI_Reduce(& o.verification_error, & total_errors, 1, MPI_INT, MPI_SUM, 0, testComm); + if(rank == 0 && total_errors){ + VERBOSE(0, -1, "\nERROR: verifying the data on read (%lld errors)! Take the performance values with care!\n", total_errors); + } + + MPI_Comm_free(&testComm); + MPI_Group_free(&testgroup); } + MPI_Group_free(&worldgroup); + if (created_root_dir && o.remove_only && o.backend->rmdir(o.testdirpath, o.backend_options) != 0) { FAIL("Unable to remove test directory path %s", o.testdirpath); } - int total_errors = 0; - MPI_Reduce(& o.verification_error, & total_errors, 1, MPI_INT, MPI_SUM, 0, testComm); - if(rank == 0 && total_errors){ - VERBOSE(0, -1, "\nERROR: verifying the data on read (%lld errors)! Take the performance values with care!\n", total_errors); - } VERBOSE(0,-1,"-- finished at %s --\n", PrintTimestamp()); if (o.random_seed > 0) { From 61f36f0c48808555be4fd2161cac0f27848f72d8 Mon Sep 17 00:00:00 2001 From: Peter Steinbach Date: Tue, 9 Mar 2021 18:07:19 +0100 Subject: [PATCH 139/154] converting to sphinx code snippets (#343) * converting to sphinx code snippets * more rigorous code highlighting --- doc/sphinx/userDoc/install.rst | 14 +-- doc/sphinx/userDoc/tutorial.rst | 148 +++++++++++++++++--------------- 2 files changed, 87 insertions(+), 75 deletions(-) diff --git a/doc/sphinx/userDoc/install.rst b/doc/sphinx/userDoc/install.rst index 4bfa684..9b000c8 100644 --- a/doc/sphinx/userDoc/install.rst +++ b/doc/sphinx/userDoc/install.rst @@ -6,19 +6,19 @@ Install Building -------- -0. If "configure" is missing from the top level directory, you +0. If ``configure`` is missing from the top level directory, you probably retrieved this code directly from the repository. - Run "./bootstrap". + Run ``./bootstrap``. If your versions of the autotools are not new enough to run this script, download and official tarball in which the configure script is already provided. -1. Run "./configure" +1. Run ``./configure`` - See "./configure --help" for configuration options. + See ``./configure --help`` for configuration options. -2. Run "make" +2. Run ``make`` -3. Optionally, run "make install". The installation prefix - can be changed as an option to the "configure" script. +3. Optionally, run ``make install``. The installation prefix + can be changed as an option to the ``configure`` script. diff --git a/doc/sphinx/userDoc/tutorial.rst b/doc/sphinx/userDoc/tutorial.rst index 449d980..70d4aa3 100644 --- a/doc/sphinx/userDoc/tutorial.rst +++ b/doc/sphinx/userDoc/tutorial.rst @@ -11,23 +11,24 @@ Running IOR ----------- There are two ways of running IOR: - 1) Command line with arguments -- executable followed by command line - options. + 1) Command line with arguments -- executable followed by command line options. - :: - $ ./IOR -w -r -o filename + .. code-block:: shell - This performs a write and a read to the file 'filename'. + $ ./IOR -w -r -o filename + + This performs a write and a read to the file 'filename'. 2) Command line with scripts -- any arguments on the command line will - establish the default for the test run, but a script may be used in - conjunction with this for varying specific tests during an execution of - the code. Only arguments before the script will be used! + establish the default for the test run, but a script may be used in + conjunction with this for varying specific tests during an execution of + the code. Only arguments before the script will be used! - :: - $ ./IOR -W -f script + .. code-block:: shell - This defaults all tests in 'script' to use write data checking. + $ ./IOR -W -f script + + This defaults all tests in 'script' to use write data checking. In this tutorial the first one is used as it is much easier to toy around with @@ -40,10 +41,10 @@ Getting Started with IOR IOR writes data sequentially with the following parameters: - * blockSize (-b) - * transferSize (-t) - * segmentCount (-s) - * numTasks (-n) + * ``blockSize`` (``-b``) + * ``transferSize`` (``-t``) + * ``segmentCount`` (``-s``) + * ``numTasks`` (``-n``) which are best illustrated with a diagram: @@ -52,30 +53,34 @@ which are best illustrated with a diagram: These four parameters are all you need to get started with IOR. However, naively running IOR usually gives disappointing results. For example, if we run -a four-node IOR test that writes a total of 16 GiB:: +a four-node IOR test that writes a total of 16 GiB: - $ mpirun -n 64 ./ior -t 1m -b 16m -s 16 - ... - access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter - ------ --------- ---------- --------- -------- -------- -------- -------- ---- - write 427.36 16384 1024.00 0.107961 38.34 32.48 38.34 2 - read 239.08 16384 1024.00 0.005789 68.53 65.53 68.53 2 - remove - - - - - - 0.534400 2 +.. code-block:: shell + + $ mpirun -n 64 ./ior -t 1m -b 16m -s 16 + ... + access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter + ------ --------- ---------- --------- -------- -------- -------- -------- ---- + write 427.36 16384 1024.00 0.107961 38.34 32.48 38.34 2 + read 239.08 16384 1024.00 0.005789 68.53 65.53 68.53 2 + remove - - - - - - 0.534400 2 we can only get a couple hundred megabytes per second out of a Lustre file system that should be capable of a lot more. Switching from writing to a single-shared file to one file per process using the --F (filePerProcess=1) option changes the performance dramatically:: +``-F`` (``filePerProcess=1``) option changes the performance dramatically: - $ mpirun -n 64 ./ior -t 1m -b 16m -s 16 -F - ... - access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter - ------ --------- ---------- --------- -------- -------- -------- -------- ---- - write 33645 16384 1024.00 0.007693 0.486249 0.195494 0.486972 1 - read 149473 16384 1024.00 0.004936 0.108627 0.016479 0.109612 1 - remove - - - - - - 6.08 1 +.. code-block:: shell + + $ mpirun -n 64 ./ior -t 1m -b 16m -s 16 -F + ... + access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter + ------ --------- ---------- --------- -------- -------- -------- -------- ---- + write 33645 16384 1024.00 0.007693 0.486249 0.195494 0.486972 1 + read 149473 16384 1024.00 0.004936 0.108627 0.016479 0.109612 1 + remove - - - - - - 6.08 1 This is in large part because letting each MPI process work on its own file cuts @@ -123,7 +128,7 @@ There are a couple of ways to measure the read performance of the underlying Lustre file system. The most crude way is to simply write more data than will fit into the total page cache so that by the time the write phase has completed, the beginning of the file has already been evicted from cache. For example, -increasing the number of segments (-s) to write more data reveals the point at +increasing the number of segments (``-s``) to write more data reveals the point at which the nodes' page cache on my test system runs over very clearly: .. image:: tutorial-ior-overflowing-cache.png @@ -142,17 +147,19 @@ written by node N-1. Since page cache is not shared between compute nodes, shifting tasks this way ensures that each MPI process is reading data it did not write. -IOR provides the -C option (reorderTasks) to do this, and it forces each MPI +IOR provides the ``-C`` option (``reorderTasks``) to do this, and it forces each MPI process to read the data written by its neighboring node. Running IOR with -this option gives much more credible read performance:: +this option gives much more credible read performance: - $ mpirun -n 64 ./ior -t 1m -b 16m -s 16 -F -C - ... - access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter - ------ --------- ---------- --------- -------- -------- -------- -------- ---- - write 41326 16384 1024.00 0.005756 0.395859 0.095360 0.396453 0 - read 3310.00 16384 1024.00 0.011786 4.95 4.20 4.95 1 - remove - - - - - - 0.237291 1 +.. code-block:: shell + + $ mpirun -n 64 ./ior -t 1m -b 16m -s 16 -F -C + ... + access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter + ------ --------- ---------- --------- -------- -------- -------- -------- ---- + write 41326 16384 1024.00 0.005756 0.395859 0.095360 0.396453 0 + read 3310.00 16384 1024.00 0.011786 4.95 4.20 4.95 1 + remove - - - - - - 0.237291 1 But now it should seem obvious that the write performance is also ridiculously @@ -166,16 +173,18 @@ pages we just wrote to flush out to Lustre. Including the time it takes for fsync() to finish gives us a measure of how long it takes for our data to write to the page cache and for the page cache to write back to Lustre. -IOR provides another convenient option, -e (fsync), to do just this. And, once -again, using this option changes our performance measurement quite a bit:: +IOR provides another convenient option, ``-e`` (fsync), to do just this. And, once +again, using this option changes our performance measurement quite a bit: - $ mpirun -n 64 ./ior -t 1m -b 16m -s 16 -F -C -e - ... - access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter - ------ --------- ---------- --------- -------- -------- -------- -------- ---- - write 2937.89 16384 1024.00 0.011841 5.56 4.93 5.58 0 - read 2712.55 16384 1024.00 0.005214 6.04 5.08 6.04 3 - remove - - - - - - 0.037706 0 +.. code-block:: shell + + $ mpirun -n 64 ./ior -t 1m -b 16m -s 16 -F -C -e + ... + access bw(MiB/s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter + ------ --------- ---------- --------- -------- -------- -------- -------- ---- + write 2937.89 16384 1024.00 0.011841 5.56 4.93 5.58 0 + read 2712.55 16384 1024.00 0.005214 6.04 5.08 6.04 3 + remove - - - - - - 0.037706 0 and we finally have a believable bandwidth measurement for our file system. @@ -192,16 +201,17 @@ the best choice. There are several ways in which we can get clever and defeat page cache in a more general sense to get meaningful performance numbers. When measuring write performance, bypassing page cache is actually quite simple; -opening a file with the O_DIRECT flag going directly to disk. In addition, -the fsync() call can be inserted into applications, as is done with IOR's -e +opening a file with the ``O_DIRECT`` flag going directly to disk. In addition, +the ``fsync()`` call can be inserted into applications, as is done with IOR's ``-e`` option. Measuring read performance is a lot trickier. If you are fortunate enough to have root access on a test system, you can force the Linux kernel to empty out its page cache by doing -:: - # echo 1 > /proc/sys/vm/drop_caches +.. code-block:: shell + + # echo 1 > /proc/sys/vm/drop_caches and in fact, this is often good practice before running any benchmark (e.g., Linpack) because it ensures that you aren't losing performance to the @@ -210,23 +220,25 @@ memory for its own use. Unfortunately, many of us do not have root on our systems, so we have to get even more clever. As it turns out, there is a way to pass a hint to the kernel -that a file is no longer needed in page cache:: +that a file is no longer needed in page cache: - #define _XOPEN_SOURCE 600 - #include - #include - int main(int argc, char *argv[]) { - int fd; - fd = open(argv[1], O_RDONLY); - fdatasync(fd); - posix_fadvise(fd, 0,0,POSIX_FADV_DONTNEED); - close(fd); - return 0; - } +.. code-block:: c -The effect of passing POSIX_FADV_DONTNEED using posix_fadvise() is usually that + #define _XOPEN_SOURCE 600 + #include + #include + int main(int argc, char *argv[]) { + int fd; + fd = open(argv[1], O_RDONLY); + fdatasync(fd); + posix_fadvise(fd, 0,0,POSIX_FADV_DONTNEED); + close(fd); + return 0; + } + +The effect of passing POSIX_FADV_DONTNEED using ``posix_fadvise()`` is usually that all pages belonging to that file are evicted from page cache in Linux. However, -this is just a hint--not a guarantee--and the kernel evicts these pages +this is just a hint --not a guarantee-- and the kernel evicts these pages asynchronously, so it may take a second or two for pages to actually leave page cache. Fortunately, Linux also provides a way to probe pages in a file to see if they are resident in memory. From df5fa556c884d58011ebc9da630ad6b5bc27dd40 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Sat, 13 Mar 2021 11:29:40 -0600 Subject: [PATCH 140/154] reset testcomm to world_comm since it can be used in finalize callbacks. (#344) Signed-off-by: Mohamad Chaarawi --- src/mdtest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mdtest.c b/src/mdtest.c index cae0ea9..72985c4 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -2546,6 +2546,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } MPI_Group_free(&worldgroup); + testComm = world_com; if (created_root_dir && o.remove_only && o.backend->rmdir(o.testdirpath, o.backend_options) != 0) { FAIL("Unable to remove test directory path %s", o.testdirpath); From 3be3cfb27421915667dcab31c12d804d9d3dae9f Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Wed, 17 Mar 2021 00:24:23 +0100 Subject: [PATCH 141/154] Fix MPI-IO strided access. (#347) * Remove MPI-IO malloc. * Bugfix for MPI-IO segmented/view. --- src/aiori-MPIIO.c | 117 +++++++++++++++++++++++++++------------------- src/ior.c | 4 +- 2 files changed, 70 insertions(+), 51 deletions(-) diff --git a/src/aiori-MPIIO.c b/src/aiori-MPIIO.c index a457ee0..070cff0 100755 --- a/src/aiori-MPIIO.c +++ b/src/aiori-MPIIO.c @@ -47,6 +47,7 @@ static int MPIIO_check_params(aiori_mod_opt_t * options); typedef struct{ MPI_File fd; MPI_Datatype transferType; /* datatype for transfer */ + MPI_Datatype contigType; /* elem datatype */ MPI_Datatype fileType; /* filetype for file view */ } mpiio_fd_t; @@ -182,9 +183,7 @@ static aiori_fd_t *MPIIO_Open(char *testFileName, int flags, aiori_mod_opt_t * m offsetFactor, tasksPerFile, transfersPerBlock = hints->blockSize / hints->transferSize; - struct fileTypeStruct { - int globalSizes[2], localSizes[2], startIndices[2]; - } fileTypeStruct; + mpiio_fd_t * mfd = malloc(sizeof(mpiio_fd_t)); memset(mfd, 0, sizeof(mpiio_fd_t)); @@ -269,15 +268,18 @@ static aiori_fd_t *MPIIO_Open(char *testFileName, int flags, aiori_mod_opt_t * m hints->numTasks)), "cannot preallocate file"); } + + /* create file view */ if (param->useFileView) { + /* Create in-memory datatype */ + MPI_CHECK(MPI_Type_contiguous (hints->transferSize / sizeof(IOR_size_t), MPI_LONG_LONG_INT, & mfd->contigType), "cannot create contiguous datatype"); + MPI_CHECK(MPI_Type_create_resized( mfd->contigType, 0, 0, & mfd->transferType), "cannot create resized type"); + MPI_CHECK(MPI_Type_commit(& mfd->contigType), "cannot commit datatype"); + MPI_CHECK(MPI_Type_commit(& mfd->transferType), "cannot commit datatype"); + /* create contiguous transfer datatype */ - MPI_CHECK(MPI_Type_contiguous - (hints->transferSize / sizeof(IOR_size_t), - MPI_LONG_LONG_INT, & mfd->transferType), - "cannot create contiguous datatype"); - MPI_CHECK(MPI_Type_commit(& mfd->transferType), - "cannot commit datatype"); + if (hints->filePerProc) { offsetFactor = 0; tasksPerFile = 1; @@ -286,33 +288,39 @@ static aiori_fd_t *MPIIO_Open(char *testFileName, int flags, aiori_mod_opt_t * m tasksPerFile = hints->numTasks; } - /* - * create file type using subarray - */ - fileTypeStruct.globalSizes[0] = 1; - fileTypeStruct.globalSizes[1] = - transfersPerBlock * tasksPerFile; - fileTypeStruct.localSizes[0] = 1; - fileTypeStruct.localSizes[1] = transfersPerBlock; - fileTypeStruct.startIndices[0] = 0; - fileTypeStruct.startIndices[1] = - transfersPerBlock * offsetFactor; + if(! hints->dryRun) { + if(! param->useStridedDatatype){ + struct fileTypeStruct { + int globalSizes[2], localSizes[2], startIndices[2]; + } fileTypeStruct; - MPI_CHECK(MPI_Type_create_subarray - (2, fileTypeStruct.globalSizes, - fileTypeStruct.localSizes, - fileTypeStruct.startIndices, MPI_ORDER_C, - mfd->transferType, & mfd->fileType), - "cannot create subarray"); - MPI_CHECK(MPI_Type_commit(& mfd->fileType), - "cannot commit datatype"); - - if(! hints->dryRun){ - MPI_CHECK(MPI_File_set_view(mfd->fd, (MPI_Offset) 0, - mfd->transferType, - mfd->fileType, "native", + /* + * create file type using subarray + */ + fileTypeStruct.globalSizes[0] = 1; + fileTypeStruct.globalSizes[1] = transfersPerBlock * tasksPerFile; + fileTypeStruct.localSizes[0] = 1; + fileTypeStruct.localSizes[1] = transfersPerBlock; + fileTypeStruct.startIndices[0] = 0; + fileTypeStruct.startIndices[1] = transfersPerBlock * offsetFactor; + + MPI_CHECK(MPI_Type_create_subarray + (2, fileTypeStruct.globalSizes, + fileTypeStruct.localSizes, + fileTypeStruct.startIndices, MPI_ORDER_C, + mfd->contigType, & mfd->fileType), + "cannot create subarray"); + MPI_CHECK(MPI_Type_commit(& mfd->fileType), "cannot commit datatype"); + MPI_CHECK(MPI_File_set_view(mfd->fd, 0, + mfd->contigType, + mfd->fileType, + "native", (MPI_Info) MPI_INFO_NULL), "cannot set file view"); + }else{ + MPI_CHECK(MPI_Type_create_resized(mfd->contigType, 0, tasksPerFile * hints->blockSize, & mfd->fileType), "cannot create MPI_Type_create_hvector"); + MPI_CHECK(MPI_Type_commit(& mfd->fileType), "cannot commit datatype"); + } } } if (mpiHints != MPI_INFO_NULL) @@ -377,7 +385,7 @@ static IOR_offset_t MPIIO_Xfer(int access, aiori_fd_t * fdp, IOR_size_t * buffer * Access_ordered = MPI_File_read_ordered; */ } - + /* * 'useFileView' uses derived datatypes and individual file pointers */ @@ -388,16 +396,28 @@ static IOR_offset_t MPIIO_Xfer(int access, aiori_fd_t * fdp, IOR_size_t * buffer /* if unsuccessful */ length = -1; } else { + /* - * 'useStridedDatatype' fits multi-strided pattern into a datatype; - * must use 'length' to determine repetitions (fix this for - * multi-segments someday, WEL): - * e.g., 'IOR -s 2 -b 32K -t 32K -a MPIIO -S' - */ + * 'useStridedDatatype' fits multi-strided pattern into a datatype; + * must use 'length' to determine repetitions (fix this for + * multi-segments someday, WEL): + * e.g., 'IOR -s 2 -b 32K -t 32K -a MPIIO --mpiio.useStridedDatatype --mpiio.useFileView' + */ if (param->useStridedDatatype) { - length = hints->segmentCount; - } else { - length = 1; + if(offset >= (rank+1) * hints->blockSize){ + /* we shall write only once per transferSize */ + /* printf("FAKE access %d %lld\n", rank, offset); */ + return hints->transferSize; + } + length = hints->segmentCount; + MPI_CHECK(MPI_File_set_view(mfd->fd, offset, + mfd->contigType, + mfd->fileType, + "native", + (MPI_Info) MPI_INFO_NULL), "cannot set file view"); + /* printf("ACCESS %d %lld -> %lld\n", rank, offset, length); */ + }else{ + length = 1; } if (hints->collective) { /* individual, collective call */ @@ -458,7 +478,7 @@ static IOR_offset_t MPIIO_Xfer(int access, aiori_fd_t * fdp, IOR_size_t * buffer } } } - return (length); + return hints->transferSize; } /* @@ -485,11 +505,12 @@ static void MPIIO_Close(aiori_fd_t *fdp, aiori_mod_opt_t * module_options) MPI_CHECK(MPI_File_close(& mfd->fd), "cannot close file"); } if (param->useFileView == TRUE) { - /* - * need to free the datatype, so done in the close process - */ - MPI_CHECK(MPI_Type_free(& mfd->fileType), "cannot free MPI file datatype"); - MPI_CHECK(MPI_Type_free(& mfd->transferType), "cannot free MPI transfer datatype"); + /* + * need to free the datatype, so done in the close process + */ + MPI_CHECK(MPI_Type_free(& mfd->fileType), "cannot free MPI file datatype"); + MPI_CHECK(MPI_Type_free(& mfd->transferType), "cannot free MPI transfer datatype"); + MPI_CHECK(MPI_Type_free(& mfd->contigType), "cannot free type"); } free(fdp); } diff --git a/src/ior.c b/src/ior.c index 38ae88f..a591b18 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1026,9 +1026,7 @@ static void InitTests(IOR_test_t *tests) static void XferBuffersSetup(IOR_io_buffers* ioBuffers, IOR_param_t* test, int pretendRank) { - /* MPI-IO driver when doing noncontiguous I/O will construct an access - * pattern that describes the entire strided access in a single go */ - ioBuffers->buffer = aligned_buffer_alloc(test->transferSize*test->segmentCount, test->gpuMemoryFlags); + ioBuffers->buffer = aligned_buffer_alloc(test->transferSize, test->gpuMemoryFlags); } /* From a436395570ad8f162d62ee7f0e31a11649ec99fc Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Thu, 18 Mar 2021 21:42:50 +0100 Subject: [PATCH 142/154] Support random data generation for memory pattern in utilities. (#348) * Support random data generation in utilities. Update first 8 byte element in each 4k block on updates to defy dedup. * Incorporate different packet types into mdtest/md-workbench. * Integrated utilities memory pattern tools into IOR. Now all tools use the same patterns. * Added IOR long option for compatibility between IOR and other tools. * Added new tests for random buffers. --- src/ior-output.c | 2 +- src/ior.c | 146 ++--------------------------------------- src/ior.h | 14 +--- src/iordef.h | 6 ++ src/md-workbench.c | 30 +++++---- src/mdtest.c | 23 ++++--- src/parse_options.c | 6 +- src/utilities.c | 102 +++++++++++++++++----------- src/utilities.h | 7 +- testing/basic-tests.sh | 5 ++ testing/test-lib.sh | 3 + 11 files changed, 124 insertions(+), 220 deletions(-) diff --git a/src/ior-output.c b/src/ior-output.c index d60cbdb..d1c842e 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -370,7 +370,7 @@ void ShowTestStart(IOR_param_t *test) PrintKeyValInt("randomOffset", test->randomOffset); PrintKeyValInt("checkWrite", test->checkWrite); PrintKeyValInt("checkRead", test->checkRead); - PrintKeyValInt("storeFileOffset", test->storeFileOffset); + PrintKeyValInt("dataPacketType", test->dataPacketType); PrintKeyValInt("keepFile", test->keepFile); PrintKeyValInt("keepFileWithError", test->keepFileWithError); PrintKeyValInt("warningAsErrors", test->warningAsErrors); diff --git a/src/ior.c b/src/ior.c index a591b18..60fb517 100755 --- a/src/ior.c +++ b/src/ior.c @@ -409,81 +409,7 @@ static size_t CompareData(void *expectedBuffer, size_t size, IOR_offset_t transferCount, IOR_param_t *test, IOR_offset_t offset, int fillrank, int access) { assert(access == WRITECHECK || access == READCHECK); - - char testFileName[MAX_PATHLEN]; - char * bufferLabel1 = "Expected: "; - char * bufferLabel2 = "Actual: "; - size_t i, j, length; - size_t errorCount = 0; - - IOR_offset_t offsetSignature = 0; - unsigned long long hi, lo, val; // for data verification - hi = ((unsigned long long)fillrank) << 32; - lo = (unsigned long long)test->timeStampSignatureValue; - if (test->storeFileOffset){ - offsetSignature = offset; - } - - unsigned long long *testbuf = (unsigned long long *)expectedBuffer; - - length = size / sizeof(IOR_size_t); - if (verbose >= VERBOSE_3) { - fprintf(out_logfile, - "[%d] At file byte offset %lld, comparing %llu-byte transfer\n", - rank, (long long) offset, (long long)size); - } - - int incompressibleSeed = test->setTimeStampSignature + fillrank; - for (i = 0; i < length; i++) { - if(test->dataPacketType == incompressible ) { - /* same logic as in FillIncompressibleBuffer() */ - /* WARNING: make sure that both functions are changed at the same time */ - hi = ((unsigned long long) rand_r(& incompressibleSeed) << 32); - lo = (unsigned long long) rand_r(& incompressibleSeed); - val = hi | lo; - }else{ - if ((i % 2) == 0) { - /* evens contain MPI rank and time in seconds */ - val = hi | lo; - } else { - /* odds contain offset */ - val = offsetSignature + (i * sizeof(unsigned long long)); - } - } - if (testbuf[i] != val) { - errorCount++; - if (verbose >= VERBOSE_2) { - fprintf(out_logfile, - "[%d] At transfer buffer #%lld, index #%lld (file byte offset %lld):\n", - rank, transferCount - 1, (long long)i, - (long long) offset + - (IOR_size_t) (i * sizeof(IOR_size_t))); - fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel1); - fprintf(out_logfile, "%016llx\n", val); - fprintf(out_logfile, "[%d] %s0x", rank, bufferLabel2); - fprintf(out_logfile, "%016llx\n", testbuf[i]); - } - - } else if (verbose >= VERBOSE_5) { - fprintf(out_logfile, - "[%d] PASSED offset = %llu bytes, transfer %lld\n", - rank, ((i * sizeof(unsigned long long)) + offset), transferCount); - fprintf(out_logfile, "[%d] GOOD %s0x", rank, bufferLabel1); - fprintf(out_logfile, "%016llx ", val); - fprintf(out_logfile, "\n[%d] GOOD %s0x", rank, bufferLabel2); - fprintf(out_logfile, "%016llx ", testbuf[i]); - fprintf(out_logfile, "\n"); - } - } - if (errorCount > 0 && verbose >= VERBOSE_1) { - GetTestFileName(testFileName, test); - EWARNF("[%d] FAILED comparison of buffer in file %s during transfer %lld offset %lld containing %d-byte ints (%zd errors)", - rank, testFileName, transferCount, offset, (int)sizeof(unsigned long long int),errorCount); - }else if(verbose >= VERBOSE_2){ - fprintf(out_logfile, "[%d] comparison successful during transfer %lld offset %lld\n", rank, transferCount, offset); - } - - return (errorCount); + return verify_memory_pattern(offset, expectedBuffer, transferCount, test->setTimeStampSignature, fillrank, test->dataPacketType); } /* @@ -610,61 +536,6 @@ static void DistributeHints(MPI_Comm com) } } -/* - * Fill buffer, which is transfer size bytes long, with known 8-byte long long - * int values. In even-numbered 8-byte long long ints, store MPI task in high - * bits and timestamp signature in low bits. In odd-numbered 8-byte long long - * ints, store transfer offset. If storeFileOffset option is used, the file - * (not transfer) offset is stored instead. - */ -static unsigned int reseed_incompressible_prng = TRUE; - -static void -FillIncompressibleBuffer(void* buffer, IOR_param_t * test) -{ - size_t i; - unsigned long long hi, lo; - unsigned long long *buf = (unsigned long long *)buffer; - - /* In order for write checks to work, we have to restart the pseudo random sequence */ - /* This function has the same logic as CompareData() */ - /* WARNING: make sure that both functions are changed at the same time */ - if(reseed_incompressible_prng == TRUE) { - test->incompressibleSeed = test->setTimeStampSignature + rank; /* We copied seed into timestampSignature at initialization, also add the rank to add randomness between processes */ - reseed_incompressible_prng = FALSE; - } - for (i = 0; i < test->transferSize / sizeof(unsigned long long); i++) { - hi = ((unsigned long long) rand_r(&test->incompressibleSeed) << 32); - lo = (unsigned long long) rand_r(&test->incompressibleSeed); - buf[i] = hi | lo; - } -} - -static void -FillBuffer(void *buffer, - IOR_param_t * test, unsigned long long offset, int fillrank) -{ - size_t i; - unsigned long long hi, lo; - unsigned long long *buf = (unsigned long long *)buffer; - - if(test->dataPacketType == incompressible ) { /* Make for some non compressible buffers with randomish data */ - FillIncompressibleBuffer(buffer, test); - } else { - hi = ((unsigned long long)fillrank) << 32; - lo = (unsigned long long)test->timeStampSignatureValue; - for (i = 0; i < test->transferSize / sizeof(unsigned long long); i++) { - if ((i % 2) == 0) { - /* evens contain MPI rank and time in seconds */ - buf[i] = hi | lo; - } else { - /* odds contain offset */ - buf[i] = offset + (i * sizeof(unsigned long long)); - } - } - } -} - /* * Return string describing machine name and type. */ @@ -1305,8 +1176,7 @@ static void TestIoSys(IOR_test_t *test) params->timeStampSignatureValue = (unsigned int) params->setTimeStampSignature; } XferBuffersSetup(&ioBuffers, params, pretendRank); - reseed_incompressible_prng = TRUE; // reset pseudo random generator, necessary to guarantee the next call to FillBuffer produces the same value as it is right now - + /* Initial time stamp */ startTime = GetTimeStamp(); @@ -1349,7 +1219,8 @@ static void TestIoSys(IOR_test_t *test) (¶ms->timeStampSignatureValue, 1, MPI_UNSIGNED, 0, testComm), "cannot broadcast start time value"); - FillBuffer(ioBuffers.buffer, params, 0, pretendRank); + generate_memory_pattern((char*) ioBuffers.buffer, params->transferSize, params->setTimeStampSignature, pretendRank, params->dataPacketType); + /* use repetition count for number of multiple files */ if (params->multiFile) params->repCounter = rep; @@ -1432,8 +1303,7 @@ static void TestIoSys(IOR_test_t *test) } rankOffset = (2 * shift) % params->numTasks; } - reseed_incompressible_prng = TRUE; /* Re-Seed the PRNG to get same sequence back, if random */ - + GetTestFileName(testFileName, params); params->open = WRITECHECK; fd = backend->open(testFileName, IOR_RDONLY, params->backend_options); @@ -1643,7 +1513,7 @@ static void ValidateTests(IOR_param_t * test, MPI_Comm com) ERR("random offset and constant reorder tasks specified with single-shared-file. Choose one and resubmit"); if (test->randomOffset && test->checkRead && test->randomSeed == -1) ERR("random offset with read check option requires to set the random seed"); - if (test->randomOffset && test->storeFileOffset) + if (test->randomOffset && test->dataPacketType == DATA_OFFSET) ERR("random offset not available with store file offset option)"); if ((strcasecmp(test->api, "HDF5") == 0) && test->randomOffset) ERR("random offset not available with HDF5"); @@ -1759,9 +1629,7 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_ if (access == WRITE) { /* fills each transfer with a unique pattern * containing the offset into the file */ - if (test->storeFileOffset == TRUE) { - FillBuffer(buffer, test, offset, pretendRank); - } + update_write_memory_pattern(offset, ioBuffers->buffer, transfer, test->setTimeStampSignature, pretendRank, test->dataPacketType); amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options); if (amtXferred != transfer) ERR("cannot write to file"); diff --git a/src/ior.h b/src/ior.h index c58b198..2effa9a 100755 --- a/src/ior.h +++ b/src/ior.h @@ -46,17 +46,6 @@ #endif /* not MPI_FILE_NULL */ #define ISPOWEROFTWO(x) ((x != 0) && !(x & (x - 1))) -/******************** DATA Packet Type ***************************************/ -/* Holds the types of data packets: generic, offset, timestamp, incompressible */ - -enum PACKET_TYPE -{ - generic = 0, /* No packet type specified */ - timestamp=1, /* Timestamp packet set with -l */ - offset=2, /* Offset packet set with -l */ - incompressible=3 /* Incompressible packet set with -l */ - -}; typedef enum{ IOR_MEMORY_TYPE_CPU = 0, @@ -142,7 +131,6 @@ typedef struct int summary_every_test; /* flag to print summary every test, not just at end */ int uniqueDir; /* use unique directory for each fpp */ int useExistingTestFile; /* do not delete test file before access */ - int storeFileOffset; /* use file offset as stored signature */ int deadlineForStonewalling; /* max time in seconds to run any test phase */ int stoneWallingWearOut; /* wear out the stonewalling, once the timeout is over, each process has to write the same amount */ uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */ @@ -161,7 +149,7 @@ typedef struct char * memoryPerNodeStr; /* for parsing */ char * testscripts; /* for parsing */ char * buffer_type; /* for parsing */ - enum PACKET_TYPE dataPacketType; /* The type of data packet. */ + ior_dataPacketType_e dataPacketType; /* The type of data packet. */ void * backend_options; /* Backend-specific options */ diff --git a/src/iordef.h b/src/iordef.h index eb10306..79f98f1 100755 --- a/src/iordef.h +++ b/src/iordef.h @@ -19,6 +19,12 @@ #include #include +typedef enum { + DATA_TIMESTAMP, /* Will not include any offset, hence each buffer will be the same */ + DATA_OFFSET, + DATA_INCOMPRESSIBLE /* Will include the offset as well */ +} ior_dataPacketType_e; + #ifdef _WIN32 # define _CRT_SECURE_NO_WARNINGS # define _CRT_RAND_S diff --git a/src/md-workbench.c b/src/md-workbench.c index fc51800..4b3372d 100644 --- a/src/md-workbench.c +++ b/src/md-workbench.c @@ -85,6 +85,8 @@ struct benchmark_options{ mdworkbench_results_t * results; // the results + ior_dataPacketType_e dataPacketType; + char * packetTypeStr; int offset; int iterations; int global_iteration; @@ -117,7 +119,7 @@ struct benchmark_options{ int rank; int size; int verify_read; - int random_buffer_offset; + int random_seed; float relative_waiting_factor; int adaptive_waiting_mode; @@ -140,12 +142,13 @@ void init_options(){ .interface = "POSIX", .prefix = "./out", .num = 1000, - .random_buffer_offset = -1, + .random_seed = -1, .precreate = 3000, .dset_count = 10, .offset = 1, .iterations = 3, .file_size = 3901, + .packetTypeStr = "t", .run_info_file = "md-workbench.status"}; } @@ -553,7 +556,7 @@ void run_precreate(phase_stat_t * s, int current_index){ } char * buf = aligned_buffer_alloc(o.file_size, o.gpu_memory_flags); - generate_memory_pattern(buf, o.file_size, o.random_buffer_offset, o.rank); + generate_memory_pattern(buf, o.file_size, o.random_seed, o.rank, o.dataPacketType); double op_timer; // timer for individual operations size_t pos = -1; // position inside the individual measurement array double op_time; @@ -569,7 +572,7 @@ void run_precreate(phase_stat_t * s, int current_index){ if (NULL == aiori_fh){ FAIL("Unable to open file %s", obj_name); } - update_write_memory_pattern(f * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, o.rank); + update_write_memory_pattern(f * o.dset_count + d, buf, o.file_size, o.random_seed, o.rank, o.dataPacketType); if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { s->obj_create.suc++; }else{ @@ -650,7 +653,7 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ } if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options) ) { if(o.verify_read){ - if(verify_memory_pattern(prevFile * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, readRank) == 0){ + if(verify_memory_pattern(prevFile * o.dset_count + d, buf, o.file_size, o.random_seed, readRank, o.dataPacketType) == 0){ s->obj_read.suc++; }else{ s->obj_read.err++; @@ -691,9 +694,9 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){ op_timer = GetTimeStamp(); aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options); if (NULL != aiori_fh){ - generate_memory_pattern(buf, o.file_size, o.random_buffer_offset, writeRank); - update_write_memory_pattern(newFileIndex * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, writeRank); - + generate_memory_pattern(buf, o.file_size, o.random_seed, writeRank, o.dataPacketType); + update_write_memory_pattern(newFileIndex * o.dset_count + d, buf, o.file_size, o.random_seed, writeRank, o.dataPacketType); + if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) { s->obj_create.suc++; }else{ @@ -808,7 +811,7 @@ static option_help options [] = { {0, "latency-all", "Keep the latency files from all ranks.", OPTION_FLAG, 'd', & o.latency_keep_all}, {'P', "precreate-per-set", "Number of object to precreate per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.precreate}, {'D', "data-sets", "Number of data sets covered per process and iteration.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.dset_count}, - {'G', NULL, "Offset for the data in the read/write buffer, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_buffer_offset}, + {'G', NULL, "Timestamp/Random seed for access pattern, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_seed}, {'o', NULL, "Output directory", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix}, {'q', "quiet", "Avoid irrelevant printing.", OPTION_FLAG, 'd', & o.quiet_output}, //{'m', "lim-free-mem", "Allocate memory until this limit (in MiB) is reached.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory}, @@ -823,6 +826,7 @@ static option_help options [] = { {'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer}, {'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out}, {'X', "verify-read", "Verify the data on read", OPTION_FLAG, 'd', & o.verify_read}, + {0, "dataPacketType", "type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & o.packetTypeStr}, {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags}, {0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number}, {0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats}, @@ -905,6 +909,8 @@ mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_c ERR("Backend doesn't support MDWorbench"); } o.backend_options = airoi_update_module_options(o.backend, global_options); + + o.dataPacketType = parsePacketType(o.packetTypeStr[0]); if (!(o.phase_cleanup || o.phase_precreate || o.phase_benchmark)){ // enable all phases @@ -915,9 +921,9 @@ mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_c ERR("Invalid options, if running only the benchmark phase using -2 with stonewall option then use stonewall wear-out"); exit(1); } - if( o.random_buffer_offset == -1 ){ - o.random_buffer_offset = time(NULL); - MPI_Bcast(& o.random_buffer_offset, 1, MPI_INT, 0, o.com); + if( o.random_seed == -1 ){ + o.random_seed = time(NULL); + MPI_Bcast(& o.random_seed, 1, MPI_INT, 0, o.com); } if(o.backend->xfer_hints){ diff --git a/src/mdtest.c b/src/mdtest.c index 72985c4..7ea2667 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -145,6 +145,7 @@ typedef struct { int print_time; int print_rate_and_time; int print_all_proc; + ior_dataPacketType_e dataPacketType; int random_seed; int shared_file; int files_only; @@ -392,12 +393,8 @@ static void create_file (const char *path, uint64_t itemNum) { if (o.write_bytes > 0) { VERBOSE(3,5,"create_remove_items_helper: write..." ); - /* - * According to Bill Loewe, writes are only done one time, so they are always at - * offset 0 (zero). - */ o.hints.fsyncPerWrite = o.sync_file; - update_write_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank); + update_write_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank, o.dataPacketType); if ( o.write_bytes != (size_t) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { EWARNF("unable to write file %s", curr_item); @@ -408,7 +405,7 @@ static void create_file (const char *path, uint64_t itemNum) { if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { EWARNF("unable to verify write (read/back) file %s", curr_item); } - o.verification_error += verify_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank); + o.verification_error += verify_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank, o.dataPacketType); } } @@ -725,15 +722,12 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { EWARNF("unable to read file %s", item); continue; } + int pretend_rank = (2 * o.nstride + rank) % o.size; if(o.verify_read){ - int pretend_rank = (2 * o.nstride + rank) % o.size; if (o.shared_file) { pretend_rank = rank; } - o.verification_error += verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank); - }else if((o.read_bytes >= 8 && ((uint64_t*) read_buffer)[0] != item_num) || (o.read_bytes < 8 && read_buffer[0] != (char) item_num)){ - // do a lightweight check, which cost is neglectable - o.verification_error++; + o.verification_error += verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank, o.dataPacketType); } } @@ -2188,6 +2182,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * char apiStr[1024]; sprintf(apiStr, "API for I/O [%s]", APIs); memset(& o.hints, 0, sizeof(o.hints)); + + char * packetType = "t"; option_help options [] = { {'a', NULL, apiStr, OPTION_OPTIONAL_ARGUMENT, 's', & o.api}, @@ -2234,6 +2230,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth}, {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time}, + {0, "dataPacketType", "type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & packetType}, {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, {0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV}, @@ -2250,6 +2247,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * free(global_options->modules); free(global_options); + + o.dataPacketType = parsePacketType(packetType[0]); MPI_Comm_rank(testComm, &rank); MPI_Comm_size(testComm, &o.size); @@ -2420,7 +2419,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * /* allocate and initialize write buffer with # */ if (o.write_bytes > 0) { o.write_buffer = aligned_buffer_alloc(o.write_bytes, o.gpu_memory_flags); - generate_memory_pattern(o.write_buffer, o.write_bytes, o.random_buffer_offset, rank); + generate_memory_pattern(o.write_buffer, o.write_bytes, o.random_buffer_offset, rank, o.dataPacketType); } /* setup directory path to work in */ diff --git a/src/parse_options.c b/src/parse_options.c index 605de91..9168778 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -222,8 +222,8 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt params->verbose = atoi(value); } else if (strcasecmp(option, "settimestampsignature") == 0) { params->setTimeStampSignature = atoi(value); - } else if (strcasecmp(option, "storefileoffset") == 0) { - params->storeFileOffset = atoi(value); + } else if (strcasecmp(option, "dataPacketType") == 0) { + params->dataPacketType = parsePacketType(value[0]); } else if (strcasecmp(option, "uniqueDir") == 0) { params->uniqueDir = atoi(value); } else if (strcasecmp(option, "useexistingtestfile") == 0) { @@ -450,7 +450,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'j', NULL, "outlierThreshold -- warn on outlier N seconds from mean", OPTION_OPTIONAL_ARGUMENT, 'd', & params->outlierThreshold}, {'k', NULL, "keepFile -- don't remove the test file(s) on program exit", OPTION_FLAG, 'd', & params->keepFile}, {'K', NULL, "keepFileWithError -- keep error-filled file(s) after data-checking", OPTION_FLAG, 'd', & params->keepFileWithError}, - {'l', NULL, "datapacket type-- type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & params->buffer_type}, + {'l', "dataPacketType", "datapacket type-- type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & params->buffer_type}, {'m', NULL, "multiFile -- use number of reps (-i) for multiple file count", OPTION_FLAG, 'd', & params->multiFile}, {'M', NULL, "memoryPerNode -- hog memory on the node (e.g.: 2g, 75%)", OPTION_OPTIONAL_ARGUMENT, 's', & params->memoryPerNodeStr}, {'N', NULL, "numTasks -- number of tasks that are participating in the test (overrides MPI)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks}, diff --git a/src/utilities.c b/src/utilities.c index 5972b27..c2ec6c9 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -75,46 +75,74 @@ enum OutputFormat_t outputFormat; /***************************** F U N C T I O N S ******************************/ -void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int buff_offset, int rank){ - if(bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible - ((uint64_t*) buf)[0] = item; - }else{ - buf[0] = (char) item; +void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType){ + if(dataPacketType == DATA_TIMESTAMP || bytes < 8) return; + int k=1; + uint64_t * buffi = (uint64_t*) buf; + for(size_t i=0; i < bytes/sizeof(uint64_t); i+=512, k++){ + buffi[i] = ((uint32_t) item * k) | ((uint64_t) pretendRank) << 32; } } -void generate_memory_pattern(char * buf, size_t bytes, int buff_offset, int rank){ +void generate_memory_pattern(char * buf, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType){ uint64_t * buffi = (uint64_t*) buf; // first half of 64 bits use the rank - const uint64_t ranki = ((uint64_t)(rank + 1) << 32) + buff_offset; const size_t size = bytes / 8; - // the first 8 bytes are set to item number - for(size_t i=1; i < size; i++){ - buffi[i] = (i + 1) + ranki; + // the first 8 bytes of each 4k block are updated at runtime + unsigned seed = rand_seed + pretendRank; + for(size_t i=0; i < size; i++){ + switch(dataPacketType){ + case(DATA_INCOMPRESSIBLE):{ + uint64_t hi = ((uint64_t) rand_r(& seed) << 32); + uint64_t lo = (uint64_t) rand_r(& seed); + buffi[i] = hi | lo; + break; + }case(DATA_OFFSET):{ + }case(DATA_TIMESTAMP):{ + buffi[i] = ((uint64_t) pretendRank) << 32 | rand_seed + i; + break; + } + } } - for(size_t i=(bytes/8)*8; i < bytes; i++){ + + for(size_t i=size*8; i < bytes; i++){ buf[i] = (char) i; } } -int verify_memory_pattern(int item, char * buffer, size_t bytes, int buff_offset, int pretendRank){ +int verify_memory_pattern(uint64_t item, char * buffer, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType){ int error = 0; // always read all data to ensure that performance numbers stay the same - if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){ - error = 1; - } - uint64_t * buffi = (uint64_t*) buffer; - // first half of 64 bits use the rank, here need to apply rank shifting - uint64_t rank_mod = ((uint64_t)(pretendRank + 1) << 32) + buff_offset; + // the first 8 bytes are set to item number - for(size_t i=1; i < bytes/8; i++){ - uint64_t exp = (i + 1) + rank_mod; + int k=1; + unsigned seed = rand_seed + pretendRank; + const size_t size = bytes / 8; + for(size_t i=0; i < size; i++){ + uint64_t exp; + + switch(dataPacketType){ + case(DATA_INCOMPRESSIBLE):{ + uint64_t hi = ((uint64_t) rand_r(& seed) << 32); + uint64_t lo = (uint64_t) rand_r(& seed); + exp = hi | lo; + break; + }case(DATA_OFFSET):{ + }case(DATA_TIMESTAMP):{ + exp = ((uint64_t) pretendRank) << 32 | rand_seed + i; + break; + } + } + if(i % 512 == 0 && dataPacketType != DATA_TIMESTAMP){ + exp = ((uint32_t) item * k) | ((uint64_t) pretendRank) << 32; + k++; + } if(buffi[i] != exp){ error = 1; } } - for(size_t i=(bytes/8)*8; i < bytes; i++){ + for(size_t i=size*8; i < bytes; i++){ if(buffer[i] != (char) i){ error = 1; } @@ -175,28 +203,28 @@ size_t NodeMemoryStringToBytes(char *size_str) return mem / 100 * percent; } +ior_dataPacketType_e parsePacketType(char t){ + switch(t) { + case '\0': return DATA_TIMESTAMP; + case 'i': /* Incompressible */ + return DATA_INCOMPRESSIBLE; + case 't': /* timestamp */ + return DATA_TIMESTAMP; + case 'o': /* offset packet */ + return DATA_OFFSET; + default: + ERRF("Unknown packet type \"%c\"; generic assumed\n", t); + return DATA_OFFSET; + } +} + void updateParsedOptions(IOR_param_t * options, options_all_t * global_options){ if (options->setTimeStampSignature){ options->incompressibleSeed = options->setTimeStampSignature; } if (options->buffer_type && options->buffer_type[0] != 0){ - switch(options->buffer_type[0]) { - case 'i': /* Incompressible */ - options->dataPacketType = incompressible; - break; - case 't': /* timestamp */ - options->dataPacketType = timestamp; - break; - case 'o': /* offset packet */ - options->storeFileOffset = TRUE; - options->dataPacketType = offset; - break; - default: - fprintf(out_logfile, - "Unknown argument for -l %s; generic assumed\n", options->buffer_type); - break; - } + options->dataPacketType = parsePacketType(options->buffer_type[0]); } if (options->memoryPerNodeStr){ options->memoryPerNode = NodeMemoryStringToBytes(options->memoryPerNodeStr); diff --git a/src/utilities.h b/src/utilities.h index 97dc2c0..7e9f704 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -35,10 +35,11 @@ extern enum OutputFormat_t outputFormat; /* format of the output */ void* safeMalloc(uint64_t size); void set_o_direct_flag(int *fd); -void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int buff_offset, int rank); -void generate_memory_pattern(char * buf, size_t bytes, int buff_offset, int rank); +ior_dataPacketType_e parsePacketType(char t); +void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int rand_seed, int rank, ior_dataPacketType_e dataPacketType); +void generate_memory_pattern(char * buf, size_t bytes, int rand_seed, int rank, ior_dataPacketType_e dataPacketType); /* check a data buffer, @return 0 if all is correct, otherwise 1 */ -int verify_memory_pattern(int item, char * buffer, size_t bytes, int buff_offset, int pretendRank); +int verify_memory_pattern(uint64_t item, char * buffer, size_t bytes, int rand_seed, int pretendRank, ior_dataPacketType_e dataPacketType); char *CurrentTimeString(void); int Regex(char *, char *); diff --git a/testing/basic-tests.sh b/testing/basic-tests.sh index 4377511..78663f3 100755 --- a/testing/basic-tests.sh +++ b/testing/basic-tests.sh @@ -44,4 +44,9 @@ MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -2 -W -w 1 --read-only --run-info-file MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -2 -W -w 1 --read-only --run-info-file=mdw.tst --print-detailed-stats MDWB 3 -a POSIX -O=1 -D=2 -G=10 -P=4 -I=3 -3 -W -w 1 --run-info-file=mdw.tst --print-detailed-stats +MDWB 2 -a POSIX -O=1 -D=1 -G=3 -P=2 -I=2 -R=2 -X -S 772 --dataPacketType=t +DELETE=0 +MDWB 2 -a POSIX -D=1 -P=2 -I=2 -R=2 -X -G=2252 -S 772 --dataPacketType=i -1 +MDWB 2 -a POSIX -D=1 -P=2 -I=2 -R=2 -X -G=2252 -S 772 --dataPacketType=i -2 +MDWB 2 -a POSIX -D=1 -P=2 -I=2 -R=2 -X -G=2252 -S 772 --dataPacketType=i -3 END diff --git a/testing/test-lib.sh b/testing/test-lib.sh index b331eda..a7e23fb 100644 --- a/testing/test-lib.sh +++ b/testing/test-lib.sh @@ -90,6 +90,9 @@ function MDTEST(){ function MDWB(){ RANKS=$1 shift + if [[ "$DELETE" != "0" ]] ; then + rm -rf "${IOR_TMP}/md-workbench" + fi WHAT="${IOR_MPIRUN} $RANKS ${IOR_BIN_DIR}/md-workbench ${@} -o ${IOR_TMP}/md-workbench ${MDWB_EXTRA}" LOG="${IOR_OUT}/test_out.$I" $WHAT 1>"$LOG" 2>&1 From 84a169c7c2caa2af2716afc2e80e3c00ebf54464 Mon Sep 17 00:00:00 2001 From: GUAN Xin Date: Sat, 20 Mar 2021 23:34:43 +0800 Subject: [PATCH 143/154] Clean up timer manipulation --- src/mdtest.c | 198 ++++++++++++++++++++------------------------------- 1 file changed, 78 insertions(+), 120 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 7ea2667..187a532 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -223,18 +223,6 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ... } } -void offset_timers(double * t, int tcount) { - double toffset; - int i; - - VERBOSE(1,-1,"V-1: Entering offset_timers..." ); - - toffset = GetTimeStamp() - t[tcount]; - for (i = 0; i < tcount+1; i++) { - t[i] += toffset; - } -} - void parse_dirpath(char *dirpath_arg) { char * tmp, * token; char delimiter_string[3] = { '@', '\n', '\0' }; @@ -890,10 +878,10 @@ void rename_dir_test(const int dirs, const long dir_iter, const char *path, rank } } -static void updateResult(mdtest_results_t * res, mdtest_test_num_t test, uint64_t item_count, int t, double * times, double * tBefore){ - res->time[test] = times[t] - times[t-1]; - if(tBefore){ - res->time_before_barrier[test] = tBefore[t] - times[t-1]; +static void updateResult(mdtest_results_t * res, mdtest_test_num_t test, uint64_t item_count, double t_start, double t_end, double t_end_before_barrier){ + res->time[test] = t_end - t_start; + if(isfinite(t_end_before_barrier)){ + res->time_before_barrier[test] = t_end_before_barrier - t_start; }else{ res->time_before_barrier[test] = res->time[test]; } @@ -905,8 +893,7 @@ static void updateResult(mdtest_results_t * res, mdtest_test_num_t test, uint64_ void directory_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) { int size; - double t[6] = {0}; - double tBefore[6] = {0}; + double t_start, t_end, t_end_before_barrier; char temp_path[MAX_PATHLEN]; mdtest_results_t * res = & o.summary_table[iteration]; @@ -914,12 +901,11 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran VERBOSE(1,-1,"Entering directory_test on %s", path ); - tBefore[0] = GetTimeStamp(); MPI_Barrier(testComm); - t[0] = GetTimeStamp(); /* create phase */ if(o.create_only) { + t_start = GetTimeStamp(); progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; progress->items_done = 0; progress->start_time = GetTimeStamp(); @@ -928,7 +914,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran if (o.unique_dir_per_task) { unique_dir_access(MK_UNI_DIR, temp_path); if (! o.time_unique_dir_overhead) { - offset_timers(t, 0); + t_start = GetTimeStamp(); } } else { sprintf( temp_path, "%s/%s", o.testdir, path ); @@ -947,20 +933,21 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran } } progress->stone_wall_timer_seconds = 0; + t_end_before_barrier = GetTimeStamp(); + phase_end(); + t_end = GetTimeStamp(); + updateResult(res, MDTEST_DIR_CREATE_NUM, o.items, t_start, t_end, t_end_before_barrier); } - tBefore[1] = GetTimeStamp(); - phase_end(); - t[1] = GetTimeStamp(); - /* stat phase */ if (o.stat_only) { + t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); if (o.unique_dir_per_task) { unique_dir_access(STAT_SUB_DIR, temp_path); if (! o.time_unique_dir_overhead) { - offset_timers(t, 1); + t_start = GetTimeStamp(); } } else { sprintf( temp_path, "%s/%s", o.testdir, path ); @@ -975,21 +962,21 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran mdtest_stat(0, 1, dir_iter, temp_path, progress); } } + t_end_before_barrier = GetTimeStamp(); + phase_end(); + t_end = GetTimeStamp(); + updateResult(res, MDTEST_DIR_STAT_NUM, o.items, t_start, t_end, t_end_before_barrier); } - tBefore[2] = GetTimeStamp(); - phase_end(); - t[2] = GetTimeStamp(); - if (o.rename_dirs && o.items > 1) { // moved close to execution - updateResult(res, MDTEST_DIR_RENAME_NUM, o.items, 4, t, tBefore); - } + /* read phase */ if (o.read_only) { + t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); if (o.unique_dir_per_task) { unique_dir_access(READ_SUB_DIR, temp_path); if (! o.time_unique_dir_overhead) { - offset_timers(t, 2); + t_start = GetTimeStamp(); } } else { sprintf( temp_path, "%s/%s", o.testdir, path ); @@ -1004,18 +991,21 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran ; /* N/A */ } } + t_end_before_barrier = GetTimeStamp(); + phase_end(); + t_end = GetTimeStamp(); + updateResult(res, MDTEST_DIR_READ_NUM, o.items, t_start, t_end, t_end_before_barrier); } - tBefore[3] = GetTimeStamp(); - phase_end(); - t[3] = GetTimeStamp(); - if(o.rename_dirs){ + /* rename phase */ + if(o.rename_dirs && o.items > 1){ + t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); if (o.unique_dir_per_task) { unique_dir_access(STAT_SUB_DIR, temp_path); if (! o.time_unique_dir_overhead) { - offset_timers(t, 1); + t_start = GetTimeStamp(); } } else { sprintf( temp_path, "%s/%s", o.testdir, path ); @@ -1025,22 +1015,21 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran rename_dir_test(1, dir_iter, temp_path, progress); } - } - tBefore[4] = GetTimeStamp(); - phase_end(); - - t[4] = GetTimeStamp(); - if (o.rename_dirs && o.items > 1) { // moved close to execution - updateResult(res, MDTEST_DIR_RENAME_NUM, o.items, 4, t, tBefore); + t_end_before_barrier = GetTimeStamp(); + phase_end(); + t_end = GetTimeStamp(); + updateResult(res, MDTEST_DIR_RENAME_NUM, o.items, t_start, t_end, t_end_before_barrier); } + /* remove phase */ if (o.remove_only) { + t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); if (o.unique_dir_per_task) { unique_dir_access(RM_SUB_DIR, temp_path); if (!o.time_unique_dir_overhead) { - offset_timers(t, 3); + t_start = GetTimeStamp(); } } else { sprintf( temp_path, "%s/%s", o.testdir, path ); @@ -1057,12 +1046,12 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran create_remove_items(0, 1, 0, 0, temp_path, 0, progress); } } + t_end_before_barrier = GetTimeStamp(); + phase_end(); + t_end = GetTimeStamp(); + updateResult(res, MDTEST_DIR_REMOVE_NUM, o.items, t_start, t_end, t_end_before_barrier); } - tBefore[5] = GetTimeStamp(); - phase_end(); - t[5] = GetTimeStamp(); - if (o.remove_only) { if (o.unique_dir_per_task) { unique_dir_access(RM_UNI_DIR, temp_path); @@ -1073,27 +1062,10 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran VERBOSE(3,5,"directory_test: remove unique directories path is '%s'\n", temp_path ); } - if (o.unique_dir_per_task && ! o.time_unique_dir_overhead) { - offset_timers(t, 5); - } - - /* calculate times */ - if (o.create_only) { - updateResult(res, MDTEST_DIR_CREATE_NUM, o.items, 1, t, tBefore); - } - if (o.stat_only) { - updateResult(res, MDTEST_DIR_STAT_NUM, o.items, 2, t, tBefore); - } - if (o.read_only) { - updateResult(res, MDTEST_DIR_READ_NUM, o.items, 3, t, tBefore); - } - if (o.remove_only) { - updateResult(res, MDTEST_DIR_REMOVE_NUM, o.items, 5, t, tBefore); - } - VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[0]); - VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], o.summary_table[iteration].rate[1]); - VERBOSE(1,-1," Directory rename : %14.3f sec, %14.3f ops/sec", t[4] - t[3], o.summary_table[iteration].rate[MDTEST_DIR_RENAME_NUM]); - VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", t[5] - t[4], o.summary_table[iteration].rate[4]); + VERBOSE(1,-1," Directory creation: %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_CREATE_NUM], o.summary_table[iteration].rate[MDTEST_DIR_CREATE_NUM]); + VERBOSE(1,-1," Directory stat : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_STAT_NUM], o.summary_table[iteration].rate[MDTEST_DIR_STAT_NUM]); + VERBOSE(1,-1," Directory rename : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_RENAME_NUM], o.summary_table[iteration].rate[MDTEST_DIR_RENAME_NUM]); + VERBOSE(1,-1," Directory removal : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_DIR_REMOVE_NUM], o.summary_table[iteration].rate[MDTEST_DIR_REMOVE_NUM]); } /* Returns if the stonewall was hit */ @@ -1123,7 +1095,7 @@ int updateStoneWallIterations(int iteration, uint64_t items_done, double tstart, return hit; } -void file_test_create(const int iteration, const int ntasks, const char *path, rank_progress_t * progress, double *t){ +void file_test_create(const int iteration, const int ntasks, const char *path, rank_progress_t * progress, double *t_start){ char temp_path[MAX_PATHLEN]; for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); @@ -1132,7 +1104,7 @@ void file_test_create(const int iteration, const int ntasks, const char *path, r unique_dir_access(MK_UNI_DIR, temp_path); VERBOSE(5,5,"operating on %s", temp_path); if (! o.time_unique_dir_overhead) { - offset_timers(t, 0); + *t_start = GetTimeStamp(); } } else { sprintf( temp_path, "%s/%s", o.testdir, path ); @@ -1153,7 +1125,7 @@ void file_test_create(const int iteration, const int ntasks, const char *path, r // hit the stonewall uint64_t max_iter = 0; uint64_t items_done = progress->items_done + dir_iter * o.items_per_dir; - int hit = updateStoneWallIterations(iteration, items_done, t[0], & max_iter); + int hit = updateStoneWallIterations(iteration, items_done, *t_start, & max_iter); progress->items_start = items_done; progress->items_per_dir = max_iter; if (hit){ @@ -1177,23 +1149,27 @@ void file_test_create(const int iteration, const int ntasks, const char *path, r void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) { int size; - double t[5] = {0}; - double tBefore[5] = {0}; + double t_start, t_end, t_end_before_barrier; char temp_path[MAX_PATHLEN]; + mdtest_results_t * res = & o.summary_table[iteration]; + MPI_Comm_size(testComm, &size); VERBOSE(3,5,"Entering file_test on %s", path); - tBefore[0] = GetTimeStamp(); MPI_Barrier(testComm); - t[0] = GetTimeStamp(); /* create phase */ if (o.create_only ) { + t_start = GetTimeStamp(); progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; progress->items_done = 0; progress->start_time = GetTimeStamp(); - file_test_create(iteration, ntasks, path, progress, t); + file_test_create(iteration, ntasks, path, progress, &t_start); + t_end_before_barrier = GetTimeStamp(); + phase_end(); + t_end = GetTimeStamp(); + updateResult(res, MDTEST_FILE_CREATE_NUM, o.items, t_start, t_end, t_end_before_barrier); }else{ if (o.stoneWallingStatusFile){ int64_t expected_items; @@ -1218,18 +1194,15 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } } - tBefore[1] = GetTimeStamp(); - phase_end(); - t[1] = GetTimeStamp(); - /* stat phase */ if (o.stat_only ) { + t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); if (o.unique_dir_per_task) { unique_dir_access(STAT_SUB_DIR, temp_path); if (!o.time_unique_dir_overhead) { - offset_timers(t, 1); + t_start = GetTimeStamp(); } } else { sprintf( temp_path, "%s/%s", o.testdir, path ); @@ -1240,20 +1213,21 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro /* stat files */ mdtest_stat((o.random_seed > 0 ? 1 : 0), 0, dir_iter, temp_path, progress); } + t_end_before_barrier = GetTimeStamp(); + phase_end(); + t_end = GetTimeStamp(); + updateResult(res, MDTEST_FILE_STAT_NUM, o.items, t_start, t_end, t_end_before_barrier); } - tBefore[2] = GetTimeStamp(); - phase_end(); - t[2] = GetTimeStamp(); - /* read phase */ if (o.read_only ) { + t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); if (o.unique_dir_per_task) { unique_dir_access(READ_SUB_DIR, temp_path); if (! o.time_unique_dir_overhead) { - offset_timers(t, 2); + t_start = GetTimeStamp(); } } else { sprintf( temp_path, "%s/%s", o.testdir, path ); @@ -1268,13 +1242,15 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro mdtest_read(0,0, dir_iter, temp_path); } } + t_end_before_barrier = GetTimeStamp(); + phase_end(); + t_end = GetTimeStamp(); + updateResult(res, MDTEST_FILE_READ_NUM, o.items, t_start, t_end, t_end_before_barrier); } - tBefore[3] = GetTimeStamp(); - phase_end(); - t[3] = GetTimeStamp(); - + /* remove phase */ if (o.remove_only) { + t_start = GetTimeStamp(); progress->items_start = 0; for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ @@ -1282,7 +1258,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro if (o.unique_dir_per_task) { unique_dir_access(RM_SUB_DIR, temp_path); if (! o.time_unique_dir_overhead) { - offset_timers(t, 3); + t_start = GetTimeStamp(); } } else { sprintf( temp_path, "%s/%s", o.testdir, path ); @@ -1299,11 +1275,12 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro create_remove_items(0, 0, 0, 0, temp_path, 0, progress); } } + t_end_before_barrier = GetTimeStamp(); + phase_end(); + t_end = GetTimeStamp(); + updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items, t_start, t_end, t_end_before_barrier); } - tBefore[4] = GetTimeStamp(); - phase_end(); - t[4] = GetTimeStamp(); if (o.remove_only) { if (o.unique_dir_per_task) { unique_dir_access(RM_UNI_DIR, temp_path); @@ -1314,36 +1291,17 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro VERBOSE(3,5,"file_test: rm unique directories path is '%s'", temp_path ); } - if (o.unique_dir_per_task && ! o.time_unique_dir_overhead) { - offset_timers(t, 4); - } - if(o.num_dirs_in_tree_calc){ /* this is temporary fix needed when using -n and -i together */ o.items *= o.num_dirs_in_tree_calc; } - mdtest_results_t * res = & o.summary_table[iteration]; - /* calculate times */ - if (o.create_only) { - updateResult(res, MDTEST_FILE_CREATE_NUM, o.items, 1, t, tBefore); - } - if (o.stat_only) { - updateResult(res, MDTEST_FILE_STAT_NUM, o.items, 2, t, tBefore); - } - if (o.read_only) { - updateResult(res, MDTEST_FILE_READ_NUM, o.items, 3, t, tBefore); - } - if (o.remove_only) { - updateResult(res, MDTEST_FILE_REMOVE_NUM, o.items, 4, t, tBefore); - } - - VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", t[1] - t[0], o.summary_table[iteration].rate[4]); + VERBOSE(1,-1," File creation : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_CREATE_NUM], o.summary_table[iteration].rate[4]); if(o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM]){ VERBOSE(1,-1," File creation (stonewall): %14.3f sec, %14.3f ops/sec", o.summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM], o.summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM]); } - VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", t[2] - t[1], o.summary_table[iteration].rate[5]); - VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", t[3] - t[2], o.summary_table[iteration].rate[6]); - VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", t[4] - t[3], o.summary_table[iteration].rate[7]); + VERBOSE(1,-1," File stat : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_STAT_NUM], o.summary_table[iteration].rate[5]); + VERBOSE(1,-1," File read : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_READ_NUM], o.summary_table[iteration].rate[6]); + VERBOSE(1,-1," File removal : %14.3f sec, %14.3f ops/sec", res->time[MDTEST_FILE_REMOVE_NUM], o.summary_table[iteration].rate[7]); } char const * mdtest_test_name(int i){ From 2250e6c5f9360f93cc9b58352832138c706e6b52 Mon Sep 17 00:00:00 2001 From: GUAN Xin Date: Sat, 20 Mar 2021 23:50:04 +0800 Subject: [PATCH 144/154] Prologue and epilogue support for benchmark phases --- src/mdtest.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/src/mdtest.c b/src/mdtest.c index 187a532..14c2063 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -166,6 +166,8 @@ typedef struct { int global_dir_layout; #endif /* HAVE_LUSTRE_LUSTREAPI */ char * saveRankDetailsCSV; /* save the details about the performance to a file */ + const char *prologue; + const char *epilogue; mdtest_results_t * summary_table; pid_t pid; @@ -261,6 +263,16 @@ static void prep_testdir(int j, int dir_iter){ pos += sprintf(& o.testdir[pos], ".%d-%d", j, dir_iter); } +static void phase_prepare(){ + if (*o.prologue){ + VERBOSE(0,5,"calling prologue: \"%s\"", o.prologue); + system(o.prologue); + if (o.barriers) { + MPI_Barrier(testComm); + } + } +} + static void phase_end(){ if (o.call_sync){ if(! o.backend->sync){ @@ -268,6 +280,10 @@ static void phase_end(){ } o.backend->sync(o.backend_options); } + if (*o.epilogue){ + VERBOSE(0,5,"calling epilogue: \"%s\"", o.epilogue); + system(o.epilogue); + } if (o.barriers) { MPI_Barrier(testComm); @@ -905,6 +921,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran /* create phase */ if(o.create_only) { + phase_prepare(); t_start = GetTimeStamp(); progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; progress->items_done = 0; @@ -941,6 +958,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran /* stat phase */ if (o.stat_only) { + phase_prepare(); t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); @@ -970,6 +988,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran /* read phase */ if (o.read_only) { + phase_prepare(); t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); @@ -999,6 +1018,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran /* rename phase */ if(o.rename_dirs && o.items > 1){ + phase_prepare(); t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); @@ -1023,6 +1043,7 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran /* remove phase */ if (o.remove_only) { + phase_prepare(); t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); @@ -1161,6 +1182,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro /* create phase */ if (o.create_only ) { + phase_prepare(); t_start = GetTimeStamp(); progress->stone_wall_timer_seconds = o.stone_wall_timer_seconds; progress->items_done = 0; @@ -1196,6 +1218,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro /* stat phase */ if (o.stat_only ) { + phase_prepare(); t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); @@ -1221,6 +1244,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro /* read phase */ if (o.read_only ) { + phase_prepare(); t_start = GetTimeStamp(); for (int dir_iter = 0; dir_iter < o.directory_loops; dir_iter ++){ prep_testdir(iteration, dir_iter); @@ -1250,6 +1274,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro /* remove phase */ if (o.remove_only) { + phase_prepare(); t_start = GetTimeStamp(); progress->items_start = 0; @@ -2103,7 +2128,9 @@ void mdtest_init_args(){ o = (mdtest_options_t) { .barriers = 1, .branch_factor = 1, - .random_buffer_offset = -1 + .random_buffer_offset = -1, + .prologue = "", + .epilogue = "", }; } @@ -2188,6 +2215,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth}, {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time}, + {'^', NULL, "call this external command before each phase (excluded from the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.prologue}, + {'$', NULL, "call this external command after each phase (included in the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.epilogue}, {0, "dataPacketType", "type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & packetType}, {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, From 1b77361e094a5e46d65982a3dce9880895ca4787 Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Sun, 21 Mar 2021 18:40:26 +0100 Subject: [PATCH 145/154] Change short option to long option --- src/mdtest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index 14c2063..d32407d 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -2215,8 +2215,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & o.call_sync}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & o.depth}, {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & o.print_time}, - {'^', NULL, "call this external command before each phase (excluded from the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.prologue}, - {'$', NULL, "call this external command after each phase (included in the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.epilogue}, + {0, "run-cmd-before-phase", "call this external command before each phase (excluded from the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.prologue}, + {0, "run-cmd-after-phase", "call this external command after each phase (included in the timing)", OPTION_OPTIONAL_ARGUMENT, 's', & o.epilogue}, {0, "dataPacketType", "type of packet that will be created [offset|incompressible|timestamp|o|i|t]", OPTION_OPTIONAL_ARGUMENT, 's', & packetType}, {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, From a490ac8ba698c39a303e70bef584e98e79b49d3a Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 29 Mar 2021 20:11:10 +0200 Subject: [PATCH 146/154] Bugfix IOR writecheck #354 --- src/ior.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ior.c b/src/ior.c index 60fb517..011e7e3 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1652,7 +1652,6 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_ amtXferred = backend->xfer(access, fd, buffer, transfer, offset, test->backend_options); if (amtXferred != transfer) ERR("cannot read from file write check"); - (*transferCount)++; *errors += CompareData(buffer, transfer, *transferCount, test, offset, pretendRank, WRITECHECK); } else if (access == READCHECK) { ((long long int*) buffer)[0] = ~((long long int*) buffer)[0]; // changes the buffer, no memset to reduce the memory pressure From 8fc7ab5c9c5718b8c08eeb8496798edc30009063 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Thu, 1 Apr 2021 17:23:10 +0200 Subject: [PATCH 147/154] MDTest, support flag to show per rank performance: #357 Also fix a potential issue for the calculation if a test is not run (number of items = 0). --- src/mdtest.c | 59 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 17 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index d32407d..be22035 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -145,6 +145,7 @@ typedef struct { int print_time; int print_rate_and_time; int print_all_proc; + int show_perrank_statistics; ior_dataPacketType_e dataPacketType; int random_seed; int shared_file; @@ -200,6 +201,8 @@ typedef struct{ /* for making/removing unique directory && stating/deleting subdirectory */ enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR}; +#define PRINT(...) fprintf(out_logfile, __VA_ARGS__); + /* a helper function for passing debug and verbose messages. use the MACRO as it will insert __LINE__ for you. Pass the verbose level for root to print, then the verbose level for anyone to print. @@ -901,8 +904,13 @@ static void updateResult(mdtest_results_t * res, mdtest_test_num_t test, uint64_ }else{ res->time_before_barrier[test] = res->time[test]; } - res->rate[test] = item_count/res->time[test]; - res->rate_before_barrier[test] = item_count/res->time_before_barrier[test]; + if(item_count == 0){ + res->rate[test] = 0.0; + res->rate_before_barrier[test] = 0.0; + }else{ + res->rate[test] = item_count/res->time[test]; + res->rate_before_barrier[test] = item_count/res->time_before_barrier[test]; + } res->items[test] = item_count; res->stonewall_last_item[test] = o.items; } @@ -1455,12 +1463,19 @@ static void summarize_results_rank0(int iterations, mdtest_results_t * all_resu } VERBOSE(0, -1, "\nSUMMARY %s: (of %d iterations)", print_time ? "time" : "rate", iterations); - VERBOSE(0, -1, - " Operation per Rank: Max Min Mean " - " per Iteration: Max Min Mean Std Dev"); - VERBOSE(0, -1, - " --------- --- --- ---- " - " --- --- ---- -------"); + PRINT(" Operation "); + if(o.show_perrank_statistics){ + PRINT("per Rank: Max Min Mean per Iteration:"); + }else{ + PRINT(" "); + } + PRINT(" Max Min Mean Std Dev\n"); + PRINT(" --------- "); + + if(o.show_perrank_statistics){ + PRINT(" --- --- ---- "); + } + PRINT(" --- --- ---- -------\n"); for (int i = start; i < stop; i++) { min = 1e308; max = 0; @@ -1526,11 +1541,16 @@ static void summarize_results_rank0(int iterations, mdtest_results_t * all_resu sd = sqrt(var); access = mdtest_test_name(i); if (i != 2) { - fprintf(out_logfile, " %-22s ", access); - fprintf(out_logfile, "%14.3f ", max); - fprintf(out_logfile, "%14.3f ", min); - fprintf(out_logfile, "%14.3f ", mean); - fprintf(out_logfile, "%18.3f ", imax); + fprintf(out_logfile, " %-18s ", access); + + if(o.show_perrank_statistics){ + fprintf(out_logfile, "%14.3f ", max); + fprintf(out_logfile, "%14.3f ", min); + fprintf(out_logfile, "%14.3f ", mean); + fprintf(out_logfile, " "); + } + fprintf(out_logfile, " "); + fprintf(out_logfile, "%14.3f ", imax); fprintf(out_logfile, "%14.3f ", imin); fprintf(out_logfile, "%14.3f ", imean); fprintf(out_logfile, "%14.3f\n", iterations == 1 ? 0 : sd); @@ -1578,10 +1598,13 @@ static void summarize_results_rank0(int iterations, mdtest_results_t * all_resu sd = sqrt(var); access = mdtest_test_name(i); fprintf(out_logfile, " %-22s ", access); - fprintf(out_logfile, "%14.3f ", max); - fprintf(out_logfile, "%14.3f ", min); - fprintf(out_logfile, "%14.3f ", mean); - fprintf(out_logfile, "%18.3f ", imax); + if(o.show_perrank_statistics){ + fprintf(out_logfile, "%14.3f ", max); + fprintf(out_logfile, "%14.3f ", min); + fprintf(out_logfile, "%14.3f ", mean); + fprintf(out_logfile, " "); + } + fprintf(out_logfile, "%14.3f ", imax); fprintf(out_logfile, "%14.3f ", imin); fprintf(out_logfile, "%14.3f ", sum / iterations); fprintf(out_logfile, "%14.3f\n", iterations == 1 ? 0 : sd); @@ -2221,6 +2244,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {0, "allocateBufferOnGPU", "Allocate the buffer on the GPU.", OPTION_FLAG, 'd', & o.gpu_memory_flags}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, {0, "saveRankPerformanceDetails", "Save the individual rank information into this CSV file.", OPTION_OPTIONAL_ARGUMENT, 's', & o.saveRankDetailsCSV}, + {0, "showRankStatistics", "Include statistics per rank", OPTION_FLAG, 'd', & o.show_perrank_statistics}, + LAST_OPTION }; options_all_t * global_options = airoi_create_all_module_options(options); From 0410a38e985e0862a9fd9abec017abffc4c5fc43 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 13 Apr 2021 11:01:30 +0200 Subject: [PATCH 148/154] IOR: add several sanity checks for stonewalling #345 --- src/ior.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/ior.c b/src/ior.c index 011e7e3..cf96cd8 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1442,6 +1442,14 @@ static void ValidateTests(IOR_param_t * test, MPI_Comm com) IOR_param_t defaults; init_IOR_Param_t(&defaults, com); + if (test->stoneWallingStatusFile && test->keepFile == 0) + ERR("a StoneWallingStatusFile is only sensible when splitting write/read into multiple executions of ior, please use -k"); + if (test->stoneWallingStatusFile && test->stoneWallingWearOut == 0 && test->writeFile) + ERR("the StoneWallingStatusFile is only sensible for a write test when using stoneWallingWearOut"); + if (test->deadlineForStonewalling == 0 && test->stoneWallingWearOut > 0) + ERR("the stoneWallingWearOut is only sensible when setting a stonewall deadline with -D"); + if (test->stoneWallingStatusFile && test->testscripts) + WARN("the StoneWallingStatusFile only preserves the last experiment, make sure that each run uses a separate status file!"); if (test->repetitions <= 0) WARN_RESET("too few test repetitions", test, &defaults, repetitions); From 39fcb5970d554359cfda6d4b94d31181bfe3c70f Mon Sep 17 00:00:00 2001 From: efajardo Date: Tue, 20 Apr 2021 15:38:04 -0700 Subject: [PATCH 149/154] Adding number of tasks to the CSV output --- src/ior-output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ior-output.c b/src/ior-output.c index d1c842e..1b21a00 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -21,7 +21,7 @@ void PrintTableHeader(){ fprintf(out_resultfile, "access bw(MiB/s) IOPS Latency(s) block(KiB) xfer(KiB) open(s) wr/rd(s) close(s) total(s) iter\n"); fprintf(out_resultfile, "------ --------- ---- ---------- ---------- --------- -------- -------- -------- -------- ----\n"); }else if(outputFormat == OUTPUT_CSV){ - fprintf(out_resultfile, "access,bw(MiB/s),IOPS,Latency,block(KiB),xfer(KiB),open(s),wr/rd(s),close(s),total(s),iter\n"); + fprintf(out_resultfile, "access,bw(MiB/s),IOPS,Latency,block(KiB),xfer(KiB),open(s),wr/rd(s),close(s),total(s),numTasks,iter\n"); } } @@ -260,6 +260,7 @@ void PrintReducedResult(IOR_test_t *test, int access, double bw, double iops, do PrintKeyValDouble("wrRdTime", diff_subset[1]); PrintKeyValDouble("closeTime", diff_subset[2]); PrintKeyValDouble("totalTime", totalTime); + PrintKeyValInt("Numtasks", test->params.numTasks); fprintf(out_resultfile, "%d\n", rep); } From be80b4e618ebc8af24ae8c1da82b82c48a387098 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Fri, 23 Apr 2021 12:20:11 +0200 Subject: [PATCH 150/154] S3-libs3 error handling improved, added a heuristic for testing as well. --- src/aiori-S3-libs3.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index 489da74..0d12805 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -171,7 +171,7 @@ static void S3_Sync(aiori_mod_opt_t * options) static S3Status S3ListResponseCallback(const char *ownerId, const char *ownerDisplayName, const char *bucketName, int64_t creationDateSeconds, void *callbackData){ uint64_t * count = (uint64_t*) callbackData; - *count++; + *count += 1; return S3StatusOK; } @@ -381,11 +381,37 @@ static void S3_Delete(char *path, aiori_mod_opt_t * options) }while(req.truncated); S3_delete_bucket(o->s3_protocol, S3UriStylePath, o->access_key, o->secret_key, NULL, o->host, p, o->authRegion, NULL, o->timeout, & responseHandler, NULL); }else{ - s3_delete_req req = {0, o, 0, NULL}; - do{ - S3_list_bucket(& o->bucket_context, p, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); - }while(req.truncated); - S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); + char * del_heuristics = getenv("S3LIB_DELETE_HEURISTICS"); + if(del_heuristics){ + struct stat buf; + S3_head_object(& o->bucket_context, p, NULL, o->timeout, & statResponseHandler, & buf); + if(s3status != S3StatusOK){ + // As the file does not exist, can return safely + CHECK_ERROR(p); + return; + } + int threshold = atoi(del_heuristics); + if (buf.st_size > threshold){ + // there may exist fragments, so try to delete them + s3_delete_req req = {0, o, 0, NULL}; + do{ + S3_list_bucket(& o->bucket_context, p, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); + }while(req.truncated); + } + S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); + }else{ + // Regular deletion, must remove all created fragments + S3_delete_object(& o->bucket_context, p, NULL, o->timeout, & responseHandler, NULL); + if(s3status != S3StatusOK){ + // As the file does not exist, can return savely + CHECK_ERROR(p); + return; + } + s3_delete_req req = {0, o, 0, NULL}; + do{ + S3_list_bucket(& o->bucket_context, p, req.nextMarker, NULL, INT_MAX, NULL, o->timeout, & list_delete_handler, & req); + }while(req.truncated); + } } CHECK_ERROR(p); } From a647f48b38e1584b6e91cbc375395c8ab42776de Mon Sep 17 00:00:00 2001 From: Julian Kunkel Date: Mon, 26 Apr 2021 18:10:47 +0200 Subject: [PATCH 151/154] AIORI POSIX use open flag. #363 (#364) Fixes #363 --- src/aiori-POSIX.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 5040a53..72f7f53 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -528,7 +528,14 @@ int POSIX_Mknod(char *testFileName) */ aiori_fd_t *POSIX_Open(char *testFileName, int flags, aiori_mod_opt_t * param) { - int fd_oflag = O_BINARY | O_RDWR; + int fd_oflag = O_BINARY; + if(flags & IOR_RDONLY){ + fd_oflag |= O_RDONLY; + }else if(flags & IOR_WRONLY){ + fd_oflag |= O_WRONLY; + }else{ + fd_oflag |= O_RDWR; + } posix_fd * pfd = safeMalloc(sizeof(posix_fd)); posix_options_t * o = (posix_options_t*) param; if (o->direct_io == TRUE){ From 47f3709c68939817a682f1ef75ff1a11ab47bea4 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Tue, 27 Apr 2021 14:50:19 +0200 Subject: [PATCH 152/154] MDTest bugfix options -f -l -s and add extra option check #365 --- src/mdtest.c | 44 ++++++++++++++++++++++++++++-------------- testing/basic-tests.sh | 1 + 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/mdtest.c b/src/mdtest.c index be22035..dee3633 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1933,7 +1933,7 @@ void create_remove_directory_tree(int create, } } -static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t * summary_table){ +static void mdtest_iteration(int i, int j, mdtest_results_t * summary_table){ rank_progress_t progress_o; memset(& progress_o, 0 , sizeof(progress_o)); progress_o.stone_wall_timer_seconds = 0; @@ -2168,7 +2168,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * int i, j; int numNodes; int numTasksOnNode0 = 0; - MPI_Group worldgroup, testgroup; + MPI_Group worldgroup; struct { int first; int last; @@ -2486,6 +2486,12 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * first = o.size; last = o.size; } + if(first > last){ + FAIL("process number: first > last doesn't make sense"); + } + if(last > o.size){ + FAIL("process number: last > number of processes doesn't make sense"); + } /* setup summary table for recording results */ o.summary_table = (mdtest_results_t *) safeMalloc(iterations * sizeof(mdtest_results_t)); @@ -2506,12 +2512,27 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * strcpy(o.rm_name, "mdtest.shared."); MPI_Comm_group(testComm, &worldgroup); + + last = o.size < last ? o.size : last; + + /* Run the tests */ + for (i = first; i <= last; i += stride) { + sleep(1); + + if(i < last){ + MPI_Group testgroup; + range.last = i - 1; + MPI_Group_range_incl(worldgroup, 1, (void *)&range, &testgroup); + MPI_Comm_create(world_com, testgroup, &testComm); + MPI_Group_free(&testgroup); + if(testComm == MPI_COMM_NULL){ + continue; + } + }else{ + MPI_Comm_dup(world_com, & testComm); + } + MPI_Comm_size(testComm, &o.size); - /* Run the tests */ - for (i = first; i <= last && i <= o.size; i += stride) { - range.last = i - 1; - MPI_Group_range_incl(worldgroup, 1, (void *)&range, &testgroup); - MPI_Comm_create(testComm, testgroup, &testComm); if (rank == 0) { uint64_t items_all = i * o.items; if(o.num_dirs_in_tree_calc){ @@ -2536,16 +2557,12 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * for (j = 0; j < iterations; j++) { // keep track of the current status for stonewalling - mdtest_iteration(i, j, testgroup, & o.summary_table[j]); + mdtest_iteration(i, j, & o.summary_table[j]); } summarize_results(iterations, aggregated_results); if(o.saveRankDetailsCSV){ StoreRankInformation(iterations, aggregated_results); } - if (i == 1 && stride > 1) { - i = 0; - } - int total_errors = 0; MPI_Reduce(& o.verification_error, & total_errors, 1, MPI_INT, MPI_SUM, 0, testComm); if(rank == 0 && total_errors){ @@ -2553,9 +2570,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * } MPI_Comm_free(&testComm); - MPI_Group_free(&testgroup); } - + MPI_Group_free(&worldgroup); testComm = world_com; diff --git a/testing/basic-tests.sh b/testing/basic-tests.sh index 78663f3..2f82ced 100755 --- a/testing/basic-tests.sh +++ b/testing/basic-tests.sh @@ -15,6 +15,7 @@ MDTEST 1 -a POSIX MDTEST 2 -a POSIX -W 2 MDTEST 1 -C -T -r -F -I 1 -z 1 -b 1 -L -u MDTEST 1 -C -T -I 1 -z 1 -b 1 -u +MDTEST 2 -n 1 -f 1 -l 2 IOR 1 -a POSIX -w -z -F -Y -e -i1 -m -t 100k -b 2000k IOR 1 -a POSIX -w -z -F -k -e -i2 -m -t 100k -b 200k From e8eb77166a9a3c557b296411214e0797062d4e5e Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 28 Apr 2021 13:41:19 +0200 Subject: [PATCH 153/154] Bugfix advanced tests to work with new validation with stonewall. --- testing/complex-tests.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testing/complex-tests.sh b/testing/complex-tests.sh index a04c14d..e4692dd 100755 --- a/testing/complex-tests.sh +++ b/testing/complex-tests.sh @@ -10,9 +10,9 @@ TYPE="advanced" source $ROOT/test-lib.sh #stonewalling tests -IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15 -IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still! -IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30 +IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15 -k +IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 -k # max 15 still! +IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30 -k MDTEST 2 -I 20 -a DUMMY -W 1 -x stonewall-md.log -C MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -T -v From 81ac754c68de22a306702d26e266e98b0a057586 Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Mon, 3 May 2021 18:46:00 +0200 Subject: [PATCH 154/154] Bugfix in AIORI-S3-libs3 to prevent name collisions. Also allow to output verification errors. --- src/aiori-S3-libs3.c | 6 ++++++ src/mdtest.c | 12 ++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/aiori-S3-libs3.c b/src/aiori-S3-libs3.c index 0d12805..98d5df9 100644 --- a/src/aiori-S3-libs3.c +++ b/src/aiori-S3-libs3.c @@ -92,6 +92,12 @@ static void def_file_name(s3_options_t * o, char * out_name, char const * path){ }else if(c == '/'){ *out_name = '_'; out_name++; + }else{ + // encode special characters + *out_name = 'a' + (c / 26); + out_name++; + *out_name = 'a' + (c % 26); + out_name++; } path++; } diff --git a/src/mdtest.c b/src/mdtest.c index dee3633..3c49a85 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -412,7 +412,11 @@ static void create_file (const char *path, uint64_t itemNum) { if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) { EWARNF("unable to verify write (read/back) file %s", curr_item); } - o.verification_error += verify_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank, o.dataPacketType); + int error = verify_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank, o.dataPacketType); + o.verification_error += error; + if(error){ + VERBOSE(1,1,"verification error in file: %s", curr_item); + } } } @@ -734,7 +738,11 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) { if (o.shared_file) { pretend_rank = rank; } - o.verification_error += verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank, o.dataPacketType); + int error = verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank, o.dataPacketType); + o.verification_error += error; + if(error){ + VERBOSE(1,1,"verification error in file: %s", item); + } } }