From 0bffd14de78a43c45e88e5e79e7b4d1ddde2ce5d Mon Sep 17 00:00:00 2001 From: "Julian M. Kunkel" Date: Wed, 24 Jun 2020 11:10:42 +0100 Subject: [PATCH] Added --warningAsErrors option to IOR and MDTest and refactored WARNINGs in IOR. #174 --- src/aiori-POSIX.c | 3 +-- src/ior-output.c | 2 +- src/ior.c | 49 +++++++++++++++++---------------------------- src/ior.h | 2 +- src/iordef.h | 42 +++++++++++++++++++------------------- src/mdtest.c | 3 ++- src/parse_options.c | 6 +++--- 7 files changed, 47 insertions(+), 60 deletions(-) diff --git a/src/aiori-POSIX.c b/src/aiori-POSIX.c index 615cd9f..648b7c1 100755 --- a/src/aiori-POSIX.c +++ b/src/aiori-POSIX.c @@ -669,8 +669,7 @@ void POSIX_Delete(char *testFileName, aiori_mod_opt_t * param) if(hints->dryRun) return; if (unlink(testFileName) != 0){ - EWARNF("[RANK %03d]: unlink() of file \"%s\" failed\n", - rank, testFileName); + EWARNF("[RANK %03d]: unlink() of file \"%s\" failed", rank, testFileName); } } diff --git a/src/ior-output.c b/src/ior-output.c index b890cd9..25366eb 100644 --- a/src/ior-output.c +++ b/src/ior-output.c @@ -365,7 +365,7 @@ void ShowTestStart(IOR_param_t *test) PrintKeyValInt("storeFileOffset", test->storeFileOffset); PrintKeyValInt("keepFile", test->keepFile); PrintKeyValInt("keepFileWithError", test->keepFileWithError); - PrintKeyValInt("quitOnError", test->quitOnError); + PrintKeyValInt("warningAsErrors", test->warningAsErrors); PrintKeyValInt("verbose", verbose); PrintKeyVal("data packet type", data_packets[test->dataPacketType]); PrintKeyValInt("setTimeStampSignature/incompressibleSeed", test->setTimeStampSignature); /* Seed value was copied into setTimeStampSignature as well */ diff --git a/src/ior.c b/src/ior.c index 08f95ef..5d1632c 100755 --- a/src/ior.c +++ b/src/ior.c @@ -78,6 +78,8 @@ static void ior_set_xfer_hints(IOR_param_t * p){ } } +int aiori_warning_as_errors = 0; + static void test_initialize(IOR_test_t * test){ verbose = test->params.verbose; backend = test->params.backend; @@ -85,6 +87,7 @@ static void test_initialize(IOR_test_t * test){ backend->initialize(test->params.backend_options); } ior_set_xfer_hints(& test->params); + aiori_warning_as_errors = test->params.warningAsErrors; if (rank == 0 && verbose >= VERBOSE_0) { ShowTestStart(& test->params); @@ -111,7 +114,6 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out /* setup tests, and validate parameters */ tests_head = ParseCommandLine(argc, argv); InitTests(tests_head, world_com); - verbose = tests_head->params.verbose; PrintHeader(argc, argv); @@ -159,7 +161,6 @@ int ior_main(int argc, char **argv) /* setup tests, and validate parameters */ InitTests(tests_head, mpi_comm_world); - verbose = tests_head->params.verbose; PrintHeader(argc, argv); @@ -281,10 +282,8 @@ DisplayOutliers(int numTasks, if (ret != 0) strcpy(hostname, "unknown"); - fprintf(out_logfile, "WARNING: for %s, task %d, %s %s is %f\n", - hostname, rank, accessString, timeString, timerVal); - fprintf(out_logfile, " (mean=%f, stddev=%f)\n", mean, sd); - fflush(out_logfile); + EWARNF("for %s, task %d, %s %s is %f (mean=%f, stddev=%f)\n", + hostname, rank, accessString, timeString, timerVal, mean, sd); } } @@ -333,18 +332,11 @@ static void CheckFileSize(IOR_test_t *test, IOR_offset_t dataMoved, int rep, != point->aggFileSizeFromXfer) || (point->aggFileSizeFromStat != point->aggFileSizeFromXfer)) { - fprintf(out_logfile, - "WARNING: Expected aggregate file size = %lld.\n", - (long long) params->expectedAggFileSize); - fprintf(out_logfile, - "WARNING: Stat() of aggregate file size = %lld.\n", - (long long) point->aggFileSizeFromStat); - fprintf(out_logfile, - "WARNING: Using actual aggregate bytes moved = %lld.\n", - (long long) point->aggFileSizeFromXfer); + EWARNF("Expected aggregate file size = %lld", (long long) params->expectedAggFileSize); + EWARNF("Stat() of aggregate file size = %lld", (long long) point->aggFileSizeFromStat); + EWARNF("Using actual aggregate bytes moved = %lld", (long long) point->aggFileSizeFromXfer); if(params->deadlineForStonewalling){ - fprintf(out_logfile, - "WARNING: maybe caused by deadlineForStonewalling\n"); + EWARN("Maybe caused by deadlineForStonewalling"); } } } @@ -425,8 +417,7 @@ CompareBuffers(void *expectedBuffer, if (inError) { inError = 0; GetTestFileName(testFileName, test); - fprintf(out_logfile, - "[%d] FAILED comparison of buffer containing %d-byte ints:\n", + EWARNF("[%d] FAILED comparison of buffer containing %d-byte ints:\n", rank, (int)sizeof(unsigned long long int)); fprintf(out_logfile, "[%d] File name = %s\n", rank, testFileName); fprintf(out_logfile, "[%d] In transfer %lld, ", rank, @@ -449,8 +440,6 @@ CompareBuffers(void *expectedBuffer, if (j == length) fprintf(out_logfile, "[end of buffer]"); fprintf(out_logfile, "\n"); - if (test->quitOnError == TRUE) - ERR("data check error, aborting execution"); } return (errorCount); } @@ -476,7 +465,7 @@ static int CountErrors(IOR_param_t * test, int access, int errors) WARN("overflow in errors counted"); allErrors = -1; } - fprintf(out_logfile, "WARNING: incorrect data on %s (%d errors found).\n", + EWARNF("Incorrect data on %s (%d errors found).\n", access == WRITECHECK ? "write" : "read", allErrors); fprintf(out_logfile, "Used Time Stamp %u (0x%x) for Data Signature\n", @@ -778,7 +767,7 @@ void GetTestFileName(char *testFileName, IOR_param_t * test) strcpy(initialTestFileName, test->testFileName); if(test->dualMount){ GetProcessorAndCore(&socket, &core); - sprintf(tmpString, "%s%d/%s",initialTestFileName, + sprintf(tmpString, "%s%d/%s",initialTestFileName, socket, "data"); strcpy(initialTestFileName, tmpString); } @@ -977,6 +966,9 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) int mpiNumTasks = 0; int mpiNumTasksOnNode0 = 0; + verbose = tests->params.verbose; + aiori_warning_as_errors = tests->params.warningAsErrors; + /* * These default values are the same for every test and expensive to * retrieve so just do it once. @@ -1005,11 +997,9 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) params->numTasks = mpiNumTasks; } else if (params->numTasks > mpiNumTasks) { if (rank == 0) { - fprintf(out_logfile, - "WARNING: More tasks requested (%d) than available (%d),", + EWARNF("More tasks requested (%d) than available (%d),", params->numTasks, mpiNumTasks); - fprintf(out_logfile, " running with %d tasks.\n", - mpiNumTasks); + EWARNF(" running with %d tasks.\n", mpiNumTasks); } params->numTasks = mpiNumTasks; } @@ -1451,7 +1441,7 @@ static void TestIoSys(IOR_test_t *test) if(params->stoneWallingStatusFile){ params->stoneWallingWearOutIterations = ReadStoneWallingIterations(params->stoneWallingStatusFile); if(params->stoneWallingWearOutIterations == -1 && rank == 0){ - fprintf(out_logfile, "WARNING: Could not read back the stonewalling status from the file!\n"); + WARN("Could not read back the stonewalling status from the file!"); params->stoneWallingWearOutIterations = 0; } } @@ -1637,9 +1627,6 @@ static void ValidateTests(IOR_param_t * test) && (test->blockSize < sizeof(IOR_size_t) || test->transferSize < sizeof(IOR_size_t))) ERR("block/transfer size may not be smaller than IOR_size_t for NCMPI"); - if ((strcasecmp(test->api, "POSIX") != 0) && test->singleXferAttempt) - WARN_RESET("retry only available in POSIX", - test, &defaults, singleXferAttempt); if (((strcasecmp(test->api, "POSIX") != 0) && (strcasecmp(test->api, "MPIIO") != 0) && (strcasecmp(test->api, "MMAP") != 0) diff --git a/src/ior.h b/src/ior.h index c3d9ad4..a5c34b9 100755 --- a/src/ior.h +++ b/src/ior.h @@ -117,7 +117,6 @@ typedef struct int keepFile; /* don't delete the testfile on exit */ int keepFileWithError; /* don't delete the testfile with errors */ int errorFound; /* error found in data check */ - int quitOnError; /* quit code when error in check */ IOR_offset_t segmentCount; /* number of segments (or HDF5 datasets) */ IOR_offset_t blockSize; /* contiguous bytes to write per task */ IOR_offset_t transferSize; /* size of transfer in bytes */ @@ -175,6 +174,7 @@ typedef struct int id; /* test's unique ID */ int intraTestBarriers; /* barriers between open/op and op/close */ + int warningAsErrors; /* treat any warning as an error */ aiori_xfer_hint_t hints; } IOR_param_t; diff --git a/src/iordef.h b/src/iordef.h index 4c46b29..0805208 100755 --- a/src/iordef.h +++ b/src/iordef.h @@ -115,15 +115,12 @@ enum OutputFormat_t{ #define DELIMITERS " \t\r\n=" /* ReadScript() */ #define FILENAME_DELIMITER '@' /* ParseFileName() */ -/* MACROs for debugging */ -#define HERE fprintf(stdout, "** LINE %d (TASK=%d) **\n", \ - __LINE__, rank); - typedef long long int IOR_offset_t; typedef long long int IOR_size_t; #define IOR_format "%016llx" +extern FILE * out_logfile; /******************************** M A C R O S *********************************/ @@ -134,34 +131,37 @@ typedef long long int IOR_size_t; #define WARN_RESET(MSG, TO_STRUCT_PTR, FROM_STRUCT_PTR, MEMBER) do { \ (TO_STRUCT_PTR)->MEMBER = (FROM_STRUCT_PTR)->MEMBER; \ if (rank == 0) { \ - fprintf(stdout, "ior WARNING: %s. Using value of %d.\n", \ + fprintf(out_logfile, "ior WARNING: %s. Using value of %d.\n", \ MSG, (TO_STRUCT_PTR)->MEMBER); \ } \ - fflush(stdout); \ + fflush(out_logfile); \ } while (0) +extern int aiori_warning_as_errors; #define WARN(MSG) do { \ + if(aiori_warning_as_errors){ ERR(MSG); } \ if (verbose > VERBOSE_2) { \ - fprintf(stdout, "ior WARNING: %s, (%s:%d).\n", \ + fprintf(out_logfile, "ior WARNING: %s, (%s:%d).\n", \ MSG, __FILE__, __LINE__); \ } else { \ - fprintf(stdout, "ior WARNING: %s.\n", MSG); \ + fprintf(out_logfile, "ior WARNING: %s.\n", MSG); \ } \ - fflush(stdout); \ + fflush(out_logfile); \ } while (0) /* warning with format string and errno printed */ #define EWARNF(FORMAT, ...) do { \ + if(aiori_warning_as_errors){ ERRF(FORMAT, __VA_ARGS__); } \ if (verbose > VERBOSE_2) { \ - fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s (%s:%d).\n", \ - __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ + fprintf(out_logfile, "ior WARNING: " FORMAT ", (%s:%d).\n", \ + __VA_ARGS__, __FILE__, __LINE__); \ } else { \ - fprintf(stdout, "ior WARNING: " FORMAT ", errno %d, %s \n", \ - __VA_ARGS__, errno, strerror(errno)); \ + fprintf(out_logfile, "ior WARNING: " FORMAT "\n", \ + __VA_ARGS__); \ } \ - fflush(stdout); \ + fflush(out_logfile); \ } while (0) @@ -173,9 +173,9 @@ typedef long long int IOR_size_t; /* display error message with format string and terminate execution */ #define ERRF(FORMAT, ...) do { \ - fprintf(stdout, "ior ERROR: " FORMAT ", errno %d, %s (%s:%d)\n", \ - __VA_ARGS__, errno, strerror(errno), __FILE__, __LINE__); \ - fflush(stdout); \ + fprintf(out_logfile, "ior ERROR: " FORMAT ", (%s:%d)\n", \ + __VA_ARGS__, __FILE__, __LINE__); \ + fflush(out_logfile); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } while (0) @@ -188,9 +188,9 @@ typedef long long int IOR_size_t; /* display a simple error message (i.e. errno is not set) and terminate execution */ #define ERR(MSG) do { \ - fprintf(stdout, "ior ERROR: %s, (%s:%d)\n", \ + fprintf(out_logfile, "ior ERROR: %s, (%s:%d)\n", \ MSG, __FILE__, __LINE__); \ - fflush(stdout); \ + fflush(out_logfile); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } while (0) @@ -207,9 +207,9 @@ typedef long long int IOR_size_t; \ if (MPI_STATUS != MPI_SUCCESS) { \ MPI_Error_string(MPI_STATUS, resultString, &resultLength); \ - fprintf(stdout, "ior ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ + fprintf(out_logfile, "ior ERROR: " FORMAT ", MPI %s, (%s:%d)\n", \ __VA_ARGS__, resultString, __FILE__, __LINE__); \ - fflush(stdout); \ + fflush(out_logfile); \ MPI_Abort(MPI_COMM_WORLD, -1); \ } \ } while(0) diff --git a/src/mdtest.c b/src/mdtest.c index 5488834..3eef40c 100644 --- a/src/mdtest.c +++ b/src/mdtest.c @@ -1079,7 +1079,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro } if (rank == 0) { if(expected_items == -1){ - fprintf(out_logfile, "WARNING: could not read stonewall status file\n"); + WARN("Could not read stonewall status file"); }else { VERBOSE(1,1, "Read stonewall status; items: "LLU"\n", items); } @@ -1949,6 +1949,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE * {'Y', NULL, "call the sync command after each phase (included in the timing; note it causes all IO to be flushed from your node)", OPTION_FLAG, 'd', & call_sync}, {'z', NULL, "depth of hierarchical directory structure", OPTION_OPTIONAL_ARGUMENT, 'd', & depth}, {'Z', NULL, "print time instead of rate", OPTION_FLAG, 'd', & print_time}, + {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & aiori_warning_as_errors}, LAST_OPTION }; options_all_t * global_options = airoi_create_all_module_options(options); diff --git a/src/parse_options.c b/src/parse_options.c index ce5421c..31fac13 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -175,8 +175,8 @@ void DecodeDirective(char *line, IOR_param_t *params, options_all_t * module_opt params->keepFileWithError = atoi(value); } else if (strcasecmp(option, "multiFile") == 0) { params->multiFile = atoi(value); - } else if (strcasecmp(option, "quitonerror") == 0) { - params->quitOnError = atoi(value); + } else if (strcasecmp(option, "warningAsErrors") == 0) { + params->warningAsErrors = atoi(value); } else if (strcasecmp(option, "segmentcount") == 0) { params->segmentCount = string_to_bytes(value); } else if (strcasecmp(option, "blocksize") == 0) { @@ -418,7 +418,6 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'N', NULL, "numTasks -- number of tasks that are participating in the test (overrides MPI)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->numTasks}, {'o', NULL, "testFile -- full name for test", OPTION_OPTIONAL_ARGUMENT, 's', & params->testFileName}, {'O', NULL, "string of IOR directives (e.g. -O checkRead=1,lustreStripeCount=32)", OPTION_OPTIONAL_ARGUMENT, 'p', & decodeDirectiveWrapper}, - {'q', NULL, "quitOnError -- during file error-checking, abort on error", OPTION_FLAG, 'd', & params->quitOnError}, {'Q', NULL, "taskPerNodeOffset for read tests use with -C & -Z options (-C constant N, -Z at least N)", OPTION_OPTIONAL_ARGUMENT, 'd', & params->taskPerNodeOffset}, {'r', NULL, "readFile -- read existing file", OPTION_FLAG, 'd', & params->readFile}, {'R', NULL, "checkRead -- verify that the output of read matches the expected signature (used with -G)", OPTION_FLAG, 'd', & params->checkRead}, @@ -435,6 +434,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite}, {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset}, {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, + {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & params->warningAsErrors}, {.help=" -O summaryFile=FILE -- store result data into this file", .arg = OPTION_OPTIONAL_ARGUMENT}, {.help=" -O summaryFormat=[default,JSON,CSV] -- use the format for outputing the summary", .arg = OPTION_OPTIONAL_ARGUMENT}, {0, "dryRun", "do not perform any I/Os just run evtl. inputs print dummy output", OPTION_FLAG, 'd', & params->dryRun},