diff --git a/src/aiori.c b/src/aiori.c index 897abb6..6c9a971 100644 --- a/src/aiori.c +++ b/src/aiori.c @@ -128,6 +128,8 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) { ior_aiori_t **tmp = available_aiori; char delimiter = ' '; + *APIs = 0; + *APIs_legacy = 0; while (*tmp != NULL) { @@ -136,7 +138,6 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) tmp++; continue; } - if (delimiter == ' ') { APIs += sprintf(APIs, "%s", (*tmp)->name); @@ -148,6 +149,7 @@ void aiori_supported_apis(char * APIs, char * APIs_legacy, enum bench_type type) if ((*tmp)->name_legacy != NULL) APIs_legacy += sprintf(APIs_legacy, "%c%s", delimiter, (*tmp)->name_legacy); + tmp++; } } diff --git a/src/ior-internal.h b/src/ior-internal.h index fa7212e..c0af544 100644 --- a/src/ior-internal.h +++ b/src/ior-internal.h @@ -25,8 +25,7 @@ void PrintTestEnds(); void PrintTableHeader(); /* End of ior-output */ -IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank); -IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int access); +IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offset_t * out_count); struct results { double min; diff --git a/src/ior.c b/src/ior.c index 0daddff..b2ba1a4 100755 --- a/src/ior.c +++ b/src/ior.c @@ -1025,9 +1025,6 @@ static void InitTests(IOR_test_t *tests, MPI_Comm com) } init_clock(); - - /* seed random number generator */ - SeedRandGen(mpi_comm_world); } /* @@ -1600,7 +1597,12 @@ static void ValidateTests(IOR_param_t * test) } if (test->blockSize < test->transferSize) ERR("block size must not be smaller than transfer size"); - + if (test->randomOffset && test->blockSize == test->transferSize) + ERR("IOR will randomize access within a block and repeats the same pattern for all segments, therefore choose blocksize > transferSize"); + if (! test->randomOffset && test->randomPrefillBlocksize) + ERR("Setting the randomPrefill option without using random is not useful"); + if (test->randomPrefillBlocksize && (test->blockSize % test->randomPrefillBlocksize != 0)) + ERR("The randomPrefill option must divide the blockSize"); /* specific APIs */ if ((strcasecmp(test->api, "MPIIO") == 0) && (test->blockSize < sizeof(IOR_size_t) @@ -1657,51 +1659,9 @@ static void ValidateTests(IOR_param_t * test) /** * Returns a precomputed array of IOR_offset_t for the inner benchmark loop. - * They are sequential and the last element is set to -1 as end marker. - * @param test IOR_param_t for getting transferSize, blocksize and SegmentCount - * @param pretendRank int pretended Rank for shifting the offsets correctly - * @return IOR_offset_t - */ -IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank) -{ - IOR_offset_t i, j, k = 0; - IOR_offset_t offsets; - IOR_offset_t *offsetArray; - - /* count needed offsets */ - offsets = (test->blockSize / test->transferSize) * test->segmentCount; - - /* setup empty array */ - offsetArray = - (IOR_offset_t *) malloc((offsets + 1) * sizeof(IOR_offset_t)); - if (offsetArray == NULL) - ERR("malloc() failed"); - offsetArray[offsets] = -1; /* set last offset with -1 */ - - /* fill with offsets */ - for (i = 0; i < test->segmentCount; i++) { - for (j = 0; j < (test->blockSize / test->transferSize); j++) { - offsetArray[k] = j * test->transferSize; - if (test->filePerProc) { - offsetArray[k] += i * test->blockSize; - } else { - offsetArray[k] += - (i * test->numTasks * test->blockSize) - + (pretendRank * test->blockSize); - } - k++; - } - } - - return (offsetArray); -} - -/** - * Returns a precomputed array of IOR_offset_t for the inner benchmark loop. - * They get created sequentially and mixed up in the end. The last array element - * is set to -1 as end marker. - * It should be noted that as the seeds get synchronised across all processes - * every process computes the same random order if used with filePerProc. + * They get created sequentially and mixed up in the end. + * It should be noted that as the seeds get synchronised across all processes if not FilePerProcess is set + * every process computes the same random order. * For a shared file all transfers get randomly assigned to ranks. The processes * can also have differen't numbers of transfers. This might lead to a bigger * diversion in accesse as it dose with filePerProc. This is expected but @@ -1709,87 +1669,87 @@ IOR_offset_t *GetOffsetArraySequential(IOR_param_t * test, int pretendRank) * @param test IOR_param_t for getting transferSize, blocksize and SegmentCount * @param pretendRank int pretended Rank for shifting the offsets correctly * @return IOR_offset_t - * @return */ -IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, int access) +IOR_offset_t *GetOffsetArrayRandom(IOR_param_t * test, int pretendRank, IOR_offset_t * out_count) { int seed; - IOR_offset_t i, value, tmp; - IOR_offset_t offsets = 0; + IOR_offset_t i; + IOR_offset_t offsets; IOR_offset_t offsetCnt = 0; - IOR_offset_t fileSize; IOR_offset_t *offsetArray; - /* set up seed, each process can determine which regions to access individually */ - if (test->randomSeed == -1) { - test->randomSeed = seed = rand(); - } else { - seed = test->randomSeed + pretendRank; - } - srand(seed); - - fileSize = test->blockSize * test->segmentCount; - if (test->filePerProc == FALSE) { - fileSize *= test->numTasks; + if (test->filePerProc) { + /* set up seed, each process can determine which regions to access individually */ + if (test->randomSeed == -1) { + seed = time(NULL); + test->randomSeed = seed; + } else { + seed = test->randomSeed + pretendRank; + } + }else{ + /* Shared file requires that the seed is synchronized */ + if (test->randomSeed == -1) { + // all processes need to have the same seed. + if(rank == 0){ + seed = time(NULL); + } + MPI_CHECK(MPI_Bcast(& seed, 1, MPI_INT, 0, test->testComm), "cannot broadcast random seed value"); + test->randomSeed = seed; + }else{ + seed = test->randomSeed; + } } + srandom(seed); /* count needed offsets (pass 1) */ - if (test->filePerProc == FALSE) { - for (i = 0; i < fileSize; i += test->transferSize) { - // this counts which process get how many transferes in - // a shared file - if ((rand() % test->numTasks) == pretendRank) { - offsets++; - } + if (test->filePerProc) { + offsets = test->blockSize / test->transferSize; + }else{ + offsets = 0; + for (i = 0; i < test->blockSize * test->numTasks; i += test->transferSize) { + // this counts which process get how many transferes in the shared file + if ((rand() % test->numTasks) == pretendRank) { + offsets++; + } } - } else { - offsets += fileSize / test->transferSize; } /* setup empty array */ - offsetArray = - (IOR_offset_t *) malloc((offsets + 1) * sizeof(IOR_offset_t)); - if (offsetArray == NULL) - ERR("malloc() failed"); - offsetArray[offsets] = -1; /* set last offset with -1 */ + offsetArray = (IOR_offset_t *) safeMalloc(offsets * sizeof(IOR_offset_t)); + + *out_count = offsets; if (test->filePerProc) { - /* fill array */ - for (i = 0; i < offsets; i++) { - offsetArray[i] = i * test->transferSize; - } + /* fill array */ + for (i = 0; i < offsets; i++) { + offsetArray[i] = i * test->transferSize; + } } else { - /* fill with offsets (pass 2) */ - srand(seed); /* need same seedto get same transfers as counted in the beginning*/ - for (i = 0; i < fileSize; i += test->transferSize) { - if ((rand() % test->numTasks) == pretendRank) { - offsetArray[offsetCnt] = i; - offsetCnt++; - } + /* fill with offsets (pass 2) */ + srandom(seed); /* need same seed to get same transfers as counted in the beginning*/ + for (i = 0; i < test->blockSize * test->numTasks; i += test->transferSize) { + if ((rand() % test->numTasks) == pretendRank) { + offsetArray[offsetCnt] = i; + offsetCnt++; } + } } /* reorder array */ for (i = 0; i < offsets; i++) { + IOR_offset_t value, tmp; value = rand() % offsets; tmp = offsetArray[value]; offsetArray[value] = offsetArray[i]; offsetArray[i] = tmp; } - SeedRandGen(test->testComm); /* synchronize seeds across tasks */ return (offsetArray); } -static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offsetArray, int pretendRank, - IOR_offset_t * transferCount, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access){ +static IOR_offset_t WriteOrReadSingle(IOR_offset_t offset, int pretendRank, IOR_offset_t transfer, IOR_offset_t * transferCount, int * errors, IOR_param_t * test, aiori_fd_t * fd, IOR_io_buffers* ioBuffers, int access){ IOR_offset_t amtXferred = 0; - IOR_offset_t transfer; void *buffer = ioBuffers->buffer; - - IOR_offset_t offset = offsetArray[pairCnt]; // this looks inappropriate - - transfer = test->transferSize; if (access == WRITE) { /* fills each transfer with a unique pattern * containing the offset into the file */ @@ -1831,6 +1791,27 @@ static IOR_offset_t WriteOrReadSingle(IOR_offset_t pairCnt, IOR_offset_t *offset return amtXferred; } +static void prefillSegment(IOR_param_t *test, void * randomPrefillBuffer, int pretendRank, aiori_fd_t *fd, IOR_io_buffers *ioBuffers, int startSegment, int endSegment){ + // prefill the whole file already with an invalid pattern + int offsets = test->blockSize / test->randomPrefillBlocksize; + void * oldBuffer = ioBuffers->buffer; + IOR_offset_t transferCount; + int errors; + ioBuffers->buffer = randomPrefillBuffer; + for (int i = startSegment; i < endSegment; i++){ + for (int j = 0; j < offsets; j++) { + IOR_offset_t offset = j * test->randomPrefillBlocksize; + if (test->filePerProc) { + offset += i * test->blockSize; + } else { + offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); + } + WriteOrReadSingle(offset, pretendRank, test->randomPrefillBlocksize, & transferCount, & errors, test, fd, ioBuffers, WRITE); + } + } + ioBuffers->buffer = oldBuffer; +} + /* * Write or Read data to file(s). This loops through the strides, writing * out the data to each block in transfer sizes, until the remainder left is 0. @@ -1841,41 +1822,87 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, int errors = 0; IOR_offset_t transferCount = 0; uint64_t pairCnt = 0; - IOR_offset_t *offsetArray; int pretendRank; IOR_offset_t dataMoved = 0; /* for data rate calculation */ double startForStonewall; int hitStonewall; + int i, j; IOR_point_t *point = ((access == WRITE) || (access == WRITECHECK)) ? &results->write : &results->read; /* initialize values */ pretendRank = (rank + rankOffset) % test->numTasks; + // offsetArray = GetOffsetArraySequential(test, pretendRank); + + IOR_offset_t offsets; + IOR_offset_t * offsets_rnd; if (test->randomOffset) { - offsetArray = GetOffsetArrayRandom(test, pretendRank, access); - } else { - offsetArray = GetOffsetArraySequential(test, pretendRank); + offsets_rnd = GetOffsetArrayRandom(test, pretendRank, & offsets); + }else{ + offsets = (test->blockSize / test->transferSize); } + void * randomPrefillBuffer = NULL; + if(test->randomPrefillBlocksize && (access == WRITE || access == WRITECHECK)){ + randomPrefillBuffer = aligned_buffer_alloc(test->randomPrefillBlocksize); + // store invalid data into the buffer + memset(randomPrefillBuffer, -1, test->randomPrefillBlocksize); + } + + // start timer after random offset was generated startForStonewall = GetTimeStamp(); hitStonewall = 0; - /* loop over offsets to access */ - while ((offsetArray[pairCnt] != -1) && !hitStonewall ) { - dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); - pairCnt++; + if(randomPrefillBuffer && test->deadlineForStonewalling == 0){ + double t_start = GetTimeStamp(); + prefillSegment(test, randomPrefillBuffer, pretendRank, fd, ioBuffers, 0, test->segmentCount); + if(rank == 0 && verbose > VERBOSE_1){ + fprintf(out_logfile, "Random prefill took: %fs\n", GetTimeStamp() - t_start); + } + // must synchronize processes to ensure they are not running ahead + MPI_Barrier(test->testComm); + } - hitStonewall = ((test->deadlineForStonewalling != 0 - && (GetTimeStamp() - startForStonewall) - > test->deadlineForStonewalling)) || (test->stoneWallingWearOutIterations != 0 && pairCnt == test->stoneWallingWearOutIterations) ; + for (i = 0; i < test->segmentCount && !hitStonewall; i++) { + if(randomPrefillBuffer && test->deadlineForStonewalling != 0){ + // prefill the whole segment with data, this needs to be done collectively + double t_start = GetTimeStamp(); + prefillSegment(test, randomPrefillBuffer, pretendRank, fd, ioBuffers, i, i+1); + MPI_Barrier(test->testComm); + if(rank == 0 && verbose > VERBOSE_1){ + fprintf(out_logfile, "Random: synchronizing segment count with barrier and prefill took: %fs\n", GetTimeStamp() - t_start); + } + } + for (j = 0; j < offsets && !hitStonewall ; j++) { + IOR_offset_t offset; + if (test->randomOffset) { + if(test->filePerProc){ + offset = offsets_rnd[j] + (i * test->blockSize); + }else{ + offset = offsets_rnd[j] + (i * test->numTasks * test->blockSize); + } + }else{ + offset = j * test->transferSize; + if (test->filePerProc) { + offset += i * test->blockSize; + } else { + offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); + } + } + dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & transferCount, & errors, test, fd, ioBuffers, access); + pairCnt++; - if ( test->collective && test->deadlineForStonewalling ) { - // if collective-mode, you'll get a HANG, if some rank 'accidentally' leave this loop - // it absolutely must be an 'all or none': - MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, MPI_COMM_WORLD), "hitStonewall broadcast failed"); - } + hitStonewall = ((test->deadlineForStonewalling != 0 + && (GetTimeStamp() - startForStonewall) > test->deadlineForStonewalling)) + || (test->stoneWallingWearOutIterations != 0 && pairCnt == test->stoneWallingWearOutIterations) ; + if ( test->collective && test->deadlineForStonewalling ) { + // if collective-mode, you'll get a HANG, if some rank 'accidentally' leave this loop + // it absolutely must be an 'all or none': + MPI_CHECK(MPI_Bcast(&hitStonewall, 1, MPI_INT, 0, MPI_COMM_WORLD), "hitStonewall broadcast failed"); + } + } } if (test->stoneWallingWearOut){ if (verbose >= VERBOSE_1){ @@ -1902,21 +1929,40 @@ static IOR_offset_t WriteOrRead(IOR_param_t *test, IOR_results_t *results, } if(pairCnt != point->pairs_accessed){ // some work needs still to be done ! - for(; pairCnt < point->pairs_accessed; pairCnt++ ) { - dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access); + for ( ; pairCnt < point->pairs_accessed; i++) { + for ( ; j < offsets && pairCnt < point->pairs_accessed ; j++) { + IOR_offset_t offset; + if (test->randomOffset) { + if(test->filePerProc){ + offset = offsets_rnd[j] + (i * test->blockSize); + }else{ + offset = offsets_rnd[j] + (i * test->numTasks * test->blockSize); + } + }else{ + offset = j * test->transferSize; + if (test->filePerProc) { + offset += i * test->blockSize; + } else { + offset += (i * test->numTasks * test->blockSize) + (pretendRank * test->blockSize); + } + } + dataMoved += WriteOrReadSingle(offset, pretendRank, test->transferSize, & transferCount, & errors, test, fd, ioBuffers, access); + pairCnt++; + } } } }else{ point->pairs_accessed = pairCnt; } - totalErrorCount += CountErrors(test, access, errors); - free(offsetArray); - if (access == WRITE && test->fsync == TRUE) { backend->fsync(fd, test->backend_options); /*fsync after all accesses */ } + if(randomPrefillBuffer){ + aligned_buffer_free(randomPrefillBuffer); + } + return (dataMoved); } diff --git a/src/ior.h b/src/ior.h index 843884d..33034c9 100755 --- a/src/ior.h +++ b/src/ior.h @@ -127,6 +127,7 @@ typedef struct IOR_offset_t blockSize; /* contiguous bytes to write per task */ IOR_offset_t transferSize; /* size of transfer in bytes */ IOR_offset_t expectedAggFileSize; /* calculated aggregate file size */ + IOR_offset_t randomPrefillBlocksize; /* prefill option for random IO, the amount of data used for prefill */ int summary_every_test; /* flag to print summary every test, not just at end */ int uniqueDir; /* use unique directory for each fpp */ @@ -168,7 +169,7 @@ typedef struct int hdfs_block_size; /* internal blk-size. (0 gets default) */ char* URI; /* "path" to target object */ - + /* RADOS variables */ rados_t rados_cluster; /* RADOS cluster handle */ rados_ioctx_t rados_ioctx; /* I/O context for our pool in the RADOS cluster */ diff --git a/src/parse_options.c b/src/parse_options.c index 87e3c91..1a2ad7e 100755 --- a/src/parse_options.c +++ b/src/parse_options.c @@ -384,7 +384,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ char APIs[1024]; char APIs_legacy[1024]; aiori_supported_apis(APIs, APIs_legacy, IOR); - char apiStr[1024]; + char * apiStr = safeMalloc(1024); sprintf(apiStr, "API for I/O [%s]", APIs); option_help o [] = { @@ -433,6 +433,7 @@ option_help * createGlobalOptions(IOR_param_t * params){ {'y', NULL, "dualMount -- use dual mount points for a filesystem", OPTION_FLAG, 'd', & params->dualMount}, {'Y', NULL, "fsyncPerWrite -- perform sync operation after every write operation", OPTION_FLAG, 'd', & params->fsyncPerWrite}, {'z', NULL, "randomOffset -- access is to random, not sequential, offsets within a file", OPTION_FLAG, 'd', & params->randomOffset}, + {0, "randomPrefill", "For random -z access only: Prefill the file with this blocksize, e.g., 2m", OPTION_OPTIONAL_ARGUMENT, 'l', & params->randomPrefillBlocksize}, {0, "random-offset-seed", "The seed for -z", OPTION_OPTIONAL_ARGUMENT, 'd', & params->randomSeed}, {'Z', NULL, "reorderTasksRandom -- changes task ordering to random ordering for readback", OPTION_FLAG, 'd', & params->reorderTasksRandom}, {0, "warningAsErrors", "Any warning should lead to an error.", OPTION_FLAG, 'd', & params->warningAsErrors}, diff --git a/src/utilities.c b/src/utilities.c index b1b8cda..0ec2390 100755 --- a/src/utilities.c +++ b/src/utilities.c @@ -652,27 +652,6 @@ int Regex(char *string, char *pattern) return (retValue); } -/* - * Seed random generator. - */ -void SeedRandGen(MPI_Comm testComm) -{ - unsigned int randomSeed; - - if (rank == 0) { -#ifdef _WIN32 - rand_s(&randomSeed); -#else - struct timeval randGenTimer; - gettimeofday(&randGenTimer, (struct timezone *)NULL); - randomSeed = randGenTimer.tv_usec; -#endif - } - MPI_CHECK(MPI_Bcast(&randomSeed, 1, MPI_INT, 0, - testComm), "cannot broadcast random seed value"); - srandom(randomSeed); -} - /* * System info for Windows. */ diff --git a/src/utilities.h b/src/utilities.h index 678837a..83563c5 100755 --- a/src/utilities.h +++ b/src/utilities.h @@ -40,7 +40,6 @@ char *CurrentTimeString(void); int Regex(char *, char *); void ShowFileSystemSize(char * filename, const struct ior_aiori * backend, void * backend_options); void DumpBuffer(void *, size_t); -void SeedRandGen(MPI_Comm); void SetHints (MPI_Info *, char *); void ShowHints (MPI_Info *); char *HumanReadable(IOR_offset_t value, int base); diff --git a/testing/basic-tests.sh b/testing/basic-tests.sh index 1a0841e..cf09082 100755 --- a/testing/basic-tests.sh +++ b/testing/basic-tests.sh @@ -16,15 +16,17 @@ MDTEST 2 -a POSIX -W 2 MDTEST 1 -C -T -r -F -I 1 -z 1 -b 1 -L -u MDTEST 1 -C -T -I 1 -z 1 -b 1 -u -IOR 1 -a POSIX -w -z -F -Y -e -i1 -m -t 100k -b 1000k -IOR 1 -a POSIX -w -z -F -k -e -i2 -m -t 100k -b 100k -IOR 1 -a MMAP -r -z -F -k -e -i1 -m -t 100k -b 100k +IOR 1 -a POSIX -w -z -F -Y -e -i1 -m -t 100k -b 2000k +IOR 1 -a POSIX -w -z -F -k -e -i2 -m -t 100k -b 200k +IOR 1 -a MMAP -r -z -F -k -e -i1 -m -t 100k -b 200k -IOR 2 -a POSIX -w -z -C -F -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -w -z -C -Q 1 -F -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 100k -IOR 2 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 100k +IOR 2 -a POSIX -w -C -k -e -i1 -m -t 100k -b 200k + +IOR 2 -a POSIX -w -z -C -F -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -w -z -C -Q 1 -F -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -r -z -Z -Q 2 -F -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -r -z -Z -Q 3 -X 13 -F -k -e -i1 -m -t 100k -b 200k +IOR 2 -a POSIX -w -z -Z -Q 1 -X -13 -F -e -i1 -m -t 100k -b 200k IOR 2 -f "$ROOT/test_comments.ior" diff --git a/testing/test_comments.ior b/testing/test_comments.ior index eaf7997..1472e8f 100644 --- a/testing/test_comments.ior +++ b/testing/test_comments.ior @@ -2,16 +2,16 @@ IOR START api=posix writeFile =1 - randomOffset=1 + randomOffset=1 reorderTasks=1 - filePerProc=1 + filePerProc=1 keepFile=1 fsync=1 repetitions=1 multiFile=1 # tab-prefixed comment -transferSize=100k -blockSize=100k +transferSize=10k +blockSize=20k # space-prefixed comment run --dummy.delay-create=1000