IOR allows to store actual performaned stonewalling count into a status file.

master
Julian M. Kunkel 2018-07-07 14:01:11 +01:00
parent 378789737f
commit 812b798f05
7 changed files with 98 additions and 36 deletions

View File

@ -100,7 +100,11 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out
TestIoSys(tptr);
if(rank == 0 && tptr->params.stoneWallingWearOut){
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
if (tptr->params.stoneWallingStatusFile){
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
}else{
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
}
}
tptr->results->errors = totalErrorCount;
}
@ -197,7 +201,9 @@ int ior_main(int argc, char **argv)
}
TestIoSys(tptr);
if(rank == 0 && tptr->params.stoneWallingWearOut){
if (tptr->params.stoneWallingStatusFile){
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
}else{
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
}
}
@ -796,6 +802,7 @@ static void DisplayUsage(char **argv)
" -D N deadlineForStonewalling -- seconds before stopping write or read phase",
" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data",
" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut",
" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read"
" -e fsync -- perform fsync/msync upon POSIX/MMAP write close",
" -E useExistingTestFile -- do not remove test file before write access",
" -f S scriptFile -- test script name",
@ -2707,6 +2714,13 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
&& ((GetTimeStamp() - startForStonewall)
> test->deadlineForStonewalling));
if(access == READ && test->stoneWallingStatusFile){
test->stoneWallingWearOutIterations = ReadStoneWallingIterations(test->stoneWallingStatusFile);
if(test->stoneWallingWearOutIterations == -1){
ERR("Could not read back the stonewalling status from the file!");
}
}
/* loop over offsets to access */
while ((offsetArray[pairCnt] != -1) && !hitStonewall ) {
dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access);

View File

@ -135,6 +135,8 @@ typedef struct
int deadlineForStonewalling; /* max time in seconds to run any test phase */
int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */
uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
char stoneWallingStatusFile[MAXPATHLEN];
int maxTimeDuration; /* max time in minutes to run each test */
int outlierThreshold; /* warn on outlier N seconds from mean */
int verbose; /* verbosity */

View File

@ -174,39 +174,7 @@ typedef struct{
/* for making/removing unique directory && stating/deleting subdirectory */
enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR};
#ifdef __linux__
#define FAIL(msg) do { \
fprintf(out_logfile, "%s: Process %d(%s): FAILED in %s, %s: %s\n", \
print_timestamp(), rank, hostname, __func__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#else
#define FAIL(msg) do { \
fprintf(out_logfile, "%s: Process %d(%s): FAILED at %d, %s: %s\n", \
print_timestamp(), rank, hostname, __LINE__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#endif
static char *print_timestamp() {
static char datestring[80];
time_t cur_timestamp;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( out_logfile, "V-1: Entering print_timestamp...\n" );
}
fflush(out_logfile);
cur_timestamp = time(NULL);
strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp));
return datestring;
}
#if MPI_VERSION >= 3
int count_tasks_per_node(void) {
@ -2199,7 +2167,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
nodeCount = size / count_tasks_per_node();
if (rank == 0) {
fprintf(out_logfile, "-- started at %s --\n\n", print_timestamp());
fprintf(out_logfile, "-- started at %s --\n\n", PrintTimestamp());
fprintf(out_logfile, "mdtest-%s was launched with %d total task(s) on %d node(s)\n",
RELEASE_VERS, size, nodeCount);
fflush(out_logfile);
@ -2570,7 +2538,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
if(CHECK_STONE_WALL(& progress)){
fprintf(out_logfile, "\n-- hit stonewall\n");
}
fprintf(out_logfile, "\n-- finished at %s --\n", print_timestamp());
fprintf(out_logfile, "\n-- finished at %s --\n", PrintTimestamp());
fflush(out_logfile);
}

View File

@ -186,6 +186,8 @@ void DecodeDirective(char *line, IOR_param_t *params)
params->stoneWallingWearOut = atoi(value);
} else if (strcasecmp(option, "stoneWallingWearOutIterations") == 0) {
params->stoneWallingWearOutIterations = atoll(value);
} else if (strcasecmp(option, "stoneWallingStatusFile") == 0) {
strcpy(params->stoneWallingStatusFile, value);
} else if (strcasecmp(option, "maxtimeduration") == 0) {
params->maxTimeDuration = atoi(value);
} else if (strcasecmp(option, "outlierthreshold") == 0) {

View File

@ -539,3 +539,50 @@ void init_clock(){
/* check for skew between tasks' start times */
wall_clock_deviation = TimeDeviation();
}
char * PrintTimestamp() {
static char datestring[80];
time_t cur_timestamp;
if (( rank == 0 ) && ( verbose >= 1 )) {
fprintf( out_logfile, "V-1: Entering PrintTimestamp...\n" );
}
fflush(out_logfile);
cur_timestamp = time(NULL);
strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp));
return datestring;
}
int64_t ReadStoneWallingIterations(char * const filename){
long long data;
if(rank != 0){
MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
return data;
}else{
FILE * out = fopen(filename, "r");
if (out == NULL){
return -1;
}
int ret = fscanf(out, "%lld", & data);
if (ret != 1){
return -1;
}
fclose(out);
MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
return data;
}
}
void StoreStoneWallingIterations(char * const filename, int64_t count){
if(rank != 0){
return;
}
FILE * out = fopen(filename, "w");
if (out == NULL){
FAIL("Cannot write to the stonewalling file!");
}
fprintf(out, "%lld", (long long) count);
fclose(out);
}

View File

@ -27,6 +27,24 @@ extern MPI_Comm testComm;
extern MPI_Comm mpi_comm_world;
extern FILE * out_logfile;
#ifdef __linux__
#define FAIL(msg) do { \
fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", \
PrintTimestamp(), rank, __func__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#else
#define FAIL(msg) do { \
fprintf(out_logfile, "%s: Process %d: FAILED at %d, %s: %s\n", \
PrintTimestamp(), rank, __LINE__, \
msg, strerror(errno)); \
fflush(out_logfile); \
MPI_Abort(testComm, 1); \
} while(0)
#endif
void set_o_direct_flag(int *fd);
char *CurrentTimeString(void);
@ -38,8 +56,13 @@ void SeedRandGen(MPI_Comm);
void SetHints (MPI_Info *, char *);
void ShowHints (MPI_Info *);
/* Returns -1, if cannot be read */
int64_t ReadStoneWallingIterations(char * const filename);
void StoreStoneWallingIterations(char * const filename, int64_t count);
void init_clock(void);
double GetTimeStamp(void);
char * PrintTimestamp(); // TODO remove this function
extern double wall_clock_deviation;
extern double wall_clock_delta;

View File

@ -8,6 +8,12 @@ ROOT=${0%/*}
source $ROOT/test-lib.sh
#stonewalling tests
IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still!
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30
#shared tests
IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k
IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 100k