IOR allows to store actual performaned stonewalling count into a status file.
parent
378789737f
commit
812b798f05
16
src/ior.c
16
src/ior.c
|
@ -100,8 +100,12 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out
|
|||
TestIoSys(tptr);
|
||||
|
||||
if(rank == 0 && tptr->params.stoneWallingWearOut){
|
||||
if (tptr->params.stoneWallingStatusFile){
|
||||
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
|
||||
}else{
|
||||
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
|
||||
}
|
||||
}
|
||||
tptr->results->errors = totalErrorCount;
|
||||
}
|
||||
|
||||
|
@ -197,7 +201,9 @@ int ior_main(int argc, char **argv)
|
|||
}
|
||||
TestIoSys(tptr);
|
||||
|
||||
if(rank == 0 && tptr->params.stoneWallingWearOut){
|
||||
if (tptr->params.stoneWallingStatusFile){
|
||||
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
|
||||
}else{
|
||||
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
|
||||
}
|
||||
}
|
||||
|
@ -796,6 +802,7 @@ static void DisplayUsage(char **argv)
|
|||
" -D N deadlineForStonewalling -- seconds before stopping write or read phase",
|
||||
" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data",
|
||||
" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut",
|
||||
" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read"
|
||||
" -e fsync -- perform fsync/msync upon POSIX/MMAP write close",
|
||||
" -E useExistingTestFile -- do not remove test file before write access",
|
||||
" -f S scriptFile -- test script name",
|
||||
|
@ -2707,6 +2714,13 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
|
|||
&& ((GetTimeStamp() - startForStonewall)
|
||||
> test->deadlineForStonewalling));
|
||||
|
||||
if(access == READ && test->stoneWallingStatusFile){
|
||||
test->stoneWallingWearOutIterations = ReadStoneWallingIterations(test->stoneWallingStatusFile);
|
||||
if(test->stoneWallingWearOutIterations == -1){
|
||||
ERR("Could not read back the stonewalling status from the file!");
|
||||
}
|
||||
}
|
||||
|
||||
/* loop over offsets to access */
|
||||
while ((offsetArray[pairCnt] != -1) && !hitStonewall ) {
|
||||
dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access);
|
||||
|
|
|
@ -135,6 +135,8 @@ typedef struct
|
|||
int deadlineForStonewalling; /* max time in seconds to run any test phase */
|
||||
int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */
|
||||
uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
|
||||
char stoneWallingStatusFile[MAXPATHLEN];
|
||||
|
||||
int maxTimeDuration; /* max time in minutes to run each test */
|
||||
int outlierThreshold; /* warn on outlier N seconds from mean */
|
||||
int verbose; /* verbosity */
|
||||
|
|
36
src/mdtest.c
36
src/mdtest.c
|
@ -174,39 +174,7 @@ typedef struct{
|
|||
/* for making/removing unique directory && stating/deleting subdirectory */
|
||||
enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR};
|
||||
|
||||
#ifdef __linux__
|
||||
#define FAIL(msg) do { \
|
||||
fprintf(out_logfile, "%s: Process %d(%s): FAILED in %s, %s: %s\n", \
|
||||
print_timestamp(), rank, hostname, __func__, \
|
||||
msg, strerror(errno)); \
|
||||
fflush(out_logfile); \
|
||||
MPI_Abort(testComm, 1); \
|
||||
} while(0)
|
||||
#else
|
||||
#define FAIL(msg) do { \
|
||||
fprintf(out_logfile, "%s: Process %d(%s): FAILED at %d, %s: %s\n", \
|
||||
print_timestamp(), rank, hostname, __LINE__, \
|
||||
msg, strerror(errno)); \
|
||||
fflush(out_logfile); \
|
||||
MPI_Abort(testComm, 1); \
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
static char *print_timestamp() {
|
||||
static char datestring[80];
|
||||
time_t cur_timestamp;
|
||||
|
||||
|
||||
if (( rank == 0 ) && ( verbose >= 1 )) {
|
||||
fprintf( out_logfile, "V-1: Entering print_timestamp...\n" );
|
||||
}
|
||||
|
||||
fflush(out_logfile);
|
||||
cur_timestamp = time(NULL);
|
||||
strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp));
|
||||
|
||||
return datestring;
|
||||
}
|
||||
|
||||
#if MPI_VERSION >= 3
|
||||
int count_tasks_per_node(void) {
|
||||
|
@ -2199,7 +2167,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
|||
nodeCount = size / count_tasks_per_node();
|
||||
|
||||
if (rank == 0) {
|
||||
fprintf(out_logfile, "-- started at %s --\n\n", print_timestamp());
|
||||
fprintf(out_logfile, "-- started at %s --\n\n", PrintTimestamp());
|
||||
fprintf(out_logfile, "mdtest-%s was launched with %d total task(s) on %d node(s)\n",
|
||||
RELEASE_VERS, size, nodeCount);
|
||||
fflush(out_logfile);
|
||||
|
@ -2570,7 +2538,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
|||
if(CHECK_STONE_WALL(& progress)){
|
||||
fprintf(out_logfile, "\n-- hit stonewall\n");
|
||||
}
|
||||
fprintf(out_logfile, "\n-- finished at %s --\n", print_timestamp());
|
||||
fprintf(out_logfile, "\n-- finished at %s --\n", PrintTimestamp());
|
||||
fflush(out_logfile);
|
||||
}
|
||||
|
||||
|
|
|
@ -186,6 +186,8 @@ void DecodeDirective(char *line, IOR_param_t *params)
|
|||
params->stoneWallingWearOut = atoi(value);
|
||||
} else if (strcasecmp(option, "stoneWallingWearOutIterations") == 0) {
|
||||
params->stoneWallingWearOutIterations = atoll(value);
|
||||
} else if (strcasecmp(option, "stoneWallingStatusFile") == 0) {
|
||||
strcpy(params->stoneWallingStatusFile, value);
|
||||
} else if (strcasecmp(option, "maxtimeduration") == 0) {
|
||||
params->maxTimeDuration = atoi(value);
|
||||
} else if (strcasecmp(option, "outlierthreshold") == 0) {
|
||||
|
|
|
@ -539,3 +539,50 @@ void init_clock(){
|
|||
/* check for skew between tasks' start times */
|
||||
wall_clock_deviation = TimeDeviation();
|
||||
}
|
||||
|
||||
char * PrintTimestamp() {
|
||||
static char datestring[80];
|
||||
time_t cur_timestamp;
|
||||
|
||||
if (( rank == 0 ) && ( verbose >= 1 )) {
|
||||
fprintf( out_logfile, "V-1: Entering PrintTimestamp...\n" );
|
||||
}
|
||||
|
||||
fflush(out_logfile);
|
||||
cur_timestamp = time(NULL);
|
||||
strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp));
|
||||
|
||||
return datestring;
|
||||
}
|
||||
|
||||
int64_t ReadStoneWallingIterations(char * const filename){
|
||||
long long data;
|
||||
if(rank != 0){
|
||||
MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
|
||||
return data;
|
||||
}else{
|
||||
FILE * out = fopen(filename, "r");
|
||||
if (out == NULL){
|
||||
return -1;
|
||||
}
|
||||
int ret = fscanf(out, "%lld", & data);
|
||||
if (ret != 1){
|
||||
return -1;
|
||||
}
|
||||
fclose(out);
|
||||
MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
void StoreStoneWallingIterations(char * const filename, int64_t count){
|
||||
if(rank != 0){
|
||||
return;
|
||||
}
|
||||
FILE * out = fopen(filename, "w");
|
||||
if (out == NULL){
|
||||
FAIL("Cannot write to the stonewalling file!");
|
||||
}
|
||||
fprintf(out, "%lld", (long long) count);
|
||||
fclose(out);
|
||||
}
|
||||
|
|
|
@ -27,6 +27,24 @@ extern MPI_Comm testComm;
|
|||
extern MPI_Comm mpi_comm_world;
|
||||
extern FILE * out_logfile;
|
||||
|
||||
#ifdef __linux__
|
||||
#define FAIL(msg) do { \
|
||||
fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", \
|
||||
PrintTimestamp(), rank, __func__, \
|
||||
msg, strerror(errno)); \
|
||||
fflush(out_logfile); \
|
||||
MPI_Abort(testComm, 1); \
|
||||
} while(0)
|
||||
#else
|
||||
#define FAIL(msg) do { \
|
||||
fprintf(out_logfile, "%s: Process %d: FAILED at %d, %s: %s\n", \
|
||||
PrintTimestamp(), rank, __LINE__, \
|
||||
msg, strerror(errno)); \
|
||||
fflush(out_logfile); \
|
||||
MPI_Abort(testComm, 1); \
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
void set_o_direct_flag(int *fd);
|
||||
|
||||
char *CurrentTimeString(void);
|
||||
|
@ -38,8 +56,13 @@ void SeedRandGen(MPI_Comm);
|
|||
void SetHints (MPI_Info *, char *);
|
||||
void ShowHints (MPI_Info *);
|
||||
|
||||
/* Returns -1, if cannot be read */
|
||||
int64_t ReadStoneWallingIterations(char * const filename);
|
||||
void StoreStoneWallingIterations(char * const filename, int64_t count);
|
||||
|
||||
void init_clock(void);
|
||||
double GetTimeStamp(void);
|
||||
char * PrintTimestamp(); // TODO remove this function
|
||||
|
||||
extern double wall_clock_deviation;
|
||||
extern double wall_clock_delta;
|
||||
|
|
|
@ -8,6 +8,12 @@ ROOT=${0%/*}
|
|||
|
||||
source $ROOT/test-lib.sh
|
||||
|
||||
#stonewalling tests
|
||||
IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15
|
||||
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still!
|
||||
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30
|
||||
|
||||
|
||||
#shared tests
|
||||
IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k
|
||||
IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 100k
|
||||
|
|
Loading…
Reference in New Issue