IOR allows to store actual performaned stonewalling count into a status file.
parent
378789737f
commit
812b798f05
18
src/ior.c
18
src/ior.c
|
@ -100,7 +100,11 @@ IOR_test_t * ior_run(int argc, char **argv, MPI_Comm world_com, FILE * world_out
|
||||||
TestIoSys(tptr);
|
TestIoSys(tptr);
|
||||||
|
|
||||||
if(rank == 0 && tptr->params.stoneWallingWearOut){
|
if(rank == 0 && tptr->params.stoneWallingWearOut){
|
||||||
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
|
if (tptr->params.stoneWallingStatusFile){
|
||||||
|
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
|
||||||
|
}else{
|
||||||
|
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
tptr->results->errors = totalErrorCount;
|
tptr->results->errors = totalErrorCount;
|
||||||
}
|
}
|
||||||
|
@ -197,7 +201,9 @@ int ior_main(int argc, char **argv)
|
||||||
}
|
}
|
||||||
TestIoSys(tptr);
|
TestIoSys(tptr);
|
||||||
|
|
||||||
if(rank == 0 && tptr->params.stoneWallingWearOut){
|
if (tptr->params.stoneWallingStatusFile){
|
||||||
|
StoreStoneWallingIterations(tptr->params.stoneWallingStatusFile, tptr->results->pairs_accessed);
|
||||||
|
}else{
|
||||||
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
|
fprintf(out_logfile, "Pairs deadlineForStonewallingaccessed: %lld\n", (long long) tptr->results->pairs_accessed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -796,6 +802,7 @@ static void DisplayUsage(char **argv)
|
||||||
" -D N deadlineForStonewalling -- seconds before stopping write or read phase",
|
" -D N deadlineForStonewalling -- seconds before stopping write or read phase",
|
||||||
" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data",
|
" -O stoneWallingWearOut=1 -- once the stonewalling timout is over, all process finish to access the amount of data",
|
||||||
" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut",
|
" -O stoneWallingWearOutIterations=N -- stop after processing this number of iterations, needed for reading data back written with stoneWallingWearOut",
|
||||||
|
" -O stoneWallingStatusFile=FILE -- this file keeps the number of iterations from stonewalling during write and allows to use them for read"
|
||||||
" -e fsync -- perform fsync/msync upon POSIX/MMAP write close",
|
" -e fsync -- perform fsync/msync upon POSIX/MMAP write close",
|
||||||
" -E useExistingTestFile -- do not remove test file before write access",
|
" -E useExistingTestFile -- do not remove test file before write access",
|
||||||
" -f S scriptFile -- test script name",
|
" -f S scriptFile -- test script name",
|
||||||
|
@ -2707,6 +2714,13 @@ static IOR_offset_t WriteOrRead(IOR_param_t * test, IOR_results_t * results, voi
|
||||||
&& ((GetTimeStamp() - startForStonewall)
|
&& ((GetTimeStamp() - startForStonewall)
|
||||||
> test->deadlineForStonewalling));
|
> test->deadlineForStonewalling));
|
||||||
|
|
||||||
|
if(access == READ && test->stoneWallingStatusFile){
|
||||||
|
test->stoneWallingWearOutIterations = ReadStoneWallingIterations(test->stoneWallingStatusFile);
|
||||||
|
if(test->stoneWallingWearOutIterations == -1){
|
||||||
|
ERR("Could not read back the stonewalling status from the file!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* loop over offsets to access */
|
/* loop over offsets to access */
|
||||||
while ((offsetArray[pairCnt] != -1) && !hitStonewall ) {
|
while ((offsetArray[pairCnt] != -1) && !hitStonewall ) {
|
||||||
dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access);
|
dataMoved += WriteOrReadSingle(pairCnt, offsetArray, pretendRank, & transferCount, & errors, test, fd, ioBuffers, access);
|
||||||
|
|
|
@ -135,6 +135,8 @@ typedef struct
|
||||||
int deadlineForStonewalling; /* max time in seconds to run any test phase */
|
int deadlineForStonewalling; /* max time in seconds to run any test phase */
|
||||||
int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */
|
int stoneWallingWearOut; /* wear out the stonewalling, once the timout is over, each process has to write the same amount */
|
||||||
uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
|
uint64_t stoneWallingWearOutIterations; /* the number of iterations for the stonewallingWearOut, needed for readBack */
|
||||||
|
char stoneWallingStatusFile[MAXPATHLEN];
|
||||||
|
|
||||||
int maxTimeDuration; /* max time in minutes to run each test */
|
int maxTimeDuration; /* max time in minutes to run each test */
|
||||||
int outlierThreshold; /* warn on outlier N seconds from mean */
|
int outlierThreshold; /* warn on outlier N seconds from mean */
|
||||||
int verbose; /* verbosity */
|
int verbose; /* verbosity */
|
||||||
|
|
36
src/mdtest.c
36
src/mdtest.c
|
@ -174,39 +174,7 @@ typedef struct{
|
||||||
/* for making/removing unique directory && stating/deleting subdirectory */
|
/* for making/removing unique directory && stating/deleting subdirectory */
|
||||||
enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR};
|
enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR};
|
||||||
|
|
||||||
#ifdef __linux__
|
|
||||||
#define FAIL(msg) do { \
|
|
||||||
fprintf(out_logfile, "%s: Process %d(%s): FAILED in %s, %s: %s\n", \
|
|
||||||
print_timestamp(), rank, hostname, __func__, \
|
|
||||||
msg, strerror(errno)); \
|
|
||||||
fflush(out_logfile); \
|
|
||||||
MPI_Abort(testComm, 1); \
|
|
||||||
} while(0)
|
|
||||||
#else
|
|
||||||
#define FAIL(msg) do { \
|
|
||||||
fprintf(out_logfile, "%s: Process %d(%s): FAILED at %d, %s: %s\n", \
|
|
||||||
print_timestamp(), rank, hostname, __LINE__, \
|
|
||||||
msg, strerror(errno)); \
|
|
||||||
fflush(out_logfile); \
|
|
||||||
MPI_Abort(testComm, 1); \
|
|
||||||
} while(0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static char *print_timestamp() {
|
|
||||||
static char datestring[80];
|
|
||||||
time_t cur_timestamp;
|
|
||||||
|
|
||||||
|
|
||||||
if (( rank == 0 ) && ( verbose >= 1 )) {
|
|
||||||
fprintf( out_logfile, "V-1: Entering print_timestamp...\n" );
|
|
||||||
}
|
|
||||||
|
|
||||||
fflush(out_logfile);
|
|
||||||
cur_timestamp = time(NULL);
|
|
||||||
strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp));
|
|
||||||
|
|
||||||
return datestring;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if MPI_VERSION >= 3
|
#if MPI_VERSION >= 3
|
||||||
int count_tasks_per_node(void) {
|
int count_tasks_per_node(void) {
|
||||||
|
@ -2199,7 +2167,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
nodeCount = size / count_tasks_per_node();
|
nodeCount = size / count_tasks_per_node();
|
||||||
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
fprintf(out_logfile, "-- started at %s --\n\n", print_timestamp());
|
fprintf(out_logfile, "-- started at %s --\n\n", PrintTimestamp());
|
||||||
fprintf(out_logfile, "mdtest-%s was launched with %d total task(s) on %d node(s)\n",
|
fprintf(out_logfile, "mdtest-%s was launched with %d total task(s) on %d node(s)\n",
|
||||||
RELEASE_VERS, size, nodeCount);
|
RELEASE_VERS, size, nodeCount);
|
||||||
fflush(out_logfile);
|
fflush(out_logfile);
|
||||||
|
@ -2570,7 +2538,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
if(CHECK_STONE_WALL(& progress)){
|
if(CHECK_STONE_WALL(& progress)){
|
||||||
fprintf(out_logfile, "\n-- hit stonewall\n");
|
fprintf(out_logfile, "\n-- hit stonewall\n");
|
||||||
}
|
}
|
||||||
fprintf(out_logfile, "\n-- finished at %s --\n", print_timestamp());
|
fprintf(out_logfile, "\n-- finished at %s --\n", PrintTimestamp());
|
||||||
fflush(out_logfile);
|
fflush(out_logfile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -186,6 +186,8 @@ void DecodeDirective(char *line, IOR_param_t *params)
|
||||||
params->stoneWallingWearOut = atoi(value);
|
params->stoneWallingWearOut = atoi(value);
|
||||||
} else if (strcasecmp(option, "stoneWallingWearOutIterations") == 0) {
|
} else if (strcasecmp(option, "stoneWallingWearOutIterations") == 0) {
|
||||||
params->stoneWallingWearOutIterations = atoll(value);
|
params->stoneWallingWearOutIterations = atoll(value);
|
||||||
|
} else if (strcasecmp(option, "stoneWallingStatusFile") == 0) {
|
||||||
|
strcpy(params->stoneWallingStatusFile, value);
|
||||||
} else if (strcasecmp(option, "maxtimeduration") == 0) {
|
} else if (strcasecmp(option, "maxtimeduration") == 0) {
|
||||||
params->maxTimeDuration = atoi(value);
|
params->maxTimeDuration = atoi(value);
|
||||||
} else if (strcasecmp(option, "outlierthreshold") == 0) {
|
} else if (strcasecmp(option, "outlierthreshold") == 0) {
|
||||||
|
|
|
@ -539,3 +539,50 @@ void init_clock(){
|
||||||
/* check for skew between tasks' start times */
|
/* check for skew between tasks' start times */
|
||||||
wall_clock_deviation = TimeDeviation();
|
wall_clock_deviation = TimeDeviation();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
char * PrintTimestamp() {
|
||||||
|
static char datestring[80];
|
||||||
|
time_t cur_timestamp;
|
||||||
|
|
||||||
|
if (( rank == 0 ) && ( verbose >= 1 )) {
|
||||||
|
fprintf( out_logfile, "V-1: Entering PrintTimestamp...\n" );
|
||||||
|
}
|
||||||
|
|
||||||
|
fflush(out_logfile);
|
||||||
|
cur_timestamp = time(NULL);
|
||||||
|
strftime(datestring, 80, "%m/%d/%Y %T", localtime(&cur_timestamp));
|
||||||
|
|
||||||
|
return datestring;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t ReadStoneWallingIterations(char * const filename){
|
||||||
|
long long data;
|
||||||
|
if(rank != 0){
|
||||||
|
MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
|
||||||
|
return data;
|
||||||
|
}else{
|
||||||
|
FILE * out = fopen(filename, "r");
|
||||||
|
if (out == NULL){
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
int ret = fscanf(out, "%lld", & data);
|
||||||
|
if (ret != 1){
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
fclose(out);
|
||||||
|
MPI_Bcast( & data, 1, MPI_LONG_LONG_INT, 0, mpi_comm_world);
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void StoreStoneWallingIterations(char * const filename, int64_t count){
|
||||||
|
if(rank != 0){
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
FILE * out = fopen(filename, "w");
|
||||||
|
if (out == NULL){
|
||||||
|
FAIL("Cannot write to the stonewalling file!");
|
||||||
|
}
|
||||||
|
fprintf(out, "%lld", (long long) count);
|
||||||
|
fclose(out);
|
||||||
|
}
|
||||||
|
|
|
@ -27,6 +27,24 @@ extern MPI_Comm testComm;
|
||||||
extern MPI_Comm mpi_comm_world;
|
extern MPI_Comm mpi_comm_world;
|
||||||
extern FILE * out_logfile;
|
extern FILE * out_logfile;
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
#define FAIL(msg) do { \
|
||||||
|
fprintf(out_logfile, "%s: Process %d: FAILED in %s, %s: %s\n", \
|
||||||
|
PrintTimestamp(), rank, __func__, \
|
||||||
|
msg, strerror(errno)); \
|
||||||
|
fflush(out_logfile); \
|
||||||
|
MPI_Abort(testComm, 1); \
|
||||||
|
} while(0)
|
||||||
|
#else
|
||||||
|
#define FAIL(msg) do { \
|
||||||
|
fprintf(out_logfile, "%s: Process %d: FAILED at %d, %s: %s\n", \
|
||||||
|
PrintTimestamp(), rank, __LINE__, \
|
||||||
|
msg, strerror(errno)); \
|
||||||
|
fflush(out_logfile); \
|
||||||
|
MPI_Abort(testComm, 1); \
|
||||||
|
} while(0)
|
||||||
|
#endif
|
||||||
|
|
||||||
void set_o_direct_flag(int *fd);
|
void set_o_direct_flag(int *fd);
|
||||||
|
|
||||||
char *CurrentTimeString(void);
|
char *CurrentTimeString(void);
|
||||||
|
@ -38,8 +56,13 @@ void SeedRandGen(MPI_Comm);
|
||||||
void SetHints (MPI_Info *, char *);
|
void SetHints (MPI_Info *, char *);
|
||||||
void ShowHints (MPI_Info *);
|
void ShowHints (MPI_Info *);
|
||||||
|
|
||||||
|
/* Returns -1, if cannot be read */
|
||||||
|
int64_t ReadStoneWallingIterations(char * const filename);
|
||||||
|
void StoreStoneWallingIterations(char * const filename, int64_t count);
|
||||||
|
|
||||||
void init_clock(void);
|
void init_clock(void);
|
||||||
double GetTimeStamp(void);
|
double GetTimeStamp(void);
|
||||||
|
char * PrintTimestamp(); // TODO remove this function
|
||||||
|
|
||||||
extern double wall_clock_deviation;
|
extern double wall_clock_deviation;
|
||||||
extern double wall_clock_delta;
|
extern double wall_clock_delta;
|
||||||
|
|
|
@ -8,6 +8,12 @@ ROOT=${0%/*}
|
||||||
|
|
||||||
source $ROOT/test-lib.sh
|
source $ROOT/test-lib.sh
|
||||||
|
|
||||||
|
#stonewalling tests
|
||||||
|
IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut=1 -D 1 -t 1000 -b 1000 -s 15
|
||||||
|
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still!
|
||||||
|
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30
|
||||||
|
|
||||||
|
|
||||||
#shared tests
|
#shared tests
|
||||||
IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k
|
IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k
|
||||||
IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 100k
|
IOR 2 -a POSIX -w -k -e -i1 -m -t 100k -b 100k
|
||||||
|
|
Loading…
Reference in New Issue