MDTest stonewalling with status file.
Appears to work; stonewall timer is honored only in the creation phase. This implies a limit to the objects that then are used for other phases.master
parent
58f13ba352
commit
fd4c306b98
|
@ -21,6 +21,9 @@ static void *DUMMY_Create(char *testFileName, IOR_param_t * param)
|
||||||
if(verbose > 4){
|
if(verbose > 4){
|
||||||
fprintf(out_logfile, "DUMMY create: %s = %p\n", testFileName, current);
|
fprintf(out_logfile, "DUMMY create: %s = %p\n", testFileName, current);
|
||||||
}
|
}
|
||||||
|
if (rank == 0){
|
||||||
|
usleep(100000);
|
||||||
|
}
|
||||||
return current++;
|
return current++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
135
src/mdtest.c
135
src/mdtest.c
|
@ -100,6 +100,8 @@ static char unique_rm_dir[MAX_PATHLEN];
|
||||||
static char unique_rm_uni_dir[MAX_PATHLEN];
|
static char unique_rm_uni_dir[MAX_PATHLEN];
|
||||||
static char *write_buffer;
|
static char *write_buffer;
|
||||||
static char *read_buffer;
|
static char *read_buffer;
|
||||||
|
static char *stoneWallingStatusFile;
|
||||||
|
|
||||||
|
|
||||||
static int barriers;
|
static int barriers;
|
||||||
static int create_only;
|
static int create_only;
|
||||||
|
@ -155,9 +157,10 @@ typedef struct{
|
||||||
double start_time;
|
double start_time;
|
||||||
|
|
||||||
int stone_wall_timer_seconds;
|
int stone_wall_timer_seconds;
|
||||||
|
|
||||||
|
long long unsigned items_start;
|
||||||
long long unsigned items_done;
|
long long unsigned items_done;
|
||||||
|
|
||||||
int items_start;
|
|
||||||
uint64_t items_per_dir;
|
uint64_t items_per_dir;
|
||||||
} rank_progress_t;
|
} rank_progress_t;
|
||||||
|
|
||||||
|
@ -222,8 +225,6 @@ void parse_dirpath(char *dirpath_arg) {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void unique_dir_access(int opt, char *to) {
|
void unique_dir_access(int opt, char *to) {
|
||||||
|
|
||||||
|
|
||||||
if (( rank == 0 ) && ( verbose >= 1 )) {
|
if (( rank == 0 ) && ( verbose >= 1 )) {
|
||||||
fprintf( out_logfile, "V-1: Entering unique_dir_access...\n" );
|
fprintf( out_logfile, "V-1: Entering unique_dir_access...\n" );
|
||||||
fflush( out_logfile );
|
fflush( out_logfile );
|
||||||
|
@ -382,7 +383,7 @@ void create_remove_items_helper(const int dirs, const int create, const char *pa
|
||||||
fflush( out_logfile );
|
fflush( out_logfile );
|
||||||
}
|
}
|
||||||
|
|
||||||
for (uint64_t i = progress->items_start ; i < progress->items_per_dir ; ++i) {
|
for (uint64_t i = progress->items_start; i < progress->items_per_dir ; ++i) {
|
||||||
if (!dirs) {
|
if (!dirs) {
|
||||||
if (create) {
|
if (create) {
|
||||||
create_file (path, itemNum + i);
|
create_file (path, itemNum + i);
|
||||||
|
@ -397,7 +398,7 @@ void create_remove_items_helper(const int dirs, const int create, const char *pa
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
progress->items_done = items_per_dir;
|
progress->items_done = progress->items_per_dir;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* helper function to do collective operations */
|
/* helper function to do collective operations */
|
||||||
|
@ -408,7 +409,7 @@ void collective_helper(const int dirs, const int create, const char* path, uint6
|
||||||
fprintf( out_logfile, "V-1: Entering collective_helper...\n" );
|
fprintf( out_logfile, "V-1: Entering collective_helper...\n" );
|
||||||
fflush( out_logfile );
|
fflush( out_logfile );
|
||||||
}
|
}
|
||||||
for (uint64_t i = 0 ; i < items_per_dir ; ++i) {
|
for (uint64_t i = progress->items_start ; i < progress->items_per_dir ; ++i) {
|
||||||
if (dirs) {
|
if (dirs) {
|
||||||
create_remove_dirs (path, create, itemNum + i);
|
create_remove_dirs (path, create, itemNum + i);
|
||||||
continue;
|
continue;
|
||||||
|
@ -440,7 +441,7 @@ void collective_helper(const int dirs, const int create, const char* path, uint6
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
progress->items_done = items_per_dir;
|
progress->items_done = progress->items_per_dir;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* recusive function to create and remove files/directories from the
|
/* recusive function to create and remove files/directories from the
|
||||||
|
@ -1020,6 +1021,36 @@ void directory_test(const int iteration, const int ntasks, const char *path, ran
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns if the stonewall was hit */
|
||||||
|
int updateStoneWallIterations(int iteration, rank_progress_t * progress, double tstart){
|
||||||
|
int hit = 0;
|
||||||
|
if (verbose >= 1 ) {
|
||||||
|
fprintf( out_logfile, "V-1: rank %d stonewall hit with %lld items\n", rank, progress->items_done );
|
||||||
|
fflush( out_logfile );
|
||||||
|
}
|
||||||
|
progress->items_start = progress->items_done;
|
||||||
|
long long unsigned max_iter = 0;
|
||||||
|
MPI_Allreduce(& progress->items_done, & max_iter, 1, MPI_INT, MPI_MAX, testComm);
|
||||||
|
summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = MPI_Wtime() - tstart;
|
||||||
|
|
||||||
|
// continue to the maximum...
|
||||||
|
long long min_accessed = 0;
|
||||||
|
MPI_Reduce(& progress->items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm);
|
||||||
|
long long sum_accessed = 0;
|
||||||
|
MPI_Reduce(& progress->items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm);
|
||||||
|
|
||||||
|
if (rank == 0 && items != (sum_accessed / size)) {
|
||||||
|
summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed;
|
||||||
|
summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size;
|
||||||
|
fprintf( out_logfile, "V-1: continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size);
|
||||||
|
fflush( out_logfile );
|
||||||
|
hit = 1;
|
||||||
|
}
|
||||||
|
progress->items_per_dir = max_iter;
|
||||||
|
|
||||||
|
return hit;
|
||||||
|
}
|
||||||
|
|
||||||
void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) {
|
void file_test(const int iteration, const int ntasks, const char *path, rank_progress_t * progress) {
|
||||||
int size;
|
int size;
|
||||||
double t[5] = {0};
|
double t[5] = {0};
|
||||||
|
@ -1061,37 +1092,27 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
|
||||||
/* create files */
|
/* create files */
|
||||||
create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
|
create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
|
||||||
if(stone_wall_timer_seconds){
|
if(stone_wall_timer_seconds){
|
||||||
/* TODO */
|
int hit = updateStoneWallIterations(iteration, progress, t[0]);
|
||||||
if (verbose >= 1 ) {
|
|
||||||
fprintf( out_logfile, "V-1: rank %d stonewall hit with %lld items\n", rank, progress->items_done );
|
if (hit){
|
||||||
fflush( out_logfile );
|
progress->stone_wall_timer_seconds = 0;
|
||||||
|
create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
|
||||||
|
// now reset the values
|
||||||
|
progress->stone_wall_timer_seconds = stone_wall_timer_seconds;
|
||||||
|
items = progress->items_per_dir;
|
||||||
}
|
}
|
||||||
long long unsigned max_iter = 0;
|
if (stoneWallingStatusFile){
|
||||||
MPI_Allreduce(& progress->items_done, & max_iter, 1, MPI_INT, MPI_MAX, testComm);
|
StoreStoneWallingIterations(stoneWallingStatusFile, progress->items_per_dir);
|
||||||
summary_table[iteration].stonewall_time[MDTEST_FILE_CREATE_NUM] = MPI_Wtime() - t[0];
|
|
||||||
|
|
||||||
// continue to the maximum...
|
|
||||||
long long min_accessed = 0;
|
|
||||||
MPI_Reduce(& progress->items_done, & min_accessed, 1, MPI_LONG_LONG_INT, MPI_MIN, 0, testComm);
|
|
||||||
|
|
||||||
long long sum_accessed = 0;
|
|
||||||
MPI_Reduce(& progress->items_done, & sum_accessed, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, testComm);
|
|
||||||
|
|
||||||
if (rank == 0 && items != sum_accessed / size) {
|
|
||||||
summary_table[iteration].stonewall_item_sum[MDTEST_FILE_CREATE_NUM] = sum_accessed;
|
|
||||||
summary_table[iteration].stonewall_item_min[MDTEST_FILE_CREATE_NUM] = min_accessed * size;
|
|
||||||
fprintf( out_logfile, "V-1: continue stonewall hit min: %lld max: %lld avg: %.1f \n", min_accessed, max_iter, ((double) sum_accessed) / size);
|
|
||||||
fflush( out_logfile );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
progress->stone_wall_timer_seconds = 0;
|
|
||||||
progress->items_start = progress->items_done;
|
|
||||||
progress->items_per_dir = max_iter;
|
|
||||||
create_remove_items(0, 0, 1, 0, temp_path, 0, progress);
|
|
||||||
progress->stone_wall_timer_seconds = stone_wall_timer_seconds;
|
|
||||||
items = max_iter;
|
|
||||||
progress->items_done = max_iter;
|
|
||||||
}
|
}
|
||||||
|
}else{
|
||||||
|
if (stoneWallingStatusFile){
|
||||||
|
/* The number of items depends on the stonewalling file */
|
||||||
|
items = ReadStoneWallingIterations(stoneWallingStatusFile);
|
||||||
|
if (verbose >= 1 && rank == 0) {
|
||||||
|
printf("read stonewall file items: "LLU"\n", items);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (barriers) {
|
if (barriers) {
|
||||||
|
@ -1116,11 +1137,7 @@ void file_test(const int iteration, const int ntasks, const char *path, rank_pro
|
||||||
}
|
}
|
||||||
|
|
||||||
/* stat files */
|
/* stat files */
|
||||||
if (random_seed > 0) {
|
mdtest_stat((random_seed > 0 ? 1 : 0), 0, temp_path, progress);
|
||||||
mdtest_stat(1,0,temp_path, progress);
|
|
||||||
} else {
|
|
||||||
mdtest_stat(0,0,temp_path, progress);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (barriers) {
|
if (barriers) {
|
||||||
|
@ -1279,7 +1296,8 @@ void print_help (void) {
|
||||||
"\t-v: verbosity (each instance of option increments by one)\n"
|
"\t-v: verbosity (each instance of option increments by one)\n"
|
||||||
"\t-V: verbosity value\n"
|
"\t-V: verbosity value\n"
|
||||||
"\t-w: bytes to write to each file after it is created\n"
|
"\t-w: bytes to write to each file after it is created\n"
|
||||||
"\t-W: number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations\n"
|
"\t-W: number in seconds; stonewall timer, write as many seconds and ensure all processes did the same number of operations (currently only stops during create phase)\n"
|
||||||
|
"\t-x: StoneWallingStatusFile; contains the number of iterations of the creation phase, can be used to split phases across runs\n"
|
||||||
"\t-y: sync file after writing\n"
|
"\t-y: sync file after writing\n"
|
||||||
"\t-z: depth of hierarchical directory structure\n"
|
"\t-z: depth of hierarchical directory structure\n"
|
||||||
"\t-Z: print time instead of rate\n"
|
"\t-Z: print time instead of rate\n"
|
||||||
|
@ -1769,7 +1787,14 @@ void create_remove_directory_tree(int create,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t * summary_table, rank_progress_t * progress){
|
static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t * summary_table){
|
||||||
|
rank_progress_t progress_o;
|
||||||
|
memset(& progress_o, 0 , sizeof(progress_o));
|
||||||
|
progress_o.start_time = GetTimeStamp();
|
||||||
|
progress_o.stone_wall_timer_seconds = stone_wall_timer_seconds;
|
||||||
|
progress_o.items_per_dir = items_per_dir;
|
||||||
|
rank_progress_t * progress = & progress_o;
|
||||||
|
|
||||||
/* start and end times of directory tree create/remove */
|
/* start and end times of directory tree create/remove */
|
||||||
double startCreate, endCreate;
|
double startCreate, endCreate;
|
||||||
int k, c;
|
int k, c;
|
||||||
|
@ -2024,6 +2049,7 @@ void mdtest_init_args(){
|
||||||
barriers = 1;
|
barriers = 1;
|
||||||
branch_factor = 1;
|
branch_factor = 1;
|
||||||
throttle = 1;
|
throttle = 1;
|
||||||
|
stoneWallingStatusFile = NULL;
|
||||||
create_only = 0;
|
create_only = 0;
|
||||||
stat_only = 0;
|
stat_only = 0;
|
||||||
read_only = 0;
|
read_only = 0;
|
||||||
|
@ -2107,7 +2133,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
|
|
||||||
verbose = 0;
|
verbose = 0;
|
||||||
option_t *optList, *thisOpt;
|
option_t *optList, *thisOpt;
|
||||||
optList = GetOptList(argc, argv, "a:b:BcCd:De:Ef:Fhi:I:l:Ln:N:p:rR::s:StTuvV:w:W:yz:Z");
|
optList = GetOptList(argc, argv, "a:b:BcCd:De:Ef:Fhi:I:l:Ln:N:p:rR::s:StTuvV:w:W:x:yz:Z");
|
||||||
|
|
||||||
|
|
||||||
while (optList != NULL) {
|
while (optList != NULL) {
|
||||||
|
@ -2186,6 +2212,8 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
write_bytes = ( size_t )strtoul( optarg, ( char ** )NULL, 10 ); break;
|
write_bytes = ( size_t )strtoul( optarg, ( char ** )NULL, 10 ); break;
|
||||||
case 'W':
|
case 'W':
|
||||||
stone_wall_timer_seconds = atoi( optarg ); break;
|
stone_wall_timer_seconds = atoi( optarg ); break;
|
||||||
|
case 'x':
|
||||||
|
stoneWallingStatusFile = strdup(optarg); break;
|
||||||
case 'y':
|
case 'y':
|
||||||
sync_file = 1; break;
|
sync_file = 1; break;
|
||||||
case 'z':
|
case 'z':
|
||||||
|
@ -2404,13 +2432,6 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
|
|
||||||
MPI_Comm_group(testComm, &worldgroup);
|
MPI_Comm_group(testComm, &worldgroup);
|
||||||
|
|
||||||
// keep track of the current status for stonewalling
|
|
||||||
rank_progress_t progress;
|
|
||||||
memset(& progress, 0 , sizeof(progress));
|
|
||||||
progress.start_time = GetTimeStamp();
|
|
||||||
progress.stone_wall_timer_seconds = stone_wall_timer_seconds;
|
|
||||||
progress.items_per_dir = items_per_dir;
|
|
||||||
|
|
||||||
/* Run the tests */
|
/* Run the tests */
|
||||||
for (i = first; i <= last && i <= size; i += stride) {
|
for (i = first; i <= last && i <= size; i += stride) {
|
||||||
range.last = i - 1;
|
range.last = i - 1;
|
||||||
|
@ -2437,26 +2458,16 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
|
||||||
}
|
}
|
||||||
|
|
||||||
for (j = 0; j < iterations; j++) {
|
for (j = 0; j < iterations; j++) {
|
||||||
mdtest_iteration(i, j, testgroup, & summary_table[j], & progress);
|
// keep track of the current status for stonewalling
|
||||||
if(CHECK_STONE_WALL(& progress)){
|
mdtest_iteration(i, j, testgroup, & summary_table[j]);
|
||||||
iterations = j + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
items = progress.items_done;
|
|
||||||
summarize_results(iterations);
|
summarize_results(iterations);
|
||||||
if (i == 1 && stride > 1) {
|
if (i == 1 && stride > 1) {
|
||||||
i = 0;
|
i = 0;
|
||||||
}
|
}
|
||||||
if(CHECK_STONE_WALL(& progress)){
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rank == 0) {
|
if (rank == 0) {
|
||||||
if(CHECK_STONE_WALL(& progress)){
|
|
||||||
fprintf(out_logfile, "\n-- hit stonewall\n");
|
|
||||||
}
|
|
||||||
fprintf(out_logfile, "\n-- finished at %s --\n", PrintTimestamp());
|
fprintf(out_logfile, "\n-- finished at %s --\n", PrintTimestamp());
|
||||||
fflush(out_logfile);
|
fflush(out_logfile);
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,9 @@ IOR 2 -a DUMMY -w -O stoneWallingStatusFile=stonewall.log -O stoneWallingWearOut
|
||||||
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still!
|
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -D 1 -t 1000 -b 1000 -s 30 # max 15 still!
|
||||||
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30
|
IOR 2 -a DUMMY -r -O stoneWallingStatusFile=stonewall.log -t 1000 -b 1000 -s 30
|
||||||
|
|
||||||
|
MDTEST 2 -I 20 -a DUMMY -W 1 -x stonewall-md.log -C
|
||||||
|
MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -T -v
|
||||||
|
MDTEST 2 -I 20 -a DUMMY -x stonewall-md.log -D -v
|
||||||
|
|
||||||
#shared tests
|
#shared tests
|
||||||
IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k
|
IOR 2 -a POSIX -w -z -Y -e -i1 -m -t 100k -b 100k
|
||||||
|
|
Loading…
Reference in New Issue