Merge pull request #322 from hpc/feature-mdtest-randnum

MDTest data verification improvements.
master
Julian Kunkel 2021-01-22 15:38:44 +00:00 committed by GitHub
commit 4c6f33f0f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 120 additions and 60 deletions

View File

@ -115,6 +115,8 @@ struct benchmark_options{
int ignore_precreate_errors;
int rank;
int size;
int verify_read;
int random_buffer_offset;
float relative_waiting_factor;
int adaptive_waiting_mode;
@ -133,16 +135,17 @@ static void def_obj_name(char * out_name, int n, int d, int i){
}
void init_options(){
memset(& o, 0, sizeof(o));
o.interface = "POSIX";
o.prefix = "./out";
o.num = 1000;
o.precreate = 3000;
o.dset_count = 10;
o.offset = 1;
o.iterations = 3;
o.file_size = 3901;
o.run_info_file = "md-workbench.status";
o = (struct benchmark_options){
.interface = "POSIX",
.prefix = "./out",
.num = 1000,
.random_buffer_offset = -1,
.precreate = 3000,
.dset_count = 10,
.offset = 1,
.iterations = 3,
.file_size = 3901,
.run_info_file = "md-workbench.status"};
}
static void mdw_wait(double runtime){
@ -549,7 +552,7 @@ void run_precreate(phase_stat_t * s, int current_index){
}
char * buf = malloc(o.file_size);
memset(buf, o.rank % 256, o.file_size);
generate_memory_pattern(buf, o.file_size, o.random_buffer_offset, o.rank);
double op_timer; // timer for individual operations
size_t pos = -1; // position inside the individual measurement array
double op_time;
@ -565,6 +568,7 @@ void run_precreate(phase_stat_t * s, int current_index){
if (NULL == aiori_fh){
FAIL("Unable to open file %s", obj_name);
}
update_write_memory_pattern(f * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, o.rank);
if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
s->obj_create.suc++;
}else{
@ -643,11 +647,19 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){
if (NULL == aiori_fh){
FAIL("Unable to open file %s", obj_name);
}
if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
s->obj_read.suc++;
if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options) ) {
if(o.verify_read){
if(verify_memory_pattern(f * o.dset_count + d, buf, o.file_size, o.random_buffer_offset, readRank) == 0){
s->obj_read.suc++;
}else{
s->obj_read.err++;
}
}else{
s->obj_read.suc++;
}
}else{
s->obj_read.err++;
ERRF("%d: Error while reading the obj: %s\n", o.rank, obj_name);
EWARNF("%d: Error while reading the obj: %s", o.rank, obj_name);
}
o.backend->close(aiori_fh, o.backend_options);
@ -676,19 +688,23 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){
op_timer = GetTimeStamp();
aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options);
if (NULL == aiori_fh){
FAIL("Unable to open file %s", obj_name);
}
if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
s->obj_create.suc++;
}else{
s->obj_create.err++;
if (! o.ignore_precreate_errors){
ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name);
if (NULL != aiori_fh){
if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
s->obj_create.suc++;
}else{
s->obj_create.err++;
if (! o.ignore_precreate_errors){
ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name);
}
}
o.backend->close(aiori_fh, o.backend_options);
}else{
if (! o.ignore_precreate_errors){
ERRF("Unable to open file %s", obj_name);
}
EWARNF("Unable to open file %s", obj_name);
s->obj_create.err++;
}
o.backend->close(aiori_fh, o.backend_options);
bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time);
if(o.relative_waiting_factor > 1e-9) {
mdw_wait(op_time);
@ -787,6 +803,7 @@ static option_help options [] = {
{0, "latency-all", "Keep the latency files from all ranks.", OPTION_FLAG, 'd', & o.latency_keep_all},
{'P', "precreate-per-set", "Number of object to precreate per data set.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.precreate},
{'D', "data-sets", "Number of data sets covered per process and iteration.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.dset_count},
{'G', NULL, "Offset for the data in the read/write buffer, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_buffer_offset},
{'o', NULL, "Output directory", OPTION_OPTIONAL_ARGUMENT, 's', & o.prefix},
{'q', "quiet", "Avoid irrelevant printing.", OPTION_FLAG, 'd', & o.quiet_output},
//{'m', "lim-free-mem", "Allocate memory until this limit (in MiB) is reached.", OPTION_OPTIONAL_ARGUMENT, 'd', & o.limit_memory},
@ -800,6 +817,7 @@ static option_help options [] = {
{'3', "run-cleanup", "Run cleanup phase (only run explicit phases)", OPTION_FLAG, 'd', & o.phase_cleanup},
{'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer},
{'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out},
{'X', "verify-read", "Verify the data on read", OPTION_FLAG, 'd', & o.verify_read},
{0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number},
{0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats},
{0, "read-only", "Run read-only during benchmarking phase (no deletes/writes), probably use with -2", OPTION_FLAG, 'd', & o.read_only},
@ -891,6 +909,10 @@ mdworkbench_results_t* md_workbench_run(int argc, char ** argv, MPI_Comm world_c
ERR("Invalid options, if running only the benchmark phase using -2 with stonewall option then use stonewall wear-out");
exit(1);
}
if( o.random_buffer_offset == -1 ){
o.random_buffer_offset = time(NULL);
MPI_Bcast(& o.random_buffer_offset, 1, MPI_INT, 0, o.com);
}
if(o.backend->xfer_hints){
o.backend->xfer_hints(& o.hints);

View File

@ -124,6 +124,7 @@ typedef struct {
int leaf_only;
unsigned branch_factor;
int depth;
int random_buffer_offset; /* user settable value, otherwise random */
/*
* This is likely a small value, but it's sometimes computed by
@ -219,18 +220,10 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ...
}
}
void generate_memory_pattern(char * buffer, size_t bytes){
// the first byte is set to the item number
for(int i=1; i < bytes; i++){
buffer[i] = i + 1;
}
}
void offset_timers(double * t, int tcount) {
double toffset;
int i;
VERBOSE(1,-1,"V-1: Entering offset_timers..." );
toffset = GetTimeStamp() - t[tcount];
@ -349,22 +342,6 @@ static void remove_file (const char *path, uint64_t itemNum) {
}
}
void mdtest_verify_data(int item, char * buffer, size_t bytes){
if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){
VERBOSE(2, -1, "Error verifying first element for item: %d", item);
o.verification_error++;
}
size_t i = bytes < 8 ? 1 : 8; // the first byte
for( ; i < bytes; i++){
if(buffer[i] != (char) (i + 1)){
VERBOSE(5, -1, "Error verifying byte %zu for item %d", i, item);
o.verification_error++;
break;
}
}
}
static void create_file (const char *path, uint64_t itemNum) {
char curr_item[MAX_PATHLEN];
@ -418,11 +395,8 @@ static void create_file (const char *path, uint64_t itemNum) {
* offset 0 (zero).
*/
o.hints.fsyncPerWrite = o.sync_file;
if(o.write_bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible
((uint64_t*) o.write_buffer)[0] = itemNum;
}else{
o.write_buffer[0] = (char) itemNum;
}
update_write_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank);
if ( o.write_bytes != (size_t) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) {
EWARNF("unable to write file %s", curr_item);
}
@ -432,7 +406,7 @@ static void create_file (const char *path, uint64_t itemNum) {
if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) {
EWARNF("unable to verify write (read/back) file %s", curr_item);
}
mdtest_verify_data(itemNum, o.write_buffer, o.write_bytes);
o.verification_error += verify_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank);
}
}
@ -753,7 +727,11 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
continue;
}
if(o.verify_read){
mdtest_verify_data(item_num, read_buffer, o.read_bytes);
int pretend_rank = (2 * o.nstride + rank) % o.size;
if (o.shared_file) {
pretend_rank = rank;
}
o.verification_error += verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank);
}else if((o.read_bytes >= 8 && ((uint64_t*) read_buffer)[0] != item_num) || (o.read_bytes < 8 && read_buffer[0] != (char) item_num)){
// do a lightweight check, which cost is neglectable
o.verification_error++;
@ -2040,7 +2018,8 @@ static void mdtest_iteration(int i, int j, MPI_Group testgroup, mdtest_results_t
void mdtest_init_args(){
o = (mdtest_options_t) {
.barriers = 1,
.branch_factor = 1
.branch_factor = 1,
.random_buffer_offset = -1
};
}
@ -2094,6 +2073,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
#ifdef HAVE_LUSTRE_LUSTREAPI
{'g', NULL, "global default directory layout for test subdirectories (deletes inherited striping layout)", OPTION_FLAG, 'd', & o.global_dir_layout},
#endif /* HAVE_LUSTRE_LUSTREAPI */
{'G', NULL, "Offset for the data in the read/write buffer, if not set, a random value is used", OPTION_OPTIONAL_ARGUMENT, 'd', & o.random_buffer_offset},
{'i', NULL, "number of iterations the test will run", OPTION_OPTIONAL_ARGUMENT, 'd', & iterations},
{'I', NULL, "number of items per directory in tree", OPTION_OPTIONAL_ARGUMENT, 'l', & o.items_per_dir},
{'k', NULL, "use mknod to create file", OPTION_FLAG, 'd', & o.make_node},
@ -2180,6 +2160,10 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
}
o.random_seed += rank;
}
if( o.random_buffer_offset == -1 ){
o.random_buffer_offset = time(NULL);
MPI_Bcast(& o.random_buffer_offset, 1, MPI_INT, 0, testComm);
}
if ((o.items > 0) && (o.items_per_dir > 0) && (! o.unique_dir_per_task)) {
o.directory_loops = o.items / o.items_per_dir;
}else{
@ -2305,7 +2289,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
if (alloc_res) {
FAIL("out of memory");
}
generate_memory_pattern(o.write_buffer, o.write_bytes);
generate_memory_pattern(o.write_buffer, o.write_bytes, o.random_buffer_offset, rank);
}
/* setup directory path to work in */
@ -2431,8 +2415,10 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
FAIL("Unable to remove test directory path %s", o.testdirpath);
}
if(o.verification_error){
VERBOSE(0, -1, "\nERROR: verifying the data read! Take the performance values with care!\n");
int total_errors;
MPI_Reduce(& o.verification_error, & total_errors, 1, MPI_INT, MPI_SUM, 0, testComm);
if(total_errors){
VERBOSE(0, -1, "\nERROR: verifying the data on read (%lld errors)! Take the performance values with care!\n", total_errors);
}
VERBOSE(0,-1,"-- finished at %s --\n", PrintTimestamp());

View File

@ -71,6 +71,53 @@ enum OutputFormat_t outputFormat;
/***************************** F U N C T I O N S ******************************/
void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int buff_offset, int rank){
if(bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible
((uint64_t*) buf)[0] = item;
}else{
buf[0] = (char) item;
}
}
void generate_memory_pattern(char * buf, size_t bytes, int buff_offset, int rank){
uint64_t * buffi = (uint64_t*) buf;
// first half of 64 bits use the rank
const uint64_t ranki = (uint64_t)(rank + 1) << 32 + buff_offset;
const size_t size = bytes / 8;
// the first 8 bytes are set to item number
for(size_t i=1; i < size; i++){
buffi[i] = (i + 1) + ranki;
}
for(size_t i=(bytes/8)*8; i < bytes; i++){
buf[i] = (char) i;
}
}
int verify_memory_pattern(int item, char * buffer, size_t bytes, int buff_offset, int pretendRank){
int error = 0;
// always read all data to ensure that performance numbers stay the same
if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){
error = 1;
}
uint64_t * buffi = (uint64_t*) buffer;
// first half of 64 bits use the rank, here need to apply rank shifting
uint64_t rank_mod = (uint64_t)(pretendRank + 1) << 32 + buff_offset;
// the first 8 bytes are set to item number
for(size_t i=1; i < bytes/8; i++){
uint64_t exp = (i + 1) + rank_mod;
if(buffi[i] != exp){
error = 1;
}
}
for(size_t i=(bytes/8)*8; i < bytes; i++){
if(buffer[i] != (char) i){
error = 1;
}
}
return error;
}
void* safeMalloc(uint64_t size){
void * d = malloc(size);
if (d == NULL){

View File

@ -35,6 +35,11 @@ extern enum OutputFormat_t outputFormat; /* format of the output */
void* safeMalloc(uint64_t size);
void set_o_direct_flag(int *fd);
void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int buff_offset, int rank);
void generate_memory_pattern(char * buf, size_t bytes, int buff_offset, int rank);
/* check a data buffer, @return 0 if all is correct, otherwise 1 */
int verify_memory_pattern(int item, char * buffer, size_t bytes, int buff_offset, int pretendRank);
char *CurrentTimeString(void);
int Regex(char *, char *);
void ShowFileSystemSize(char * filename, const struct ior_aiori * backend, void * backend_options);