Extracted memory pattern comparison, added mem check option to md-workbench.

master
Julian M. Kunkel 2021-01-22 14:05:58 +00:00
parent effcb4131c
commit 7061b60ed8
4 changed files with 87 additions and 64 deletions

View File

@ -115,6 +115,7 @@ struct benchmark_options{
int ignore_precreate_errors;
int rank;
int size;
int verify_read;
float relative_waiting_factor;
int adaptive_waiting_mode;
@ -549,7 +550,7 @@ void run_precreate(phase_stat_t * s, int current_index){
}
char * buf = malloc(o.file_size);
memset(buf, o.rank % 256, o.file_size);
generate_memory_pattern(buf, o.file_size, 0, o.rank);
double op_timer; // timer for individual operations
size_t pos = -1; // position inside the individual measurement array
double op_time;
@ -565,6 +566,7 @@ void run_precreate(phase_stat_t * s, int current_index){
if (NULL == aiori_fh){
FAIL("Unable to open file %s", obj_name);
}
update_write_memory_pattern(f * o.dset_count + d, buf, o.file_size, 0, o.rank);
if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
s->obj_create.suc++;
}else{
@ -643,11 +645,19 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){
if (NULL == aiori_fh){
FAIL("Unable to open file %s", obj_name);
}
if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
s->obj_read.suc++;
if ( o.file_size == (int) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options) ) {
if(o.verify_read){
if(verify_memory_pattern(f * o.dset_count + d, buf, o.file_size, 0, readRank) == 0){
s->obj_read.suc++;
}else{
s->obj_read.err++;
}
}else{
s->obj_read.suc++;
}
}else{
s->obj_read.err++;
ERRF("%d: Error while reading the obj: %s\n", o.rank, obj_name);
EWARNF("%d: Error while reading the obj: %s", o.rank, obj_name);
}
o.backend->close(aiori_fh, o.backend_options);
@ -676,19 +686,23 @@ void run_benchmark(phase_stat_t * s, int * current_index_p){
op_timer = GetTimeStamp();
aiori_fh = o.backend->create(obj_name, IOR_WRONLY | IOR_CREAT, o.backend_options);
if (NULL == aiori_fh){
FAIL("Unable to open file %s", obj_name);
}
if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
s->obj_create.suc++;
}else{
s->obj_create.err++;
if (! o.ignore_precreate_errors){
ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name);
if (NULL != aiori_fh){
if ( o.file_size == (int) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) buf, o.file_size, 0, o.backend_options)) {
s->obj_create.suc++;
}else{
s->obj_create.err++;
if (! o.ignore_precreate_errors){
ERRF("%d: Error while creating the obj: %s\n", o.rank, obj_name);
}
}
o.backend->close(aiori_fh, o.backend_options);
}else{
if (! o.ignore_precreate_errors){
ERRF("Unable to open file %s", obj_name);
}
EWARNF("Unable to open file %s", obj_name);
s->obj_create.err++;
}
o.backend->close(aiori_fh, o.backend_options);
bench_runtime = add_timed_result(op_timer, s->phase_start_timer, s->time_create, pos, & s->max_op_time, & op_time);
if(o.relative_waiting_factor > 1e-9) {
mdw_wait(op_time);
@ -800,6 +814,7 @@ static option_help options [] = {
{'3', "run-cleanup", "Run cleanup phase (only run explicit phases)", OPTION_FLAG, 'd', & o.phase_cleanup},
{'w', "stonewall-timer", "Stop each benchmark iteration after the specified seconds (if not used with -W this leads to process-specific progress!)", OPTION_OPTIONAL_ARGUMENT, 'd', & o.stonewall_timer},
{'W', "stonewall-wear-out", "Stop with stonewall after specified time and use a soft wear-out phase -- all processes perform the same number of iterations", OPTION_FLAG, 'd', & o.stonewall_timer_wear_out},
{'X', "verify-read", "Verify the data on read", OPTION_FLAG, 'd', & o.verify_read},
{0, "start-item", "The iteration number of the item to start with, allowing to offset the operations", OPTION_OPTIONAL_ARGUMENT, 'l', & o.start_item_number},
{0, "print-detailed-stats", "Print detailed machine parsable statistics.", OPTION_FLAG, 'd', & o.print_detailed_stats},
{0, "read-only", "Run read-only during benchmarking phase (no deletes/writes), probably use with -2", OPTION_FLAG, 'd', & o.read_only},

View File

@ -220,24 +220,10 @@ void VerboseMessage (int root_level, int any_level, int line, char * format, ...
}
}
void generate_memory_pattern(char * buf, size_t bytes){
uint64_t * buffi = (uint64_t*) buf;
// first half of 64 bits use the rank
uint64_t ranki = (uint64_t)(rank + 1) << 32 + o.random_buffer_offset;
// the first 8 bytes are set to item number
for(size_t i=1; i < bytes/8; i++){
buffi[i] = (i + 1) + ranki;
}
for(size_t i=(bytes/8)*8; i < bytes; i++){
buf[i] = (char) i;
}
}
void offset_timers(double * t, int tcount) {
double toffset;
int i;
VERBOSE(1,-1,"V-1: Entering offset_timers..." );
toffset = GetTimeStamp() - t[tcount];
@ -356,33 +342,6 @@ static void remove_file (const char *path, uint64_t itemNum) {
}
}
void mdtest_verify_data(int item, char * buffer, size_t bytes, int pretendRank){
if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){
VERBOSE(2, -1, "Error verifying first element for item: %d", item);
o.verification_error++;
return;
}
uint64_t * buffi = (uint64_t*) buffer;
// first half of 64 bits use the rank, here need to apply rank shifting
uint64_t rank_mod = (uint64_t)(pretendRank + 1) << 32 + o.random_buffer_offset;
// the first 8 bytes are set to item number
for(size_t i=1; i < bytes/8; i++){
uint64_t exp = (i + 1) + rank_mod;
if(buffi[i] != exp){
VERBOSE(5, -1, "Error verifying offset %zu for item %d", i*8, item);
o.verification_error++;
return;
}
}
for(size_t i=(bytes/8)*8; i < bytes; i++){
if(buffer[i] != (char) i){
VERBOSE(5, -1, "Error verifying byte %zu for item %d", i, item);
o.verification_error++;
return;
}
}
}
static void create_file (const char *path, uint64_t itemNum) {
char curr_item[MAX_PATHLEN];
@ -436,11 +395,8 @@ static void create_file (const char *path, uint64_t itemNum) {
* offset 0 (zero).
*/
o.hints.fsyncPerWrite = o.sync_file;
if(o.write_bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible
((uint64_t*) o.write_buffer)[0] = itemNum;
}else{
o.write_buffer[0] = (char) itemNum;
}
update_write_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank);
if ( o.write_bytes != (size_t) o.backend->xfer(WRITE, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) {
EWARNF("unable to write file %s", curr_item);
}
@ -450,7 +406,7 @@ static void create_file (const char *path, uint64_t itemNum) {
if (o.write_bytes != (size_t) o.backend->xfer(READ, aiori_fh, (IOR_size_t *) o.write_buffer, o.write_bytes, 0, o.backend_options)) {
EWARNF("unable to verify write (read/back) file %s", curr_item);
}
mdtest_verify_data(itemNum, o.write_buffer, o.write_bytes, rank);
o.verification_error += verify_memory_pattern(itemNum, o.write_buffer, o.write_bytes, o.random_buffer_offset, rank);
}
}
@ -775,7 +731,7 @@ void mdtest_read(int random, int dirs, const long dir_iter, char *path) {
if (o.shared_file) {
pretend_rank = rank;
}
mdtest_verify_data(item_num, read_buffer, o.read_bytes, pretend_rank);
o.verification_error += verify_memory_pattern(item_num, read_buffer, o.read_bytes, o.random_buffer_offset, pretend_rank);
}else if((o.read_bytes >= 8 && ((uint64_t*) read_buffer)[0] != item_num) || (o.read_bytes < 8 && read_buffer[0] != (char) item_num)){
// do a lightweight check, which cost is neglectable
o.verification_error++;
@ -2333,7 +2289,7 @@ mdtest_results_t * mdtest_run(int argc, char **argv, MPI_Comm world_com, FILE *
if (alloc_res) {
FAIL("out of memory");
}
generate_memory_pattern(o.write_buffer, o.write_bytes);
generate_memory_pattern(o.write_buffer, o.write_bytes, o.random_buffer_offset, rank);
}
/* setup directory path to work in */

View File

@ -71,6 +71,53 @@ enum OutputFormat_t outputFormat;
/***************************** F U N C T I O N S ******************************/
void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int buff_offset, int rank){
if(bytes >= 8){ // set the item number as first element of the buffer to be as much unique as possible
((uint64_t*) buf)[0] = item;
}else{
buf[0] = (char) item;
}
}
void generate_memory_pattern(char * buf, size_t bytes, int buff_offset, int rank){
uint64_t * buffi = (uint64_t*) buf;
// first half of 64 bits use the rank
const uint64_t ranki = (uint64_t)(rank + 1) << 32 + buff_offset;
const size_t size = bytes / 8;
// the first 8 bytes are set to item number
for(size_t i=1; i < size; i++){
buffi[i] = (i + 1) + ranki;
}
for(size_t i=(bytes/8)*8; i < bytes; i++){
buf[i] = (char) i;
}
}
int verify_memory_pattern(int item, char * buffer, size_t bytes, int buff_offset, int pretendRank){
int error = 0;
// always read all data to ensure that performance numbers stay the same
if((bytes >= 8 && ((uint64_t*) buffer)[0] != item) || (bytes < 8 && buffer[0] != (char) item)){
error = 1;
}
uint64_t * buffi = (uint64_t*) buffer;
// first half of 64 bits use the rank, here need to apply rank shifting
uint64_t rank_mod = (uint64_t)(pretendRank + 1) << 32 + buff_offset;
// the first 8 bytes are set to item number
for(size_t i=1; i < bytes/8; i++){
uint64_t exp = (i + 1) + rank_mod;
if(buffi[i] != exp){
error = 1;
}
}
for(size_t i=(bytes/8)*8; i < bytes; i++){
if(buffer[i] != (char) i){
error = 1;
}
}
return error;
}
void* safeMalloc(uint64_t size){
void * d = malloc(size);
if (d == NULL){

View File

@ -35,6 +35,11 @@ extern enum OutputFormat_t outputFormat; /* format of the output */
void* safeMalloc(uint64_t size);
void set_o_direct_flag(int *fd);
void update_write_memory_pattern(uint64_t item, char * buf, size_t bytes, int buff_offset, int rank);
void generate_memory_pattern(char * buf, size_t bytes, int buff_offset, int rank);
/* check a data buffer, @return 0 if all is correct, otherwise 1 */
int verify_memory_pattern(int item, char * buffer, size_t bytes, int buff_offset, int pretendRank);
char *CurrentTimeString(void);
int Regex(char *, char *);
void ShowFileSystemSize(char * filename, const struct ior_aiori * backend, void * backend_options);