/* * * Copyright (c) 2014, James S. Plank and Kevin Greenan * All rights reserved. * * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure * Coding Techniques * * Revision 2.0: Galois Field backend now links to GF-Complete * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * - Neither the name of the University of Tennessee nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* Jerasure's authors: Revision 2.x - 2014: James S. Plank and Kevin M. Greenan. Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman. Revision 1.0 - 2007: James S. Plank. */ /* This program takes as input an inputfile, k, m, a coding technique, w, and packetsize. It creates k+m files from the original file so that k of these files are parts of the original file and m of the files are encoded based on the given coding technique. The format of the created files is the file name with "_k#" or "_m#" and then the extension. (For example, inputfile test.txt would yield file "test_k1.txt".) */ #include #include #include #include #include #include #include #include #include #include #include #include "jerasure.h" #include "reed_sol.h" #include "cauchy.h" #include "liberation.h" #include "timing.h" #define N 10 enum Coding_Technique {Reed_Sol_Van, Reed_Sol_R6_Op, Cauchy_Orig, Cauchy_Good, Liberation, Blaum_Roth, Liber8tion, RDP, EVENODD, No_Coding}; char *Methods[N] = {"reed_sol_van", "reed_sol_r6_op", "cauchy_orig", "cauchy_good", "liberation", "blaum_roth", "liber8tion", "no_coding"}; /* Global variables for signal handler */ int readins, n; enum Coding_Technique method; /* Function prototypes */ int is_prime(int w); void ctrl_bs_handler(int dummy); int jfread(void *ptr, int size, int nmembers, FILE *stream) { if (stream != NULL) return fread(ptr, size, nmembers, stream); MOA_Fill_Random_Region(ptr, size); return size; } int main (int argc, char **argv) { FILE *fp, *fp2; // file pointers char *block; // padding file int size, newsize; // size of file and temp size struct stat status; // finding file size enum Coding_Technique tech; // coding technique (parameter) int k, m, w, packetsize; // parameters int buffersize; // paramter int i; // loop control variables int blocksize; // size of k+m files int total; int extra; /* Jerasure Arguments */ char **data; char **coding; int *matrix; int *bitmatrix; int **schedule; /* Creation of file name variables */ char temp[5]; char *s1, *s2, *extension; char *fname; int md; char *curdir; /* Timing variables */ struct timing t1, t2, t3, t4; double tsec; double totalsec; struct timing start; /* Find buffersize */ int up, down; signal(SIGQUIT, ctrl_bs_handler); /* Start timing */ timing_set(&t1); totalsec = 0.0; matrix = NULL; bitmatrix = NULL; schedule = NULL; /* Error check Arguments*/ if (argc != 8) { fprintf(stderr, "usage: inputfile k m coding_technique w packetsize buffersize\n"); fprintf(stderr, "\nChoose one of the following coding techniques: \nreed_sol_van, \nreed_sol_r6_op, \ncauchy_orig, \ncauchy_good, \nliberation, \nblaum_roth, \nliber8tion"); fprintf(stderr, "\n\nPacketsize is ignored for the reed_sol's"); fprintf(stderr, "\nBuffersize of 0 means the buffersize is chosen automatically.\n"); fprintf(stderr, "\nIf you just want to test speed, use an inputfile of \"-number\" where number is the size of the fake file you want to test.\n\n"); exit(0); } /* Conversion of parameters and error checking */ if (sscanf(argv[2], "%d", &k) == 0 || k <= 0) { fprintf(stderr, "Invalid value for k\n"); exit(0); } if (sscanf(argv[3], "%d", &m) == 0 || m < 0) { fprintf(stderr, "Invalid value for m\n"); exit(0); } if (sscanf(argv[5],"%d", &w) == 0 || w <= 0) { fprintf(stderr, "Invalid value for w.\n"); exit(0); } if (argc == 6) { packetsize = 0; } else { if (sscanf(argv[6], "%d", &packetsize) == 0 || packetsize < 0) { fprintf(stderr, "Invalid value for packetsize.\n"); exit(0); } } if (argc != 8) { buffersize = 0; } else { if (sscanf(argv[7], "%d", &buffersize) == 0 || buffersize < 0) { fprintf(stderr, "Invalid value for buffersize\n"); exit(0); } } /* Determine proper buffersize by finding the closest valid buffersize to the input value */ if (buffersize != 0) { if (packetsize != 0 && buffersize%(sizeof(long)*w*k*packetsize) != 0) { up = buffersize; down = buffersize; while (up%(sizeof(long)*w*k*packetsize) != 0 && (down%(sizeof(long)*w*k*packetsize) != 0)) { up++; if (down == 0) { down--; } } if (up%(sizeof(long)*w*k*packetsize) == 0) { buffersize = up; } else { if (down != 0) { buffersize = down; } } } else if (packetsize == 0 && buffersize%(sizeof(long)*w*k) != 0) { up = buffersize; down = buffersize; while (up%(sizeof(long)*w*k) != 0 && down%(sizeof(long)*w*k) != 0) { up++; down--; } if (up%(sizeof(long)*w*k) == 0) { buffersize = up; } else { buffersize = down; } } } /* Setting of coding technique and error checking */ if (strcmp(argv[4], "no_coding") == 0) { tech = No_Coding; } else if (strcmp(argv[4], "reed_sol_van") == 0) { tech = Reed_Sol_Van; if (w != 8 && w != 16 && w != 32) { fprintf(stderr, "w must be one of {8, 16, 32}\n"); exit(0); } } else if (strcmp(argv[4], "reed_sol_r6_op") == 0) { if (m != 2) { fprintf(stderr, "m must be equal to 2\n"); exit(0); } if (w != 8 && w != 16 && w != 32) { fprintf(stderr, "w must be one of {8, 16, 32}\n"); exit(0); } tech = Reed_Sol_R6_Op; } else if (strcmp(argv[4], "cauchy_orig") == 0) { tech = Cauchy_Orig; if (packetsize == 0) { fprintf(stderr, "Must include packetsize.\n"); exit(0); } } else if (strcmp(argv[4], "cauchy_good") == 0) { tech = Cauchy_Good; if (packetsize == 0) { fprintf(stderr, "Must include packetsize.\n"); exit(0); } } else if (strcmp(argv[4], "liberation") == 0) { if (k > w) { fprintf(stderr, "k must be less than or equal to w\n"); exit(0); } if (w <= 2 || !(w%2) || !is_prime(w)) { fprintf(stderr, "w must be greater than two and w must be prime\n"); exit(0); } if (packetsize == 0) { fprintf(stderr, "Must include packetsize.\n"); exit(0); } if ((packetsize%(sizeof(long))) != 0) { fprintf(stderr, "packetsize must be a multiple of sizeof(long)\n"); exit(0); } tech = Liberation; } else if (strcmp(argv[4], "blaum_roth") == 0) { if (k > w) { fprintf(stderr, "k must be less than or equal to w\n"); exit(0); } if (w <= 2 || !((w+1)%2) || !is_prime(w+1)) { fprintf(stderr, "w must be greater than two and w+1 must be prime\n"); exit(0); } if (packetsize == 0) { fprintf(stderr, "Must include packetsize.\n"); exit(0); } if ((packetsize%(sizeof(long))) != 0) { fprintf(stderr, "packetsize must be a multiple of sizeof(long)\n"); exit(0); } tech = Blaum_Roth; } else if (strcmp(argv[4], "liber8tion") == 0) { if (packetsize == 0) { fprintf(stderr, "Must include packetsize\n"); exit(0); } if (w != 8) { fprintf(stderr, "w must equal 8\n"); exit(0); } if (m != 2) { fprintf(stderr, "m must equal 2\n"); exit(0); } if (k > w) { fprintf(stderr, "k must be less than or equal to w\n"); exit(0); } tech = Liber8tion; } else { fprintf(stderr, "Not a valid coding technique. Choose one of the following: reed_sol_van, reed_sol_r6_op, cauchy_orig, cauchy_good, liberation, blaum_roth, liber8tion, no_coding\n"); exit(0); } /* Set global variable method for signal handler */ method = tech; /* Get current working directory for construction of file names */ curdir = (char*)malloc(sizeof(char)*1000); getcwd(curdir, 1000); if (argv[1][0] != '-') { /* Open file and error check */ fp = fopen(argv[1], "rb"); if (fp == NULL) { fprintf(stderr, "Unable to open file.\n"); exit(0); } /* Create Coding directory */ i = mkdir("Coding", S_IRWXU); if (i == -1 && errno != EEXIST) { fprintf(stderr, "Unable to create Coding directory.\n"); exit(0); } /* Determine original size of file */ stat(argv[1], &status); size = status.st_size; } else { if (sscanf(argv[1]+1, "%d", &size) != 1 || size <= 0) { fprintf(stderr, "Files starting with '-' should be sizes for randomly created input\n"); exit(1); } fp = NULL; MOA_Seed(time(0)); } newsize = size; /* Find new size by determining next closest multiple */ if (packetsize != 0) { if (size%(k*w*packetsize*sizeof(long)) != 0) { while (newsize%(k*w*packetsize*sizeof(long)) != 0) newsize++; } } else { if (size%(k*w*sizeof(long)) != 0) { while (newsize%(k*w*sizeof(long)) != 0) newsize++; } } if (buffersize != 0) { while (newsize%buffersize != 0) { newsize++; } } /* Determine size of k+m files */ blocksize = newsize/k; /* Allow for buffersize and determine number of read-ins */ if (size > buffersize && buffersize != 0) { if (newsize%buffersize != 0) { readins = newsize/buffersize; } else { readins = newsize/buffersize; } block = (char *)malloc(sizeof(char)*buffersize); blocksize = buffersize/k; } else { readins = 1; buffersize = size; block = (char *)malloc(sizeof(char)*newsize); } /* Break inputfile name into the filename and extension */ s1 = (char*)malloc(sizeof(char)*(strlen(argv[1])+20)); s2 = strrchr(argv[1], '/'); if (s2 != NULL) { s2++; strcpy(s1, s2); } else { strcpy(s1, argv[1]); } s2 = strchr(s1, '.'); if (s2 != NULL) { extension = strdup(s2); *s2 = '\0'; } else { extension = strdup(""); } /* Allocate for full file name */ fname = (char*)malloc(sizeof(char)*(strlen(argv[1])+strlen(curdir)+20)); sprintf(temp, "%d", k); md = strlen(temp); /* Allocate data and coding */ data = (char **)malloc(sizeof(char*)*k); coding = (char **)malloc(sizeof(char*)*m); for (i = 0; i < m; i++) { coding[i] = (char *)malloc(sizeof(char)*blocksize); if (coding[i] == NULL) { perror("malloc"); exit(1); } } /* Create coding matrix or bitmatrix and schedule */ timing_set(&t3); switch(tech) { case No_Coding: break; case Reed_Sol_Van: matrix = reed_sol_vandermonde_coding_matrix(k, m, w); break; case Reed_Sol_R6_Op: break; case Cauchy_Orig: matrix = cauchy_original_coding_matrix(k, m, w); bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; case Cauchy_Good: matrix = cauchy_good_general_coding_matrix(k, m, w); bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; case Liberation: bitmatrix = liberation_coding_bitmatrix(k, w); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; case Blaum_Roth: bitmatrix = blaum_roth_coding_bitmatrix(k, w); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; case Liber8tion: bitmatrix = liber8tion_coding_bitmatrix(k); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; case RDP: case EVENODD: assert(0); } timing_set(&start); timing_set(&t4); totalsec += timing_delta(&t3, &t4); /* Read in data until finished */ n = 1; total = 0; while (n <= readins) { /* Check if padding is needed, if so, add appropriate number of zeros */ if (total < size && total+buffersize <= size) { total += jfread(block, sizeof(char), buffersize, fp); } else if (total < size && total+buffersize > size) { extra = jfread(block, sizeof(char), buffersize, fp); for (i = extra; i < buffersize; i++) { block[i] = '0'; } } else if (total == size) { for (i = 0; i < buffersize; i++) { block[i] = '0'; } } /* Set pointers to point to file data */ for (i = 0; i < k; i++) { data[i] = block+(i*blocksize); } timing_set(&t3); /* Encode according to coding method */ switch(tech) { case No_Coding: break; case Reed_Sol_Van: jerasure_matrix_encode(k, m, w, matrix, data, coding, blocksize); break; case Reed_Sol_R6_Op: reed_sol_r6_encode(k, w, data, coding, blocksize); break; case Cauchy_Orig: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; case Cauchy_Good: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; case Liberation: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; case Blaum_Roth: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; case Liber8tion: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; case RDP: case EVENODD: assert(0); } timing_set(&t4); /* Write data and encoded data to k+m files */ for (i = 1; i <= k; i++) { if (fp == NULL) { bzero(data[i-1], blocksize); } else { sprintf(fname, "%s/Coding/%s_k%0*d%s", curdir, s1, md, i, extension); if (n == 1) { fp2 = fopen(fname, "wb"); } else { fp2 = fopen(fname, "ab"); } fwrite(data[i-1], sizeof(char), blocksize, fp2); fclose(fp2); } } for (i = 1; i <= m; i++) { if (fp == NULL) { bzero(data[i-1], blocksize); } else { sprintf(fname, "%s/Coding/%s_m%0*d%s", curdir, s1, md, i, extension); if (n == 1) { fp2 = fopen(fname, "wb"); } else { fp2 = fopen(fname, "ab"); } fwrite(coding[i-1], sizeof(char), blocksize, fp2); fclose(fp2); } } n++; /* Calculate encoding time */ totalsec += timing_delta(&t3, &t4); } /* Create metadata file */ if (fp != NULL) { sprintf(fname, "%s/Coding/%s_meta.txt", curdir, s1); fp2 = fopen(fname, "wb"); fprintf(fp2, "%s\n", argv[1]); fprintf(fp2, "%d\n", size); fprintf(fp2, "%d %d %d %d %d\n", k, m, w, packetsize, buffersize); fprintf(fp2, "%s\n", argv[4]); fprintf(fp2, "%d\n", tech); fprintf(fp2, "%d\n", readins); fclose(fp2); } /* Free allocated memory */ free(s1); free(fname); free(block); free(curdir); /* Calculate rate in MB/sec and print */ timing_set(&t2); tsec = timing_delta(&t1, &t2); printf("Encoding (MB/sec): %0.10f\n", (((double) size)/1024.0/1024.0)/totalsec); printf("En_Total (MB/sec): %0.10f\n", (((double) size)/1024.0/1024.0)/tsec); return 0; } /* is_prime returns 1 if number if prime, 0 if not prime */ int is_prime(int w) { int prime55[] = {2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71, 73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179, 181,191,193,197,199,211,223,227,229,233,239,241,251,257}; int i; for (i = 0; i < 55; i++) { if (w%prime55[i] == 0) { if (w == prime55[i]) return 1; else { return 0; } } } assert(0); } /* Handles ctrl-\ event */ void ctrl_bs_handler(int dummy) { time_t mytime; mytime = time(0); fprintf(stderr, "\n%s\n", ctime(&mytime)); fprintf(stderr, "You just typed ctrl-\\ in encoder.c.\n"); fprintf(stderr, "Total number of read ins = %d\n", readins); fprintf(stderr, "Current read in: %d\n", n); fprintf(stderr, "Method: %s\n\n", Methods[method]); signal(SIGQUIT, ctrl_bs_handler); }