/* * * Copyright (c) 2013, James S. Plank and Kevin Greenan * All rights reserved. * * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure * Coding Techniques * * Revision 2.0: Galois Field backend now links to GF-Complete * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * - Neither the name of the University of Tennessee nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* This program takes as input an inputfile, k, m, a coding technique, w, and packetsize. It creates k+m files from the original file so that k of these files are parts of the original file and m of the files are encoded based on the given coding technique. The format of the created files is the file name with "_k#" or "_m#" and then the extension. (For example, inputfile test.txt would yield file "test_k1.txt".) */ #include #include #include #include #include #include #include #include "jerasure.h" #include "reed_sol.h" #include "galois.h" #include "cauchy.h" #include "liberation.h" #define N 10 enum Coding_Technique {Reed_Sol_Van, Reed_Sol_R6_Op, Cauchy_Orig, Cauchy_Good, Liberation, Blaum_Roth, Liber8tion, RDP, EVENODD, No_Coding}; char *Methods[N] = {"reed_sol_van", "reed_sol_r6_op", "cauchy_orig", "cauchy_good", "liberation", "blaum_roth", "liber8tion", "no_coding"}; /* Global variables for signal handler */ int readins, n; enum Coding_Technique method; /* Function prototypes */ int is_prime(int w); void ctrl_bs_handler(int dummy); int jfread(void *ptr, int size, int nmembers, FILE *stream) { int nd; int *li, i; if (stream != NULL) return fread(ptr, size, nmembers, stream); nd = size/sizeof(int); li = (int *) ptr; for (i = 0; i < nd; i++) li[i] = mrand48(); return size; } int main (int argc, char **argv) { FILE *fp, *fp2; // file pointers char *memblock; // reading in file char *block; // padding file int size, newsize; // size of file and temp size struct stat status; // finding file size enum Coding_Technique tech; // coding technique (parameter) int k, m, w, packetsize; // parameters int buffersize; // paramter int i, j; // loop control variables int blocksize; // size of k+m files int total; int extra; /* Jerasure Arguments */ char **data; char **coding; int *matrix; int *bitmatrix; int **schedule; int *erasure; int *erased; /* Creation of file name variables */ char temp[5]; char *s1, *s2, *extension; char *fname; int md; char *curdir; /* Timing variables */ struct timeval t1, t2, t3, t4; struct timezone tz; double tsec; double totalsec; struct timeval start, stop; /* Find buffersize */ int up, down; signal(SIGQUIT, ctrl_bs_handler); /* Start timing */ gettimeofday(&t1, &tz); totalsec = 0.0; matrix = NULL; bitmatrix = NULL; schedule = NULL; /* Error check Arguments*/ if (argc != 8) { fprintf(stderr, "usage: inputfile k m coding_technique w (packetsize) (buffersize)\n"); fprintf(stderr, "\nChoose one of the following coding techniques: \nreed_sol_van, \nreed_sol_r6_op, \ncauchy_orig, \ncauchy_good, \nliberation, \nblaum_roth, \nliber8tion"); exit(0); } /* Conversion of parameters and error checking */ if (sscanf(argv[2], "%d", &k) == 0 || k <= 0) { fprintf(stderr, "Invalid value for k\n"); exit(0); } if (sscanf(argv[3], "%d", &m) == 0 || m < 0) { fprintf(stderr, "Invalid value for m\n"); exit(0); } if (sscanf(argv[5],"%d", &w) == 0 || w <= 0) { fprintf(stderr, "Invalid value for w.\n"); exit(0); } if (argc == 6) { packetsize = 0; } else { if (sscanf(argv[6], "%d", &packetsize) == 0 || packetsize < 0) { fprintf(stderr, "Invalid value for packetsize.\n"); exit(0); } } if (argc != 8) { buffersize = 0; } else { if (sscanf(argv[7], "%d", &buffersize) == 0 || buffersize < 0) { fprintf(stderr, "Invalid value for buffersize\n"); exit(0); } } /* Determine proper buffersize by finding the closest valid buffersize to the input value */ if (buffersize != 0) { if (packetsize != 0 && buffersize%(sizeof(int)*w*k*packetsize) != 0) { up = buffersize; down = buffersize; while (up%(sizeof(int)*w*k*packetsize) != 0 && (down%(sizeof(int)*w*k*packetsize) != 0)) { up++; if (down == 0) { down--; } } if (up%(sizeof(int)*w*k*packetsize) == 0) { buffersize = up; } else { if (down != 0) { buffersize = down; } } } else if (packetsize == 0 && buffersize%(sizeof(int)*w*k) != 0) { up = buffersize; down = buffersize; while (up%(sizeof(int)*w*k) != 0 && down%(sizeof(int)*w*k) != 0) { up++; down--; } if (up%(sizeof(int)*w*k) == 0) { buffersize = up; } else { buffersize = down; } } } /* Setting of coding technique and error checking */ if (strcmp(argv[4], "no_coding") == 0) { tech = No_Coding; } else if (strcmp(argv[4], "reed_sol_van") == 0) { tech = Reed_Sol_Van; if (w != 8 && w != 16 && w != 32) { fprintf(stderr, "w must be one of {8, 16, 32}\n"); exit(0); } } else if (strcmp(argv[4], "reed_sol_r6_op") == 0) { if (m != 2) { fprintf(stderr, "m must be equal to 2\n"); exit(0); } if (w != 8 && w != 16 && w != 32) { fprintf(stderr, "w must be one of {8, 16, 32}\n"); exit(0); } tech = Reed_Sol_R6_Op; } else if (strcmp(argv[4], "cauchy_orig") == 0) { tech = Cauchy_Orig; if (packetsize == 0) { fprintf(stderr, "Must include packetsize.\n"); exit(0); } } else if (strcmp(argv[4], "cauchy_good") == 0) { tech = Cauchy_Good; if (packetsize == 0) { fprintf(stderr, "Must include packetsize.\n"); exit(0); } } else if (strcmp(argv[4], "liberation") == 0) { if (k > w) { fprintf(stderr, "k must be less than or equal to w\n"); exit(0); } if (w <= 2 || !(w%2) || !is_prime(w)) { fprintf(stderr, "w must be greater than two and w must be prime\n"); exit(0); } if (packetsize == 0) { fprintf(stderr, "Must include packetsize.\n"); exit(0); } if ((packetsize%(sizeof(int))) != 0) { fprintf(stderr, "packetsize must be a multiple of sizeof(int)\n"); exit(0); } tech = Liberation; } else if (strcmp(argv[4], "blaum_roth") == 0) { if (k > w) { fprintf(stderr, "k must be less than or equal to w\n"); exit(0); } if (w <= 2 || !((w+1)%2) || !is_prime(w+1)) { fprintf(stderr, "w must be greater than two and w+1 must be prime\n"); exit(0); } if (packetsize == 0) { fprintf(stderr, "Must include packetsize.\n"); exit(0); } if ((packetsize%(sizeof(int))) != 0) { fprintf(stderr, "packetsize must be a multiple of sizeof(int)\n"); exit(0); } tech = Blaum_Roth; } else if (strcmp(argv[4], "liber8tion") == 0) { if (packetsize == 0) { fprintf(stderr, "Must include packetsize\n"); exit(0); } if (w != 8) { fprintf(stderr, "w must equal 8\n"); exit(0); } if (m != 2) { fprintf(stderr, "m must equal 2\n"); exit(0); } if (k > w) { fprintf(stderr, "k must be less than or equal to w\n"); exit(0); } tech = Liber8tion; } else { fprintf(stderr, "Not a valid coding technique. Choose one of the following: reed_sol_van, reed_sol_r6_op, cauchy_orig, cauchy_good, liberation, blaum_roth, liber8tion, no_coding\n"); exit(0); } /* Set global variable method for signal handler */ method = tech; /* Get current working directory for construction of file names */ curdir = (char*)malloc(sizeof(char)*1000); getcwd(curdir, 1000); if (argv[1][0] != '-') { /* Open file and error check */ fp = fopen(argv[1], "rb"); if (fp == NULL) { fprintf(stderr, "Unable to open file.\n"); exit(0); } /* Create Coding directory */ i = mkdir("Coding", S_IRWXU); if (i == -1 && errno != EEXIST) { fprintf(stderr, "Unable to create Coding directory.\n"); exit(0); } /* Determine original size of file */ stat(argv[1], &status); size = status.st_size; } else { if (sscanf(argv[1]+1, "%d", &size) != 1 || size <= 0) { fprintf(stderr, "Files starting with '-' should be sizes for randomly created input\n"); exit(1); } fp = NULL; srand48(time(0)); } newsize = size; /* Find new size by determining next closest multiple */ if (packetsize != 0) { if (size%(k*w*packetsize*sizeof(int)) != 0) { while (newsize%(k*w*packetsize*sizeof(int)) != 0) newsize++; } } else { if (size%(k*w*sizeof(int)) != 0) { while (newsize%(k*w*sizeof(int)) != 0) newsize++; } } if (buffersize != 0) { while (newsize%buffersize != 0) { newsize++; } } /* Determine size of k+m files */ blocksize = newsize/k; /* Allow for buffersize and determine number of read-ins */ if (size > buffersize && buffersize != 0) { if (newsize%buffersize != 0) { readins = newsize/buffersize; } else { readins = newsize/buffersize; } block = (char *)malloc(sizeof(char)*buffersize); blocksize = buffersize/k; } else { readins = 1; buffersize = size; block = (char *)malloc(sizeof(char)*newsize); } /* Break inputfile name into the filename and extension */ s1 = (char*)malloc(sizeof(char)*(strlen(argv[1])+20)); s2 = strrchr(argv[1], '/'); if (s2 != NULL) { s2++; strcpy(s1, s2); } else { strcpy(s1, argv[1]); } s2 = strchr(s1, '.'); if (s2 != NULL) { extension = strdup(s2); *s2 = '\0'; } else { extension = strdup(""); } /* Allocate for full file name */ fname = (char*)malloc(sizeof(char)*(strlen(argv[1])+strlen(curdir)+20)); sprintf(temp, "%d", k); md = strlen(temp); /* Allocate data and coding */ data = (char **)malloc(sizeof(char*)*k); coding = (char **)malloc(sizeof(char*)*m); for (i = 0; i < m; i++) { coding[i] = (char *)malloc(sizeof(char)*blocksize); if (coding[i] == NULL) { perror("malloc"); exit(1); } } /* Create coding matrix or bitmatrix and schedule */ gettimeofday(&t3, &tz); switch(tech) { case No_Coding: break; case Reed_Sol_Van: matrix = reed_sol_vandermonde_coding_matrix(k, m, w); break; case Cauchy_Orig: matrix = cauchy_original_coding_matrix(k, m, w); bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; case Cauchy_Good: matrix = cauchy_good_general_coding_matrix(k, m, w); bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; case Liberation: bitmatrix = liberation_coding_bitmatrix(k, w); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; case Blaum_Roth: bitmatrix = blaum_roth_coding_bitmatrix(k, w); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; case Liber8tion: bitmatrix = liber8tion_coding_bitmatrix(k); schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix); break; } gettimeofday(&start, &tz); gettimeofday(&t4, &tz); tsec = 0.0; tsec += t4.tv_usec; tsec -= t3.tv_usec; tsec /= 1000000.0; tsec += t4.tv_sec; tsec -= t3.tv_sec; totalsec += tsec; /* Read in data until finished */ n = 1; total = 0; while (n <= readins) { /* Check if padding is needed, if so, add appropriate number of zeros */ if (total < size && total+buffersize <= size) { total += jfread(block, sizeof(char), buffersize, fp); } else if (total < size && total+buffersize > size) { extra = jfread(block, sizeof(char), buffersize, fp); for (i = extra; i < buffersize; i++) { block[i] = '0'; } } else if (total == size) { for (i = 0; i < buffersize; i++) { block[i] = '0'; } } /* Set pointers to point to file data */ for (i = 0; i < k; i++) { data[i] = block+(i*blocksize); } gettimeofday(&t3, &tz); /* Encode according to coding method */ switch(tech) { case No_Coding: break; case Reed_Sol_Van: jerasure_matrix_encode(k, m, w, matrix, data, coding, blocksize); break; case Reed_Sol_R6_Op: reed_sol_r6_encode(k, w, data, coding, blocksize); break; case Cauchy_Orig: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; case Cauchy_Good: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; case Liberation: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; case Blaum_Roth: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; case Liber8tion: jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize); break; } gettimeofday(&t4, &tz); /* Write data and encoded data to k+m files */ for (i = 1; i <= k; i++) { if (fp == NULL) { bzero(data[i-1], blocksize); } else { sprintf(fname, "%s/Coding/%s_k%0*d%s", curdir, s1, md, i, extension); if (n == 1) { fp2 = fopen(fname, "wb"); } else { fp2 = fopen(fname, "ab"); } fwrite(data[i-1], sizeof(char), blocksize, fp2); fclose(fp2); } } for (i = 1; i <= m; i++) { if (fp == NULL) { bzero(data[i-1], blocksize); } else { sprintf(fname, "%s/Coding/%s_m%0*d%s", curdir, s1, md, i, extension); if (n == 1) { fp2 = fopen(fname, "wb"); } else { fp2 = fopen(fname, "ab"); } fwrite(coding[i-1], sizeof(char), blocksize, fp2); fclose(fp2); } } n++; /* Calculate encoding time */ tsec = 0.0; tsec += t4.tv_usec; tsec -= t3.tv_usec; tsec /= 1000000.0; tsec += t4.tv_sec; tsec -= t3.tv_sec; totalsec += tsec; } /* Create metadata file */ if (fp != NULL) { sprintf(fname, "%s/Coding/%s_meta.txt", curdir, s1); fp2 = fopen(fname, "wb"); fprintf(fp2, "%s\n", argv[1]); fprintf(fp2, "%d\n", size); fprintf(fp2, "%d %d %d %d %d\n", k, m, w, packetsize, buffersize); fprintf(fp2, "%s\n", argv[4]); fprintf(fp2, "%d\n", tech); fprintf(fp2, "%d\n", readins); fclose(fp2); } /* Free allocated memory */ free(s1); free(fname); free(block); free(curdir); /* Calculate rate in MB/sec and print */ gettimeofday(&t2, &tz); tsec = 0.0; tsec += t2.tv_usec; tsec -= t1.tv_usec; tsec /= 1000000.0; tsec += t2.tv_sec; tsec -= t1.tv_sec; printf("Encoding (MB/sec): %0.10f\n", (size/1024/1024)/totalsec); printf("En_Total (MB/sec): %0.10f\n", (size/1024/1024)/tsec); } /* is_prime returns 1 if number if prime, 0 if not prime */ int is_prime(int w) { int prime55[] = {2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71, 73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179, 181,191,193,197,199,211,223,227,229,233,239,241,251,257}; int i; for (i = 0; i < 55; i++) { if (w%prime55[i] == 0) { if (w == prime55[i]) return 1; else { return 0; } } } } /* Handles ctrl-\ event */ void ctrl_bs_handler(int dummy) { time_t mytime; mytime = time(0); fprintf(stderr, "\n%s\n", ctime(&mytime)); fprintf(stderr, "You just typed ctrl-\\ in encoder.c.\n"); fprintf(stderr, "Total number of read ins = %d\n", readins); fprintf(stderr, "Current read in: %d\n", n); fprintf(stderr, "Method: %s\n\n", Methods[method]); signal(SIGQUIT, ctrl_bs_handler); }