jerasure/Examples/encoder.c

628 lines
16 KiB
C

/* *
* Copyright (c) 2013, James S. Plank and Kevin Greenan
* All rights reserved.
*
* Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
* Coding Techniques
*
* Revision 2.0: Galois Field backend now links to GF-Complete
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* - Neither the name of the University of Tennessee nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
This program takes as input an inputfile, k, m, a coding
technique, w, and packetsize. It creates k+m files from
the original file so that k of these files are parts of
the original file and m of the files are encoded based on
the given coding technique. The format of the created files
is the file name with "_k#" or "_m#" and then the extension.
(For example, inputfile test.txt would yield file "test_k1.txt".)
*/
#include <sys/time.h>
#include <sys/stat.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <signal.h>
#include "jerasure.h"
#include "reed_sol.h"
#include "galois.h"
#include "cauchy.h"
#include "liberation.h"
#define N 10
enum Coding_Technique {Reed_Sol_Van, Reed_Sol_R6_Op, Cauchy_Orig, Cauchy_Good, Liberation, Blaum_Roth, Liber8tion, RDP, EVENODD, No_Coding};
char *Methods[N] = {"reed_sol_van", "reed_sol_r6_op", "cauchy_orig", "cauchy_good", "liberation", "blaum_roth", "liber8tion", "no_coding"};
/* Global variables for signal handler */
int readins, n;
enum Coding_Technique method;
/* Function prototypes */
int is_prime(int w);
void ctrl_bs_handler(int dummy);
int jfread(void *ptr, int size, int nmembers, FILE *stream)
{
int nd;
int *li, i;
if (stream != NULL) return fread(ptr, size, nmembers, stream);
nd = size/sizeof(int);
li = (int *) ptr;
for (i = 0; i < nd; i++) li[i] = mrand48();
return size;
}
int main (int argc, char **argv) {
FILE *fp, *fp2; // file pointers
char *memblock; // reading in file
char *block; // padding file
int size, newsize; // size of file and temp size
struct stat status; // finding file size
enum Coding_Technique tech; // coding technique (parameter)
int k, m, w, packetsize; // parameters
int buffersize; // paramter
int i, j; // loop control variables
int blocksize; // size of k+m files
int total;
int extra;
/* Jerasure Arguments */
char **data;
char **coding;
int *matrix;
int *bitmatrix;
int **schedule;
int *erasure;
int *erased;
/* Creation of file name variables */
char temp[5];
char *s1, *s2, *extension;
char *fname;
int md;
char *curdir;
/* Timing variables */
struct timeval t1, t2, t3, t4;
struct timezone tz;
double tsec;
double totalsec;
struct timeval start, stop;
/* Find buffersize */
int up, down;
signal(SIGQUIT, ctrl_bs_handler);
/* Start timing */
gettimeofday(&t1, &tz);
totalsec = 0.0;
matrix = NULL;
bitmatrix = NULL;
schedule = NULL;
/* Error check Arguments*/
if (argc != 8) {
fprintf(stderr, "usage: inputfile k m coding_technique w (packetsize) (buffersize)\n");
fprintf(stderr, "\nChoose one of the following coding techniques: \nreed_sol_van, \nreed_sol_r6_op, \ncauchy_orig, \ncauchy_good, \nliberation, \nblaum_roth, \nliber8tion");
exit(0);
}
/* Conversion of parameters and error checking */
if (sscanf(argv[2], "%d", &k) == 0 || k <= 0) {
fprintf(stderr, "Invalid value for k\n");
exit(0);
}
if (sscanf(argv[3], "%d", &m) == 0 || m < 0) {
fprintf(stderr, "Invalid value for m\n");
exit(0);
}
if (sscanf(argv[5],"%d", &w) == 0 || w <= 0) {
fprintf(stderr, "Invalid value for w.\n");
exit(0);
}
if (argc == 6) {
packetsize = 0;
}
else {
if (sscanf(argv[6], "%d", &packetsize) == 0 || packetsize < 0) {
fprintf(stderr, "Invalid value for packetsize.\n");
exit(0);
}
}
if (argc != 8) {
buffersize = 0;
}
else {
if (sscanf(argv[7], "%d", &buffersize) == 0 || buffersize < 0) {
fprintf(stderr, "Invalid value for buffersize\n");
exit(0);
}
}
/* Determine proper buffersize by finding the closest valid buffersize to the input value */
if (buffersize != 0) {
if (packetsize != 0 && buffersize%(sizeof(int)*w*k*packetsize) != 0) {
up = buffersize;
down = buffersize;
while (up%(sizeof(int)*w*k*packetsize) != 0 && (down%(sizeof(int)*w*k*packetsize) != 0)) {
up++;
if (down == 0) {
down--;
}
}
if (up%(sizeof(int)*w*k*packetsize) == 0) {
buffersize = up;
}
else {
if (down != 0) {
buffersize = down;
}
}
}
else if (packetsize == 0 && buffersize%(sizeof(int)*w*k) != 0) {
up = buffersize;
down = buffersize;
while (up%(sizeof(int)*w*k) != 0 && down%(sizeof(int)*w*k) != 0) {
up++;
down--;
}
if (up%(sizeof(int)*w*k) == 0) {
buffersize = up;
}
else {
buffersize = down;
}
}
}
/* Setting of coding technique and error checking */
if (strcmp(argv[4], "no_coding") == 0) {
tech = No_Coding;
}
else if (strcmp(argv[4], "reed_sol_van") == 0) {
tech = Reed_Sol_Van;
if (w != 8 && w != 16 && w != 32) {
fprintf(stderr, "w must be one of {8, 16, 32}\n");
exit(0);
}
}
else if (strcmp(argv[4], "reed_sol_r6_op") == 0) {
if (m != 2) {
fprintf(stderr, "m must be equal to 2\n");
exit(0);
}
if (w != 8 && w != 16 && w != 32) {
fprintf(stderr, "w must be one of {8, 16, 32}\n");
exit(0);
}
tech = Reed_Sol_R6_Op;
}
else if (strcmp(argv[4], "cauchy_orig") == 0) {
tech = Cauchy_Orig;
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize.\n");
exit(0);
}
}
else if (strcmp(argv[4], "cauchy_good") == 0) {
tech = Cauchy_Good;
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize.\n");
exit(0);
}
}
else if (strcmp(argv[4], "liberation") == 0) {
if (k > w) {
fprintf(stderr, "k must be less than or equal to w\n");
exit(0);
}
if (w <= 2 || !(w%2) || !is_prime(w)) {
fprintf(stderr, "w must be greater than two and w must be prime\n");
exit(0);
}
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize.\n");
exit(0);
}
if ((packetsize%(sizeof(int))) != 0) {
fprintf(stderr, "packetsize must be a multiple of sizeof(int)\n");
exit(0);
}
tech = Liberation;
}
else if (strcmp(argv[4], "blaum_roth") == 0) {
if (k > w) {
fprintf(stderr, "k must be less than or equal to w\n");
exit(0);
}
if (w <= 2 || !((w+1)%2) || !is_prime(w+1)) {
fprintf(stderr, "w must be greater than two and w+1 must be prime\n");
exit(0);
}
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize.\n");
exit(0);
}
if ((packetsize%(sizeof(int))) != 0) {
fprintf(stderr, "packetsize must be a multiple of sizeof(int)\n");
exit(0);
}
tech = Blaum_Roth;
}
else if (strcmp(argv[4], "liber8tion") == 0) {
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize\n");
exit(0);
}
if (w != 8) {
fprintf(stderr, "w must equal 8\n");
exit(0);
}
if (m != 2) {
fprintf(stderr, "m must equal 2\n");
exit(0);
}
if (k > w) {
fprintf(stderr, "k must be less than or equal to w\n");
exit(0);
}
tech = Liber8tion;
}
else {
fprintf(stderr, "Not a valid coding technique. Choose one of the following: reed_sol_van, reed_sol_r6_op, cauchy_orig, cauchy_good, liberation, blaum_roth, liber8tion, no_coding\n");
exit(0);
}
/* Set global variable method for signal handler */
method = tech;
/* Get current working directory for construction of file names */
curdir = (char*)malloc(sizeof(char)*1000);
getcwd(curdir, 1000);
if (argv[1][0] != '-') {
/* Open file and error check */
fp = fopen(argv[1], "rb");
if (fp == NULL) {
fprintf(stderr, "Unable to open file.\n");
exit(0);
}
/* Create Coding directory */
i = mkdir("Coding", S_IRWXU);
if (i == -1 && errno != EEXIST) {
fprintf(stderr, "Unable to create Coding directory.\n");
exit(0);
}
/* Determine original size of file */
stat(argv[1], &status);
size = status.st_size;
} else {
if (sscanf(argv[1]+1, "%d", &size) != 1 || size <= 0) {
fprintf(stderr, "Files starting with '-' should be sizes for randomly created input\n");
exit(1);
}
fp = NULL;
srand48(time(0));
}
newsize = size;
/* Find new size by determining next closest multiple */
if (packetsize != 0) {
if (size%(k*w*packetsize*sizeof(int)) != 0) {
while (newsize%(k*w*packetsize*sizeof(int)) != 0)
newsize++;
}
}
else {
if (size%(k*w*sizeof(int)) != 0) {
while (newsize%(k*w*sizeof(int)) != 0)
newsize++;
}
}
if (buffersize != 0) {
while (newsize%buffersize != 0) {
newsize++;
}
}
/* Determine size of k+m files */
blocksize = newsize/k;
/* Allow for buffersize and determine number of read-ins */
if (size > buffersize && buffersize != 0) {
if (newsize%buffersize != 0) {
readins = newsize/buffersize;
}
else {
readins = newsize/buffersize;
}
block = (char *)malloc(sizeof(char)*buffersize);
blocksize = buffersize/k;
}
else {
readins = 1;
buffersize = size;
block = (char *)malloc(sizeof(char)*newsize);
}
/* Break inputfile name into the filename and extension */
s1 = (char*)malloc(sizeof(char)*(strlen(argv[1])+20));
s2 = strrchr(argv[1], '/');
if (s2 != NULL) {
s2++;
strcpy(s1, s2);
}
else {
strcpy(s1, argv[1]);
}
s2 = strchr(s1, '.');
if (s2 != NULL) {
extension = strdup(s2);
*s2 = '\0';
} else {
extension = strdup("");
}
/* Allocate for full file name */
fname = (char*)malloc(sizeof(char)*(strlen(argv[1])+strlen(curdir)+20));
sprintf(temp, "%d", k);
md = strlen(temp);
/* Allocate data and coding */
data = (char **)malloc(sizeof(char*)*k);
coding = (char **)malloc(sizeof(char*)*m);
for (i = 0; i < m; i++) {
coding[i] = (char *)malloc(sizeof(char)*blocksize);
if (coding[i] == NULL) { perror("malloc"); exit(1); }
}
/* Create coding matrix or bitmatrix and schedule */
gettimeofday(&t3, &tz);
switch(tech) {
case No_Coding:
break;
case Reed_Sol_Van:
matrix = reed_sol_vandermonde_coding_matrix(k, m, w);
break;
case Cauchy_Orig:
matrix = cauchy_original_coding_matrix(k, m, w);
bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
case Cauchy_Good:
matrix = cauchy_good_general_coding_matrix(k, m, w);
bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
case Liberation:
bitmatrix = liberation_coding_bitmatrix(k, w);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
case Blaum_Roth:
bitmatrix = blaum_roth_coding_bitmatrix(k, w);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
case Liber8tion:
bitmatrix = liber8tion_coding_bitmatrix(k);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
}
gettimeofday(&start, &tz);
gettimeofday(&t4, &tz);
tsec = 0.0;
tsec += t4.tv_usec;
tsec -= t3.tv_usec;
tsec /= 1000000.0;
tsec += t4.tv_sec;
tsec -= t3.tv_sec;
totalsec += tsec;
/* Read in data until finished */
n = 1;
total = 0;
while (n <= readins) {
/* Check if padding is needed, if so, add appropriate
number of zeros */
if (total < size && total+buffersize <= size) {
total += jfread(block, sizeof(char), buffersize, fp);
}
else if (total < size && total+buffersize > size) {
extra = jfread(block, sizeof(char), buffersize, fp);
for (i = extra; i < buffersize; i++) {
block[i] = '0';
}
}
else if (total == size) {
for (i = 0; i < buffersize; i++) {
block[i] = '0';
}
}
/* Set pointers to point to file data */
for (i = 0; i < k; i++) {
data[i] = block+(i*blocksize);
}
gettimeofday(&t3, &tz);
/* Encode according to coding method */
switch(tech) {
case No_Coding:
break;
case Reed_Sol_Van:
jerasure_matrix_encode(k, m, w, matrix, data, coding, blocksize);
break;
case Reed_Sol_R6_Op:
reed_sol_r6_encode(k, w, data, coding, blocksize);
break;
case Cauchy_Orig:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
case Cauchy_Good:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
case Liberation:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
case Blaum_Roth:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
case Liber8tion:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
}
gettimeofday(&t4, &tz);
/* Write data and encoded data to k+m files */
for (i = 1; i <= k; i++) {
if (fp == NULL) {
bzero(data[i-1], blocksize);
} else {
sprintf(fname, "%s/Coding/%s_k%0*d%s", curdir, s1, md, i, extension);
if (n == 1) {
fp2 = fopen(fname, "wb");
}
else {
fp2 = fopen(fname, "ab");
}
fwrite(data[i-1], sizeof(char), blocksize, fp2);
fclose(fp2);
}
}
for (i = 1; i <= m; i++) {
if (fp == NULL) {
bzero(data[i-1], blocksize);
} else {
sprintf(fname, "%s/Coding/%s_m%0*d%s", curdir, s1, md, i, extension);
if (n == 1) {
fp2 = fopen(fname, "wb");
}
else {
fp2 = fopen(fname, "ab");
}
fwrite(coding[i-1], sizeof(char), blocksize, fp2);
fclose(fp2);
}
}
n++;
/* Calculate encoding time */
tsec = 0.0;
tsec += t4.tv_usec;
tsec -= t3.tv_usec;
tsec /= 1000000.0;
tsec += t4.tv_sec;
tsec -= t3.tv_sec;
totalsec += tsec;
}
/* Create metadata file */
if (fp != NULL) {
sprintf(fname, "%s/Coding/%s_meta.txt", curdir, s1);
fp2 = fopen(fname, "wb");
fprintf(fp2, "%s\n", argv[1]);
fprintf(fp2, "%d\n", size);
fprintf(fp2, "%d %d %d %d %d\n", k, m, w, packetsize, buffersize);
fprintf(fp2, "%s\n", argv[4]);
fprintf(fp2, "%d\n", tech);
fprintf(fp2, "%d\n", readins);
fclose(fp2);
}
/* Free allocated memory */
free(s1);
free(fname);
free(block);
free(curdir);
/* Calculate rate in MB/sec and print */
gettimeofday(&t2, &tz);
tsec = 0.0;
tsec += t2.tv_usec;
tsec -= t1.tv_usec;
tsec /= 1000000.0;
tsec += t2.tv_sec;
tsec -= t1.tv_sec;
printf("Encoding (MB/sec): %0.10f\n", (size/1024/1024)/totalsec);
printf("En_Total (MB/sec): %0.10f\n", (size/1024/1024)/tsec);
}
/* is_prime returns 1 if number if prime, 0 if not prime */
int is_prime(int w) {
int prime55[] = {2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,
73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,
181,191,193,197,199,211,223,227,229,233,239,241,251,257};
int i;
for (i = 0; i < 55; i++) {
if (w%prime55[i] == 0) {
if (w == prime55[i]) return 1;
else { return 0; }
}
}
}
/* Handles ctrl-\ event */
void ctrl_bs_handler(int dummy) {
time_t mytime;
mytime = time(0);
fprintf(stderr, "\n%s\n", ctime(&mytime));
fprintf(stderr, "You just typed ctrl-\\ in encoder.c.\n");
fprintf(stderr, "Total number of read ins = %d\n", readins);
fprintf(stderr, "Current read in: %d\n", n);
fprintf(stderr, "Method: %s\n\n", Methods[method]);
signal(SIGQUIT, ctrl_bs_handler);
}