jerasure/Examples/encoder.c

637 lines
16 KiB
C
Raw Normal View History

2013-10-01 21:25:12 +04:00
/* Examples/encoder.c
* Catherine D. Schuman, James S. Plank
Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure Coding Techniques
Revision 1.2A
May 24, 2011
James S. Plank
Department of Electrical Engineering and Computer Science
University of Tennessee
Knoxville, TN 37996
plank@cs.utk.edu
Copyright (c) 2011, James S. Plank
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
- Neither the name of the University of Tennessee nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
/*
This program takes as input an inputfile, k, m, a coding
technique, w, and packetsize. It creates k+m files from
the original file so that k of these files are parts of
the original file and m of the files are encoded based on
the given coding technique. The format of the created files
is the file name with "_k#" or "_m#" and then the extension.
(For example, inputfile test.txt would yield file "test_k1.txt".)
*/
#include <sys/time.h>
#include <sys/stat.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <signal.h>
#include "jerasure.h"
#include "reed_sol.h"
#include "galois.h"
#include "cauchy.h"
#include "liberation.h"
#define N 10
enum Coding_Technique {Reed_Sol_Van, Reed_Sol_R6_Op, Cauchy_Orig, Cauchy_Good, Liberation, Blaum_Roth, Liber8tion, RDP, EVENODD, No_Coding};
char *Methods[N] = {"reed_sol_van", "reed_sol_r6_op", "cauchy_orig", "cauchy_good", "liberation", "blaum_roth", "liber8tion", "no_coding"};
/* Global variables for signal handler */
int readins, n;
enum Coding_Technique method;
/* Function prototypes */
int is_prime(int w);
void ctrl_bs_handler(int dummy);
int jfread(void *ptr, int size, int nmembers, FILE *stream)
{
int nd;
int *li, i;
if (stream != NULL) return fread(ptr, size, nmembers, stream);
nd = size/sizeof(int);
li = (int *) ptr;
for (i = 0; i < nd; i++) li[i] = mrand48();
return size;
}
int main (int argc, char **argv) {
FILE *fp, *fp2; // file pointers
char *memblock; // reading in file
char *block; // padding file
int size, newsize; // size of file and temp size
struct stat status; // finding file size
enum Coding_Technique tech; // coding technique (parameter)
int k, m, w, packetsize; // parameters
int buffersize; // paramter
int i, j; // loop control variables
int blocksize; // size of k+m files
int total;
int extra;
/* Jerasure Arguments */
char **data;
char **coding;
int *matrix;
int *bitmatrix;
int **schedule;
int *erasure;
int *erased;
/* Creation of file name variables */
char temp[5];
char *s1, *s2, *extension;
char *fname;
int md;
char *curdir;
/* Timing variables */
struct timeval t1, t2, t3, t4;
struct timezone tz;
double tsec;
double totalsec;
struct timeval start, stop;
/* Find buffersize */
int up, down;
signal(SIGQUIT, ctrl_bs_handler);
/* Start timing */
gettimeofday(&t1, &tz);
totalsec = 0.0;
matrix = NULL;
bitmatrix = NULL;
schedule = NULL;
/* Error check Arguments*/
if (argc != 8) {
fprintf(stderr, "usage: inputfile k m coding_technique w (packetsize) (buffersize)\n");
fprintf(stderr, "\nChoose one of the following coding techniques: \nreed_sol_van, \nreed_sol_r6_op, \ncauchy_orig, \ncauchy_good, \nliberation, \nblaum_roth, \nliber8tion");
exit(0);
}
/* Conversion of parameters and error checking */
if (sscanf(argv[2], "%d", &k) == 0 || k <= 0) {
fprintf(stderr, "Invalid value for k\n");
exit(0);
}
if (sscanf(argv[3], "%d", &m) == 0 || m < 0) {
fprintf(stderr, "Invalid value for m\n");
exit(0);
}
if (sscanf(argv[5],"%d", &w) == 0 || w <= 0) {
fprintf(stderr, "Invalid value for w.\n");
exit(0);
}
if (argc == 6) {
packetsize = 0;
}
else {
if (sscanf(argv[6], "%d", &packetsize) == 0 || packetsize < 0) {
fprintf(stderr, "Invalid value for packetsize.\n");
exit(0);
}
}
if (argc != 8) {
buffersize = 0;
}
else {
if (sscanf(argv[7], "%d", &buffersize) == 0 || buffersize < 0) {
fprintf(stderr, "Invalid value for buffersize\n");
exit(0);
}
}
/* Determine proper buffersize by finding the closest valid buffersize to the input value */
if (buffersize != 0) {
if (packetsize != 0 && buffersize%(sizeof(int)*w*k*packetsize) != 0) {
up = buffersize;
down = buffersize;
while (up%(sizeof(int)*w*k*packetsize) != 0 && (down%(sizeof(int)*w*k*packetsize) != 0)) {
up++;
if (down == 0) {
down--;
}
}
if (up%(sizeof(int)*w*k*packetsize) == 0) {
buffersize = up;
}
else {
if (down != 0) {
buffersize = down;
}
}
}
else if (packetsize == 0 && buffersize%(sizeof(int)*w*k) != 0) {
up = buffersize;
down = buffersize;
while (up%(sizeof(int)*w*k) != 0 && down%(sizeof(int)*w*k) != 0) {
up++;
down--;
}
if (up%(sizeof(int)*w*k) == 0) {
buffersize = up;
}
else {
buffersize = down;
}
}
}
/* Setting of coding technique and error checking */
if (strcmp(argv[4], "no_coding") == 0) {
tech = No_Coding;
}
else if (strcmp(argv[4], "reed_sol_van") == 0) {
tech = Reed_Sol_Van;
if (w != 8 && w != 16 && w != 32) {
fprintf(stderr, "w must be one of {8, 16, 32}\n");
exit(0);
}
}
else if (strcmp(argv[4], "reed_sol_r6_op") == 0) {
if (m != 2) {
fprintf(stderr, "m must be equal to 2\n");
exit(0);
}
if (w != 8 && w != 16 && w != 32) {
fprintf(stderr, "w must be one of {8, 16, 32}\n");
exit(0);
}
tech = Reed_Sol_R6_Op;
}
else if (strcmp(argv[4], "cauchy_orig") == 0) {
tech = Cauchy_Orig;
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize.\n");
exit(0);
}
}
else if (strcmp(argv[4], "cauchy_good") == 0) {
tech = Cauchy_Good;
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize.\n");
exit(0);
}
}
else if (strcmp(argv[4], "liberation") == 0) {
if (k > w) {
fprintf(stderr, "k must be less than or equal to w\n");
exit(0);
}
if (w <= 2 || !(w%2) || !is_prime(w)) {
fprintf(stderr, "w must be greater than two and w must be prime\n");
exit(0);
}
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize.\n");
exit(0);
}
if ((packetsize%(sizeof(int))) != 0) {
fprintf(stderr, "packetsize must be a multiple of sizeof(int)\n");
exit(0);
}
tech = Liberation;
}
else if (strcmp(argv[4], "blaum_roth") == 0) {
if (k > w) {
fprintf(stderr, "k must be less than or equal to w\n");
exit(0);
}
if (w <= 2 || !((w+1)%2) || !is_prime(w+1)) {
fprintf(stderr, "w must be greater than two and w+1 must be prime\n");
exit(0);
}
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize.\n");
exit(0);
}
if ((packetsize%(sizeof(int))) != 0) {
fprintf(stderr, "packetsize must be a multiple of sizeof(int)\n");
exit(0);
}
tech = Blaum_Roth;
}
else if (strcmp(argv[4], "liber8tion") == 0) {
if (packetsize == 0) {
fprintf(stderr, "Must include packetsize\n");
exit(0);
}
if (w != 8) {
fprintf(stderr, "w must equal 8\n");
exit(0);
}
if (m != 2) {
fprintf(stderr, "m must equal 2\n");
exit(0);
}
if (k > w) {
fprintf(stderr, "k must be less than or equal to w\n");
exit(0);
}
tech = Liber8tion;
}
else {
fprintf(stderr, "Not a valid coding technique. Choose one of the following: reed_sol_van, reed_sol_r6_op, cauchy_orig, cauchy_good, liberation, blaum_roth, liber8tion, no_coding\n");
exit(0);
}
/* Set global variable method for signal handler */
method = tech;
/* Get current working directory for construction of file names */
curdir = (char*)malloc(sizeof(char)*1000);
getcwd(curdir, 1000);
if (argv[1][0] != '-') {
/* Open file and error check */
fp = fopen(argv[1], "rb");
if (fp == NULL) {
fprintf(stderr, "Unable to open file.\n");
exit(0);
}
/* Create Coding directory */
i = mkdir("Coding", S_IRWXU);
if (i == -1 && errno != EEXIST) {
fprintf(stderr, "Unable to create Coding directory.\n");
exit(0);
}
/* Determine original size of file */
stat(argv[1], &status);
size = status.st_size;
} else {
if (sscanf(argv[1]+1, "%d", &size) != 1 || size <= 0) {
fprintf(stderr, "Files starting with '-' should be sizes for randomly created input\n");
exit(1);
}
fp = NULL;
srand48(time(0));
}
newsize = size;
/* Find new size by determining next closest multiple */
if (packetsize != 0) {
if (size%(k*w*packetsize*sizeof(int)) != 0) {
while (newsize%(k*w*packetsize*sizeof(int)) != 0)
newsize++;
}
}
else {
if (size%(k*w*sizeof(int)) != 0) {
while (newsize%(k*w*sizeof(int)) != 0)
newsize++;
}
}
if (buffersize != 0) {
while (newsize%buffersize != 0) {
newsize++;
}
}
/* Determine size of k+m files */
blocksize = newsize/k;
/* Allow for buffersize and determine number of read-ins */
if (size > buffersize && buffersize != 0) {
if (newsize%buffersize != 0) {
readins = newsize/buffersize;
}
else {
readins = newsize/buffersize;
}
block = (char *)malloc(sizeof(char)*buffersize);
blocksize = buffersize/k;
}
else {
readins = 1;
buffersize = size;
block = (char *)malloc(sizeof(char)*newsize);
}
/* Break inputfile name into the filename and extension */
s1 = (char*)malloc(sizeof(char)*(strlen(argv[1])+10));
s2 = strrchr(argv[1], '/');
if (s2 != NULL) {
s2++;
strcpy(s1, s2);
}
else {
strcpy(s1, argv[1]);
}
s2 = strchr(s1, '.');
if (s2 != NULL) {
extension = strdup(s2);
*s2 = '\0';
} else {
extension = strdup("");
}
/* Allocate for full file name */
fname = (char*)malloc(sizeof(char)*(strlen(argv[1])+strlen(curdir)+10));
sprintf(temp, "%d", k);
md = strlen(temp);
/* Allocate data and coding */
data = (char **)malloc(sizeof(char*)*k);
coding = (char **)malloc(sizeof(char*)*m);
for (i = 0; i < m; i++) {
coding[i] = (char *)malloc(sizeof(char)*blocksize);
if (coding[i] == NULL) { perror("malloc"); exit(1); }
}
/* Create coding matrix or bitmatrix and schedule */
gettimeofday(&t3, &tz);
switch(tech) {
case No_Coding:
break;
case Reed_Sol_Van:
matrix = reed_sol_vandermonde_coding_matrix(k, m, w);
break;
case Cauchy_Orig:
matrix = cauchy_original_coding_matrix(k, m, w);
bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
case Cauchy_Good:
matrix = cauchy_good_general_coding_matrix(k, m, w);
bitmatrix = jerasure_matrix_to_bitmatrix(k, m, w, matrix);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
case Liberation:
bitmatrix = liberation_coding_bitmatrix(k, w);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
case Blaum_Roth:
bitmatrix = blaum_roth_coding_bitmatrix(k, w);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
case Liber8tion:
bitmatrix = liber8tion_coding_bitmatrix(k);
schedule = jerasure_smart_bitmatrix_to_schedule(k, m, w, bitmatrix);
break;
}
gettimeofday(&start, &tz);
gettimeofday(&t4, &tz);
tsec = 0.0;
tsec += t4.tv_usec;
tsec -= t3.tv_usec;
tsec /= 1000000.0;
tsec += t4.tv_sec;
tsec -= t3.tv_sec;
totalsec += tsec;
/* Read in data until finished */
n = 1;
total = 0;
while (n <= readins) {
/* Check if padding is needed, if so, add appropriate
number of zeros */
if (total < size && total+buffersize <= size) {
total += jfread(block, sizeof(char), buffersize, fp);
}
else if (total < size && total+buffersize > size) {
extra = jfread(block, sizeof(char), buffersize, fp);
for (i = extra; i < buffersize; i++) {
block[i] = '0';
}
}
else if (total == size) {
for (i = 0; i < buffersize; i++) {
block[i] = '0';
}
}
/* Set pointers to point to file data */
for (i = 0; i < k; i++) {
data[i] = block+(i*blocksize);
}
gettimeofday(&t3, &tz);
/* Encode according to coding method */
switch(tech) {
case No_Coding:
break;
case Reed_Sol_Van:
jerasure_matrix_encode(k, m, w, matrix, data, coding, blocksize);
break;
case Reed_Sol_R6_Op:
reed_sol_r6_encode(k, w, data, coding, blocksize);
break;
case Cauchy_Orig:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
case Cauchy_Good:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
case Liberation:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
case Blaum_Roth:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
case Liber8tion:
jerasure_schedule_encode(k, m, w, schedule, data, coding, blocksize, packetsize);
break;
}
gettimeofday(&t4, &tz);
/* Write data and encoded data to k+m files */
for (i = 1; i <= k; i++) {
if (fp == NULL) {
bzero(data[i-1], blocksize);
} else {
sprintf(fname, "%s/Coding/%s_k%0*d%s", curdir, s1, md, i, extension);
if (n == 1) {
fp2 = fopen(fname, "wb");
}
else {
fp2 = fopen(fname, "ab");
}
fwrite(data[i-1], sizeof(char), blocksize, fp2);
fclose(fp2);
}
}
for (i = 1; i <= m; i++) {
if (fp == NULL) {
bzero(data[i-1], blocksize);
} else {
sprintf(fname, "%s/Coding/%s_m%0*d%s", curdir, s1, md, i, extension);
if (n == 1) {
fp2 = fopen(fname, "wb");
}
else {
fp2 = fopen(fname, "ab");
}
fwrite(coding[i-1], sizeof(char), blocksize, fp2);
fclose(fp2);
}
}
n++;
/* Calculate encoding time */
tsec = 0.0;
tsec += t4.tv_usec;
tsec -= t3.tv_usec;
tsec /= 1000000.0;
tsec += t4.tv_sec;
tsec -= t3.tv_sec;
totalsec += tsec;
}
/* Create metadata file */
if (fp != NULL) {
sprintf(fname, "%s/Coding/%s_meta.txt", curdir, s1);
fp2 = fopen(fname, "wb");
fprintf(fp2, "%s\n", argv[1]);
fprintf(fp2, "%d\n", size);
fprintf(fp2, "%d %d %d %d %d\n", k, m, w, packetsize, buffersize);
fprintf(fp2, "%s\n", argv[4]);
fprintf(fp2, "%d\n", tech);
fprintf(fp2, "%d\n", readins);
fclose(fp2);
}
/* Free allocated memory */
free(s1);
free(fname);
free(block);
free(curdir);
/* Calculate rate in MB/sec and print */
gettimeofday(&t2, &tz);
tsec = 0.0;
tsec += t2.tv_usec;
tsec -= t1.tv_usec;
tsec /= 1000000.0;
tsec += t2.tv_sec;
tsec -= t1.tv_sec;
printf("Encoding (MB/sec): %0.10f\n", (size/1024/1024)/totalsec);
printf("En_Total (MB/sec): %0.10f\n", (size/1024/1024)/tsec);
}
/* is_prime returns 1 if number if prime, 0 if not prime */
int is_prime(int w) {
int prime55[] = {2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,
73,79,83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,
181,191,193,197,199,211,223,227,229,233,239,241,251,257};
int i;
for (i = 0; i < 55; i++) {
if (w%prime55[i] == 0) {
if (w == prime55[i]) return 1;
else { return 0; }
}
}
}
/* Handles ctrl-\ event */
void ctrl_bs_handler(int dummy) {
time_t mytime;
mytime = time(0);
fprintf(stderr, "\n%s\n", ctime(&mytime));
fprintf(stderr, "You just typed ctrl-\\ in encoder.c.\n");
fprintf(stderr, "Total number of read ins = %d\n", readins);
fprintf(stderr, "Current read in: %d\n", n);
fprintf(stderr, "Method: %s\n\n", Methods[method]);
signal(SIGQUIT, ctrl_bs_handler);
}