Added LOG_ZERO_EXT and modified LOG_ZERO. The new LOG_ZERO doesn't have an entry

for two times the sentinel, because it's only needed for single multiplication.
I haven't fixed w=16 yet with this.  Monday maybe?  Timings on our lab machines
show no big difference.  We'd only expect a difference in single multiplies, and
it's in the noise really.

UNIX> gf_time 8 MDG 0 10240 10240 LOG - -
Seed: 0
      Multiply:             0.231191 s   Mops:    100.000       432.542 Mega-ops/s
        Divide:             0.229992 s   Mops:    100.000       434.797 Mega-ops/s
 Region-Random: XOR: 0      0.095446 s     MB:    100.000      1047.712 MB/s
 Region-Random: XOR: 1      0.115485 s     MB:    100.000       865.914 MB/s
UNIX> gf_time 8 MDG 0 10240 10240 LOG_ZERO - -
Seed: 0
      Multiply:             0.228568 s   Mops:    100.000       437.506 Mega-ops/s
        Divide:             0.227718 s   Mops:    100.000       439.140 Mega-ops/s
 Region-Random: XOR: 0      0.085062 s     MB:    100.000      1175.613 MB/s
 Region-Random: XOR: 1      0.095891 s     MB:    100.000      1042.846 MB/s
UNIX> gf_time 8 MDG 0 10240 10240 LOG_ZERO_EXT - -
Seed: 0
      Multiply:             0.228960 s   Mops:    100.000       436.758 Mega-ops/s
        Divide:             0.227758 s   Mops:    100.000       439.063 Mega-ops/s
 Region-Random: XOR: 0      0.085180 s     MB:    100.000      1173.981 MB/s
 Region-Random: XOR: 1      0.095931 s     MB:    100.000      1042.421 MB/s
UNIX>
master
Jim Plank 2013-03-08 16:31:42 -05:00
parent 47896e9ddc
commit d05a931f04
7 changed files with 158 additions and 145 deletions

View File

@ -15,6 +15,10 @@ EXECUTABLES = gf_mult gf_div gf_add gf_unit gf_time gf_methods gf_poly \
CFLAGS = -O3 -msse4 -maes -mpclmul -DINTEL_SSE4 -DINTEL_PCLMUL
LDFLAGS = -O3 -msse4 -maes -mpclmul
# Use these if you don't have INTEL_PCLMUL
# CFLAGS = -O3 -msse4 -DINTEL_SSE4
# LDFLAGS = -O3 -msse4
RM = /bin/rm -f
LIBOBJS = gf.o gf_method.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o \

116
gf_div.c
View File

@ -1,116 +0,0 @@
/*
* gf_div.c
*
* Multiplies two numbers in gf_2^w
*/
#include <stdio.h>
#include <getopt.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "gf_complete.h"
#include "gf_method.h"
void usage(char *s)
{
fprintf(stderr, "usage: gf_div a b w [method] - does division of a and b in GF(2^w)\n");
fprintf(stderr, " If w has an h on the end, treat a, b and the quotient as hexadecimal (no 0x required)\n");
fprintf(stderr, "\n");
fprintf(stderr, " legal w are: 1-32, 64 and 128\n");
fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n");
fprintf(stderr, "\n");
fprintf(stderr, " For method specification, type gf_methods\n");
if (s != NULL) fprintf(stderr, "%s", s);
exit(1);
}
int read_128(char *s, uint64_t *v)
{
int l, t;
char save;
l = strlen(s);
if (l > 32) return 0;
if (l > 16) {
if (sscanf(s + (l-16), "%llx", (long long unsigned int *) &(v[1])) == 0) return 0;
save = s[l-16];
s[l-16] = '\0';
t = sscanf(s, "%llx", (long long unsigned int *) &(v[0]));
s[l-16] = save;
return t;
} else {
v[0] = 0;
return sscanf(s, "%llx", (long long unsigned int *)&(v[1]));
}
return 1;
}
void print_128(uint64_t *v)
{
if (v[0] > 0) {
printf("%llx", (long long unsigned int) v[0]);
printf("%016llx", (long long unsigned int) v[1]);
} else {
printf("%llx", (long long unsigned int) v[1]);
}
printf("\n");
}
int main(int argc, char **argv)
{
int hex, al, bl, w;
uint32_t a, b, c, top;
uint64_t a64, b64, c64;
uint64_t a128[2], b128[2], c128[2];
char *format;
gf_t gf;
if (argc < 4) usage(NULL);
if (sscanf(argv[3], "%d", &w) == 0) usage("Bad w\n");
if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage("Bad w");
hex = (strchr(argv[3], 'h') != NULL);
if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("\nBad Method\n");
if (!hex && w == 128) usage(NULL);
if (w <= 32) {
format = (hex) ? "%x" : "%u";
if (sscanf(argv[1], format, &a) == 0) usage("Bad a\n");
if (sscanf(argv[2], format, &b) == 0) usage("Bad b\n");
if (w < 32) {
top = (w == 31) ? 0x80000000 : (1 << w);
if (w != 32 && a >= top) usage("a is too large\n");
if (w != 32 && b >= top) usage("b is too large\n");
}
c = gf.divide.w32(&gf, a, b);
printf(format, c);
printf("\n");
} else if (w == 64) {
format = (hex) ? "%llx" : "%llu";
if (sscanf(argv[1], format, &a64) == 0) usage("Bad a\n");
if (sscanf(argv[2], format, &b64) == 0) usage("Bad b\n");
c64 = gf.divide.w64(&gf, a64, b64);
printf(format, c64);
printf("\n");
} else if (w == 128) {
if (read_128(argv[1], a128) == 0) usage("Bad a\n");
if (read_128(argv[2], b128) == 0) usage("Bad b\n");
gf.divide.w128(&gf, a128, b128, c128);
print_128(c128);
}
exit(0);
}

View File

@ -249,19 +249,53 @@ void gf_general_do_region_check(gf_t *gf, gf_general_t *a, void *orig_a, void *o
void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
{
void *top;
gf_general_t g;
uint8_t *r8;
uint16_t *r16;
uint32_t *r32;
uint64_t *r64;
int i;
/* If w is 8, 16, 32, 64 or 128, this is easy --
just fill the regions with random bytes.
top = rb+size;
/* If w is 8, 16, 32, 64 or 128, fill the regions with random bytes.
However, don't allow for zeros in rb, because that will screw up
division.
Otherwise, treat every four bytes as an uint32_t
and fill it with a random value mod (1 << w).
*/
if (w == 8 || w == 16 || w == 32 || w == 64 || w == 128) {
MOA_Fill_Random_Region (ra, size);
MOA_Fill_Random_Region (rb, size);
while (rb < top) {
gf_general_set_random(&g, w, 0);
switch (w) {
case 8:
r8 = (uint8_t *) rb;
*r8 = g.w32;
break;
case 16:
r16 = (uint16_t *) rb;
*r16 = g.w32;
break;
case 32:
r32 = (uint32_t *) rb;
*r32 = g.w32;
break;
case 64:
r64 = (uint64_t *) rb;
*r64 = g.w64;
break;
case 128:
r64 = (uint64_t *) rb;
r64[0] = g.w128[0];
r64[1] = g.w128[1];
break;
}
rb += (w/8);
}
} else {
r32 = (uint32_t *) ra;
for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 1);

View File

@ -28,6 +28,7 @@ void methods_to_stderr()
fprintf(stderr, " TABLE: Full multiplication table\n");
fprintf(stderr, " LOG: Discrete logs\n");
fprintf(stderr, " LOG_ZERO: Discrete logs with a large table for zeros\n");
fprintf(stderr, " LOG_ZERO_EXT: Discrete logs with an extra large table for zeros\n");
fprintf(stderr, " SPLIT g_a g_b: Split tables defined by g_a and g_b\n");
fprintf(stderr, " COMPOSITE k rec METHOD: Composite field. GF((2^l)^k), l=w/k.\n");
fprintf(stderr, " rec = 0 means inline single multiplication\n");
@ -100,6 +101,10 @@ int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
mult_type = GF_MULT_LOG_TABLE;
arg1 = 1;
starting++;
} else if (strcmp(argv[starting], "LOG_ZERO_EXT") == 0) {
mult_type = GF_MULT_LOG_TABLE;
arg1 = 2;
starting++;
} else if (strcmp(argv[starting], "SPLIT") == 0) {
mult_type = GF_MULT_SPLIT_TABLE;
if (argc < starting+5) return 0;

View File

@ -12,9 +12,9 @@
#include "gf_complete.h"
#include "gf_method.h"
#define NMULTS (14)
#define NMULTS (15)
static char *mults[NMULTS] = { "SHIFT", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
"TABLE", "LOG", "LOG_ZERO", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE-0", "COMPOSITE-1" };
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE-0", "COMPOSITE-1" };
#define NREGIONS (96)
static char *regions[NREGIONS] = { "-", "SINGLE", "DOUBLE", "QUAD",

View File

@ -224,6 +224,7 @@ gf_w64_clm_multiply (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
void
gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
{
#ifdef INTEL_PCLMUL
gf_internal_t *h;
int i, j, k;
uint8_t *s8, *d8, *dtop;
@ -305,6 +306,7 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
}
}
gf_do_final_region_alignment(&rd);
#endif
}
void

132
gf_w8.c
View File

@ -24,11 +24,19 @@ struct gf_w8_logtable_data {
};
struct gf_w8_logzero_table_data {
uint16_t log_tbl[GF_FIELD_SIZE];
short log_tbl[GF_FIELD_SIZE]; /* Make this signed, so that we can divide easily */
uint8_t antilog_tbl[512+512+1];
uint8_t *div_tbl;
uint8_t *inv_tbl;
};
struct gf_w8_logzero_small_table_data {
short log_tbl[GF_FIELD_SIZE]; /* Make this signed, so that we can divide easily */
uint8_t antilog_tbl[255*3];
uint8_t inv_tbl[GF_FIELD_SIZE];
uint8_t *div_tbl;
};
/* Don't change the order of these relative to gf_w8_half_table_data */
struct gf_w8_default_data {
@ -217,7 +225,7 @@ gf_w8_logzero_multiply (gf_t *gf, uint32_t a, uint32_t b)
struct gf_w8_logzero_table_data *ltd;
ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private;
return ltd->antilog_tbl[(unsigned)(ltd->log_tbl[a] + ltd->log_tbl[b])];
return ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]];
}
static
@ -228,7 +236,30 @@ gf_w8_logzero_divide (gf_t *gf, uint32_t a, uint32_t b)
struct gf_w8_logzero_table_data *ltd;
ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private;
return ltd->antilog_tbl[(unsigned)((255 + ltd->log_tbl[a]) - ltd->log_tbl[b])];
return ltd->div_tbl[ltd->log_tbl[a] - ltd->log_tbl[b]];
}
static
inline
uint32_t
gf_w8_logzero_small_multiply (gf_t *gf, uint32_t a, uint32_t b)
{
struct gf_w8_logzero_small_table_data *std;
std = (struct gf_w8_logzero_small_table_data *) ((gf_internal_t *) gf->scratch)->private;
if (b == 0) return 0;
return std->antilog_tbl[std->log_tbl[a] + std->log_tbl[b]];
}
static
inline
uint32_t
gf_w8_logzero_small_divide (gf_t *gf, uint32_t a, uint32_t b)
{
struct gf_w8_logzero_small_table_data *std;
std = (struct gf_w8_logzero_small_table_data *) ((gf_internal_t *) gf->scratch)->private;
return std->div_tbl[std->log_tbl[a] - std->log_tbl[b]];
}
static
@ -277,6 +308,16 @@ gf_w8_logzero_inverse (gf_t *gf, uint32_t a)
return (ltd->inv_tbl[a]);
}
static
uint32_t
gf_w8_logzero_small_inverse (gf_t *gf, uint32_t a)
{
struct gf_w8_logzero_small_table_data *std;
std = (struct gf_w8_logzero_small_table_data *) ((gf_internal_t *) gf->scratch)->private;
return (std->inv_tbl[a]);
}
static
void
gf_w8_log_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
@ -314,23 +355,37 @@ gf_w8_logzero_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int
uint8_t lv, b, c;
uint8_t *s8, *d8;
struct gf_w8_logzero_table_data *ltd;
struct gf_w8_logzero_small_table_data *std;
short *log;
uint8_t *alt;
gf_internal_t *h;
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private;
h = (gf_internal_t *) gf->scratch;
if (h->arg1 == 1) {
std = (struct gf_w8_logzero_small_table_data *) h->private;
log = std->log_tbl;
alt = std->antilog_tbl;
} else {
ltd = (struct gf_w8_logzero_table_data *) h->private;
log = ltd->log_tbl;
alt = ltd->antilog_tbl;
}
s8 = (uint8_t *) src;
d8 = (uint8_t *) dest;
lv = ltd->log_tbl[val];
lv = log[val];
if (xor) {
for (i = 0; i < bytes; i++) {
d8[i] ^= (ltd->antilog_tbl[lv + ltd->log_tbl[s8[i]]]);
d8[i] ^= (alt[lv + log[s8[i]]]);
}
} else {
for (i = 0; i < bytes; i++) {
d8[i] = (ltd->antilog_tbl[lv + ltd->log_tbl[s8[i]]]);
d8[i] = (alt[lv + log[s8[i]]]);
}
}
}
@ -341,6 +396,7 @@ int gf_w8_log_init(gf_t *gf)
gf_internal_t *h;
struct gf_w8_logtable_data *ltd;
struct gf_w8_logzero_table_data *ztd;
struct gf_w8_logzero_small_table_data *std;
uint8_t *alt;
uint8_t *inv;
int i, b;
@ -350,25 +406,35 @@ int gf_w8_log_init(gf_t *gf)
ltd = h->private;
alt = ltd->antilog_tbl;
inv = ltd->inv_tbl;
} else if (h->arg1 == 1) {
std = h->private;
alt = std->antilog_tbl;
std->div_tbl = (alt + 255);
inv = std->inv_tbl;
} else {
ztd = h->private;
alt = ztd->antilog_tbl;
ztd->inv_tbl = (alt + 512 + 256);
ztd->div_tbl = (alt + 255);
inv = ztd->inv_tbl;
}
if (h->arg1 == 1) {
ztd->log_tbl[0] = 512;
} else {
if (h->arg1 == 0) {
ltd->log_tbl[0] = 0;
} else if (h->arg1 == 1) {
std->log_tbl[0] = 510;
} else {
ztd->log_tbl[0] = 512;
}
b = 1;
for (i = 0; i < GF_MULT_GROUP_SIZE; i++) {
if (h->arg1 == 1) {
ztd->log_tbl[b] = i;
} else {
if (h->arg1 == 0) {
ltd->log_tbl[b] = i;
} else if (h->arg1 == 1) {
std->log_tbl[b] = i;
} else {
ztd->log_tbl[b] = i;
}
alt[i] = b;
alt[i+GF_MULT_GROUP_SIZE] = b;
@ -377,22 +443,39 @@ int gf_w8_log_init(gf_t *gf)
b = b ^ h->prim_poly;
}
}
if (h->arg1 == 1) {
if (h->arg1 == 1) bzero(alt+510, 255);
if (h->arg1 == 2) {
bzero(alt+512, 255);
alt[512+512] = 0;
}
inv[0] = 0; /* Not really, but we need to fill it with something */
inv[1] = 1;
for (i = 2; i < GF_FIELD_SIZE; i++) {
b = (h->arg1 == 1) ? ztd->log_tbl[i] : ltd->log_tbl[i];
inv[i] = alt[GF_MULT_GROUP_SIZE-b];
i = 1;
b = GF_MULT_GROUP_SIZE;
do {
inv[i] = alt[b];
i <<= 1;
if (i & (1 << 8)) i ^= h->prim_poly;
b--;
} while (i != 1);
if (h->arg1 == 0) {
gf->inverse.w32 = gf_w8_log_inverse;
gf->divide.w32 = gf_w8_log_divide;
gf->multiply.w32 = gf_w8_log_multiply;
gf->multiply_region.w32 = gf_w8_log_multiply_region;
} else if (h->arg1 == 1) {
gf->inverse.w32 = gf_w8_logzero_small_inverse;
gf->divide.w32 = gf_w8_logzero_small_divide;
gf->multiply.w32 = gf_w8_logzero_small_multiply;
gf->multiply_region.w32 = gf_w8_logzero_multiply_region;
} else {
gf->inverse.w32 = gf_w8_logzero_inverse;
gf->divide.w32 = gf_w8_logzero_divide;
gf->multiply.w32 = gf_w8_logzero_multiply;
gf->multiply_region.w32 = gf_w8_logzero_multiply_region;
}
gf->inverse.w32 = (h->arg1 == 0) ? gf_w8_log_inverse : gf_w8_logzero_inverse;
gf->divide.w32 = (h->arg1 == 0) ? gf_w8_log_divide : gf_w8_logzero_divide;
gf->multiply.w32 = (h->arg1 == 0) ? gf_w8_log_multiply : gf_w8_logzero_multiply;
gf->multiply_region.w32 = (h->arg1 == 0) ? gf_w8_log_multiply_region : gf_w8_logzero_multiply_region;
return 1;
}
@ -1818,9 +1901,10 @@ int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1
return -1;
break;
case GF_MULT_LOG_TABLE:
if ((arg1 != 0 && arg1 != 1) || arg2 != 0) return -1;
if ((arg1 != 0 && arg1 != 1 && arg1 != 2) || arg2 != 0) return -1;
if (region_type != 0 && region_type != GF_REGION_CAUCHY) return -1;
if (arg1 == 0) return sizeof(gf_internal_t) + sizeof(struct gf_w8_logtable_data) + 64;
if (arg1 == 1) return sizeof(gf_internal_t) + sizeof(struct gf_w8_logzero_small_table_data) + 64;
return sizeof(gf_internal_t) + sizeof(struct gf_w8_logzero_table_data) + 64;
break;
case GF_MULT_SHIFT: