diff --git a/GNUmakefile b/GNUmakefile index e722c01..bb7ead8 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -5,19 +5,20 @@ SRCS = gf_w4.c gf_w8.c gf_w16.c gf_w32.c gf_w64.c gf_w128.c gf_wgen.c gf.c gf_unit.c \ gf_time.c gf_mult.c gf_method.c gf_methods.c gf_div.c gf_rand.c gf_general.c \ - gf_poly.c gf_example_1.c gf_add.c gf_example_2.c gf_example_3.c gf_example_4.c + gf_poly.c gf_example_1.c gf_add.c gf_example_2.c gf_example_3.c gf_example_4.c \ + gf_inline_time.c HDRS = gf_complete.h gf_int.h EXECUTABLES = gf_mult gf_div gf_add gf_unit gf_time gf_methods gf_poly \ - gf_example_1 gf_example_2 gf_example_3 gf_example_4 + gf_example_1 gf_example_2 gf_example_3 gf_example_4 gf_inline_time -CFLAGS = -O3 -msse4 -maes -mpclmul -DINTEL_SSE4 -DINTEL_PCLMUL -LDFLAGS = -O3 -msse4 -maes -mpclmul +# CFLAGS = -O3 -msse4 -maes -mpclmul -DINTEL_SSE4 -DINTEL_PCLMUL +# LDFLAGS = -O3 -msse4 -maes -mpclmul # Use these if you don't have INTEL_PCLMUL -# CFLAGS = -O3 -msse4 -DINTEL_SSE4 -# LDFLAGS = -O3 -msse4 + CFLAGS = -O3 -msse4 -DINTEL_SSE4 + LDFLAGS = -O3 -msse4 RM = /bin/rm -f @@ -38,6 +39,7 @@ gf_complete.a: $(LIBOBJS) gf_methods: gf_methods.o gf_complete.a gf_time: gf_time.o gf_complete.a +gf_inline_time: gf_inline_time.o gf_complete.a gf_unit: gf_unit.o gf_complete.a gf_example_1: gf_example_1.o gf_complete.a gf_example_2: gf_example_2.o gf_complete.a @@ -57,6 +59,7 @@ spotless: clean gf_div.o: gf_complete.h gf_method.h gf_methods.o: gf_complete.h gf_method.h gf_time.o: gf_complete.h gf_method.h gf_rand.h gf_general.h +gf_inline_time.o: gf_complete.h gf_rand.h gf_wgen.o: gf_int.h gf_complete.h gf_w4.o: gf_int.h gf_complete.h gf_w8.o: gf_int.h gf_complete.h diff --git a/gf_complete.h b/gf_complete.h index 2336cfc..ac6688e 100644 --- a/gf_complete.h +++ b/gf_complete.h @@ -130,3 +130,28 @@ extern int gf_scratch_size(int w, int arg2); extern int gf_free(GFP gf, int recursive); + +/* This is support for inline single multiplications and divisions. + I know it's yucky, but if you've got to be fast, you've got to be fast. + We'll support inlines for w=4, w=8 and w=16. + + To use inline multiplication and division with w=4 or 8, you should use the + default gf_t, or one with a single table. Otherwise, gf_w4/8_get_mult_table() + will return NULL. */ + +uint8_t *gf_w4_get_mult_table(GFP gf); +uint8_t *gf_w4_get_div_table(GFP gf); + +#define GF_W4_INLINE_MULTDIV(table, a, b) (table[((a)<<4)|b]) + +uint8_t *gf_w8_get_mult_table(GFP gf); +uint8_t *gf_w8_get_div_table(GFP gf); + +#define GF_W8_INLINE_MULTDIV(table, a, b) (table[(((uint32_t) a)<<8)|b]) + +uint16_t *gf_w16_get_log_table(GFP gf); +uint16_t *gf_w16_get_mult_alog_table(GFP gf); +uint16_t *gf_w16_get_div_alog_table(GFP gf); + +#define GF_W16_INLINE_MULT(log, alog, a, b) ((a) == 0 || (b) == 0) ? 0 : (alog[(uint32_t)log[a]+(uint32_t)log[b]]) +#define GF_W16_INLINE_DIV(log, alog, a, b) ((a) == 0 || (b) == 0) ? 0 : (alog[(int)log[a]-(int)log[b]]) diff --git a/gf_time.c b/gf_time.c index 1c72bcd..8313b05 100644 --- a/gf_time.c +++ b/gf_time.c @@ -16,8 +16,6 @@ #include "gf_rand.h" #include "gf_general.h" -#define REGION_SIZE (4096) - void timer_start (double *t) { diff --git a/gf_unit.c b/gf_unit.c index 4eb3d2a..8fe253c 100644 --- a/gf_unit.c +++ b/gf_unit.c @@ -54,6 +54,8 @@ int main(int argc, char **argv) time_t t0; gf_internal_t *h; gf_general_t *a, *b, *c, *d, *ai, *bi; + uint8_t a8, b8, c8, *mult4, *div4, *mult8, *div8; + uint16_t a16, b16, c16, d16, *log16, *alog16; char as[50], bs[50], cs[50], ds[50], ais[50], bis[50]; uint32_t mask; char *ra, *rb, *rc, *rd, *target; @@ -97,6 +99,21 @@ int main(int argc, char **argv) if (!gf_init_easy(&gf_def, w)) problem("No default for this value of w"); + if (w == 4) { + mult4 = gf_w4_get_mult_table(&gf); + div4 = gf_w4_get_div_table(&gf); + } + + if (w == 8) { + mult8 = gf_w8_get_mult_table(&gf); + div8 = gf_w8_get_div_table(&gf); + } + + if (w == 16) { + log16 = gf_w16_get_log_table(&gf); + alog16 = gf_w16_get_mult_alog_table(&gf); + } + if (verbose) printf("Seed: %ld\n", t0); if (single) { @@ -132,6 +149,45 @@ int main(int argc, char **argv) tested = 0; gf_general_multiply(&gf, a, b, c); + /* If w is 4, 8 or 16, then there are inline multiplication/division methods. + Test them here. */ + + if (w == 4 && mult4 != NULL) { + a8 = a->w32; + b8 = b->w32; + c8 = GF_W4_INLINE_MULTDIV(mult4, a8, b8); + if (c8 != c->w32) { + printf("Error in inline multiplication. %d * %d. Inline = %d. Default = %d.\n", + a8, b8, c8, c->w32); + exit(1); + } + } + + if (w == 8 && mult8 != NULL) { + a8 = a->w32; + b8 = b->w32; + c8 = GF_W8_INLINE_MULTDIV(mult8, a8, b8); + if (c8 != c->w32) { + printf("Error in inline multiplication. %d * %d. Inline = %d. Default = %d.\n", + a8, b8, c8, c->w32); + exit(1); + } + } + + if (w == 16 && log16 != NULL) { + a16 = a->w32; + b16 = b->w32; + c16 = GF_W16_INLINE_MULT(log16, alog16, a16, b16); + if (c16 != c->w32) { + printf("Error in inline multiplication. %d * %d. Inline = %d. Default = %d.\n", + a16, b16, c16, c->w32); + printf("%d %d\n", log16[a16], log16[b16]); + top = log16[a16] + log16[b16]; + printf("%d %d\n", top, alog16[top]); + exit(1); + } + } + /* If this is not composite, then first test against the default: */ if (h->mult_type != GF_MULT_COMPOSITE) { diff --git a/gf_w16.c b/gf_w16.c index 5752415..d6fffc3 100644 --- a/gf_w16.c +++ b/gf_w16.c @@ -17,9 +17,10 @@ #define GF_S_GF_8_2 (63) struct gf_logtable_data { - int log_tbl[GF_FIELD_SIZE]; + uint16_t log_tbl[GF_FIELD_SIZE]; uint16_t antilog_tbl[GF_FIELD_SIZE * 2]; uint16_t inv_tbl[GF_FIELD_SIZE]; + uint16_t *d_antilog; }; struct gf_zero_logtable_data { @@ -308,7 +309,7 @@ gf_w16_log_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) struct gf_logtable_data *ltd; ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; - return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]]; + return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[(int) ltd->log_tbl[a] + (int) ltd->log_tbl[b]]; } static @@ -322,8 +323,8 @@ gf_w16_log_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) if (a == 0 || b == 0) return 0; ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; - log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE); - return (ltd->antilog_tbl[log_sum]); + log_sum = (int) ltd->log_tbl[a] - (int) ltd->log_tbl[b]; + return (ltd->d_antilog[log_sum]); } static @@ -347,6 +348,7 @@ int gf_w16_log_init(gf_t *gf) ltd = h->private; ltd->log_tbl[0] = 0; + ltd->d_antilog = ltd->log_tbl + GF_MULT_GROUP_SIZE; b = 1; for (i = 0; i < GF_MULT_GROUP_SIZE; i++) { @@ -1945,3 +1947,44 @@ int gf_w16_init(gf_t *gf) } return 1; } + +/* Inline setup functions */ + +uint16_t *gf_w16_get_log_table(gf_t *gf) +{ + gf_internal_t *h; + struct gf_logtable_data *ltd; + + h = (gf_internal_t *) gf->scratch; + if (gf->multiply.w32 == gf_w16_log_multiply) { + ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return (uint16_t *) ltd->log_tbl; + } + return NULL; +} + +uint16_t *gf_w16_get_mult_alog_table(gf_t *gf) +{ + gf_internal_t *h; + struct gf_logtable_data *ltd; + + h = (gf_internal_t *) gf->scratch; + if (gf->multiply.w32 == gf_w16_log_multiply) { + ltd = (struct gf_logtable_data *) h->private; + return (uint16_t *) ltd->antilog_tbl; + } + return NULL; +} + +uint16_t *gf_w16_get_div_alog_table(gf_t *gf) +{ + gf_internal_t *h; + struct gf_logtable_data *ltd; + + h = (gf_internal_t *) gf->scratch; + if (gf->multiply.w32 == gf_w16_log_multiply) { + ltd = (struct gf_logtable_data *) h->private; + return (uint16_t *) ltd->d_antilog; + } + return NULL; +} diff --git a/gf_w4.c b/gf_w4.c index b80da4d..1175e01 100644 --- a/gf_w4.c +++ b/gf_w4.c @@ -146,7 +146,6 @@ gf_val_32_t gf_w4_matrix (gf_t *gf, gf_val_32_t b) return gf_bitmatrix_inverse(b, 4, ((gf_internal_t *) (gf->scratch))->prim_poly); } - /* ------------------------------------------------------------ IMPLEMENTATION: LOG_TABLE: @@ -2010,3 +2009,32 @@ gf_w4_init (gf_t *gf) } return 1; } + +/* Inline setup functions */ + +uint8_t *gf_w4_get_mult_table(gf_t *gf) +{ + gf_internal_t *h; + struct gf_single_table_data *std; + + h = (gf_internal_t *) gf->scratch; + if (gf->multiply.w32 == gf_w4_single_table_multiply) { + std = (struct gf_single_table_data *) h->private; + return (uint8_t *) std->mult; + } + return NULL; +} + +uint8_t *gf_w4_get_div_table(gf_t *gf) +{ + gf_internal_t *h; + struct gf_single_table_data *std; + + h = (gf_internal_t *) gf->scratch; + if (gf->multiply.w32 == gf_w4_single_table_multiply) { + std = (struct gf_single_table_data *) h->private; + return (uint8_t *) std->div; + } + return NULL; +} + diff --git a/gf_w8.c b/gf_w8.c index 0dbd472..306f911 100644 --- a/gf_w8.c +++ b/gf_w8.c @@ -1971,3 +1971,40 @@ int gf_w8_init(gf_t *gf) return 1; } + + +/* Inline setup functions */ + +uint8_t *gf_w8_get_mult_table(gf_t *gf) +{ + gf_internal_t *h; + struct gf_w8_default_data *ftd; + struct gf_w8_single_table_data *std; + + h = (gf_internal_t *) gf->scratch; + if (gf->multiply.w32 == gf_w8_default_multiply) { + ftd = (struct gf_w8_default_data *) h->private; + return (uint8_t *) ftd->multtable; + } else if (gf->multiply.w32 == gf_w8_table_multiply) { + std = (struct gf_w8_single_table_data *) h->private; + return (uint8_t *) std->multtable; + } + return NULL; +} + +uint8_t *gf_w8_get_div_table(gf_t *gf) +{ + gf_internal_t *h; + struct gf_w8_default_data *ftd; + struct gf_w8_single_table_data *std; + + h = (gf_internal_t *) gf->scratch; + if (gf->multiply.w32 == gf_w8_default_multiply) { + ftd = (struct gf_w8_default_data *) ((gf_internal_t *) gf->scratch)->private; + return (uint8_t *) std->divtable; + } else if (gf->multiply.w32 == gf_w8_table_multiply) { + std = (struct gf_w8_single_table_data *) ((gf_internal_t *) gf->scratch)->private; + return (uint8_t *) std->divtable; + } + return NULL; +}