Added LOG_ZERO_EXT and modified LOG_ZERO. The new LOG_ZERO doesn't have an entry

for two times the sentinel, because it's only needed for single multiplication. I haven't fixed w=16 yet with this. Monday maybe? Timings on our lab machines show no big difference. We'd only expect a difference in single multiplies, and it's in the noise really. UNIX> gf_time 8 MDG 0 10240 10240 LOG - - Seed: 0 Multiply: 0.231191 s Mops: 100.000 432.542 Mega-ops/s Divide: 0.229992 s Mops: 100.000 434.797 Mega-ops/s Region-Random: XOR: 0 0.095446 s MB: 100.000 1047.712 MB/s Region-Random: XOR: 1 0.115485 s MB: 100.000 865.914 MB/s UNIX> gf_time 8 MDG 0 10240 10240 LOG_ZERO - - Seed: 0 Multiply: 0.228568 s Mops: 100.000 437.506 Mega-ops/s Divide: 0.227718 s Mops: 100.000 439.140 Mega-ops/s Region-Random: XOR: 0 0.085062 s MB: 100.000 1175.613 MB/s Region-Random: XOR: 1 0.095891 s MB: 100.000 1042.846 MB/s UNIX> gf_time 8 MDG 0 10240 10240 LOG_ZERO_EXT - - Seed: 0 Multiply: 0.228960 s Mops: 100.000 436.758 Mega-ops/s Divide: 0.227758 s Mops: 100.000 439.063 Mega-ops/s Region-Random: XOR: 0 0.085180 s MB: 100.000 1173.981 MB/s Region-Random: XOR: 1 0.095931 s MB: 100.000 1042.421 MB/s UNIX>
2013-03-08 16:31:42 -05:00 · 2013-03-08 16:31:42 -05:00 · d05a931f04
parent 47896e9ddc
commit d05a931f04
7 changed files with 158 additions and 145 deletions
--- a/4
+++ b/4
@ -15,6 +15,10 @@ EXECUTABLES = gf_mult gf_div gf_add gf_unit gf_time gf_methods gf_poly \
 CFLAGS = -O3 -msse4 -maes -mpclmul -DINTEL_SSE4 -DINTEL_PCLMUL
 LDFLAGS = -O3 -msse4 -maes -mpclmul

+# Use these if you don't have INTEL_PCLMUL
+# CFLAGS = -O3 -msse4 -DINTEL_SSE4
+# LDFLAGS = -O3 -msse4 
+
 RM = /bin/rm -f

 LIBOBJS = gf.o gf_method.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o \
--- a/gf_div.c
+++ b/gf_div.c
@ -1,116 +0,0 @@
-/*
- * gf_div.c
- *
- * Multiplies two numbers in gf_2^w
- */
-
-#include <stdio.h>
-#include <getopt.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "gf_complete.h"
-#include "gf_method.h"
-
-void usage(char *s)
-{
-  fprintf(stderr, "usage: gf_div a b w [method] - does division of a and b in GF(2^w)\n");
-  fprintf(stderr, "       If w has an h on the end, treat a, b and the quotient as hexadecimal (no 0x required)\n");
-  fprintf(stderr, "\n");
-  fprintf(stderr, "       legal w are: 1-32, 64 and 128\n");
-  fprintf(stderr, "           128 is hex only (i.e. '128' will be an error - do '128h')\n");
-  fprintf(stderr, "\n");
-  fprintf(stderr, "       For method specification, type gf_methods\n");
-
-  if (s != NULL) fprintf(stderr, "%s", s);
-  exit(1);
-}
-
-int read_128(char *s, uint64_t *v)
-{
-  int l, t;
-  char save;
-
-  l = strlen(s);
-  if (l > 32) return 0;
-
-  if (l > 16) {
-    if (sscanf(s + (l-16), "%llx", (long long unsigned int *) &(v[1])) == 0) return 0;
-    save = s[l-16];
-    s[l-16] = '\0';
-    t = sscanf(s, "%llx", (long long unsigned int *) &(v[0]));
-    s[l-16] = save;
-    return t;
-  } else {
-    v[0] = 0;
-    return sscanf(s, "%llx", (long long unsigned int *)&(v[1]));
-  }
-  return 1;
-}
-
-void print_128(uint64_t *v) 
-{
-  if (v[0] > 0) {
-    printf("%llx", (long long unsigned int) v[0]);
-    printf("%016llx", (long long unsigned int) v[1]);
-  } else {
-    printf("%llx", (long long unsigned int) v[1]);
-  }
-  printf("\n");
-}
-
-
-int main(int argc, char **argv)
-{
-  int hex, al, bl, w;
-  uint32_t a, b, c, top;
-  uint64_t a64, b64, c64;
-  uint64_t a128[2], b128[2], c128[2];
-  char *format;
-  gf_t gf;
-
-  if (argc < 4) usage(NULL);
-  if (sscanf(argv[3], "%d", &w) == 0) usage("Bad w\n");
-
-  if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage("Bad w");
-
-  hex = (strchr(argv[3], 'h') != NULL);
-  if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("\nBad Method\n");
-
-  if (!hex && w == 128) usage(NULL);
- 
-  if (w <= 32) {
-    format = (hex) ? "%x" : "%u";
-    if (sscanf(argv[1], format, &a) == 0) usage("Bad a\n");
-    if (sscanf(argv[2], format, &b) == 0) usage("Bad b\n");
-
-    if (w < 32) {
-      top = (w == 31) ? 0x80000000 : (1 << w);
-      if (w != 32 && a >= top) usage("a is too large\n");
-      if (w != 32 && b >= top) usage("b is too large\n");
-    }
-  
-    c = gf.divide.w32(&gf, a, b);
-    printf(format, c);
-    printf("\n");
-
-  } else if (w == 64) {
-    format = (hex) ? "%llx" : "%llu";
-    if (sscanf(argv[1], format, &a64) == 0) usage("Bad a\n");
-    if (sscanf(argv[2], format, &b64) == 0) usage("Bad b\n");
-    c64 = gf.divide.w64(&gf, a64, b64);
-
-    printf(format, c64);
-    printf("\n");
-
-  } else if (w == 128) {
-
-    if (read_128(argv[1], a128) == 0) usage("Bad a\n");
-    if (read_128(argv[2], b128) == 0) usage("Bad b\n");
-    gf.divide.w128(&gf, a128, b128, c128);
-
-    print_128(c128);
-  }
-  exit(0);
-}
--- a/gf_general.c
+++ b/gf_general.c
@ -249,19 +249,53 @@ void gf_general_do_region_check(gf_t *gf, gf_general_t *a, void *orig_a, void *o

 void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
 {
+  void *top;
+  gf_general_t g;
+  uint8_t *r8;
+  uint16_t *r16;
  uint32_t *r32;
+  uint64_t *r64;
  int i;

-  /* If w is 8, 16, 32, 64 or 128, this is easy -- 
-     just fill the regions with random bytes.
+  top = rb+size;

+  /* If w is 8, 16, 32, 64 or 128, fill the regions with random bytes.
+     However, don't allow for zeros in rb, because that will screw up
+     division.
+     
     Otherwise, treat every four bytes as an uint32_t
     and fill it with a random value mod (1 << w).
   */

  if (w == 8 || w == 16 || w == 32 || w == 64 || w == 128) {
    MOA_Fill_Random_Region (ra, size);
-    MOA_Fill_Random_Region (rb, size);
+    while (rb < top) {
+      gf_general_set_random(&g, w, 0);
+      switch (w) {
+        case 8: 
+          r8 = (uint8_t *) rb;
+          *r8 = g.w32;
+          break;
+        case 16: 
+          r16 = (uint16_t *) rb;
+          *r16 = g.w32;
+          break;
+        case 32: 
+          r32 = (uint32_t *) rb;
+          *r32 = g.w32;
+          break;
+        case 64:
+          r64 = (uint64_t *) rb;
+          *r64 = g.w64;
+          break;
+        case 128: 
+          r64 = (uint64_t *) rb;
+          r64[0] = g.w128[0];
+          r64[1] = g.w128[1];
+          break;
+      }
+      rb += (w/8);
+    }
  } else {
    r32 = (uint32_t *) ra;
    for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 1);
--- a/gf_method.c
+++ b/gf_method.c
@ -28,6 +28,7 @@ void methods_to_stderr()
  fprintf(stderr, "       TABLE: Full multiplication table\n");
  fprintf(stderr, "       LOG:   Discrete logs\n");
  fprintf(stderr, "       LOG_ZERO: Discrete logs with a large table for zeros\n");
+  fprintf(stderr, "       LOG_ZERO_EXT: Discrete logs with an extra large table for zeros\n");
  fprintf(stderr, "       SPLIT g_a g_b: Split tables defined by g_a and g_b\n");
  fprintf(stderr, "       COMPOSITE k rec METHOD: Composite field.  GF((2^l)^k), l=w/k.\n");
  fprintf(stderr, "                               rec = 0 means inline single multiplication\n");
@ -100,6 +101,10 @@ int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
    mult_type = GF_MULT_LOG_TABLE;
    arg1 = 1;
    starting++;
+  } else if (strcmp(argv[starting], "LOG_ZERO_EXT") == 0) {
+    mult_type = GF_MULT_LOG_TABLE;
+    arg1 = 2;
+    starting++;
  } else if (strcmp(argv[starting], "SPLIT") == 0) {
    mult_type = GF_MULT_SPLIT_TABLE;
    if (argc < starting+5) return 0;
--- a/gf_methods.c
+++ b/gf_methods.c
@ -12,9 +12,9 @@
 #include "gf_complete.h"
 #include "gf_method.h"

-#define NMULTS (14)
+#define NMULTS (15)
 static char *mults[NMULTS] = { "SHIFT", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
-                               "TABLE", "LOG", "LOG_ZERO", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE-0", "COMPOSITE-1" };
+                               "TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE-0", "COMPOSITE-1" };

 #define NREGIONS (96) 
 static char *regions[NREGIONS] = { "-", "SINGLE", "DOUBLE", "QUAD",
--- a/gf_w64.c
+++ b/gf_w64.c
@ -224,6 +224,7 @@ gf_w64_clm_multiply (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
 void
 gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
 {
+#ifdef  INTEL_PCLMUL
  gf_internal_t *h;
  int i, j, k;
  uint8_t *s8, *d8, *dtop;
@ -305,6 +306,7 @@ gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int by
    }
  }
  gf_do_final_region_alignment(&rd);
+#endif
 }

 void
--- a/gf_w8.c
+++ b/gf_w8.c
@ -24,11 +24,19 @@ struct gf_w8_logtable_data {
 };

 struct gf_w8_logzero_table_data {
-    uint16_t        log_tbl[GF_FIELD_SIZE];
+    short           log_tbl[GF_FIELD_SIZE];  /* Make this signed, so that we can divide easily */
    uint8_t         antilog_tbl[512+512+1];
+    uint8_t         *div_tbl;
    uint8_t         *inv_tbl;
 };

+struct gf_w8_logzero_small_table_data {
+    short           log_tbl[GF_FIELD_SIZE];  /* Make this signed, so that we can divide easily */
+    uint8_t         antilog_tbl[255*3];
+    uint8_t         inv_tbl[GF_FIELD_SIZE];
+    uint8_t         *div_tbl;
+};
+
 /* Don't change the order of these relative to gf_w8_half_table_data */

 struct gf_w8_default_data {
@ -217,7 +225,7 @@ gf_w8_logzero_multiply (gf_t *gf, uint32_t a, uint32_t b)
  struct gf_w8_logzero_table_data *ltd;

  ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private;
-  return ltd->antilog_tbl[(unsigned)(ltd->log_tbl[a] + ltd->log_tbl[b])];
+  return ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]];
 }

 static
@ -228,7 +236,30 @@ gf_w8_logzero_divide (gf_t *gf, uint32_t a, uint32_t b)
  struct gf_w8_logzero_table_data *ltd;

  ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private;
-  return ltd->antilog_tbl[(unsigned)((255 + ltd->log_tbl[a]) - ltd->log_tbl[b])];
+  return ltd->div_tbl[ltd->log_tbl[a] - ltd->log_tbl[b]];
+}
+
+static
+inline
+uint32_t
+gf_w8_logzero_small_multiply (gf_t *gf, uint32_t a, uint32_t b)
+{
+  struct gf_w8_logzero_small_table_data *std;
+
+  std = (struct gf_w8_logzero_small_table_data *) ((gf_internal_t *) gf->scratch)->private;
+  if (b == 0) return 0;
+  return std->antilog_tbl[std->log_tbl[a] + std->log_tbl[b]];
+}
+
+static
+inline
+uint32_t
+gf_w8_logzero_small_divide (gf_t *gf, uint32_t a, uint32_t b)
+{
+  struct gf_w8_logzero_small_table_data *std;
+
+  std = (struct gf_w8_logzero_small_table_data *) ((gf_internal_t *) gf->scratch)->private;
+  return std->div_tbl[std->log_tbl[a] - std->log_tbl[b]];
 }

 static
@ -277,6 +308,16 @@ gf_w8_logzero_inverse (gf_t *gf, uint32_t a)
  return (ltd->inv_tbl[a]);
 }

+static
+uint32_t
+gf_w8_logzero_small_inverse (gf_t *gf, uint32_t a)
+{
+  struct gf_w8_logzero_small_table_data *std;
+
+  std = (struct gf_w8_logzero_small_table_data *) ((gf_internal_t *) gf->scratch)->private;
+  return (std->inv_tbl[a]);
+}
+
 static
 void
 gf_w8_log_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
@ -314,23 +355,37 @@ gf_w8_logzero_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int
  uint8_t lv, b, c;
  uint8_t *s8, *d8;
  struct gf_w8_logzero_table_data *ltd;
+  struct gf_w8_logzero_small_table_data *std;
+  short *log;
+  uint8_t *alt;
+  gf_internal_t *h;

  if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
  if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }

-  ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private;
+  h = (gf_internal_t *) gf->scratch;
+
+  if (h->arg1 == 1) {
+    std = (struct gf_w8_logzero_small_table_data *) h->private;
+    log = std->log_tbl;
+    alt = std->antilog_tbl;
+  } else {
+    ltd = (struct gf_w8_logzero_table_data *) h->private;
+    log = ltd->log_tbl;
+    alt = ltd->antilog_tbl;
+  }
  s8 = (uint8_t *) src;
  d8 = (uint8_t *) dest;

-  lv = ltd->log_tbl[val];
+  lv = log[val];

  if (xor) {
    for (i = 0; i < bytes; i++) {
-      d8[i] ^= (ltd->antilog_tbl[lv + ltd->log_tbl[s8[i]]]);
+      d8[i] ^= (alt[lv + log[s8[i]]]);
    }
  } else {
    for (i = 0; i < bytes; i++) {
-      d8[i] = (ltd->antilog_tbl[lv + ltd->log_tbl[s8[i]]]);
+      d8[i] = (alt[lv + log[s8[i]]]);
    }
  }
 }
@ -341,6 +396,7 @@ int gf_w8_log_init(gf_t *gf)
  gf_internal_t *h;
  struct gf_w8_logtable_data *ltd;
  struct gf_w8_logzero_table_data *ztd;
+  struct gf_w8_logzero_small_table_data *std;
  uint8_t *alt;
  uint8_t *inv;
  int i, b;
@ -350,25 +406,35 @@ int gf_w8_log_init(gf_t *gf)
    ltd = h->private;
    alt = ltd->antilog_tbl;
    inv = ltd->inv_tbl;
+  } else if (h->arg1 == 1) {
+    std = h->private;
+    alt = std->antilog_tbl;
+    std->div_tbl = (alt + 255);
+    inv = std->inv_tbl;
  } else {
    ztd = h->private;
    alt = ztd->antilog_tbl;
    ztd->inv_tbl = (alt + 512 + 256);
+    ztd->div_tbl = (alt + 255);
    inv = ztd->inv_tbl;
  }
  
-  if (h->arg1 == 1) {
-    ztd->log_tbl[0] = 512;
-  } else {
+  if (h->arg1 == 0) {
    ltd->log_tbl[0] = 0;
+  } else if (h->arg1 == 1) {
+    std->log_tbl[0] = 510;
+  } else {
+    ztd->log_tbl[0] = 512;
  }

  b = 1;
  for (i = 0; i < GF_MULT_GROUP_SIZE; i++) {
-      if (h->arg1 == 1) {
-        ztd->log_tbl[b] = i;
-      } else {
+      if (h->arg1 == 0) {
        ltd->log_tbl[b] = i;
+      } else if (h->arg1 == 1) {
+        std->log_tbl[b] = i;
+      } else {
+        ztd->log_tbl[b] = i;
      }
      alt[i] = b;
      alt[i+GF_MULT_GROUP_SIZE] = b;
@ -377,22 +443,39 @@ int gf_w8_log_init(gf_t *gf)
          b = b ^ h->prim_poly;
      }
  }
-  if (h->arg1 == 1) {
+  if (h->arg1 == 1) bzero(alt+510, 255);
+
+  if (h->arg1 == 2) {
    bzero(alt+512, 255);
    alt[512+512] = 0;
  }

  inv[0] = 0;  /* Not really, but we need to fill it with something  */
-  inv[1] = 1;
-  for (i = 2; i < GF_FIELD_SIZE; i++) {
-    b = (h->arg1 == 1) ? ztd->log_tbl[i] : ltd->log_tbl[i];
-    inv[i] = alt[GF_MULT_GROUP_SIZE-b];
+  i = 1;
+  b = GF_MULT_GROUP_SIZE;
+  do {
+    inv[i] = alt[b];
+    i <<= 1;
+    if (i & (1 << 8)) i ^= h->prim_poly;
+    b--;
+  } while (i != 1);
+    
+  if (h->arg1 == 0) {
+    gf->inverse.w32 = gf_w8_log_inverse;
+    gf->divide.w32 = gf_w8_log_divide;
+    gf->multiply.w32 = gf_w8_log_multiply;
+    gf->multiply_region.w32 = gf_w8_log_multiply_region;
+  } else if (h->arg1 == 1) {
+    gf->inverse.w32 = gf_w8_logzero_small_inverse;
+    gf->divide.w32 = gf_w8_logzero_small_divide;
+    gf->multiply.w32 = gf_w8_logzero_small_multiply;
+    gf->multiply_region.w32 = gf_w8_logzero_multiply_region;
+  } else {
+    gf->inverse.w32 = gf_w8_logzero_inverse;
+    gf->divide.w32 = gf_w8_logzero_divide;
+    gf->multiply.w32 = gf_w8_logzero_multiply;
+    gf->multiply_region.w32 = gf_w8_logzero_multiply_region;
  }
-
-  gf->inverse.w32 = (h->arg1 == 0) ? gf_w8_log_inverse : gf_w8_logzero_inverse;
-  gf->divide.w32 = (h->arg1 == 0) ? gf_w8_log_divide : gf_w8_logzero_divide;
-  gf->multiply.w32 = (h->arg1 == 0) ? gf_w8_log_multiply : gf_w8_logzero_multiply;
-  gf->multiply_region.w32 = (h->arg1 == 0) ? gf_w8_log_multiply_region : gf_w8_logzero_multiply_region;
  return 1;
 }

@ -1818,9 +1901,10 @@ int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1
      return -1;
      break;
    case GF_MULT_LOG_TABLE:
-      if ((arg1 != 0 && arg1 != 1) || arg2 != 0) return -1;
+      if ((arg1 != 0 && arg1 != 1 && arg1 != 2) || arg2 != 0) return -1;
      if (region_type != 0 && region_type != GF_REGION_CAUCHY) return -1;
      if (arg1 == 0) return sizeof(gf_internal_t) + sizeof(struct gf_w8_logtable_data) + 64;
+      if (arg1 == 1) return sizeof(gf_internal_t) + sizeof(struct gf_w8_logzero_small_table_data) + 64;
      return sizeof(gf_internal_t) + sizeof(struct gf_w8_logzero_table_data) + 64;
      break;
    case GF_MULT_SHIFT: