diff --git a/include/gf_complete.h b/include/gf_complete.h index 57b439e..0469b77 100644 --- a/include/gf_complete.h +++ b/include/gf_complete.h @@ -33,17 +33,18 @@ Not all are implemented for all values of w. See the paper for an explanation of how they work. */ -typedef enum {GF_MULT_DEFAULT, - GF_MULT_SHIFT, - GF_MULT_CARRY_FREE, - GF_MULT_GROUP, +typedef enum {GF_MULT_DEFAULT, + GF_MULT_SHIFT, + GF_MULT_CARRY_FREE, + GF_MULT_CARRY_FREE_GK, //ADAM + GF_MULT_GROUP, GF_MULT_BYTWO_p, GF_MULT_BYTWO_b, - GF_MULT_TABLE, - GF_MULT_LOG_TABLE, + GF_MULT_TABLE, + GF_MULT_LOG_TABLE, GF_MULT_LOG_ZERO, GF_MULT_LOG_ZERO_EXT, - GF_MULT_SPLIT_TABLE, + GF_MULT_SPLIT_TABLE, GF_MULT_COMPOSITE } gf_mult_type_t; /* These are the different ways to optimize region diff --git a/src/gf.c b/src/gf.c index da714d2..1955559 100644 --- a/src/gf.c +++ b/src/gf.c @@ -286,6 +286,16 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type, return 1; } + //ADAM + if (mult_type == GF_MULT_CARRY_FREE_GK) { + if (w != 4 && w != 8 && w != 16 && + w != 32 && w != 64 && w != 128) { _gf_errno = GF_E_CFM___W; return 0; } + if (raltmap) { _gf_errno = GF_E_ALT_CFM; return 0; } + if (rsse || rnosse) { _gf_errno = GF_E_SSE_CFM; return 0; } + if (!pclmul) { _gf_errno = GF_E_PCLMULX; return 0; } + return 1; + } + if (mult_type == GF_MULT_BYTWO_p || mult_type == GF_MULT_BYTWO_b) { if (raltmap) { _gf_errno = GF_E_ALT_BY2; return 0; } if (rsse && !sse2) { _gf_errno = GF_E_BY2_SSE; return 0; } diff --git a/src/gf_method.c b/src/gf_method.c index a7bcacf..90d62af 100644 --- a/src/gf_method.c +++ b/src/gf_method.c @@ -47,6 +47,10 @@ int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting) } else if (strcmp(argv[starting], "CARRY_FREE") == 0) { mult_type = GF_MULT_CARRY_FREE; starting++; + //ADAM + } else if (strcmp(argv[starting], "CARRY_FREE_GK") == 0) { + mult_type = GF_MULT_CARRY_FREE_GK; + starting++; } else if (strcmp(argv[starting], "GROUP") == 0) { mult_type = GF_MULT_GROUP; if (argc < starting + 3) { diff --git a/src/gf_w32.c b/src/gf_w32.c index 8f7790c..c90c7fb 100644 --- a/src/gf_w32.c +++ b/src/gf_w32.c @@ -399,7 +399,94 @@ uint32_t gf_w32_matrix (gf_t *gf, uint32_t b) extra memory. */ +//ADAM +static +inline +gf_val_32_t +gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) +{ + gf_val_32_t rv = 0; +#if defined(INTEL_SSE4_PCLMUL) + + __m128i a, b; + __m128i result; + __m128i w; + __m128i g, q; + gf_internal_t * h = gf->scratch; + uint64_t g_star, q_plus; + + q_plus = *(uint64_t *) h->private; + g_star = *((uint64_t *) h->private + 1); + + a = _mm_insert_epi32 (_mm_setzero_si128(), a32, 0); + b = _mm_insert_epi32 (a, b32, 0); + g = _mm_insert_epi64 (a, g_star, 0); + q = _mm_insert_epi64 (a, q_plus, 0); + + result = _mm_clmulepi64_si128 (a, b, 0); + w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0); + w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0); + result = _mm_xor_si128 (result, w); + + /* Extracts 32 bit value from result. */ + rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); +#endif + return rv; +} + +//ADAM +#if defined(INTEL_SSE4_PCLMUL) + +static +void +gf_w32_cfmgk_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ + + int i; + uint32_t *s32; + uint32_t *d32; + + __m128i a, b; + __m128i result; + __m128i w; + __m128i g, q; + gf_internal_t * h = gf->scratch; + uint64_t g_star, q_plus; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; } + + q_plus = *(uint64_t *) h->private; + g_star = *((uint64_t *) h->private + 1); + + g = _mm_insert_epi64 (a, g_star, 0); + q = _mm_insert_epi64 (a, q_plus, 0); + a = _mm_insert_epi32 (_mm_setzero_si128(), val, 0); + s32 = (uint32_t *) src; + d32 = (uint32_t *) dest; + + if (xor) { + for (i = 0; i < bytes/sizeof(uint32_t); i++) { + b = _mm_insert_epi32 (a, s32[i], 0); + result = _mm_clmulepi64_si128 (a, b, 0); + w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0); + w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0); + result = _mm_xor_si128 (result, w); + d32[i] ^= ((gf_val_32_t)_mm_extract_epi32(result, 0)); + } + } else { + for (i = 0; i < bytes/sizeof(uint32_t); i++) { + b = _mm_insert_epi32 (a, s32[i], 0); + result = _mm_clmulepi64_si128 (a, b, 0); + w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0); + w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0); + result = _mm_xor_si128 (result, w); + d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0)); + } + } +} +#endif static @@ -446,6 +533,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) #endif return rv; } + static inline gf_val_32_t @@ -552,6 +640,45 @@ gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32) return product; } +//ADAM + static +int gf_w32_cfmgk_init(gf_t *gf) +{ + gf->inverse.w32 = gf_w32_euclid; + gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + +#if defined(INTEL_SSE4_PCLMUL) + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + gf->multiply.w32 = gf_w32_cfmgk_multiply; + gf->multiply_region.w32 = gf_w32_cfmgk_multiply_region_from_single; + + //setup in the private section the q+ and g* ADAM + uint64_t *q_plus = (uint64_t *) h->private; + uint64_t *g_star = (uint64_t *) h->private + 1; + + //q+ + uint64_t tmp = h->prim_poly << 32; + *q_plus = 1ULL << 32; + + int i; + for(i = 63; i >= 32; i--) + if((1ULL << i) & tmp) + { + *q_plus |= 1ULL << (i-32); + tmp ^= h->prim_poly << (i-32); + } + + //g* + *g_star = h->prim_poly & ((1ULL << 32) - 1); + + return 1; +#endif + + return 0; +} + static int gf_w32_cfm_init(gf_t *gf) { @@ -2656,6 +2783,10 @@ int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg case GF_MULT_CARRY_FREE: return sizeof(gf_internal_t); break; + //ADAM + case GF_MULT_CARRY_FREE_GK: + return sizeof(gf_internal_t) + sizeof(uint64_t)*2; + break; case GF_MULT_SHIFT: return sizeof(gf_internal_t); break; @@ -2703,14 +2834,15 @@ int gf_w32_init(gf_t *gf) gf->multiply_region.w32 = NULL; switch(h->mult_type) { - case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break; - case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break; - case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break; + case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break; + case GF_MULT_CARRY_FREE_GK: if (gf_w32_cfmgk_init(gf) == 0) return 0; break; //ADAM + case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break; + case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break; case GF_MULT_DEFAULT: - case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break; - case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break; + case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break; + case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break; case GF_MULT_BYTWO_p: - case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break; + case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break; default: return 0; } if (h->divide_type == GF_DIVIDE_EUCLID) { diff --git a/tools/gf_methods.c b/tools/gf_methods.c index 6664bec..921febf 100644 --- a/tools/gf_methods.c +++ b/tools/gf_methods.c @@ -20,8 +20,9 @@ #define BNMULTS (8) static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48", "TABLE", "LOG", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE" }; -#define NMULTS (16) -static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b", +//ADAM +#define NMULTS (17) +static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "CARRY_FREE_GK", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b", "TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };