From fb0bbdcf62a8cc82268207285a2f94808672dfdb Mon Sep 17 00:00:00 2001 From: Jim Plank Date: Tue, 31 Dec 2013 20:08:18 -0500 Subject: [PATCH] Fixed the problem with PCLMUL and gf_complete.h. Removed ARCH_64 from everything but 128/GROUP/SSE. Fortunately, no one ever uses that. --- include/gf_complete.h | 8 +------- src/gf_w128.c | 21 ++++++++++++--------- src/gf_w16.c | 16 ++++++++-------- src/gf_w32.c | 16 ++++++++-------- src/gf_w4.c | 4 ++-- src/gf_w64.c | 14 +++++++------- src/gf_w8.c | 14 +++++++------- 7 files changed, 45 insertions(+), 48 deletions(-) diff --git a/include/gf_complete.h b/include/gf_complete.h index ef685f9..57b439e 100644 --- a/include/gf_complete.h +++ b/include/gf_complete.h @@ -24,14 +24,8 @@ #include #endif -#ifdef INTEL_PCLMUL +#ifdef INTEL_SSE4_PCLMUL #include - #ifdef INTEL_SSE4 - #define INTEL_SSE4_PCLMUL - #endif - #ifdef INTEL_SSSE3 - #define INTEL_SSSE3_PCLMUL - #endif #endif diff --git a/src/gf_w128.c b/src/gf_w128.c index fc08a3f..c888f44 100644 --- a/src/gf_w128.c +++ b/src/gf_w128.c @@ -91,7 +91,7 @@ int xor) gf_val_128_t d128; uint64_t c128[2]; gf_region_data rd; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a,b; __m128i result0,result1; __m128i prim_poly; @@ -296,7 +296,7 @@ gf_w128_shift_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_12 void gf_w128_clm_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) { -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a,b; __m128i result0,result1; @@ -382,7 +382,7 @@ gf_w128_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_ void gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) { -#if defined(INTEL_SSE4) && defined(ARCH_64) +#if defined(INTEL_SSE4) int i; __m128i a, b, pp, one, prod, amask, l_middle_one, u_middle_one; /*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/ @@ -440,7 +440,7 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_ void gf_w128_sse_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) { -#if defined(INTEL_SSE4) && defined(ARCH_64) +#if defined(INTEL_SSE4) __m128i a, b, lmask, hmask, pp, c, middle_one; gf_internal_t *h; uint64_t topbit, middlebit; @@ -987,7 +987,7 @@ void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128) static void gf_w128_group_m_sse_init(gf_t *gf, gf_val_128_t b128) { -#if defined(INTEL_SSE4) && defined(ARCH_64) +#if defined(INTEL_SSE4) int i, j; int g_m; uint64_t lbit, middlebit; @@ -1277,7 +1277,7 @@ static void gf_w128_group_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor) { -#if defined(INTEL_SSE4) && defined(ARCH_64) +#if defined(INTEL_SSE4) int i; int i_r, i_m, t_m; int mask_m, mask_r, mask_s; @@ -1706,7 +1706,7 @@ int gf_w128_composite_init(gf_t *gf) static int gf_w128_cfm_init(gf_t *gf) { -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) gf->inverse.w128 = gf_w128_euclid; gf->multiply.w128 = gf_w128_clm_multiply; gf->multiply_region.w128 = gf_w128_clm_multiply_region_from_single; @@ -1779,7 +1779,7 @@ void gf_w128_group_r_init(gf_t *gf) static void gf_w128_group_r_sse_init(gf_t *gf) { -#if defined(INTEL_SSE4) && defined(ARCH_64) +#if defined(INTEL_SSE4) int i, j; int g_r; uint64_t pp; @@ -1814,7 +1814,7 @@ int gf_w128_split_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; gf->multiply.w128 = gf_w128_bytwo_p_multiply; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) if (!(h->region_type & GF_REGION_NOSSE)){ gf->multiply.w128 = gf_w128_clm_multiply; } @@ -1880,6 +1880,9 @@ int gf_w128_group_init(gf_t *gf) gf->inverse.w128 = gf_w128_euclid; gf->multiply_region.w128 = gf_w128_group_multiply_region; + /* JSP: I've got a problem compiling here -- something about "vmovq", and + I don't have the time to chase it down right now. */ + #if defined(INTEL_SSE4) && defined(ARCH_64) if(!(scratch->region_type & GF_REGION_NOSSE)) { diff --git a/src/gf_w16.c b/src/gf_w16.c index 2d73034..454c6cc 100644 --- a/src/gf_w16.c +++ b/src/gf_w16.c @@ -133,7 +133,7 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val uint16_t *s16; uint16_t *d16; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; __m128i prim_poly; @@ -197,7 +197,7 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val uint16_t *s16; uint16_t *d16; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -266,7 +266,7 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val uint16_t *s16; uint16_t *d16; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -448,7 +448,7 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -495,7 +495,7 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -535,7 +535,7 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -611,7 +611,7 @@ int gf_w16_cfm_init(gf_t *gf) /*Ben: Determining how many reductions to do */ -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) if ((0xfe00 & h->prim_poly) == 0) { gf->multiply.w32 = gf_w16_clm_multiply_2; gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2; @@ -739,7 +739,7 @@ int gf_w16_log_init(gf_t *gf) if (check) { if (h->mult_type != GF_MULT_LOG_TABLE) { -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) return gf_w16_cfm_init(gf); #endif return gf_w16_shift_init(gf); diff --git a/src/gf_w32.c b/src/gf_w32.c index e2fb0f9..03f285f 100644 --- a/src/gf_w32.c +++ b/src/gf_w32.c @@ -125,7 +125,7 @@ void gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) { -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) int i; uint32_t *s32; @@ -175,7 +175,7 @@ void gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) { -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) int i; uint32_t *s32; @@ -229,7 +229,7 @@ static void gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) { -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) int i; uint32_t *s32; uint32_t *d32; @@ -409,7 +409,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -453,7 +453,7 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -492,7 +492,7 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -565,7 +565,7 @@ int gf_w32_cfm_init(gf_t *gf) /*Ben: We also check to see if the prim poly will work for pclmul */ /*Ben: Check to see how many reduction steps it will take*/ -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) if ((0xfffe0000 & h->prim_poly) == 0){ gf->multiply.w32 = gf_w32_clm_multiply_2; gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_2; @@ -2176,7 +2176,7 @@ int gf_w32_split_init(gf_t *gf) int i, j, exp, ispclmul, issse3; ispclmul = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) ispclmul = 1; #endif diff --git a/src/gf_w4.c b/src/gf_w4.c index 3e00cd2..2504ec6 100644 --- a/src/gf_w4.c +++ b/src/gf_w4.c @@ -182,7 +182,7 @@ gf_w4_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -1967,7 +1967,7 @@ int gf_w4_cfm_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) gf->multiply.w32 = gf_w4_clm_multiply; return 1; #endif diff --git a/src/gf_w64.c b/src/gf_w64.c index b8baa8f..73bf164 100644 --- a/src/gf_w64.c +++ b/src/gf_w64.c @@ -96,7 +96,7 @@ xor) gf_val_64_t *s64, *d64, *top; gf_region_data rd; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result, r1; __m128i prim_poly; @@ -187,7 +187,7 @@ xor) gf_val_64_t *s64, *d64, *top; gf_region_data rd; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result, r1; __m128i prim_poly; @@ -385,7 +385,7 @@ gf_w64_clm_multiply_2 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64) { gf_val_64_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -427,7 +427,7 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64) { gf_val_64_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -466,7 +466,7 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64) void gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor) { -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) gf_internal_t *h; int i, j, k; uint8_t *s8, *d8, *dtop; @@ -759,7 +759,7 @@ int gf_w64_cfm_init(gf_t *gf) gf->inverse.w64 = gf_w64_euclid; gf->multiply_region.w64 = gf_w64_multiply_region_from_single; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) if ((0xfffffffe00000000ULL & h->prim_poly) == 0){ gf->multiply.w64 = gf_w64_clm_multiply_2; gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_2; @@ -2030,7 +2030,7 @@ int gf_w64_split_init(gf_t *gf) gf->multiply.w64 = gf_w64_bytwo_p_multiply; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) if ((!(h->region_type & GF_REGION_NOSSE) && (h->arg1 == 64 || h->arg2 == 64)) || h->mult_type == GF_MULT_DEFAULT){ diff --git a/src/gf_w8.c b/src/gf_w8.c index da34968..7661aad 100644 --- a/src/gf_w8.c +++ b/src/gf_w8.c @@ -211,7 +211,7 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -257,7 +257,7 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -296,7 +296,7 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -373,7 +373,7 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_ uint8_t *s8; uint8_t *d8; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -432,7 +432,7 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_ uint8_t *s8; uint8_t *d8; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -495,7 +495,7 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_ uint8_t *s8; uint8_t *d8; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) __m128i a, b; __m128i result; @@ -592,7 +592,7 @@ int gf_w8_cfm_init(gf_t *gf) h = (gf_internal_t *) gf->scratch; -#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64) +#if defined(INTEL_SSE4_PCLMUL) if ((0xe0 & h->prim_poly) == 0){ gf->multiply.w32 = gf_w8_clm_multiply_2; gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_2;