Fixed the problem with PCLMUL and gf_complete.h. Removed

ARCH_64 from everything but 128/GROUP/SSE.  Fortunately, no
one ever uses that.
master
Jim Plank 2013-12-31 20:08:18 -05:00
parent 8900c0e635
commit fb0bbdcf62
7 changed files with 45 additions and 48 deletions

View File

@ -24,14 +24,8 @@
#include <emmintrin.h>
#endif
#ifdef INTEL_PCLMUL
#ifdef INTEL_SSE4_PCLMUL
#include <wmmintrin.h>
#ifdef INTEL_SSE4
#define INTEL_SSE4_PCLMUL
#endif
#ifdef INTEL_SSSE3
#define INTEL_SSSE3_PCLMUL
#endif
#endif

View File

@ -91,7 +91,7 @@ int xor)
gf_val_128_t d128;
uint64_t c128[2];
gf_region_data rd;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a,b;
__m128i result0,result1;
__m128i prim_poly;
@ -296,7 +296,7 @@ gf_w128_shift_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_12
void
gf_w128_clm_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a,b;
__m128i result0,result1;
@ -382,7 +382,7 @@ gf_w128_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_
void
gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
#if defined(INTEL_SSE4) && defined(ARCH_64)
#if defined(INTEL_SSE4)
int i;
__m128i a, b, pp, one, prod, amask, l_middle_one, u_middle_one;
/*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/
@ -440,7 +440,7 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
void
gf_w128_sse_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
#if defined(INTEL_SSE4) && defined(ARCH_64)
#if defined(INTEL_SSE4)
__m128i a, b, lmask, hmask, pp, c, middle_one;
gf_internal_t *h;
uint64_t topbit, middlebit;
@ -987,7 +987,7 @@ void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128)
static
void gf_w128_group_m_sse_init(gf_t *gf, gf_val_128_t b128)
{
#if defined(INTEL_SSE4) && defined(ARCH_64)
#if defined(INTEL_SSE4)
int i, j;
int g_m;
uint64_t lbit, middlebit;
@ -1277,7 +1277,7 @@ static
void
gf_w128_group_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
#if defined(INTEL_SSE4) && defined(ARCH_64)
#if defined(INTEL_SSE4)
int i;
int i_r, i_m, t_m;
int mask_m, mask_r, mask_s;
@ -1706,7 +1706,7 @@ int gf_w128_composite_init(gf_t *gf)
static
int gf_w128_cfm_init(gf_t *gf)
{
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
gf->inverse.w128 = gf_w128_euclid;
gf->multiply.w128 = gf_w128_clm_multiply;
gf->multiply_region.w128 = gf_w128_clm_multiply_region_from_single;
@ -1779,7 +1779,7 @@ void gf_w128_group_r_init(gf_t *gf)
static
void gf_w128_group_r_sse_init(gf_t *gf)
{
#if defined(INTEL_SSE4) && defined(ARCH_64)
#if defined(INTEL_SSE4)
int i, j;
int g_r;
uint64_t pp;
@ -1814,7 +1814,7 @@ int gf_w128_split_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
gf->multiply.w128 = gf_w128_bytwo_p_multiply;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
if (!(h->region_type & GF_REGION_NOSSE)){
gf->multiply.w128 = gf_w128_clm_multiply;
}
@ -1880,6 +1880,9 @@ int gf_w128_group_init(gf_t *gf)
gf->inverse.w128 = gf_w128_euclid;
gf->multiply_region.w128 = gf_w128_group_multiply_region;
/* JSP: I've got a problem compiling here -- something about "vmovq", and
I don't have the time to chase it down right now. */
#if defined(INTEL_SSE4) && defined(ARCH_64)
if(!(scratch->region_type & GF_REGION_NOSSE))
{

View File

@ -133,7 +133,7 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
__m128i prim_poly;
@ -197,7 +197,7 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -266,7 +266,7 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val
uint16_t *s16;
uint16_t *d16;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -448,7 +448,7 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -495,7 +495,7 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -535,7 +535,7 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -611,7 +611,7 @@ int gf_w16_cfm_init(gf_t *gf)
/*Ben: Determining how many reductions to do */
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
if ((0xfe00 & h->prim_poly) == 0) {
gf->multiply.w32 = gf_w16_clm_multiply_2;
gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2;
@ -739,7 +739,7 @@ int gf_w16_log_init(gf_t *gf)
if (check) {
if (h->mult_type != GF_MULT_LOG_TABLE) {
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
return gf_w16_cfm_init(gf);
#endif
return gf_w16_shift_init(gf);

View File

@ -125,7 +125,7 @@ void
gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
int i;
uint32_t *s32;
@ -175,7 +175,7 @@ void
gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
int i;
uint32_t *s32;
@ -229,7 +229,7 @@ static
void
gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
int i;
uint32_t *s32;
uint32_t *d32;
@ -409,7 +409,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -453,7 +453,7 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -492,7 +492,7 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -565,7 +565,7 @@ int gf_w32_cfm_init(gf_t *gf)
/*Ben: We also check to see if the prim poly will work for pclmul */
/*Ben: Check to see how many reduction steps it will take*/
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
if ((0xfffe0000 & h->prim_poly) == 0){
gf->multiply.w32 = gf_w32_clm_multiply_2;
gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_2;
@ -2176,7 +2176,7 @@ int gf_w32_split_init(gf_t *gf)
int i, j, exp, ispclmul, issse3;
ispclmul = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
ispclmul = 1;
#endif

View File

@ -182,7 +182,7 @@ gf_w4_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -1967,7 +1967,7 @@ int gf_w4_cfm_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
gf->multiply.w32 = gf_w4_clm_multiply;
return 1;
#endif

View File

@ -96,7 +96,7 @@ xor)
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result, r1;
__m128i prim_poly;
@ -187,7 +187,7 @@ xor)
gf_val_64_t *s64, *d64, *top;
gf_region_data rd;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result, r1;
__m128i prim_poly;
@ -385,7 +385,7 @@ gf_w64_clm_multiply_2 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
{
gf_val_64_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -427,7 +427,7 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
{
gf_val_64_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -466,7 +466,7 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
void
gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
{
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
int i, j, k;
uint8_t *s8, *d8, *dtop;
@ -759,7 +759,7 @@ int gf_w64_cfm_init(gf_t *gf)
gf->inverse.w64 = gf_w64_euclid;
gf->multiply_region.w64 = gf_w64_multiply_region_from_single;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
if ((0xfffffffe00000000ULL & h->prim_poly) == 0){
gf->multiply.w64 = gf_w64_clm_multiply_2;
gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_2;
@ -2030,7 +2030,7 @@ int gf_w64_split_init(gf_t *gf)
gf->multiply.w64 = gf_w64_bytwo_p_multiply;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
if ((!(h->region_type & GF_REGION_NOSSE) &&
(h->arg1 == 64 || h->arg2 == 64)) ||
h->mult_type == GF_MULT_DEFAULT){

View File

@ -211,7 +211,7 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -257,7 +257,7 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -296,7 +296,7 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -373,7 +373,7 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -432,7 +432,7 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -495,7 +495,7 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
uint8_t *s8;
uint8_t *d8;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
@ -592,7 +592,7 @@ int gf_w8_cfm_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
#if defined(INTEL_SSE4_PCLMUL)
if ((0xe0 & h->prim_poly) == 0){
gf->multiply.w32 = gf_w8_clm_multiply_2;
gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_2;