Fixed the problem with PCLMUL and gf_complete.h. Removed
ARCH_64 from everything but 128/GROUP/SSE. Fortunately, no one ever uses that.master
parent
8900c0e635
commit
fb0bbdcf62
|
@ -24,14 +24,8 @@
|
|||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef INTEL_PCLMUL
|
||||
#ifdef INTEL_SSE4_PCLMUL
|
||||
#include <wmmintrin.h>
|
||||
#ifdef INTEL_SSE4
|
||||
#define INTEL_SSE4_PCLMUL
|
||||
#endif
|
||||
#ifdef INTEL_SSSE3
|
||||
#define INTEL_SSSE3_PCLMUL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ int xor)
|
|||
gf_val_128_t d128;
|
||||
uint64_t c128[2];
|
||||
gf_region_data rd;
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
__m128i a,b;
|
||||
__m128i result0,result1;
|
||||
__m128i prim_poly;
|
||||
|
@ -296,7 +296,7 @@ gf_w128_shift_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_12
|
|||
void
|
||||
gf_w128_clm_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
|
||||
{
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a,b;
|
||||
__m128i result0,result1;
|
||||
|
@ -382,7 +382,7 @@ gf_w128_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_
|
|||
void
|
||||
gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
|
||||
{
|
||||
#if defined(INTEL_SSE4) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4)
|
||||
int i;
|
||||
__m128i a, b, pp, one, prod, amask, l_middle_one, u_middle_one;
|
||||
/*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/
|
||||
|
@ -440,7 +440,7 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
|
|||
void
|
||||
gf_w128_sse_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
|
||||
{
|
||||
#if defined(INTEL_SSE4) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4)
|
||||
__m128i a, b, lmask, hmask, pp, c, middle_one;
|
||||
gf_internal_t *h;
|
||||
uint64_t topbit, middlebit;
|
||||
|
@ -987,7 +987,7 @@ void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128)
|
|||
static
|
||||
void gf_w128_group_m_sse_init(gf_t *gf, gf_val_128_t b128)
|
||||
{
|
||||
#if defined(INTEL_SSE4) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4)
|
||||
int i, j;
|
||||
int g_m;
|
||||
uint64_t lbit, middlebit;
|
||||
|
@ -1277,7 +1277,7 @@ static
|
|||
void
|
||||
gf_w128_group_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
|
||||
{
|
||||
#if defined(INTEL_SSE4) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4)
|
||||
int i;
|
||||
int i_r, i_m, t_m;
|
||||
int mask_m, mask_r, mask_s;
|
||||
|
@ -1706,7 +1706,7 @@ int gf_w128_composite_init(gf_t *gf)
|
|||
static
|
||||
int gf_w128_cfm_init(gf_t *gf)
|
||||
{
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
gf->inverse.w128 = gf_w128_euclid;
|
||||
gf->multiply.w128 = gf_w128_clm_multiply;
|
||||
gf->multiply_region.w128 = gf_w128_clm_multiply_region_from_single;
|
||||
|
@ -1779,7 +1779,7 @@ void gf_w128_group_r_init(gf_t *gf)
|
|||
static
|
||||
void gf_w128_group_r_sse_init(gf_t *gf)
|
||||
{
|
||||
#if defined(INTEL_SSE4) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4)
|
||||
int i, j;
|
||||
int g_r;
|
||||
uint64_t pp;
|
||||
|
@ -1814,7 +1814,7 @@ int gf_w128_split_init(gf_t *gf)
|
|||
h = (gf_internal_t *) gf->scratch;
|
||||
|
||||
gf->multiply.w128 = gf_w128_bytwo_p_multiply;
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
if (!(h->region_type & GF_REGION_NOSSE)){
|
||||
gf->multiply.w128 = gf_w128_clm_multiply;
|
||||
}
|
||||
|
@ -1880,6 +1880,9 @@ int gf_w128_group_init(gf_t *gf)
|
|||
gf->inverse.w128 = gf_w128_euclid;
|
||||
gf->multiply_region.w128 = gf_w128_group_multiply_region;
|
||||
|
||||
/* JSP: I've got a problem compiling here -- something about "vmovq", and
|
||||
I don't have the time to chase it down right now. */
|
||||
|
||||
#if defined(INTEL_SSE4) && defined(ARCH_64)
|
||||
if(!(scratch->region_type & GF_REGION_NOSSE))
|
||||
{
|
||||
|
|
16
src/gf_w16.c
16
src/gf_w16.c
|
@ -133,7 +133,7 @@ gf_w16_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val
|
|||
uint16_t *s16;
|
||||
uint16_t *d16;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
|
@ -197,7 +197,7 @@ gf_w16_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val
|
|||
uint16_t *s16;
|
||||
uint16_t *d16;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -266,7 +266,7 @@ gf_w16_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val
|
|||
uint16_t *s16;
|
||||
uint16_t *d16;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -448,7 +448,7 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -495,7 +495,7 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -535,7 +535,7 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -611,7 +611,7 @@ int gf_w16_cfm_init(gf_t *gf)
|
|||
|
||||
/*Ben: Determining how many reductions to do */
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
if ((0xfe00 & h->prim_poly) == 0) {
|
||||
gf->multiply.w32 = gf_w16_clm_multiply_2;
|
||||
gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2;
|
||||
|
@ -739,7 +739,7 @@ int gf_w16_log_init(gf_t *gf)
|
|||
if (check) {
|
||||
if (h->mult_type != GF_MULT_LOG_TABLE) {
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
return gf_w16_cfm_init(gf);
|
||||
#endif
|
||||
return gf_w16_shift_init(gf);
|
||||
|
|
16
src/gf_w32.c
16
src/gf_w32.c
|
@ -125,7 +125,7 @@ void
|
|||
gf_w32_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
|
||||
{
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
int i;
|
||||
uint32_t *s32;
|
||||
|
@ -175,7 +175,7 @@ void
|
|||
gf_w32_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
|
||||
{
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
int i;
|
||||
uint32_t *s32;
|
||||
|
@ -229,7 +229,7 @@ static
|
|||
void
|
||||
gf_w32_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
|
||||
{
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
int i;
|
||||
uint32_t *s32;
|
||||
uint32_t *d32;
|
||||
|
@ -409,7 +409,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -453,7 +453,7 @@ gf_w32_clm_multiply_3 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -492,7 +492,7 @@ gf_w32_clm_multiply_4 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -565,7 +565,7 @@ int gf_w32_cfm_init(gf_t *gf)
|
|||
/*Ben: We also check to see if the prim poly will work for pclmul */
|
||||
/*Ben: Check to see how many reduction steps it will take*/
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
if ((0xfffe0000 & h->prim_poly) == 0){
|
||||
gf->multiply.w32 = gf_w32_clm_multiply_2;
|
||||
gf->multiply_region.w32 = gf_w32_clm_multiply_region_from_single_2;
|
||||
|
@ -2176,7 +2176,7 @@ int gf_w32_split_init(gf_t *gf)
|
|||
int i, j, exp, ispclmul, issse3;
|
||||
|
||||
ispclmul = 0;
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
ispclmul = 1;
|
||||
#endif
|
||||
|
||||
|
|
|
@ -182,7 +182,7 @@ gf_w4_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -1967,7 +1967,7 @@ int gf_w4_cfm_init(gf_t *gf)
|
|||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
gf->multiply.w32 = gf_w4_clm_multiply;
|
||||
return 1;
|
||||
#endif
|
||||
|
|
14
src/gf_w64.c
14
src/gf_w64.c
|
@ -96,7 +96,7 @@ xor)
|
|||
gf_val_64_t *s64, *d64, *top;
|
||||
gf_region_data rd;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
__m128i a, b;
|
||||
__m128i result, r1;
|
||||
__m128i prim_poly;
|
||||
|
@ -187,7 +187,7 @@ xor)
|
|||
gf_val_64_t *s64, *d64, *top;
|
||||
gf_region_data rd;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
__m128i a, b;
|
||||
__m128i result, r1;
|
||||
__m128i prim_poly;
|
||||
|
@ -385,7 +385,7 @@ gf_w64_clm_multiply_2 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
|
|||
{
|
||||
gf_val_64_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -427,7 +427,7 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
|
|||
{
|
||||
gf_val_64_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -466,7 +466,7 @@ gf_w64_clm_multiply_4 (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
|
|||
void
|
||||
gf_w64_clm_multiply_region(gf_t *gf, void *src, void *dest, uint64_t val, int bytes, int xor)
|
||||
{
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
gf_internal_t *h;
|
||||
int i, j, k;
|
||||
uint8_t *s8, *d8, *dtop;
|
||||
|
@ -759,7 +759,7 @@ int gf_w64_cfm_init(gf_t *gf)
|
|||
gf->inverse.w64 = gf_w64_euclid;
|
||||
gf->multiply_region.w64 = gf_w64_multiply_region_from_single;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
if ((0xfffffffe00000000ULL & h->prim_poly) == 0){
|
||||
gf->multiply.w64 = gf_w64_clm_multiply_2;
|
||||
gf->multiply_region.w64 = gf_w64_clm_multiply_region_from_single_2;
|
||||
|
@ -2030,7 +2030,7 @@ int gf_w64_split_init(gf_t *gf)
|
|||
|
||||
gf->multiply.w64 = gf_w64_bytwo_p_multiply;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
if ((!(h->region_type & GF_REGION_NOSSE) &&
|
||||
(h->arg1 == 64 || h->arg2 == 64)) ||
|
||||
h->mult_type == GF_MULT_DEFAULT){
|
||||
|
|
14
src/gf_w8.c
14
src/gf_w8.c
|
@ -211,7 +211,7 @@ gf_w8_clm_multiply_2 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -257,7 +257,7 @@ gf_w8_clm_multiply_3 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -296,7 +296,7 @@ gf_w8_clm_multiply_4 (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8)
|
|||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -373,7 +373,7 @@ gf_w8_clm_multiply_region_from_single_2(gf_t *gf, void *src, void *dest, gf_val_
|
|||
uint8_t *s8;
|
||||
uint8_t *d8;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -432,7 +432,7 @@ gf_w8_clm_multiply_region_from_single_3(gf_t *gf, void *src, void *dest, gf_val_
|
|||
uint8_t *s8;
|
||||
uint8_t *d8;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -495,7 +495,7 @@ gf_w8_clm_multiply_region_from_single_4(gf_t *gf, void *src, void *dest, gf_val_
|
|||
uint8_t *s8;
|
||||
uint8_t *d8;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
|
@ -592,7 +592,7 @@ int gf_w8_cfm_init(gf_t *gf)
|
|||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
|
||||
#if defined(INTEL_SSE4_PCLMUL) && defined(ARCH_64)
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
if ((0xe0 & h->prim_poly) == 0){
|
||||
gf->multiply.w32 = gf_w8_clm_multiply_2;
|
||||
gf->multiply_region.w32 = gf_w8_clm_multiply_region_from_single_2;
|
||||
|
|
Loading…
Reference in New Issue