Removed GROUP/128/SSE. It wasn't compiling, and it needed an overhaul.

I'll do it someday when I'm bored.
master
Jim Plank 2014-01-01 11:00:40 -05:00
parent fb0bbdcf62
commit f0c32c94bc
5 changed files with 3 additions and 303 deletions

View File

@ -144,7 +144,6 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */
GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */
GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */
GF_E_GR_SSE4, /* Mult == GROUP, w == 128, No SSE4 */
GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */
GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */
GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */

View File

@ -75,7 +75,6 @@ void gf_error()
case GF_E_GR_W_48: s = "With -m GROUP, w cannot be 4 or 8."; break;
case GF_E_GR_W_16: s = "With -m GROUP, w == 16, arg1 and arg2 must be 4."; break;
case GF_E_GR_128A: s = "With -m GROUP, w == 128, arg1 must be 4, and arg2 in { 4,8,16 }."; break;
case GF_E_GR_SSE4: s = "With -m GROUP, w == 128, you need SSE4."; break;
case GF_E_GR_A_27: s = "With -m GROUP, arg1 and arg2 must be <= 27."; break;
case GF_E_GR_AR_W: s = "With -m GROUP, arg1 and arg2 must be <= w."; break;
case GF_E_GR____J: s = "Cannot use GROUP with -r ALTMAP|SSE|NOSSE."; break;
@ -320,7 +319,6 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
if (w == 16 && (arg1 != 4 || arg2 != 4)) { _gf_errno = GF_E_GR_W_16; return 0; }
if (w == 128 && (arg1 != 4 ||
(arg2 != 4 && arg2 != 8 && arg2 != 16))) { _gf_errno = GF_E_GR_128A; return 0; }
if (w == 128 && !sse4) { _gf_errno = GF_E_GR_SSE4; return 0; }
if (arg1 > 27 || arg2 > 27) { _gf_errno = GF_E_GR_A_27; return 0; }
if (arg1 > w || arg2 > w) { _gf_errno = GF_E_GR_AR_W; return 0; }
if (raltmap || rsse || rnosse) { _gf_errno = GF_E_GR____J; return 0; }

View File

@ -144,7 +144,6 @@ typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */
GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */
GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */
GF_E_GR_SSE4, /* Mult == GROUP, w == 128, No SSE4 */
GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */
GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */
GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */

View File

@ -984,45 +984,6 @@ void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128)
return;
}
static
void gf_w128_group_m_sse_init(gf_t *gf, gf_val_128_t b128)
{
#if defined(INTEL_SSE4)
int i, j;
int g_m;
uint64_t lbit, middlebit;
gf_internal_t *scratch;
gf_group_tables_t *gt;
scratch = (gf_internal_t *) gf->scratch;
gt = scratch->private;
g_m = scratch->arg1;
__m128i *table = (__m128i *)(gt->m_table), b, a, ubit, prim_poly;
prim_poly = _mm_insert_epi64(_mm_setzero_si128(), scratch->prim_poly, 0);
b = _mm_loadu_si128((__m128i *)(b128));
table[0] = _mm_setzero_si128();
table[1] = table[0];
table[1] = _mm_insert_epi64(table[1],b128[0],1);
table[1] = _mm_insert_epi64(table[1],b128[1],0);
lbit = 1;
lbit <<= 63;
ubit = _mm_set_epi32(0, 1, 0, 0);
for (i = 2; i < (1 << g_m); i <<= 1) {
a = table[(i >> 1)];
middlebit = (_mm_extract_epi64(a, 0x0) & lbit);
a = _mm_slli_epi64(a, 1);
if (middlebit) a = _mm_xor_si128(a, ubit);
table[i] = a;
if (_mm_extract_epi64(table[i >> 1], 0x1) & lbit) table[i] = _mm_xor_si128(table[i], prim_poly);
for (j = 0; j < i; j++) {
table[i + j] = _mm_xor_si128(table[i], table[j]);
}
}
return;
#endif
}
void
gf_w128_group_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
@ -1095,88 +1056,6 @@ gf_w128_group_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_
c128[1] = p_i[1];
}
void
gf_w128_group_sse_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
#if defined(INTEL_SSE4) && defined(ARCH_64)
int i,j;
int i_r, i_m, t_m;
int mask_m, mask_r, mask_s;
int g_m, g_r;
uint32_t shiftbits;
uint64_t a[2], tbit = 1;
tbit <<= 63;
gf_internal_t *scratch;
gf_group_tables_t *gt;
__m128i p_i, *m_table, *r_table, zero;
zero = _mm_setzero_si128();
scratch = (gf_internal_t *) gf->scratch;
gt = scratch->private;
m_table = (__m128i *)(gt->m_table);
r_table = (__m128i *)(gt->r_table);
g_m = scratch->arg1;
g_r = scratch->arg2;
mask_m = (1 << g_m) - 1;
mask_r = (1 << g_r) - 1;
mask_s = mask_m << (32-g_m); /*sets g_m leftmost bits to 1*/
if (b128[0] != _mm_extract_epi64(m_table[1], 1) || b128[1] != _mm_extract_epi64(m_table[1], 0)) {
gf_w128_group_m_sse_init(gf, b128);
}
p_i = zero;
a[0] = a128[0];
a[1] = a128[1];
t_m = 0;
i_r = 0;
/* Top 64 bits */
for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
i_m = (a[0] >> (i * g_m)) & mask_m;
i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
shiftbits >>= 32-g_m;
p_i = _mm_slli_epi64(p_i, g_m);
p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
p_i = _mm_xor_si128(p_i, m_table[i_m]);
t_m += g_m;
if (t_m == g_r) {
p_i = _mm_xor_si128(p_i, r_table[i_r]);
t_m = 0;
i_r = 0;
} else {
i_r <<= g_m;
}
}
for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
i_m = (a[1] >> (i * g_m)) & mask_m;
i_r ^= (((uint64_t)_mm_extract_epi64(p_i,1)) >> (64 - g_m)) & mask_r;
shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
shiftbits >>= 32-g_m;
p_i = _mm_slli_epi64(p_i, g_m);
p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
p_i = _mm_xor_si128(p_i, m_table[i_m]);
t_m += g_m;
if (t_m == g_r) {
p_i = _mm_xor_si128(p_i, r_table[i_r]);
t_m = 0;
i_r = 0;
} else {
i_r <<= g_m;
}
}
c128[0] = _mm_extract_epi64(p_i, 1);
c128[1] = _mm_extract_epi64(p_i, 0);
#endif
}
static
void
gf_w128_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
@ -1273,160 +1152,6 @@ gf_w128_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val,
}
}
static
void
gf_w128_group_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, int xor)
{
#if defined(INTEL_SSE4)
int i;
int i_r, i_m, t_m;
int mask_m, mask_r, mask_s;
int g_m, g_r;
uint32_t shiftbits;
uint64_t a[2];
gf_internal_t *scratch;
gf_group_tables_t *gt;
gf_region_data rd;
uint64_t *a128, *c128, *top;
__m128i *m_table, *r_table, p_i, zero;
zero = _mm_setzero_si128();
/* We only do this to check on alignment. */
gf_set_region_data(&rd, gf, src, dest, bytes, 0, xor, 8);
if (val[0] == 0) {
if (val[1] == 0) { gf_multby_zero(dest, bytes, xor); return; }
if (val[1] == 1) { gf_multby_one(src, dest, bytes, xor); return; }
}
scratch = (gf_internal_t *) gf->scratch;
gt = scratch->private;
m_table = (__m128i *)(gt->m_table);
r_table = (__m128i *)(gt->r_table);
g_m = scratch->arg1;
g_r = scratch->arg2;
mask_m = (1 << g_m) - 1;
mask_r = (1 << g_r) - 1;
mask_s = mask_m << (32-g_m);
if (val[0] != _mm_extract_epi64(m_table[1], 1) || val[1] != _mm_extract_epi64(m_table[1], 0)) {
gf_w128_group_m_sse_init(gf, val);
}
a128 = (uint64_t *) src;
c128 = (uint64_t *) dest;
top = (uint64_t *) rd.d_top;
if (xor){
while (c128 < top) {
p_i = zero;
a[0] = a128[0];
a[1] = a128[1];
t_m = 0;
i_r = 0;
/* Top 64 bits */
for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
i_m = (a[0] >> (i * g_m)) & mask_m;
i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
shiftbits >>= 32-g_m;
p_i = _mm_slli_epi64(p_i, g_m);
p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
p_i = _mm_xor_si128(p_i, m_table[i_m]);
t_m += g_m;
if (t_m == g_r) {
p_i = _mm_xor_si128(p_i, r_table[i_r]);
t_m = 0;
i_r = 0;
} else {
i_r <<= g_m;
}
}
for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
i_m = (a[1] >> (i * g_m)) & mask_m;
i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
shiftbits >>= 32-g_m;
p_i = _mm_slli_epi64(p_i, g_m);
p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
p_i = _mm_xor_si128(p_i, m_table[i_m]);
t_m += g_m;
if (t_m == g_r) {
p_i = _mm_xor_si128(p_i, r_table[i_r]);
t_m = 0;
i_r = 0;
} else {
i_r <<= g_m;
}
}
c128[0] ^= _mm_extract_epi64(p_i, 1);
c128[1] ^= _mm_extract_epi64(p_i, 0);
a128 += 2;
c128 += 2;
}
}else{
while (c128 < top) {
p_i = zero;
a[0] = a128[0];
a[1] = a128[1];
t_m = 0;
i_r = 0;
/* Top 64 bits */
for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
i_m = (a[0] >> (i * g_m)) & mask_m;
i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
shiftbits >>= 32-g_m;
p_i = _mm_slli_epi64(p_i, g_m);
p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
p_i = _mm_xor_si128(p_i, m_table[i_m]);
t_m += g_m;
if (t_m == g_r) {
p_i = _mm_xor_si128(p_i, r_table[i_r]);
t_m = 0;
i_r = 0;
} else {
i_r <<= g_m;
}
}
for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) {
i_m = (a[1] >> (i * g_m)) & mask_m;
i_r ^= ((uint64_t)_mm_extract_epi64(p_i, 1) >> (64 - g_m)) & mask_r;
shiftbits = _mm_extract_epi32(p_i, 1) & mask_s;
shiftbits >>= 32-g_m;
p_i = _mm_slli_epi64(p_i, g_m);
p_i = _mm_xor_si128(p_i, _mm_insert_epi32(zero, shiftbits, 2));
p_i = _mm_xor_si128(p_i, m_table[i_m]);
t_m += g_m;
if (t_m == g_r) {
p_i = _mm_xor_si128(p_i, r_table[i_r]);
t_m = 0;
i_r = 0;
} else {
i_r <<= g_m;
}
}
c128[0] = _mm_extract_epi64(p_i, 1);
c128[1] = _mm_extract_epi64(p_i, 0);
a128 += 2;
c128 += 2;
}
}
#endif
}
/* a^-1 -> b */
void
gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
@ -1880,28 +1605,7 @@ int gf_w128_group_init(gf_t *gf)
gf->inverse.w128 = gf_w128_euclid;
gf->multiply_region.w128 = gf_w128_group_multiply_region;
/* JSP: I've got a problem compiling here -- something about "vmovq", and
I don't have the time to chase it down right now. */
#if defined(INTEL_SSE4) && defined(ARCH_64)
if(!(scratch->region_type & GF_REGION_NOSSE))
{
if ((g_m != 4) && ((g_r != 4) || (g_r != 8)))
return 0;
gt->r_table = (void *)(((uint64_t)gt->r_table + 15) & (~0xfULL)); /* aligns gt->r_table on a 16-bit boundary*/
gt->m_table = gt->r_table + 2*size_r;
gt->m_table[2] = 0;
gt->m_table[3] = 0;
gf->multiply.w128 = gf_w128_group_sse_multiply;
gf->multiply_region.w128 = gf_w128_group_sse_multiply_region;
gf_w128_group_r_sse_init(gf);
}
else
gf_w128_group_r_init(gf);
#else
if(scratch->region_type & GF_REGION_SSE) return 0;
else gf_w128_group_r_init(gf);
#endif
gf_w128_group_r_init(gf);
return 1;
}

View File

@ -20,8 +20,8 @@
#define BNMULTS (8)
static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48",
"TABLE", "LOG", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE" };
#define NMULTS (15)
static char *MULTS[NMULTS] = { "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
#define NMULTS (16)
static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2",
"SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };