Implemented CARRY_FREE_GK. Sections added are tagged with a comment //ADAM

for easy navigation.
master
Adam Disney 2014-06-06 13:09:04 -04:00
parent 9d53ea590b
commit 6bb1ebb9f4
5 changed files with 163 additions and 15 deletions

View File

@ -33,17 +33,18 @@
Not all are implemented for all values of w.
See the paper for an explanation of how they work. */
typedef enum {GF_MULT_DEFAULT,
GF_MULT_SHIFT,
GF_MULT_CARRY_FREE,
GF_MULT_GROUP,
typedef enum {GF_MULT_DEFAULT,
GF_MULT_SHIFT,
GF_MULT_CARRY_FREE,
GF_MULT_CARRY_FREE_GK, //ADAM
GF_MULT_GROUP,
GF_MULT_BYTWO_p,
GF_MULT_BYTWO_b,
GF_MULT_TABLE,
GF_MULT_LOG_TABLE,
GF_MULT_TABLE,
GF_MULT_LOG_TABLE,
GF_MULT_LOG_ZERO,
GF_MULT_LOG_ZERO_EXT,
GF_MULT_SPLIT_TABLE,
GF_MULT_SPLIT_TABLE,
GF_MULT_COMPOSITE } gf_mult_type_t;
/* These are the different ways to optimize region

View File

@ -286,6 +286,16 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
return 1;
}
//ADAM
if (mult_type == GF_MULT_CARRY_FREE_GK) {
if (w != 4 && w != 8 && w != 16 &&
w != 32 && w != 64 && w != 128) { _gf_errno = GF_E_CFM___W; return 0; }
if (raltmap) { _gf_errno = GF_E_ALT_CFM; return 0; }
if (rsse || rnosse) { _gf_errno = GF_E_SSE_CFM; return 0; }
if (!pclmul) { _gf_errno = GF_E_PCLMULX; return 0; }
return 1;
}
if (mult_type == GF_MULT_BYTWO_p || mult_type == GF_MULT_BYTWO_b) {
if (raltmap) { _gf_errno = GF_E_ALT_BY2; return 0; }
if (rsse && !sse2) { _gf_errno = GF_E_BY2_SSE; return 0; }

View File

@ -47,6 +47,10 @@ int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
} else if (strcmp(argv[starting], "CARRY_FREE") == 0) {
mult_type = GF_MULT_CARRY_FREE;
starting++;
//ADAM
} else if (strcmp(argv[starting], "CARRY_FREE_GK") == 0) {
mult_type = GF_MULT_CARRY_FREE_GK;
starting++;
} else if (strcmp(argv[starting], "GROUP") == 0) {
mult_type = GF_MULT_GROUP;
if (argc < starting + 3) {

View File

@ -399,7 +399,94 @@ uint32_t gf_w32_matrix (gf_t *gf, uint32_t b)
extra memory.
*/
//ADAM
static
inline
gf_val_32_t
gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
{
gf_val_32_t rv = 0;
#if defined(INTEL_SSE4_PCLMUL)
__m128i a, b;
__m128i result;
__m128i w;
__m128i g, q;
gf_internal_t * h = gf->scratch;
uint64_t g_star, q_plus;
q_plus = *(uint64_t *) h->private;
g_star = *((uint64_t *) h->private + 1);
a = _mm_insert_epi32 (_mm_setzero_si128(), a32, 0);
b = _mm_insert_epi32 (a, b32, 0);
g = _mm_insert_epi64 (a, g_star, 0);
q = _mm_insert_epi64 (a, q_plus, 0);
result = _mm_clmulepi64_si128 (a, b, 0);
w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
result = _mm_xor_si128 (result, w);
/* Extracts 32 bit value from result. */
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
#endif
return rv;
}
//ADAM
#if defined(INTEL_SSE4_PCLMUL)
static
void
gf_w32_cfmgk_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
{
int i;
uint32_t *s32;
uint32_t *d32;
__m128i a, b;
__m128i result;
__m128i w;
__m128i g, q;
gf_internal_t * h = gf->scratch;
uint64_t g_star, q_plus;
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
q_plus = *(uint64_t *) h->private;
g_star = *((uint64_t *) h->private + 1);
g = _mm_insert_epi64 (a, g_star, 0);
q = _mm_insert_epi64 (a, q_plus, 0);
a = _mm_insert_epi32 (_mm_setzero_si128(), val, 0);
s32 = (uint32_t *) src;
d32 = (uint32_t *) dest;
if (xor) {
for (i = 0; i < bytes/sizeof(uint32_t); i++) {
b = _mm_insert_epi32 (a, s32[i], 0);
result = _mm_clmulepi64_si128 (a, b, 0);
w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
result = _mm_xor_si128 (result, w);
d32[i] ^= ((gf_val_32_t)_mm_extract_epi32(result, 0));
}
} else {
for (i = 0; i < bytes/sizeof(uint32_t); i++) {
b = _mm_insert_epi32 (a, s32[i], 0);
result = _mm_clmulepi64_si128 (a, b, 0);
w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
result = _mm_xor_si128 (result, w);
d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
}
}
}
#endif
static
@ -446,6 +533,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
#endif
return rv;
}
static
inline
gf_val_32_t
@ -552,6 +640,45 @@ gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32)
return product;
}
//ADAM
static
int gf_w32_cfmgk_init(gf_t *gf)
{
gf->inverse.w32 = gf_w32_euclid;
gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
#if defined(INTEL_SSE4_PCLMUL)
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
gf->multiply.w32 = gf_w32_cfmgk_multiply;
gf->multiply_region.w32 = gf_w32_cfmgk_multiply_region_from_single;
//setup in the private section the q+ and g* ADAM
uint64_t *q_plus = (uint64_t *) h->private;
uint64_t *g_star = (uint64_t *) h->private + 1;
//q+
uint64_t tmp = h->prim_poly << 32;
*q_plus = 1ULL << 32;
int i;
for(i = 63; i >= 32; i--)
if((1ULL << i) & tmp)
{
*q_plus |= 1ULL << (i-32);
tmp ^= h->prim_poly << (i-32);
}
//g*
*g_star = h->prim_poly & ((1ULL << 32) - 1);
return 1;
#endif
return 0;
}
static
int gf_w32_cfm_init(gf_t *gf)
{
@ -2656,6 +2783,10 @@ int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg
case GF_MULT_CARRY_FREE:
return sizeof(gf_internal_t);
break;
//ADAM
case GF_MULT_CARRY_FREE_GK:
return sizeof(gf_internal_t) + sizeof(uint64_t)*2;
break;
case GF_MULT_SHIFT:
return sizeof(gf_internal_t);
break;
@ -2703,14 +2834,15 @@ int gf_w32_init(gf_t *gf)
gf->multiply_region.w32 = NULL;
switch(h->mult_type) {
case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break;
case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break;
case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break;
case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break;
case GF_MULT_CARRY_FREE_GK: if (gf_w32_cfmgk_init(gf) == 0) return 0; break; //ADAM
case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break;
case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break;
case GF_MULT_DEFAULT:
case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break;
case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break;
case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break;
case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break;
case GF_MULT_BYTWO_p:
case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break;
case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break;
default: return 0;
}
if (h->divide_type == GF_DIVIDE_EUCLID) {

View File

@ -20,8 +20,9 @@
#define BNMULTS (8)
static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48",
"TABLE", "LOG", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE" };
#define NMULTS (16)
static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
//ADAM
#define NMULTS (17)
static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "CARRY_FREE_GK", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2",
"SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };