Merge remote-tracking branch 'dalgaaf/wip-da-sca-20140513'
commit
f48f2d38af
|
@ -568,7 +568,6 @@ gf_w128_split_4_128_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_
|
|||
printf("\n");
|
||||
}
|
||||
*/
|
||||
i = 0;
|
||||
while (d64 < top) {
|
||||
v[0] = (xor) ? d64[0] : 0;
|
||||
v[1] = (xor) ? d64[1] : 0;
|
||||
|
@ -613,7 +612,6 @@ gf_w128_split_4_128_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_
|
|||
}
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
pp = h->prim_poly;
|
||||
|
||||
/* We only do this to check on alignment. */
|
||||
gf_set_region_data(&rd, gf, src, dest, bytes, 0, xor, 16);
|
||||
|
@ -713,7 +711,6 @@ gf_w128_split_4_128_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest,
|
|||
}
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
pp = h->prim_poly;
|
||||
|
||||
/* We only do this to check on alignment. */
|
||||
gf_set_region_data(&rd, gf, src, dest, bytes, 0, xor, 256);
|
||||
|
|
|
@ -1268,9 +1268,10 @@ int gf_w16_split_init(gf_t *gf)
|
|||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
|
||||
issse3 = 0;
|
||||
#ifdef INTEL_SSSE3
|
||||
issse3 = 1;
|
||||
#else
|
||||
issse3 = 0;
|
||||
#endif
|
||||
|
||||
if (h->arg1 == 8 && h->arg2 == 8) {
|
||||
|
@ -2270,7 +2271,6 @@ void gf_w16_group_4_4_region_multiply(gf_t *gf, void *src, void *dest, gf_val_32
|
|||
top = (uint16_t *) rd.d_top;
|
||||
|
||||
while (d16 < top) {
|
||||
p = 0;
|
||||
a16 = *s16;
|
||||
p16 = (xor) ? *d16 : 0;
|
||||
ind = a16 >> 12;
|
||||
|
|
15
src/gf_w32.c
15
src/gf_w32.c
|
@ -914,7 +914,6 @@ gf_w32_group_4_4_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
|
|||
d44 = (struct gf_w32_group_data *) h->private;
|
||||
gf_w32_group_set_shift_tables(d44->shift, b, h);
|
||||
|
||||
p = 0;
|
||||
a32 = a;
|
||||
ind = a32 >> 28;
|
||||
a32 <<= 4;
|
||||
|
@ -1609,7 +1608,7 @@ gf_w32_split_16_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_
|
|||
p = (xor) ? *d32 : 0;
|
||||
a = *s32;
|
||||
i = 0;
|
||||
while (a != 0) {
|
||||
while (a != 0 && i < 2) {
|
||||
v = (a & 0xffff);
|
||||
p ^= t[i][v];
|
||||
a >>= 16;
|
||||
|
@ -1745,7 +1744,6 @@ gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
|
|||
xi = _mm_and_si128(si, mask2);
|
||||
xi = _mm_xor_si128(xi, adder);
|
||||
pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi));
|
||||
si = _mm_srli_epi16(si, 2);
|
||||
tindex++;
|
||||
|
||||
vi = _mm_srli_epi32(vi, 8);
|
||||
|
@ -2206,11 +2204,6 @@ gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint
|
|||
v2 = _mm_packus_epi16(tv2, tv0);
|
||||
v3 = _mm_packus_epi16(tv3, tv1);
|
||||
|
||||
p0 = v0;
|
||||
p1 = v1;
|
||||
p2 = v2;
|
||||
p3 = v3;
|
||||
|
||||
si = _mm_and_si128(v0, mask1);
|
||||
p0 = _mm_shuffle_epi8(tables[6][0], si);
|
||||
p1 = _mm_shuffle_epi8(tables[6][1], si);
|
||||
|
@ -2297,14 +2290,16 @@ int gf_w32_split_init(gf_t *gf)
|
|||
uint32_t p, basep;
|
||||
int i, j, exp, ispclmul, issse3;
|
||||
|
||||
ispclmul = 0;
|
||||
#if defined(INTEL_SSE4_PCLMUL)
|
||||
ispclmul = 1;
|
||||
#else
|
||||
ispclmul = 0;
|
||||
#endif
|
||||
|
||||
issse3 = 0;
|
||||
#ifdef INTEL_SSSE3
|
||||
issse3 = 1;
|
||||
#else
|
||||
issse3 = 0;
|
||||
#endif
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
|
|
|
@ -100,7 +100,7 @@ xor)
|
|||
__m128i result, r1;
|
||||
__m128i prim_poly;
|
||||
__m128i w;
|
||||
__m128i m1, m2, m3, m4;
|
||||
__m128i m1, m3, m4;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
|
||||
|
@ -112,8 +112,6 @@ xor)
|
|||
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0xffffffffULL));
|
||||
b = _mm_insert_epi64 (_mm_setzero_si128(), val, 0);
|
||||
m1 = _mm_set_epi32(0, 0, 0, (uint32_t)0xffffffff);
|
||||
m2 = _mm_slli_si128(m1, 4);
|
||||
m2 = _mm_or_si128(m1, m2);
|
||||
m3 = _mm_slli_si128(m1, 8);
|
||||
m4 = _mm_slli_si128(m3, 4);
|
||||
|
||||
|
@ -321,12 +319,9 @@ gf_w64_shift_multiply (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64)
|
|||
gf_internal_t *h;
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
ppr = h->prim_poly;
|
||||
|
||||
/* Allen: set leading one of primitive polynomial */
|
||||
|
||||
ppl = 1;
|
||||
|
||||
a = a64;
|
||||
bl = 0;
|
||||
br = b64;
|
||||
|
|
|
@ -1209,9 +1209,10 @@ int gf_w8_table_init(gf_t *gf)
|
|||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
|
||||
issse = 0;
|
||||
#ifdef INTEL_SSSE3
|
||||
issse = 1;
|
||||
#else
|
||||
issse = 0;
|
||||
#endif
|
||||
|
||||
if (h->mult_type == GF_MULT_DEFAULT && issse) {
|
||||
|
|
|
@ -141,15 +141,12 @@ int main(int argc, char **argv)
|
|||
if (!gf_init_hard(&gf_def, w, GF_MULT_DEFAULT, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
|
||||
(h->mult_type != GF_MULT_COMPOSITE) ? h->prim_poly : 0, 0, 0, NULL, NULL))
|
||||
problem("No default for this value of w");
|
||||
|
||||
if (w == 4) {
|
||||
mult4 = gf_w4_get_mult_table(&gf);
|
||||
}
|
||||
|
||||
if (w == 8) {
|
||||
} else if (w == 8) {
|
||||
mult8 = gf_w8_get_mult_table(&gf);
|
||||
}
|
||||
|
||||
if (w == 16) {
|
||||
} else if (w == 16) {
|
||||
log16 = gf_w16_get_log_table(&gf);
|
||||
alog16 = gf_w16_get_mult_alog_table(&gf);
|
||||
}
|
||||
|
@ -308,7 +305,6 @@ int main(int argc, char **argv)
|
|||
gf_general_val_to_s(c, w, cs, 1);
|
||||
printf("Error in single multiplication (all numbers in hex):\n\n");
|
||||
printf(" gf.multiply(gf, %s, %s) = %s, which is clearly wrong.\n", as, bs, cs);
|
||||
;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
@ -422,5 +418,15 @@ int main(int argc, char **argv)
|
|||
gf_general_do_region_check(&gf, a, rc+s_start, rd+d_start, target+d_start, bytes, xor);
|
||||
}
|
||||
}
|
||||
|
||||
free(a);
|
||||
free(b);
|
||||
free(c);
|
||||
free(d);
|
||||
free(ra);
|
||||
free(rb);
|
||||
free(rc);
|
||||
free(rd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -116,8 +116,7 @@ int main(int argc, char **argv)
|
|||
printf("Inline mult: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n",
|
||||
elapsed, dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed);
|
||||
|
||||
}
|
||||
if (w == 8) {
|
||||
} else if (w == 8) {
|
||||
mult8 = gf_w8_get_mult_table(&gf);
|
||||
if (mult8 == NULL) {
|
||||
printf("Couldn't get inline multiplication table.\n");
|
||||
|
@ -139,8 +138,7 @@ int main(int argc, char **argv)
|
|||
}
|
||||
printf("Inline mult: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n",
|
||||
elapsed, dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed);
|
||||
}
|
||||
if (w == 16) {
|
||||
} else if (w == 16) {
|
||||
log16 = gf_w16_get_log_table(&gf);
|
||||
alog16 = gf_w16_get_mult_alog_table(&gf);
|
||||
if (log16 == NULL) {
|
||||
|
@ -164,5 +162,9 @@ int main(int argc, char **argv)
|
|||
printf("Inline mult: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n",
|
||||
elapsed, dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed);
|
||||
}
|
||||
free (ra);
|
||||
free (rb);
|
||||
free (ra16);
|
||||
free (rb16);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
char *BM = "Bad Method: ";
|
||||
|
||||
|
@ -203,9 +204,14 @@ int main(int argc, char **argv)
|
|||
sprintf(string, "Argument '%s' not in proper format of power:coefficient\n", argv[i]);
|
||||
usage(string);
|
||||
}
|
||||
if (power < 0) usage("Can't have negative powers\n");
|
||||
if (power > n) n = power;
|
||||
if (power < 0) {
|
||||
usage("Can't have negative powers\n");
|
||||
} else {
|
||||
n = power;
|
||||
}
|
||||
}
|
||||
// in case the for-loop header fails
|
||||
assert (n >= 0);
|
||||
|
||||
poly = (gf_general_t *) malloc(sizeof(gf_general_t)*(n+1));
|
||||
for (i = 0; i <= n; i++) gf_general_set_zero(poly+i, w);
|
||||
|
|
|
@ -188,8 +188,6 @@ int main(int argc, char **argv)
|
|||
if (tmethods[(int)test] == NULL) {
|
||||
printf("No %s method.\n", tstrings[(int)test]);
|
||||
} else {
|
||||
elapsed = 0;
|
||||
|
||||
if (test == '0') gf_general_set_zero(&a, w);
|
||||
if (test == '1') gf_general_set_one(&a, w);
|
||||
if (test == '2') gf_general_set_two(&a, w);
|
||||
|
|
Loading…
Reference in New Issue